18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2007,2008 Oracle. All rights reserved. 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <linux/sched.h> 78c2ecf20Sopenharmony_ci#include <linux/slab.h> 88c2ecf20Sopenharmony_ci#include <linux/rbtree.h> 98c2ecf20Sopenharmony_ci#include <linux/mm.h> 108c2ecf20Sopenharmony_ci#include "ctree.h" 118c2ecf20Sopenharmony_ci#include "disk-io.h" 128c2ecf20Sopenharmony_ci#include "transaction.h" 138c2ecf20Sopenharmony_ci#include "print-tree.h" 148c2ecf20Sopenharmony_ci#include "locking.h" 158c2ecf20Sopenharmony_ci#include "volumes.h" 168c2ecf20Sopenharmony_ci#include "qgroup.h" 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_cistatic int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 198c2ecf20Sopenharmony_ci *root, struct btrfs_path *path, int level); 208c2ecf20Sopenharmony_cistatic int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, 218c2ecf20Sopenharmony_ci const struct btrfs_key *ins_key, struct btrfs_path *path, 228c2ecf20Sopenharmony_ci int data_size, int extend); 238c2ecf20Sopenharmony_cistatic int push_node_left(struct btrfs_trans_handle *trans, 248c2ecf20Sopenharmony_ci struct extent_buffer *dst, 258c2ecf20Sopenharmony_ci struct extent_buffer *src, int empty); 268c2ecf20Sopenharmony_cistatic int balance_node_right(struct btrfs_trans_handle *trans, 278c2ecf20Sopenharmony_ci struct extent_buffer *dst_buf, 288c2ecf20Sopenharmony_ci struct extent_buffer *src_buf); 298c2ecf20Sopenharmony_cistatic void del_ptr(struct btrfs_root *root, struct btrfs_path *path, 308c2ecf20Sopenharmony_ci int level, int slot); 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_cistatic const struct btrfs_csums { 338c2ecf20Sopenharmony_ci u16 size; 348c2ecf20Sopenharmony_ci const char name[10]; 358c2ecf20Sopenharmony_ci const char driver[12]; 368c2ecf20Sopenharmony_ci} btrfs_csums[] = { 378c2ecf20Sopenharmony_ci [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" }, 388c2ecf20Sopenharmony_ci [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" }, 398c2ecf20Sopenharmony_ci [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" }, 408c2ecf20Sopenharmony_ci [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b", 418c2ecf20Sopenharmony_ci .driver = "blake2b-256" }, 428c2ecf20Sopenharmony_ci}; 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ciint btrfs_super_csum_size(const struct btrfs_super_block *s) 458c2ecf20Sopenharmony_ci{ 468c2ecf20Sopenharmony_ci u16 t = btrfs_super_csum_type(s); 478c2ecf20Sopenharmony_ci /* 488c2ecf20Sopenharmony_ci * csum type is validated at mount time 498c2ecf20Sopenharmony_ci */ 508c2ecf20Sopenharmony_ci return btrfs_csums[t].size; 518c2ecf20Sopenharmony_ci} 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ciconst char *btrfs_super_csum_name(u16 csum_type) 548c2ecf20Sopenharmony_ci{ 558c2ecf20Sopenharmony_ci /* csum type is validated at mount time */ 568c2ecf20Sopenharmony_ci return btrfs_csums[csum_type].name; 578c2ecf20Sopenharmony_ci} 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci/* 608c2ecf20Sopenharmony_ci * Return driver name if defined, otherwise the name that's also a valid driver 618c2ecf20Sopenharmony_ci * name 628c2ecf20Sopenharmony_ci */ 638c2ecf20Sopenharmony_ciconst char *btrfs_super_csum_driver(u16 csum_type) 648c2ecf20Sopenharmony_ci{ 658c2ecf20Sopenharmony_ci /* csum type is validated at mount time */ 668c2ecf20Sopenharmony_ci return btrfs_csums[csum_type].driver[0] ? 678c2ecf20Sopenharmony_ci btrfs_csums[csum_type].driver : 688c2ecf20Sopenharmony_ci btrfs_csums[csum_type].name; 698c2ecf20Sopenharmony_ci} 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_cisize_t __attribute_const__ btrfs_get_num_csums(void) 728c2ecf20Sopenharmony_ci{ 738c2ecf20Sopenharmony_ci return ARRAY_SIZE(btrfs_csums); 748c2ecf20Sopenharmony_ci} 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_cistruct btrfs_path *btrfs_alloc_path(void) 778c2ecf20Sopenharmony_ci{ 788c2ecf20Sopenharmony_ci return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 798c2ecf20Sopenharmony_ci} 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci/* this also releases the path */ 828c2ecf20Sopenharmony_civoid btrfs_free_path(struct btrfs_path *p) 838c2ecf20Sopenharmony_ci{ 848c2ecf20Sopenharmony_ci if (!p) 858c2ecf20Sopenharmony_ci return; 868c2ecf20Sopenharmony_ci btrfs_release_path(p); 878c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_path_cachep, p); 888c2ecf20Sopenharmony_ci} 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci/* 918c2ecf20Sopenharmony_ci * path release drops references on the extent buffers in the path 928c2ecf20Sopenharmony_ci * and it drops any locks held by this path 938c2ecf20Sopenharmony_ci * 948c2ecf20Sopenharmony_ci * It is safe to call this on paths that no locks or extent buffers held. 958c2ecf20Sopenharmony_ci */ 968c2ecf20Sopenharmony_cinoinline void btrfs_release_path(struct btrfs_path *p) 978c2ecf20Sopenharmony_ci{ 988c2ecf20Sopenharmony_ci int i; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 1018c2ecf20Sopenharmony_ci p->slots[i] = 0; 1028c2ecf20Sopenharmony_ci if (!p->nodes[i]) 1038c2ecf20Sopenharmony_ci continue; 1048c2ecf20Sopenharmony_ci if (p->locks[i]) { 1058c2ecf20Sopenharmony_ci btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); 1068c2ecf20Sopenharmony_ci p->locks[i] = 0; 1078c2ecf20Sopenharmony_ci } 1088c2ecf20Sopenharmony_ci free_extent_buffer(p->nodes[i]); 1098c2ecf20Sopenharmony_ci p->nodes[i] = NULL; 1108c2ecf20Sopenharmony_ci } 1118c2ecf20Sopenharmony_ci} 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci/* 1148c2ecf20Sopenharmony_ci * safely gets a reference on the root node of a tree. A lock 1158c2ecf20Sopenharmony_ci * is not taken, so a concurrent writer may put a different node 1168c2ecf20Sopenharmony_ci * at the root of the tree. See btrfs_lock_root_node for the 1178c2ecf20Sopenharmony_ci * looping required. 1188c2ecf20Sopenharmony_ci * 1198c2ecf20Sopenharmony_ci * The extent buffer returned by this has a reference taken, so 1208c2ecf20Sopenharmony_ci * it won't disappear. It may stop being the root of the tree 1218c2ecf20Sopenharmony_ci * at any time because there are no locks held. 1228c2ecf20Sopenharmony_ci */ 1238c2ecf20Sopenharmony_cistruct extent_buffer *btrfs_root_node(struct btrfs_root *root) 1248c2ecf20Sopenharmony_ci{ 1258c2ecf20Sopenharmony_ci struct extent_buffer *eb; 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci while (1) { 1288c2ecf20Sopenharmony_ci rcu_read_lock(); 1298c2ecf20Sopenharmony_ci eb = rcu_dereference(root->node); 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci /* 1328c2ecf20Sopenharmony_ci * RCU really hurts here, we could free up the root node because 1338c2ecf20Sopenharmony_ci * it was COWed but we may not get the new root node yet so do 1348c2ecf20Sopenharmony_ci * the inc_not_zero dance and if it doesn't work then 1358c2ecf20Sopenharmony_ci * synchronize_rcu and try again. 1368c2ecf20Sopenharmony_ci */ 1378c2ecf20Sopenharmony_ci if (atomic_inc_not_zero(&eb->refs)) { 1388c2ecf20Sopenharmony_ci rcu_read_unlock(); 1398c2ecf20Sopenharmony_ci break; 1408c2ecf20Sopenharmony_ci } 1418c2ecf20Sopenharmony_ci rcu_read_unlock(); 1428c2ecf20Sopenharmony_ci synchronize_rcu(); 1438c2ecf20Sopenharmony_ci } 1448c2ecf20Sopenharmony_ci return eb; 1458c2ecf20Sopenharmony_ci} 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci/* 1488c2ecf20Sopenharmony_ci * Cowonly root (not-shareable trees, everything not subvolume or reloc roots), 1498c2ecf20Sopenharmony_ci * just get put onto a simple dirty list. Transaction walks this list to make 1508c2ecf20Sopenharmony_ci * sure they get properly updated on disk. 1518c2ecf20Sopenharmony_ci */ 1528c2ecf20Sopenharmony_cistatic void add_root_to_dirty_list(struct btrfs_root *root) 1538c2ecf20Sopenharmony_ci{ 1548c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci if (test_bit(BTRFS_ROOT_DIRTY, &root->state) || 1578c2ecf20Sopenharmony_ci !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state)) 1588c2ecf20Sopenharmony_ci return; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 1618c2ecf20Sopenharmony_ci if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) { 1628c2ecf20Sopenharmony_ci /* Want the extent tree to be the last on the list */ 1638c2ecf20Sopenharmony_ci if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID) 1648c2ecf20Sopenharmony_ci list_move_tail(&root->dirty_list, 1658c2ecf20Sopenharmony_ci &fs_info->dirty_cowonly_roots); 1668c2ecf20Sopenharmony_ci else 1678c2ecf20Sopenharmony_ci list_move(&root->dirty_list, 1688c2ecf20Sopenharmony_ci &fs_info->dirty_cowonly_roots); 1698c2ecf20Sopenharmony_ci } 1708c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 1718c2ecf20Sopenharmony_ci} 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci/* 1748c2ecf20Sopenharmony_ci * used by snapshot creation to make a copy of a root for a tree with 1758c2ecf20Sopenharmony_ci * a given objectid. The buffer with the new root node is returned in 1768c2ecf20Sopenharmony_ci * cow_ret, and this func returns zero on success or a negative error code. 1778c2ecf20Sopenharmony_ci */ 1788c2ecf20Sopenharmony_ciint btrfs_copy_root(struct btrfs_trans_handle *trans, 1798c2ecf20Sopenharmony_ci struct btrfs_root *root, 1808c2ecf20Sopenharmony_ci struct extent_buffer *buf, 1818c2ecf20Sopenharmony_ci struct extent_buffer **cow_ret, u64 new_root_objectid) 1828c2ecf20Sopenharmony_ci{ 1838c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 1848c2ecf20Sopenharmony_ci struct extent_buffer *cow; 1858c2ecf20Sopenharmony_ci int ret = 0; 1868c2ecf20Sopenharmony_ci int level; 1878c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 1908c2ecf20Sopenharmony_ci trans->transid != fs_info->running_transaction->transid); 1918c2ecf20Sopenharmony_ci WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 1928c2ecf20Sopenharmony_ci trans->transid != root->last_trans); 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci level = btrfs_header_level(buf); 1958c2ecf20Sopenharmony_ci if (level == 0) 1968c2ecf20Sopenharmony_ci btrfs_item_key(buf, &disk_key, 0); 1978c2ecf20Sopenharmony_ci else 1988c2ecf20Sopenharmony_ci btrfs_node_key(buf, &disk_key, 0); 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid, 2018c2ecf20Sopenharmony_ci &disk_key, level, buf->start, 0, 2028c2ecf20Sopenharmony_ci BTRFS_NESTING_NEW_ROOT); 2038c2ecf20Sopenharmony_ci if (IS_ERR(cow)) 2048c2ecf20Sopenharmony_ci return PTR_ERR(cow); 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci copy_extent_buffer_full(cow, buf); 2078c2ecf20Sopenharmony_ci btrfs_set_header_bytenr(cow, cow->start); 2088c2ecf20Sopenharmony_ci btrfs_set_header_generation(cow, trans->transid); 2098c2ecf20Sopenharmony_ci btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 2108c2ecf20Sopenharmony_ci btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 2118c2ecf20Sopenharmony_ci BTRFS_HEADER_FLAG_RELOC); 2128c2ecf20Sopenharmony_ci if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 2138c2ecf20Sopenharmony_ci btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 2148c2ecf20Sopenharmony_ci else 2158c2ecf20Sopenharmony_ci btrfs_set_header_owner(cow, new_root_objectid); 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_ci write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_generation(buf) > trans->transid); 2208c2ecf20Sopenharmony_ci if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 2218c2ecf20Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 1); 2228c2ecf20Sopenharmony_ci else 2238c2ecf20Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 0); 2248c2ecf20Sopenharmony_ci if (ret) { 2258c2ecf20Sopenharmony_ci btrfs_tree_unlock(cow); 2268c2ecf20Sopenharmony_ci free_extent_buffer(cow); 2278c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 2288c2ecf20Sopenharmony_ci return ret; 2298c2ecf20Sopenharmony_ci } 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(cow); 2328c2ecf20Sopenharmony_ci *cow_ret = cow; 2338c2ecf20Sopenharmony_ci return 0; 2348c2ecf20Sopenharmony_ci} 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_cienum mod_log_op { 2378c2ecf20Sopenharmony_ci MOD_LOG_KEY_REPLACE, 2388c2ecf20Sopenharmony_ci MOD_LOG_KEY_ADD, 2398c2ecf20Sopenharmony_ci MOD_LOG_KEY_REMOVE, 2408c2ecf20Sopenharmony_ci MOD_LOG_KEY_REMOVE_WHILE_FREEING, 2418c2ecf20Sopenharmony_ci MOD_LOG_KEY_REMOVE_WHILE_MOVING, 2428c2ecf20Sopenharmony_ci MOD_LOG_MOVE_KEYS, 2438c2ecf20Sopenharmony_ci MOD_LOG_ROOT_REPLACE, 2448c2ecf20Sopenharmony_ci}; 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_cistruct tree_mod_root { 2478c2ecf20Sopenharmony_ci u64 logical; 2488c2ecf20Sopenharmony_ci u8 level; 2498c2ecf20Sopenharmony_ci}; 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_cistruct tree_mod_elem { 2528c2ecf20Sopenharmony_ci struct rb_node node; 2538c2ecf20Sopenharmony_ci u64 logical; 2548c2ecf20Sopenharmony_ci u64 seq; 2558c2ecf20Sopenharmony_ci enum mod_log_op op; 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */ 2588c2ecf20Sopenharmony_ci int slot; 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */ 2618c2ecf20Sopenharmony_ci u64 generation; 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */ 2648c2ecf20Sopenharmony_ci struct btrfs_disk_key key; 2658c2ecf20Sopenharmony_ci u64 blockptr; 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci /* this is used for op == MOD_LOG_MOVE_KEYS */ 2688c2ecf20Sopenharmony_ci struct { 2698c2ecf20Sopenharmony_ci int dst_slot; 2708c2ecf20Sopenharmony_ci int nr_items; 2718c2ecf20Sopenharmony_ci } move; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci /* this is used for op == MOD_LOG_ROOT_REPLACE */ 2748c2ecf20Sopenharmony_ci struct tree_mod_root old_root; 2758c2ecf20Sopenharmony_ci}; 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci/* 2788c2ecf20Sopenharmony_ci * Pull a new tree mod seq number for our operation. 2798c2ecf20Sopenharmony_ci */ 2808c2ecf20Sopenharmony_cistatic inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) 2818c2ecf20Sopenharmony_ci{ 2828c2ecf20Sopenharmony_ci return atomic64_inc_return(&fs_info->tree_mod_seq); 2838c2ecf20Sopenharmony_ci} 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci/* 2868c2ecf20Sopenharmony_ci * This adds a new blocker to the tree mod log's blocker list if the @elem 2878c2ecf20Sopenharmony_ci * passed does not already have a sequence number set. So when a caller expects 2888c2ecf20Sopenharmony_ci * to record tree modifications, it should ensure to set elem->seq to zero 2898c2ecf20Sopenharmony_ci * before calling btrfs_get_tree_mod_seq. 2908c2ecf20Sopenharmony_ci * Returns a fresh, unused tree log modification sequence number, even if no new 2918c2ecf20Sopenharmony_ci * blocker was added. 2928c2ecf20Sopenharmony_ci */ 2938c2ecf20Sopenharmony_ciu64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, 2948c2ecf20Sopenharmony_ci struct seq_list *elem) 2958c2ecf20Sopenharmony_ci{ 2968c2ecf20Sopenharmony_ci write_lock(&fs_info->tree_mod_log_lock); 2978c2ecf20Sopenharmony_ci if (!elem->seq) { 2988c2ecf20Sopenharmony_ci elem->seq = btrfs_inc_tree_mod_seq(fs_info); 2998c2ecf20Sopenharmony_ci list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); 3008c2ecf20Sopenharmony_ci } 3018c2ecf20Sopenharmony_ci write_unlock(&fs_info->tree_mod_log_lock); 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci return elem->seq; 3048c2ecf20Sopenharmony_ci} 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_civoid btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, 3078c2ecf20Sopenharmony_ci struct seq_list *elem) 3088c2ecf20Sopenharmony_ci{ 3098c2ecf20Sopenharmony_ci struct rb_root *tm_root; 3108c2ecf20Sopenharmony_ci struct rb_node *node; 3118c2ecf20Sopenharmony_ci struct rb_node *next; 3128c2ecf20Sopenharmony_ci struct tree_mod_elem *tm; 3138c2ecf20Sopenharmony_ci u64 min_seq = (u64)-1; 3148c2ecf20Sopenharmony_ci u64 seq_putting = elem->seq; 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci if (!seq_putting) 3178c2ecf20Sopenharmony_ci return; 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci write_lock(&fs_info->tree_mod_log_lock); 3208c2ecf20Sopenharmony_ci list_del(&elem->list); 3218c2ecf20Sopenharmony_ci elem->seq = 0; 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_ci if (!list_empty(&fs_info->tree_mod_seq_list)) { 3248c2ecf20Sopenharmony_ci struct seq_list *first; 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci first = list_first_entry(&fs_info->tree_mod_seq_list, 3278c2ecf20Sopenharmony_ci struct seq_list, list); 3288c2ecf20Sopenharmony_ci if (seq_putting > first->seq) { 3298c2ecf20Sopenharmony_ci /* 3308c2ecf20Sopenharmony_ci * Blocker with lower sequence number exists, we 3318c2ecf20Sopenharmony_ci * cannot remove anything from the log. 3328c2ecf20Sopenharmony_ci */ 3338c2ecf20Sopenharmony_ci write_unlock(&fs_info->tree_mod_log_lock); 3348c2ecf20Sopenharmony_ci return; 3358c2ecf20Sopenharmony_ci } 3368c2ecf20Sopenharmony_ci min_seq = first->seq; 3378c2ecf20Sopenharmony_ci } 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci /* 3408c2ecf20Sopenharmony_ci * anything that's lower than the lowest existing (read: blocked) 3418c2ecf20Sopenharmony_ci * sequence number can be removed from the tree. 3428c2ecf20Sopenharmony_ci */ 3438c2ecf20Sopenharmony_ci tm_root = &fs_info->tree_mod_log; 3448c2ecf20Sopenharmony_ci for (node = rb_first(tm_root); node; node = next) { 3458c2ecf20Sopenharmony_ci next = rb_next(node); 3468c2ecf20Sopenharmony_ci tm = rb_entry(node, struct tree_mod_elem, node); 3478c2ecf20Sopenharmony_ci if (tm->seq >= min_seq) 3488c2ecf20Sopenharmony_ci continue; 3498c2ecf20Sopenharmony_ci rb_erase(node, tm_root); 3508c2ecf20Sopenharmony_ci kfree(tm); 3518c2ecf20Sopenharmony_ci } 3528c2ecf20Sopenharmony_ci write_unlock(&fs_info->tree_mod_log_lock); 3538c2ecf20Sopenharmony_ci} 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci/* 3568c2ecf20Sopenharmony_ci * key order of the log: 3578c2ecf20Sopenharmony_ci * node/leaf start address -> sequence 3588c2ecf20Sopenharmony_ci * 3598c2ecf20Sopenharmony_ci * The 'start address' is the logical address of the *new* root node 3608c2ecf20Sopenharmony_ci * for root replace operations, or the logical address of the affected 3618c2ecf20Sopenharmony_ci * block for all other operations. 3628c2ecf20Sopenharmony_ci */ 3638c2ecf20Sopenharmony_cistatic noinline int 3648c2ecf20Sopenharmony_ci__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) 3658c2ecf20Sopenharmony_ci{ 3668c2ecf20Sopenharmony_ci struct rb_root *tm_root; 3678c2ecf20Sopenharmony_ci struct rb_node **new; 3688c2ecf20Sopenharmony_ci struct rb_node *parent = NULL; 3698c2ecf20Sopenharmony_ci struct tree_mod_elem *cur; 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci lockdep_assert_held_write(&fs_info->tree_mod_log_lock); 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci tm->seq = btrfs_inc_tree_mod_seq(fs_info); 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_ci tm_root = &fs_info->tree_mod_log; 3768c2ecf20Sopenharmony_ci new = &tm_root->rb_node; 3778c2ecf20Sopenharmony_ci while (*new) { 3788c2ecf20Sopenharmony_ci cur = rb_entry(*new, struct tree_mod_elem, node); 3798c2ecf20Sopenharmony_ci parent = *new; 3808c2ecf20Sopenharmony_ci if (cur->logical < tm->logical) 3818c2ecf20Sopenharmony_ci new = &((*new)->rb_left); 3828c2ecf20Sopenharmony_ci else if (cur->logical > tm->logical) 3838c2ecf20Sopenharmony_ci new = &((*new)->rb_right); 3848c2ecf20Sopenharmony_ci else if (cur->seq < tm->seq) 3858c2ecf20Sopenharmony_ci new = &((*new)->rb_left); 3868c2ecf20Sopenharmony_ci else if (cur->seq > tm->seq) 3878c2ecf20Sopenharmony_ci new = &((*new)->rb_right); 3888c2ecf20Sopenharmony_ci else 3898c2ecf20Sopenharmony_ci return -EEXIST; 3908c2ecf20Sopenharmony_ci } 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci rb_link_node(&tm->node, parent, new); 3938c2ecf20Sopenharmony_ci rb_insert_color(&tm->node, tm_root); 3948c2ecf20Sopenharmony_ci return 0; 3958c2ecf20Sopenharmony_ci} 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci/* 3988c2ecf20Sopenharmony_ci * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it 3998c2ecf20Sopenharmony_ci * returns zero with the tree_mod_log_lock acquired. The caller must hold 4008c2ecf20Sopenharmony_ci * this until all tree mod log insertions are recorded in the rb tree and then 4018c2ecf20Sopenharmony_ci * write unlock fs_info::tree_mod_log_lock. 4028c2ecf20Sopenharmony_ci */ 4038c2ecf20Sopenharmony_cistatic inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, 4048c2ecf20Sopenharmony_ci struct extent_buffer *eb) { 4058c2ecf20Sopenharmony_ci smp_mb(); 4068c2ecf20Sopenharmony_ci if (list_empty(&(fs_info)->tree_mod_seq_list)) 4078c2ecf20Sopenharmony_ci return 1; 4088c2ecf20Sopenharmony_ci if (eb && btrfs_header_level(eb) == 0) 4098c2ecf20Sopenharmony_ci return 1; 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci write_lock(&fs_info->tree_mod_log_lock); 4128c2ecf20Sopenharmony_ci if (list_empty(&(fs_info)->tree_mod_seq_list)) { 4138c2ecf20Sopenharmony_ci write_unlock(&fs_info->tree_mod_log_lock); 4148c2ecf20Sopenharmony_ci return 1; 4158c2ecf20Sopenharmony_ci } 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci return 0; 4188c2ecf20Sopenharmony_ci} 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */ 4218c2ecf20Sopenharmony_cistatic inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info, 4228c2ecf20Sopenharmony_ci struct extent_buffer *eb) 4238c2ecf20Sopenharmony_ci{ 4248c2ecf20Sopenharmony_ci smp_mb(); 4258c2ecf20Sopenharmony_ci if (list_empty(&(fs_info)->tree_mod_seq_list)) 4268c2ecf20Sopenharmony_ci return 0; 4278c2ecf20Sopenharmony_ci if (eb && btrfs_header_level(eb) == 0) 4288c2ecf20Sopenharmony_ci return 0; 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci return 1; 4318c2ecf20Sopenharmony_ci} 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_cistatic struct tree_mod_elem * 4348c2ecf20Sopenharmony_cialloc_tree_mod_elem(struct extent_buffer *eb, int slot, 4358c2ecf20Sopenharmony_ci enum mod_log_op op, gfp_t flags) 4368c2ecf20Sopenharmony_ci{ 4378c2ecf20Sopenharmony_ci struct tree_mod_elem *tm; 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci tm = kzalloc(sizeof(*tm), flags); 4408c2ecf20Sopenharmony_ci if (!tm) 4418c2ecf20Sopenharmony_ci return NULL; 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci tm->logical = eb->start; 4448c2ecf20Sopenharmony_ci if (op != MOD_LOG_KEY_ADD) { 4458c2ecf20Sopenharmony_ci btrfs_node_key(eb, &tm->key, slot); 4468c2ecf20Sopenharmony_ci tm->blockptr = btrfs_node_blockptr(eb, slot); 4478c2ecf20Sopenharmony_ci } 4488c2ecf20Sopenharmony_ci tm->op = op; 4498c2ecf20Sopenharmony_ci tm->slot = slot; 4508c2ecf20Sopenharmony_ci tm->generation = btrfs_node_ptr_generation(eb, slot); 4518c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&tm->node); 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci return tm; 4548c2ecf20Sopenharmony_ci} 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_cistatic noinline int tree_mod_log_insert_key(struct extent_buffer *eb, int slot, 4578c2ecf20Sopenharmony_ci enum mod_log_op op, gfp_t flags) 4588c2ecf20Sopenharmony_ci{ 4598c2ecf20Sopenharmony_ci struct tree_mod_elem *tm; 4608c2ecf20Sopenharmony_ci int ret; 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci if (!tree_mod_need_log(eb->fs_info, eb)) 4638c2ecf20Sopenharmony_ci return 0; 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_ci tm = alloc_tree_mod_elem(eb, slot, op, flags); 4668c2ecf20Sopenharmony_ci if (!tm) 4678c2ecf20Sopenharmony_ci return -ENOMEM; 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci if (tree_mod_dont_log(eb->fs_info, eb)) { 4708c2ecf20Sopenharmony_ci kfree(tm); 4718c2ecf20Sopenharmony_ci return 0; 4728c2ecf20Sopenharmony_ci } 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci ret = __tree_mod_log_insert(eb->fs_info, tm); 4758c2ecf20Sopenharmony_ci write_unlock(&eb->fs_info->tree_mod_log_lock); 4768c2ecf20Sopenharmony_ci if (ret) 4778c2ecf20Sopenharmony_ci kfree(tm); 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci return ret; 4808c2ecf20Sopenharmony_ci} 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_cistatic noinline int tree_mod_log_insert_move(struct extent_buffer *eb, 4838c2ecf20Sopenharmony_ci int dst_slot, int src_slot, int nr_items) 4848c2ecf20Sopenharmony_ci{ 4858c2ecf20Sopenharmony_ci struct tree_mod_elem *tm = NULL; 4868c2ecf20Sopenharmony_ci struct tree_mod_elem **tm_list = NULL; 4878c2ecf20Sopenharmony_ci int ret = 0; 4888c2ecf20Sopenharmony_ci int i; 4898c2ecf20Sopenharmony_ci int locked = 0; 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_ci if (!tree_mod_need_log(eb->fs_info, eb)) 4928c2ecf20Sopenharmony_ci return 0; 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS); 4958c2ecf20Sopenharmony_ci if (!tm_list) 4968c2ecf20Sopenharmony_ci return -ENOMEM; 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci tm = kzalloc(sizeof(*tm), GFP_NOFS); 4998c2ecf20Sopenharmony_ci if (!tm) { 5008c2ecf20Sopenharmony_ci ret = -ENOMEM; 5018c2ecf20Sopenharmony_ci goto free_tms; 5028c2ecf20Sopenharmony_ci } 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci tm->logical = eb->start; 5058c2ecf20Sopenharmony_ci tm->slot = src_slot; 5068c2ecf20Sopenharmony_ci tm->move.dst_slot = dst_slot; 5078c2ecf20Sopenharmony_ci tm->move.nr_items = nr_items; 5088c2ecf20Sopenharmony_ci tm->op = MOD_LOG_MOVE_KEYS; 5098c2ecf20Sopenharmony_ci 5108c2ecf20Sopenharmony_ci for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 5118c2ecf20Sopenharmony_ci tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot, 5128c2ecf20Sopenharmony_ci MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS); 5138c2ecf20Sopenharmony_ci if (!tm_list[i]) { 5148c2ecf20Sopenharmony_ci ret = -ENOMEM; 5158c2ecf20Sopenharmony_ci goto free_tms; 5168c2ecf20Sopenharmony_ci } 5178c2ecf20Sopenharmony_ci } 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci if (tree_mod_dont_log(eb->fs_info, eb)) 5208c2ecf20Sopenharmony_ci goto free_tms; 5218c2ecf20Sopenharmony_ci locked = 1; 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_ci /* 5248c2ecf20Sopenharmony_ci * When we override something during the move, we log these removals. 5258c2ecf20Sopenharmony_ci * This can only happen when we move towards the beginning of the 5268c2ecf20Sopenharmony_ci * buffer, i.e. dst_slot < src_slot. 5278c2ecf20Sopenharmony_ci */ 5288c2ecf20Sopenharmony_ci for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) { 5298c2ecf20Sopenharmony_ci ret = __tree_mod_log_insert(eb->fs_info, tm_list[i]); 5308c2ecf20Sopenharmony_ci if (ret) 5318c2ecf20Sopenharmony_ci goto free_tms; 5328c2ecf20Sopenharmony_ci } 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci ret = __tree_mod_log_insert(eb->fs_info, tm); 5358c2ecf20Sopenharmony_ci if (ret) 5368c2ecf20Sopenharmony_ci goto free_tms; 5378c2ecf20Sopenharmony_ci write_unlock(&eb->fs_info->tree_mod_log_lock); 5388c2ecf20Sopenharmony_ci kfree(tm_list); 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci return 0; 5418c2ecf20Sopenharmony_cifree_tms: 5428c2ecf20Sopenharmony_ci for (i = 0; i < nr_items; i++) { 5438c2ecf20Sopenharmony_ci if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node)) 5448c2ecf20Sopenharmony_ci rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log); 5458c2ecf20Sopenharmony_ci kfree(tm_list[i]); 5468c2ecf20Sopenharmony_ci } 5478c2ecf20Sopenharmony_ci if (locked) 5488c2ecf20Sopenharmony_ci write_unlock(&eb->fs_info->tree_mod_log_lock); 5498c2ecf20Sopenharmony_ci kfree(tm_list); 5508c2ecf20Sopenharmony_ci kfree(tm); 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_ci return ret; 5538c2ecf20Sopenharmony_ci} 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_cistatic inline int 5568c2ecf20Sopenharmony_ci__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, 5578c2ecf20Sopenharmony_ci struct tree_mod_elem **tm_list, 5588c2ecf20Sopenharmony_ci int nritems) 5598c2ecf20Sopenharmony_ci{ 5608c2ecf20Sopenharmony_ci int i, j; 5618c2ecf20Sopenharmony_ci int ret; 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci for (i = nritems - 1; i >= 0; i--) { 5648c2ecf20Sopenharmony_ci ret = __tree_mod_log_insert(fs_info, tm_list[i]); 5658c2ecf20Sopenharmony_ci if (ret) { 5668c2ecf20Sopenharmony_ci for (j = nritems - 1; j > i; j--) 5678c2ecf20Sopenharmony_ci rb_erase(&tm_list[j]->node, 5688c2ecf20Sopenharmony_ci &fs_info->tree_mod_log); 5698c2ecf20Sopenharmony_ci return ret; 5708c2ecf20Sopenharmony_ci } 5718c2ecf20Sopenharmony_ci } 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci return 0; 5748c2ecf20Sopenharmony_ci} 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_cistatic noinline int tree_mod_log_insert_root(struct extent_buffer *old_root, 5778c2ecf20Sopenharmony_ci struct extent_buffer *new_root, int log_removal) 5788c2ecf20Sopenharmony_ci{ 5798c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = old_root->fs_info; 5808c2ecf20Sopenharmony_ci struct tree_mod_elem *tm = NULL; 5818c2ecf20Sopenharmony_ci struct tree_mod_elem **tm_list = NULL; 5828c2ecf20Sopenharmony_ci int nritems = 0; 5838c2ecf20Sopenharmony_ci int ret = 0; 5848c2ecf20Sopenharmony_ci int i; 5858c2ecf20Sopenharmony_ci 5868c2ecf20Sopenharmony_ci if (!tree_mod_need_log(fs_info, NULL)) 5878c2ecf20Sopenharmony_ci return 0; 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci if (log_removal && btrfs_header_level(old_root) > 0) { 5908c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(old_root); 5918c2ecf20Sopenharmony_ci tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), 5928c2ecf20Sopenharmony_ci GFP_NOFS); 5938c2ecf20Sopenharmony_ci if (!tm_list) { 5948c2ecf20Sopenharmony_ci ret = -ENOMEM; 5958c2ecf20Sopenharmony_ci goto free_tms; 5968c2ecf20Sopenharmony_ci } 5978c2ecf20Sopenharmony_ci for (i = 0; i < nritems; i++) { 5988c2ecf20Sopenharmony_ci tm_list[i] = alloc_tree_mod_elem(old_root, i, 5998c2ecf20Sopenharmony_ci MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); 6008c2ecf20Sopenharmony_ci if (!tm_list[i]) { 6018c2ecf20Sopenharmony_ci ret = -ENOMEM; 6028c2ecf20Sopenharmony_ci goto free_tms; 6038c2ecf20Sopenharmony_ci } 6048c2ecf20Sopenharmony_ci } 6058c2ecf20Sopenharmony_ci } 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci tm = kzalloc(sizeof(*tm), GFP_NOFS); 6088c2ecf20Sopenharmony_ci if (!tm) { 6098c2ecf20Sopenharmony_ci ret = -ENOMEM; 6108c2ecf20Sopenharmony_ci goto free_tms; 6118c2ecf20Sopenharmony_ci } 6128c2ecf20Sopenharmony_ci 6138c2ecf20Sopenharmony_ci tm->logical = new_root->start; 6148c2ecf20Sopenharmony_ci tm->old_root.logical = old_root->start; 6158c2ecf20Sopenharmony_ci tm->old_root.level = btrfs_header_level(old_root); 6168c2ecf20Sopenharmony_ci tm->generation = btrfs_header_generation(old_root); 6178c2ecf20Sopenharmony_ci tm->op = MOD_LOG_ROOT_REPLACE; 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci if (tree_mod_dont_log(fs_info, NULL)) 6208c2ecf20Sopenharmony_ci goto free_tms; 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_ci if (tm_list) 6238c2ecf20Sopenharmony_ci ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems); 6248c2ecf20Sopenharmony_ci if (!ret) 6258c2ecf20Sopenharmony_ci ret = __tree_mod_log_insert(fs_info, tm); 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci write_unlock(&fs_info->tree_mod_log_lock); 6288c2ecf20Sopenharmony_ci if (ret) 6298c2ecf20Sopenharmony_ci goto free_tms; 6308c2ecf20Sopenharmony_ci kfree(tm_list); 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_ci return ret; 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_cifree_tms: 6358c2ecf20Sopenharmony_ci if (tm_list) { 6368c2ecf20Sopenharmony_ci for (i = 0; i < nritems; i++) 6378c2ecf20Sopenharmony_ci kfree(tm_list[i]); 6388c2ecf20Sopenharmony_ci kfree(tm_list); 6398c2ecf20Sopenharmony_ci } 6408c2ecf20Sopenharmony_ci kfree(tm); 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci return ret; 6438c2ecf20Sopenharmony_ci} 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_cistatic struct tree_mod_elem * 6468c2ecf20Sopenharmony_ci__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, 6478c2ecf20Sopenharmony_ci int smallest) 6488c2ecf20Sopenharmony_ci{ 6498c2ecf20Sopenharmony_ci struct rb_root *tm_root; 6508c2ecf20Sopenharmony_ci struct rb_node *node; 6518c2ecf20Sopenharmony_ci struct tree_mod_elem *cur = NULL; 6528c2ecf20Sopenharmony_ci struct tree_mod_elem *found = NULL; 6538c2ecf20Sopenharmony_ci 6548c2ecf20Sopenharmony_ci read_lock(&fs_info->tree_mod_log_lock); 6558c2ecf20Sopenharmony_ci tm_root = &fs_info->tree_mod_log; 6568c2ecf20Sopenharmony_ci node = tm_root->rb_node; 6578c2ecf20Sopenharmony_ci while (node) { 6588c2ecf20Sopenharmony_ci cur = rb_entry(node, struct tree_mod_elem, node); 6598c2ecf20Sopenharmony_ci if (cur->logical < start) { 6608c2ecf20Sopenharmony_ci node = node->rb_left; 6618c2ecf20Sopenharmony_ci } else if (cur->logical > start) { 6628c2ecf20Sopenharmony_ci node = node->rb_right; 6638c2ecf20Sopenharmony_ci } else if (cur->seq < min_seq) { 6648c2ecf20Sopenharmony_ci node = node->rb_left; 6658c2ecf20Sopenharmony_ci } else if (!smallest) { 6668c2ecf20Sopenharmony_ci /* we want the node with the highest seq */ 6678c2ecf20Sopenharmony_ci if (found) 6688c2ecf20Sopenharmony_ci BUG_ON(found->seq > cur->seq); 6698c2ecf20Sopenharmony_ci found = cur; 6708c2ecf20Sopenharmony_ci node = node->rb_left; 6718c2ecf20Sopenharmony_ci } else if (cur->seq > min_seq) { 6728c2ecf20Sopenharmony_ci /* we want the node with the smallest seq */ 6738c2ecf20Sopenharmony_ci if (found) 6748c2ecf20Sopenharmony_ci BUG_ON(found->seq < cur->seq); 6758c2ecf20Sopenharmony_ci found = cur; 6768c2ecf20Sopenharmony_ci node = node->rb_right; 6778c2ecf20Sopenharmony_ci } else { 6788c2ecf20Sopenharmony_ci found = cur; 6798c2ecf20Sopenharmony_ci break; 6808c2ecf20Sopenharmony_ci } 6818c2ecf20Sopenharmony_ci } 6828c2ecf20Sopenharmony_ci read_unlock(&fs_info->tree_mod_log_lock); 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci return found; 6858c2ecf20Sopenharmony_ci} 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci/* 6888c2ecf20Sopenharmony_ci * this returns the element from the log with the smallest time sequence 6898c2ecf20Sopenharmony_ci * value that's in the log (the oldest log item). any element with a time 6908c2ecf20Sopenharmony_ci * sequence lower than min_seq will be ignored. 6918c2ecf20Sopenharmony_ci */ 6928c2ecf20Sopenharmony_cistatic struct tree_mod_elem * 6938c2ecf20Sopenharmony_citree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start, 6948c2ecf20Sopenharmony_ci u64 min_seq) 6958c2ecf20Sopenharmony_ci{ 6968c2ecf20Sopenharmony_ci return __tree_mod_log_search(fs_info, start, min_seq, 1); 6978c2ecf20Sopenharmony_ci} 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_ci/* 7008c2ecf20Sopenharmony_ci * this returns the element from the log with the largest time sequence 7018c2ecf20Sopenharmony_ci * value that's in the log (the most recent log item). any element with 7028c2ecf20Sopenharmony_ci * a time sequence lower than min_seq will be ignored. 7038c2ecf20Sopenharmony_ci */ 7048c2ecf20Sopenharmony_cistatic struct tree_mod_elem * 7058c2ecf20Sopenharmony_citree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq) 7068c2ecf20Sopenharmony_ci{ 7078c2ecf20Sopenharmony_ci return __tree_mod_log_search(fs_info, start, min_seq, 0); 7088c2ecf20Sopenharmony_ci} 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_cistatic noinline int tree_mod_log_eb_copy(struct extent_buffer *dst, 7118c2ecf20Sopenharmony_ci struct extent_buffer *src, unsigned long dst_offset, 7128c2ecf20Sopenharmony_ci unsigned long src_offset, int nr_items) 7138c2ecf20Sopenharmony_ci{ 7148c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = dst->fs_info; 7158c2ecf20Sopenharmony_ci int ret = 0; 7168c2ecf20Sopenharmony_ci struct tree_mod_elem **tm_list = NULL; 7178c2ecf20Sopenharmony_ci struct tree_mod_elem **tm_list_add, **tm_list_rem; 7188c2ecf20Sopenharmony_ci int i; 7198c2ecf20Sopenharmony_ci int locked = 0; 7208c2ecf20Sopenharmony_ci 7218c2ecf20Sopenharmony_ci if (!tree_mod_need_log(fs_info, NULL)) 7228c2ecf20Sopenharmony_ci return 0; 7238c2ecf20Sopenharmony_ci 7248c2ecf20Sopenharmony_ci if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) 7258c2ecf20Sopenharmony_ci return 0; 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *), 7288c2ecf20Sopenharmony_ci GFP_NOFS); 7298c2ecf20Sopenharmony_ci if (!tm_list) 7308c2ecf20Sopenharmony_ci return -ENOMEM; 7318c2ecf20Sopenharmony_ci 7328c2ecf20Sopenharmony_ci tm_list_add = tm_list; 7338c2ecf20Sopenharmony_ci tm_list_rem = tm_list + nr_items; 7348c2ecf20Sopenharmony_ci for (i = 0; i < nr_items; i++) { 7358c2ecf20Sopenharmony_ci tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset, 7368c2ecf20Sopenharmony_ci MOD_LOG_KEY_REMOVE, GFP_NOFS); 7378c2ecf20Sopenharmony_ci if (!tm_list_rem[i]) { 7388c2ecf20Sopenharmony_ci ret = -ENOMEM; 7398c2ecf20Sopenharmony_ci goto free_tms; 7408c2ecf20Sopenharmony_ci } 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_ci tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset, 7438c2ecf20Sopenharmony_ci MOD_LOG_KEY_ADD, GFP_NOFS); 7448c2ecf20Sopenharmony_ci if (!tm_list_add[i]) { 7458c2ecf20Sopenharmony_ci ret = -ENOMEM; 7468c2ecf20Sopenharmony_ci goto free_tms; 7478c2ecf20Sopenharmony_ci } 7488c2ecf20Sopenharmony_ci } 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_ci if (tree_mod_dont_log(fs_info, NULL)) 7518c2ecf20Sopenharmony_ci goto free_tms; 7528c2ecf20Sopenharmony_ci locked = 1; 7538c2ecf20Sopenharmony_ci 7548c2ecf20Sopenharmony_ci for (i = 0; i < nr_items; i++) { 7558c2ecf20Sopenharmony_ci ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]); 7568c2ecf20Sopenharmony_ci if (ret) 7578c2ecf20Sopenharmony_ci goto free_tms; 7588c2ecf20Sopenharmony_ci ret = __tree_mod_log_insert(fs_info, tm_list_add[i]); 7598c2ecf20Sopenharmony_ci if (ret) 7608c2ecf20Sopenharmony_ci goto free_tms; 7618c2ecf20Sopenharmony_ci } 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ci write_unlock(&fs_info->tree_mod_log_lock); 7648c2ecf20Sopenharmony_ci kfree(tm_list); 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ci return 0; 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_cifree_tms: 7698c2ecf20Sopenharmony_ci for (i = 0; i < nr_items * 2; i++) { 7708c2ecf20Sopenharmony_ci if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node)) 7718c2ecf20Sopenharmony_ci rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log); 7728c2ecf20Sopenharmony_ci kfree(tm_list[i]); 7738c2ecf20Sopenharmony_ci } 7748c2ecf20Sopenharmony_ci if (locked) 7758c2ecf20Sopenharmony_ci write_unlock(&fs_info->tree_mod_log_lock); 7768c2ecf20Sopenharmony_ci kfree(tm_list); 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ci return ret; 7798c2ecf20Sopenharmony_ci} 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_cistatic noinline int tree_mod_log_free_eb(struct extent_buffer *eb) 7828c2ecf20Sopenharmony_ci{ 7838c2ecf20Sopenharmony_ci struct tree_mod_elem **tm_list = NULL; 7848c2ecf20Sopenharmony_ci int nritems = 0; 7858c2ecf20Sopenharmony_ci int i; 7868c2ecf20Sopenharmony_ci int ret = 0; 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci if (btrfs_header_level(eb) == 0) 7898c2ecf20Sopenharmony_ci return 0; 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_ci if (!tree_mod_need_log(eb->fs_info, NULL)) 7928c2ecf20Sopenharmony_ci return 0; 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(eb); 7958c2ecf20Sopenharmony_ci tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS); 7968c2ecf20Sopenharmony_ci if (!tm_list) 7978c2ecf20Sopenharmony_ci return -ENOMEM; 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_ci for (i = 0; i < nritems; i++) { 8008c2ecf20Sopenharmony_ci tm_list[i] = alloc_tree_mod_elem(eb, i, 8018c2ecf20Sopenharmony_ci MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS); 8028c2ecf20Sopenharmony_ci if (!tm_list[i]) { 8038c2ecf20Sopenharmony_ci ret = -ENOMEM; 8048c2ecf20Sopenharmony_ci goto free_tms; 8058c2ecf20Sopenharmony_ci } 8068c2ecf20Sopenharmony_ci } 8078c2ecf20Sopenharmony_ci 8088c2ecf20Sopenharmony_ci if (tree_mod_dont_log(eb->fs_info, eb)) 8098c2ecf20Sopenharmony_ci goto free_tms; 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci ret = __tree_mod_log_free_eb(eb->fs_info, tm_list, nritems); 8128c2ecf20Sopenharmony_ci write_unlock(&eb->fs_info->tree_mod_log_lock); 8138c2ecf20Sopenharmony_ci if (ret) 8148c2ecf20Sopenharmony_ci goto free_tms; 8158c2ecf20Sopenharmony_ci kfree(tm_list); 8168c2ecf20Sopenharmony_ci 8178c2ecf20Sopenharmony_ci return 0; 8188c2ecf20Sopenharmony_ci 8198c2ecf20Sopenharmony_cifree_tms: 8208c2ecf20Sopenharmony_ci for (i = 0; i < nritems; i++) 8218c2ecf20Sopenharmony_ci kfree(tm_list[i]); 8228c2ecf20Sopenharmony_ci kfree(tm_list); 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci return ret; 8258c2ecf20Sopenharmony_ci} 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_ci/* 8288c2ecf20Sopenharmony_ci * check if the tree block can be shared by multiple trees 8298c2ecf20Sopenharmony_ci */ 8308c2ecf20Sopenharmony_ciint btrfs_block_can_be_shared(struct btrfs_root *root, 8318c2ecf20Sopenharmony_ci struct extent_buffer *buf) 8328c2ecf20Sopenharmony_ci{ 8338c2ecf20Sopenharmony_ci /* 8348c2ecf20Sopenharmony_ci * Tree blocks not in shareable trees and tree roots are never shared. 8358c2ecf20Sopenharmony_ci * If a block was allocated after the last snapshot and the block was 8368c2ecf20Sopenharmony_ci * not allocated by tree relocation, we know the block is not shared. 8378c2ecf20Sopenharmony_ci */ 8388c2ecf20Sopenharmony_ci if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 8398c2ecf20Sopenharmony_ci buf != root->node && buf != root->commit_root && 8408c2ecf20Sopenharmony_ci (btrfs_header_generation(buf) <= 8418c2ecf20Sopenharmony_ci btrfs_root_last_snapshot(&root->root_item) || 8428c2ecf20Sopenharmony_ci btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 8438c2ecf20Sopenharmony_ci return 1; 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_ci return 0; 8468c2ecf20Sopenharmony_ci} 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_cistatic noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, 8498c2ecf20Sopenharmony_ci struct btrfs_root *root, 8508c2ecf20Sopenharmony_ci struct extent_buffer *buf, 8518c2ecf20Sopenharmony_ci struct extent_buffer *cow, 8528c2ecf20Sopenharmony_ci int *last_ref) 8538c2ecf20Sopenharmony_ci{ 8548c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 8558c2ecf20Sopenharmony_ci u64 refs; 8568c2ecf20Sopenharmony_ci u64 owner; 8578c2ecf20Sopenharmony_ci u64 flags; 8588c2ecf20Sopenharmony_ci u64 new_flags = 0; 8598c2ecf20Sopenharmony_ci int ret; 8608c2ecf20Sopenharmony_ci 8618c2ecf20Sopenharmony_ci /* 8628c2ecf20Sopenharmony_ci * Backrefs update rules: 8638c2ecf20Sopenharmony_ci * 8648c2ecf20Sopenharmony_ci * Always use full backrefs for extent pointers in tree block 8658c2ecf20Sopenharmony_ci * allocated by tree relocation. 8668c2ecf20Sopenharmony_ci * 8678c2ecf20Sopenharmony_ci * If a shared tree block is no longer referenced by its owner 8688c2ecf20Sopenharmony_ci * tree (btrfs_header_owner(buf) == root->root_key.objectid), 8698c2ecf20Sopenharmony_ci * use full backrefs for extent pointers in tree block. 8708c2ecf20Sopenharmony_ci * 8718c2ecf20Sopenharmony_ci * If a tree block is been relocating 8728c2ecf20Sopenharmony_ci * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), 8738c2ecf20Sopenharmony_ci * use full backrefs for extent pointers in tree block. 8748c2ecf20Sopenharmony_ci * The reason for this is some operations (such as drop tree) 8758c2ecf20Sopenharmony_ci * are only allowed for blocks use full backrefs. 8768c2ecf20Sopenharmony_ci */ 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_ci if (btrfs_block_can_be_shared(root, buf)) { 8798c2ecf20Sopenharmony_ci ret = btrfs_lookup_extent_info(trans, fs_info, buf->start, 8808c2ecf20Sopenharmony_ci btrfs_header_level(buf), 1, 8818c2ecf20Sopenharmony_ci &refs, &flags); 8828c2ecf20Sopenharmony_ci if (ret) 8838c2ecf20Sopenharmony_ci return ret; 8848c2ecf20Sopenharmony_ci if (refs == 0) { 8858c2ecf20Sopenharmony_ci ret = -EROFS; 8868c2ecf20Sopenharmony_ci btrfs_handle_fs_error(fs_info, ret, NULL); 8878c2ecf20Sopenharmony_ci return ret; 8888c2ecf20Sopenharmony_ci } 8898c2ecf20Sopenharmony_ci } else { 8908c2ecf20Sopenharmony_ci refs = 1; 8918c2ecf20Sopenharmony_ci if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 8928c2ecf20Sopenharmony_ci btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 8938c2ecf20Sopenharmony_ci flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 8948c2ecf20Sopenharmony_ci else 8958c2ecf20Sopenharmony_ci flags = 0; 8968c2ecf20Sopenharmony_ci } 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci owner = btrfs_header_owner(buf); 8998c2ecf20Sopenharmony_ci BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && 9008c2ecf20Sopenharmony_ci !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 9018c2ecf20Sopenharmony_ci 9028c2ecf20Sopenharmony_ci if (refs > 1) { 9038c2ecf20Sopenharmony_ci if ((owner == root->root_key.objectid || 9048c2ecf20Sopenharmony_ci root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 9058c2ecf20Sopenharmony_ci !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 9068c2ecf20Sopenharmony_ci ret = btrfs_inc_ref(trans, root, buf, 1); 9078c2ecf20Sopenharmony_ci if (ret) 9088c2ecf20Sopenharmony_ci return ret; 9098c2ecf20Sopenharmony_ci 9108c2ecf20Sopenharmony_ci if (root->root_key.objectid == 9118c2ecf20Sopenharmony_ci BTRFS_TREE_RELOC_OBJECTID) { 9128c2ecf20Sopenharmony_ci ret = btrfs_dec_ref(trans, root, buf, 0); 9138c2ecf20Sopenharmony_ci if (ret) 9148c2ecf20Sopenharmony_ci return ret; 9158c2ecf20Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 1); 9168c2ecf20Sopenharmony_ci if (ret) 9178c2ecf20Sopenharmony_ci return ret; 9188c2ecf20Sopenharmony_ci } 9198c2ecf20Sopenharmony_ci new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 9208c2ecf20Sopenharmony_ci } else { 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_ci if (root->root_key.objectid == 9238c2ecf20Sopenharmony_ci BTRFS_TREE_RELOC_OBJECTID) 9248c2ecf20Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 1); 9258c2ecf20Sopenharmony_ci else 9268c2ecf20Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 0); 9278c2ecf20Sopenharmony_ci if (ret) 9288c2ecf20Sopenharmony_ci return ret; 9298c2ecf20Sopenharmony_ci } 9308c2ecf20Sopenharmony_ci if (new_flags != 0) { 9318c2ecf20Sopenharmony_ci int level = btrfs_header_level(buf); 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_ci ret = btrfs_set_disk_extent_flags(trans, buf, 9348c2ecf20Sopenharmony_ci new_flags, level, 0); 9358c2ecf20Sopenharmony_ci if (ret) 9368c2ecf20Sopenharmony_ci return ret; 9378c2ecf20Sopenharmony_ci } 9388c2ecf20Sopenharmony_ci } else { 9398c2ecf20Sopenharmony_ci if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 9408c2ecf20Sopenharmony_ci if (root->root_key.objectid == 9418c2ecf20Sopenharmony_ci BTRFS_TREE_RELOC_OBJECTID) 9428c2ecf20Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 1); 9438c2ecf20Sopenharmony_ci else 9448c2ecf20Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 0); 9458c2ecf20Sopenharmony_ci if (ret) 9468c2ecf20Sopenharmony_ci return ret; 9478c2ecf20Sopenharmony_ci ret = btrfs_dec_ref(trans, root, buf, 1); 9488c2ecf20Sopenharmony_ci if (ret) 9498c2ecf20Sopenharmony_ci return ret; 9508c2ecf20Sopenharmony_ci } 9518c2ecf20Sopenharmony_ci btrfs_clean_tree_block(buf); 9528c2ecf20Sopenharmony_ci *last_ref = 1; 9538c2ecf20Sopenharmony_ci } 9548c2ecf20Sopenharmony_ci return 0; 9558c2ecf20Sopenharmony_ci} 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_cistatic struct extent_buffer *alloc_tree_block_no_bg_flush( 9588c2ecf20Sopenharmony_ci struct btrfs_trans_handle *trans, 9598c2ecf20Sopenharmony_ci struct btrfs_root *root, 9608c2ecf20Sopenharmony_ci u64 parent_start, 9618c2ecf20Sopenharmony_ci const struct btrfs_disk_key *disk_key, 9628c2ecf20Sopenharmony_ci int level, 9638c2ecf20Sopenharmony_ci u64 hint, 9648c2ecf20Sopenharmony_ci u64 empty_size, 9658c2ecf20Sopenharmony_ci enum btrfs_lock_nesting nest) 9668c2ecf20Sopenharmony_ci{ 9678c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 9688c2ecf20Sopenharmony_ci struct extent_buffer *ret; 9698c2ecf20Sopenharmony_ci 9708c2ecf20Sopenharmony_ci /* 9718c2ecf20Sopenharmony_ci * If we are COWing a node/leaf from the extent, chunk, device or free 9728c2ecf20Sopenharmony_ci * space trees, make sure that we do not finish block group creation of 9738c2ecf20Sopenharmony_ci * pending block groups. We do this to avoid a deadlock. 9748c2ecf20Sopenharmony_ci * COWing can result in allocation of a new chunk, and flushing pending 9758c2ecf20Sopenharmony_ci * block groups (btrfs_create_pending_block_groups()) can be triggered 9768c2ecf20Sopenharmony_ci * when finishing allocation of a new chunk. Creation of a pending block 9778c2ecf20Sopenharmony_ci * group modifies the extent, chunk, device and free space trees, 9788c2ecf20Sopenharmony_ci * therefore we could deadlock with ourselves since we are holding a 9798c2ecf20Sopenharmony_ci * lock on an extent buffer that btrfs_create_pending_block_groups() may 9808c2ecf20Sopenharmony_ci * try to COW later. 9818c2ecf20Sopenharmony_ci * For similar reasons, we also need to delay flushing pending block 9828c2ecf20Sopenharmony_ci * groups when splitting a leaf or node, from one of those trees, since 9838c2ecf20Sopenharmony_ci * we are holding a write lock on it and its parent or when inserting a 9848c2ecf20Sopenharmony_ci * new root node for one of those trees. 9858c2ecf20Sopenharmony_ci */ 9868c2ecf20Sopenharmony_ci if (root == fs_info->extent_root || 9878c2ecf20Sopenharmony_ci root == fs_info->chunk_root || 9888c2ecf20Sopenharmony_ci root == fs_info->dev_root || 9898c2ecf20Sopenharmony_ci root == fs_info->free_space_root) 9908c2ecf20Sopenharmony_ci trans->can_flush_pending_bgs = false; 9918c2ecf20Sopenharmony_ci 9928c2ecf20Sopenharmony_ci ret = btrfs_alloc_tree_block(trans, root, parent_start, 9938c2ecf20Sopenharmony_ci root->root_key.objectid, disk_key, level, 9948c2ecf20Sopenharmony_ci hint, empty_size, nest); 9958c2ecf20Sopenharmony_ci trans->can_flush_pending_bgs = true; 9968c2ecf20Sopenharmony_ci 9978c2ecf20Sopenharmony_ci return ret; 9988c2ecf20Sopenharmony_ci} 9998c2ecf20Sopenharmony_ci 10008c2ecf20Sopenharmony_ci/* 10018c2ecf20Sopenharmony_ci * does the dirty work in cow of a single block. The parent block (if 10028c2ecf20Sopenharmony_ci * supplied) is updated to point to the new cow copy. The new buffer is marked 10038c2ecf20Sopenharmony_ci * dirty and returned locked. If you modify the block it needs to be marked 10048c2ecf20Sopenharmony_ci * dirty again. 10058c2ecf20Sopenharmony_ci * 10068c2ecf20Sopenharmony_ci * search_start -- an allocation hint for the new block 10078c2ecf20Sopenharmony_ci * 10088c2ecf20Sopenharmony_ci * empty_size -- a hint that you plan on doing more cow. This is the size in 10098c2ecf20Sopenharmony_ci * bytes the allocator should try to find free next to the block it returns. 10108c2ecf20Sopenharmony_ci * This is just a hint and may be ignored by the allocator. 10118c2ecf20Sopenharmony_ci */ 10128c2ecf20Sopenharmony_cistatic noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, 10138c2ecf20Sopenharmony_ci struct btrfs_root *root, 10148c2ecf20Sopenharmony_ci struct extent_buffer *buf, 10158c2ecf20Sopenharmony_ci struct extent_buffer *parent, int parent_slot, 10168c2ecf20Sopenharmony_ci struct extent_buffer **cow_ret, 10178c2ecf20Sopenharmony_ci u64 search_start, u64 empty_size, 10188c2ecf20Sopenharmony_ci enum btrfs_lock_nesting nest) 10198c2ecf20Sopenharmony_ci{ 10208c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 10218c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 10228c2ecf20Sopenharmony_ci struct extent_buffer *cow; 10238c2ecf20Sopenharmony_ci int level, ret; 10248c2ecf20Sopenharmony_ci int last_ref = 0; 10258c2ecf20Sopenharmony_ci int unlock_orig = 0; 10268c2ecf20Sopenharmony_ci u64 parent_start = 0; 10278c2ecf20Sopenharmony_ci 10288c2ecf20Sopenharmony_ci if (*cow_ret == buf) 10298c2ecf20Sopenharmony_ci unlock_orig = 1; 10308c2ecf20Sopenharmony_ci 10318c2ecf20Sopenharmony_ci btrfs_assert_tree_locked(buf); 10328c2ecf20Sopenharmony_ci 10338c2ecf20Sopenharmony_ci WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 10348c2ecf20Sopenharmony_ci trans->transid != fs_info->running_transaction->transid); 10358c2ecf20Sopenharmony_ci WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 10368c2ecf20Sopenharmony_ci trans->transid != root->last_trans); 10378c2ecf20Sopenharmony_ci 10388c2ecf20Sopenharmony_ci level = btrfs_header_level(buf); 10398c2ecf20Sopenharmony_ci 10408c2ecf20Sopenharmony_ci if (level == 0) 10418c2ecf20Sopenharmony_ci btrfs_item_key(buf, &disk_key, 0); 10428c2ecf20Sopenharmony_ci else 10438c2ecf20Sopenharmony_ci btrfs_node_key(buf, &disk_key, 0); 10448c2ecf20Sopenharmony_ci 10458c2ecf20Sopenharmony_ci if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) 10468c2ecf20Sopenharmony_ci parent_start = parent->start; 10478c2ecf20Sopenharmony_ci 10488c2ecf20Sopenharmony_ci cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key, 10498c2ecf20Sopenharmony_ci level, search_start, empty_size, nest); 10508c2ecf20Sopenharmony_ci if (IS_ERR(cow)) 10518c2ecf20Sopenharmony_ci return PTR_ERR(cow); 10528c2ecf20Sopenharmony_ci 10538c2ecf20Sopenharmony_ci /* cow is set to blocking by btrfs_init_new_buffer */ 10548c2ecf20Sopenharmony_ci 10558c2ecf20Sopenharmony_ci copy_extent_buffer_full(cow, buf); 10568c2ecf20Sopenharmony_ci btrfs_set_header_bytenr(cow, cow->start); 10578c2ecf20Sopenharmony_ci btrfs_set_header_generation(cow, trans->transid); 10588c2ecf20Sopenharmony_ci btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 10598c2ecf20Sopenharmony_ci btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 10608c2ecf20Sopenharmony_ci BTRFS_HEADER_FLAG_RELOC); 10618c2ecf20Sopenharmony_ci if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 10628c2ecf20Sopenharmony_ci btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 10638c2ecf20Sopenharmony_ci else 10648c2ecf20Sopenharmony_ci btrfs_set_header_owner(cow, root->root_key.objectid); 10658c2ecf20Sopenharmony_ci 10668c2ecf20Sopenharmony_ci write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); 10678c2ecf20Sopenharmony_ci 10688c2ecf20Sopenharmony_ci ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); 10698c2ecf20Sopenharmony_ci if (ret) { 10708c2ecf20Sopenharmony_ci btrfs_tree_unlock(cow); 10718c2ecf20Sopenharmony_ci free_extent_buffer(cow); 10728c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 10738c2ecf20Sopenharmony_ci return ret; 10748c2ecf20Sopenharmony_ci } 10758c2ecf20Sopenharmony_ci 10768c2ecf20Sopenharmony_ci if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) { 10778c2ecf20Sopenharmony_ci ret = btrfs_reloc_cow_block(trans, root, buf, cow); 10788c2ecf20Sopenharmony_ci if (ret) { 10798c2ecf20Sopenharmony_ci btrfs_tree_unlock(cow); 10808c2ecf20Sopenharmony_ci free_extent_buffer(cow); 10818c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 10828c2ecf20Sopenharmony_ci return ret; 10838c2ecf20Sopenharmony_ci } 10848c2ecf20Sopenharmony_ci } 10858c2ecf20Sopenharmony_ci 10868c2ecf20Sopenharmony_ci if (buf == root->node) { 10878c2ecf20Sopenharmony_ci WARN_ON(parent && parent != buf); 10888c2ecf20Sopenharmony_ci if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 10898c2ecf20Sopenharmony_ci btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 10908c2ecf20Sopenharmony_ci parent_start = buf->start; 10918c2ecf20Sopenharmony_ci 10928c2ecf20Sopenharmony_ci atomic_inc(&cow->refs); 10938c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_root(root->node, cow, 1); 10948c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 10958c2ecf20Sopenharmony_ci rcu_assign_pointer(root->node, cow); 10968c2ecf20Sopenharmony_ci 10978c2ecf20Sopenharmony_ci btrfs_free_tree_block(trans, root, buf, parent_start, 10988c2ecf20Sopenharmony_ci last_ref); 10998c2ecf20Sopenharmony_ci free_extent_buffer(buf); 11008c2ecf20Sopenharmony_ci add_root_to_dirty_list(root); 11018c2ecf20Sopenharmony_ci } else { 11028c2ecf20Sopenharmony_ci WARN_ON(trans->transid != btrfs_header_generation(parent)); 11038c2ecf20Sopenharmony_ci tree_mod_log_insert_key(parent, parent_slot, 11048c2ecf20Sopenharmony_ci MOD_LOG_KEY_REPLACE, GFP_NOFS); 11058c2ecf20Sopenharmony_ci btrfs_set_node_blockptr(parent, parent_slot, 11068c2ecf20Sopenharmony_ci cow->start); 11078c2ecf20Sopenharmony_ci btrfs_set_node_ptr_generation(parent, parent_slot, 11088c2ecf20Sopenharmony_ci trans->transid); 11098c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(parent); 11108c2ecf20Sopenharmony_ci if (last_ref) { 11118c2ecf20Sopenharmony_ci ret = tree_mod_log_free_eb(buf); 11128c2ecf20Sopenharmony_ci if (ret) { 11138c2ecf20Sopenharmony_ci btrfs_tree_unlock(cow); 11148c2ecf20Sopenharmony_ci free_extent_buffer(cow); 11158c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 11168c2ecf20Sopenharmony_ci return ret; 11178c2ecf20Sopenharmony_ci } 11188c2ecf20Sopenharmony_ci } 11198c2ecf20Sopenharmony_ci btrfs_free_tree_block(trans, root, buf, parent_start, 11208c2ecf20Sopenharmony_ci last_ref); 11218c2ecf20Sopenharmony_ci } 11228c2ecf20Sopenharmony_ci if (unlock_orig) 11238c2ecf20Sopenharmony_ci btrfs_tree_unlock(buf); 11248c2ecf20Sopenharmony_ci free_extent_buffer_stale(buf); 11258c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(cow); 11268c2ecf20Sopenharmony_ci *cow_ret = cow; 11278c2ecf20Sopenharmony_ci return 0; 11288c2ecf20Sopenharmony_ci} 11298c2ecf20Sopenharmony_ci 11308c2ecf20Sopenharmony_ci/* 11318c2ecf20Sopenharmony_ci * returns the logical address of the oldest predecessor of the given root. 11328c2ecf20Sopenharmony_ci * entries older than time_seq are ignored. 11338c2ecf20Sopenharmony_ci */ 11348c2ecf20Sopenharmony_cistatic struct tree_mod_elem *__tree_mod_log_oldest_root( 11358c2ecf20Sopenharmony_ci struct extent_buffer *eb_root, u64 time_seq) 11368c2ecf20Sopenharmony_ci{ 11378c2ecf20Sopenharmony_ci struct tree_mod_elem *tm; 11388c2ecf20Sopenharmony_ci struct tree_mod_elem *found = NULL; 11398c2ecf20Sopenharmony_ci u64 root_logical = eb_root->start; 11408c2ecf20Sopenharmony_ci int looped = 0; 11418c2ecf20Sopenharmony_ci 11428c2ecf20Sopenharmony_ci if (!time_seq) 11438c2ecf20Sopenharmony_ci return NULL; 11448c2ecf20Sopenharmony_ci 11458c2ecf20Sopenharmony_ci /* 11468c2ecf20Sopenharmony_ci * the very last operation that's logged for a root is the 11478c2ecf20Sopenharmony_ci * replacement operation (if it is replaced at all). this has 11488c2ecf20Sopenharmony_ci * the logical address of the *new* root, making it the very 11498c2ecf20Sopenharmony_ci * first operation that's logged for this root. 11508c2ecf20Sopenharmony_ci */ 11518c2ecf20Sopenharmony_ci while (1) { 11528c2ecf20Sopenharmony_ci tm = tree_mod_log_search_oldest(eb_root->fs_info, root_logical, 11538c2ecf20Sopenharmony_ci time_seq); 11548c2ecf20Sopenharmony_ci if (!looped && !tm) 11558c2ecf20Sopenharmony_ci return NULL; 11568c2ecf20Sopenharmony_ci /* 11578c2ecf20Sopenharmony_ci * if there are no tree operation for the oldest root, we simply 11588c2ecf20Sopenharmony_ci * return it. this should only happen if that (old) root is at 11598c2ecf20Sopenharmony_ci * level 0. 11608c2ecf20Sopenharmony_ci */ 11618c2ecf20Sopenharmony_ci if (!tm) 11628c2ecf20Sopenharmony_ci break; 11638c2ecf20Sopenharmony_ci 11648c2ecf20Sopenharmony_ci /* 11658c2ecf20Sopenharmony_ci * if there's an operation that's not a root replacement, we 11668c2ecf20Sopenharmony_ci * found the oldest version of our root. normally, we'll find a 11678c2ecf20Sopenharmony_ci * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here. 11688c2ecf20Sopenharmony_ci */ 11698c2ecf20Sopenharmony_ci if (tm->op != MOD_LOG_ROOT_REPLACE) 11708c2ecf20Sopenharmony_ci break; 11718c2ecf20Sopenharmony_ci 11728c2ecf20Sopenharmony_ci found = tm; 11738c2ecf20Sopenharmony_ci root_logical = tm->old_root.logical; 11748c2ecf20Sopenharmony_ci looped = 1; 11758c2ecf20Sopenharmony_ci } 11768c2ecf20Sopenharmony_ci 11778c2ecf20Sopenharmony_ci /* if there's no old root to return, return what we found instead */ 11788c2ecf20Sopenharmony_ci if (!found) 11798c2ecf20Sopenharmony_ci found = tm; 11808c2ecf20Sopenharmony_ci 11818c2ecf20Sopenharmony_ci return found; 11828c2ecf20Sopenharmony_ci} 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci/* 11858c2ecf20Sopenharmony_ci * tm is a pointer to the first operation to rewind within eb. then, all 11868c2ecf20Sopenharmony_ci * previous operations will be rewound (until we reach something older than 11878c2ecf20Sopenharmony_ci * time_seq). 11888c2ecf20Sopenharmony_ci */ 11898c2ecf20Sopenharmony_cistatic void 11908c2ecf20Sopenharmony_ci__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, 11918c2ecf20Sopenharmony_ci u64 time_seq, struct tree_mod_elem *first_tm) 11928c2ecf20Sopenharmony_ci{ 11938c2ecf20Sopenharmony_ci u32 n; 11948c2ecf20Sopenharmony_ci struct rb_node *next; 11958c2ecf20Sopenharmony_ci struct tree_mod_elem *tm = first_tm; 11968c2ecf20Sopenharmony_ci unsigned long o_dst; 11978c2ecf20Sopenharmony_ci unsigned long o_src; 11988c2ecf20Sopenharmony_ci unsigned long p_size = sizeof(struct btrfs_key_ptr); 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_ci n = btrfs_header_nritems(eb); 12018c2ecf20Sopenharmony_ci read_lock(&fs_info->tree_mod_log_lock); 12028c2ecf20Sopenharmony_ci while (tm && tm->seq >= time_seq) { 12038c2ecf20Sopenharmony_ci /* 12048c2ecf20Sopenharmony_ci * all the operations are recorded with the operator used for 12058c2ecf20Sopenharmony_ci * the modification. as we're going backwards, we do the 12068c2ecf20Sopenharmony_ci * opposite of each operation here. 12078c2ecf20Sopenharmony_ci */ 12088c2ecf20Sopenharmony_ci switch (tm->op) { 12098c2ecf20Sopenharmony_ci case MOD_LOG_KEY_REMOVE_WHILE_FREEING: 12108c2ecf20Sopenharmony_ci BUG_ON(tm->slot < n); 12118c2ecf20Sopenharmony_ci fallthrough; 12128c2ecf20Sopenharmony_ci case MOD_LOG_KEY_REMOVE_WHILE_MOVING: 12138c2ecf20Sopenharmony_ci case MOD_LOG_KEY_REMOVE: 12148c2ecf20Sopenharmony_ci btrfs_set_node_key(eb, &tm->key, tm->slot); 12158c2ecf20Sopenharmony_ci btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); 12168c2ecf20Sopenharmony_ci btrfs_set_node_ptr_generation(eb, tm->slot, 12178c2ecf20Sopenharmony_ci tm->generation); 12188c2ecf20Sopenharmony_ci n++; 12198c2ecf20Sopenharmony_ci break; 12208c2ecf20Sopenharmony_ci case MOD_LOG_KEY_REPLACE: 12218c2ecf20Sopenharmony_ci BUG_ON(tm->slot >= n); 12228c2ecf20Sopenharmony_ci btrfs_set_node_key(eb, &tm->key, tm->slot); 12238c2ecf20Sopenharmony_ci btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr); 12248c2ecf20Sopenharmony_ci btrfs_set_node_ptr_generation(eb, tm->slot, 12258c2ecf20Sopenharmony_ci tm->generation); 12268c2ecf20Sopenharmony_ci break; 12278c2ecf20Sopenharmony_ci case MOD_LOG_KEY_ADD: 12288c2ecf20Sopenharmony_ci /* if a move operation is needed it's in the log */ 12298c2ecf20Sopenharmony_ci n--; 12308c2ecf20Sopenharmony_ci break; 12318c2ecf20Sopenharmony_ci case MOD_LOG_MOVE_KEYS: 12328c2ecf20Sopenharmony_ci o_dst = btrfs_node_key_ptr_offset(tm->slot); 12338c2ecf20Sopenharmony_ci o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot); 12348c2ecf20Sopenharmony_ci memmove_extent_buffer(eb, o_dst, o_src, 12358c2ecf20Sopenharmony_ci tm->move.nr_items * p_size); 12368c2ecf20Sopenharmony_ci break; 12378c2ecf20Sopenharmony_ci case MOD_LOG_ROOT_REPLACE: 12388c2ecf20Sopenharmony_ci /* 12398c2ecf20Sopenharmony_ci * this operation is special. for roots, this must be 12408c2ecf20Sopenharmony_ci * handled explicitly before rewinding. 12418c2ecf20Sopenharmony_ci * for non-roots, this operation may exist if the node 12428c2ecf20Sopenharmony_ci * was a root: root A -> child B; then A gets empty and 12438c2ecf20Sopenharmony_ci * B is promoted to the new root. in the mod log, we'll 12448c2ecf20Sopenharmony_ci * have a root-replace operation for B, a tree block 12458c2ecf20Sopenharmony_ci * that is no root. we simply ignore that operation. 12468c2ecf20Sopenharmony_ci */ 12478c2ecf20Sopenharmony_ci break; 12488c2ecf20Sopenharmony_ci } 12498c2ecf20Sopenharmony_ci next = rb_next(&tm->node); 12508c2ecf20Sopenharmony_ci if (!next) 12518c2ecf20Sopenharmony_ci break; 12528c2ecf20Sopenharmony_ci tm = rb_entry(next, struct tree_mod_elem, node); 12538c2ecf20Sopenharmony_ci if (tm->logical != first_tm->logical) 12548c2ecf20Sopenharmony_ci break; 12558c2ecf20Sopenharmony_ci } 12568c2ecf20Sopenharmony_ci read_unlock(&fs_info->tree_mod_log_lock); 12578c2ecf20Sopenharmony_ci btrfs_set_header_nritems(eb, n); 12588c2ecf20Sopenharmony_ci} 12598c2ecf20Sopenharmony_ci 12608c2ecf20Sopenharmony_ci/* 12618c2ecf20Sopenharmony_ci * Called with eb read locked. If the buffer cannot be rewound, the same buffer 12628c2ecf20Sopenharmony_ci * is returned. If rewind operations happen, a fresh buffer is returned. The 12638c2ecf20Sopenharmony_ci * returned buffer is always read-locked. If the returned buffer is not the 12648c2ecf20Sopenharmony_ci * input buffer, the lock on the input buffer is released and the input buffer 12658c2ecf20Sopenharmony_ci * is freed (its refcount is decremented). 12668c2ecf20Sopenharmony_ci */ 12678c2ecf20Sopenharmony_cistatic struct extent_buffer * 12688c2ecf20Sopenharmony_citree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, 12698c2ecf20Sopenharmony_ci struct extent_buffer *eb, u64 time_seq) 12708c2ecf20Sopenharmony_ci{ 12718c2ecf20Sopenharmony_ci struct extent_buffer *eb_rewin; 12728c2ecf20Sopenharmony_ci struct tree_mod_elem *tm; 12738c2ecf20Sopenharmony_ci 12748c2ecf20Sopenharmony_ci if (!time_seq) 12758c2ecf20Sopenharmony_ci return eb; 12768c2ecf20Sopenharmony_ci 12778c2ecf20Sopenharmony_ci if (btrfs_header_level(eb) == 0) 12788c2ecf20Sopenharmony_ci return eb; 12798c2ecf20Sopenharmony_ci 12808c2ecf20Sopenharmony_ci tm = tree_mod_log_search(fs_info, eb->start, time_seq); 12818c2ecf20Sopenharmony_ci if (!tm) 12828c2ecf20Sopenharmony_ci return eb; 12838c2ecf20Sopenharmony_ci 12848c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 12858c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_read(eb); 12868c2ecf20Sopenharmony_ci 12878c2ecf20Sopenharmony_ci if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 12888c2ecf20Sopenharmony_ci BUG_ON(tm->slot != 0); 12898c2ecf20Sopenharmony_ci eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start); 12908c2ecf20Sopenharmony_ci if (!eb_rewin) { 12918c2ecf20Sopenharmony_ci btrfs_tree_read_unlock_blocking(eb); 12928c2ecf20Sopenharmony_ci free_extent_buffer(eb); 12938c2ecf20Sopenharmony_ci return NULL; 12948c2ecf20Sopenharmony_ci } 12958c2ecf20Sopenharmony_ci btrfs_set_header_bytenr(eb_rewin, eb->start); 12968c2ecf20Sopenharmony_ci btrfs_set_header_backref_rev(eb_rewin, 12978c2ecf20Sopenharmony_ci btrfs_header_backref_rev(eb)); 12988c2ecf20Sopenharmony_ci btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb)); 12998c2ecf20Sopenharmony_ci btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); 13008c2ecf20Sopenharmony_ci } else { 13018c2ecf20Sopenharmony_ci eb_rewin = btrfs_clone_extent_buffer(eb); 13028c2ecf20Sopenharmony_ci if (!eb_rewin) { 13038c2ecf20Sopenharmony_ci btrfs_tree_read_unlock_blocking(eb); 13048c2ecf20Sopenharmony_ci free_extent_buffer(eb); 13058c2ecf20Sopenharmony_ci return NULL; 13068c2ecf20Sopenharmony_ci } 13078c2ecf20Sopenharmony_ci } 13088c2ecf20Sopenharmony_ci 13098c2ecf20Sopenharmony_ci btrfs_tree_read_unlock_blocking(eb); 13108c2ecf20Sopenharmony_ci free_extent_buffer(eb); 13118c2ecf20Sopenharmony_ci 13128c2ecf20Sopenharmony_ci btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin), 13138c2ecf20Sopenharmony_ci eb_rewin, btrfs_header_level(eb_rewin)); 13148c2ecf20Sopenharmony_ci btrfs_tree_read_lock(eb_rewin); 13158c2ecf20Sopenharmony_ci __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm); 13168c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_nritems(eb_rewin) > 13178c2ecf20Sopenharmony_ci BTRFS_NODEPTRS_PER_BLOCK(fs_info)); 13188c2ecf20Sopenharmony_ci 13198c2ecf20Sopenharmony_ci return eb_rewin; 13208c2ecf20Sopenharmony_ci} 13218c2ecf20Sopenharmony_ci 13228c2ecf20Sopenharmony_ci/* 13238c2ecf20Sopenharmony_ci * get_old_root() rewinds the state of @root's root node to the given @time_seq 13248c2ecf20Sopenharmony_ci * value. If there are no changes, the current root->root_node is returned. If 13258c2ecf20Sopenharmony_ci * anything changed in between, there's a fresh buffer allocated on which the 13268c2ecf20Sopenharmony_ci * rewind operations are done. In any case, the returned buffer is read locked. 13278c2ecf20Sopenharmony_ci * Returns NULL on error (with no locks held). 13288c2ecf20Sopenharmony_ci */ 13298c2ecf20Sopenharmony_cistatic inline struct extent_buffer * 13308c2ecf20Sopenharmony_ciget_old_root(struct btrfs_root *root, u64 time_seq) 13318c2ecf20Sopenharmony_ci{ 13328c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 13338c2ecf20Sopenharmony_ci struct tree_mod_elem *tm; 13348c2ecf20Sopenharmony_ci struct extent_buffer *eb = NULL; 13358c2ecf20Sopenharmony_ci struct extent_buffer *eb_root; 13368c2ecf20Sopenharmony_ci u64 eb_root_owner = 0; 13378c2ecf20Sopenharmony_ci struct extent_buffer *old; 13388c2ecf20Sopenharmony_ci struct tree_mod_root *old_root = NULL; 13398c2ecf20Sopenharmony_ci u64 old_generation = 0; 13408c2ecf20Sopenharmony_ci u64 logical; 13418c2ecf20Sopenharmony_ci int level; 13428c2ecf20Sopenharmony_ci 13438c2ecf20Sopenharmony_ci eb_root = btrfs_read_lock_root_node(root); 13448c2ecf20Sopenharmony_ci tm = __tree_mod_log_oldest_root(eb_root, time_seq); 13458c2ecf20Sopenharmony_ci if (!tm) 13468c2ecf20Sopenharmony_ci return eb_root; 13478c2ecf20Sopenharmony_ci 13488c2ecf20Sopenharmony_ci if (tm->op == MOD_LOG_ROOT_REPLACE) { 13498c2ecf20Sopenharmony_ci old_root = &tm->old_root; 13508c2ecf20Sopenharmony_ci old_generation = tm->generation; 13518c2ecf20Sopenharmony_ci logical = old_root->logical; 13528c2ecf20Sopenharmony_ci level = old_root->level; 13538c2ecf20Sopenharmony_ci } else { 13548c2ecf20Sopenharmony_ci logical = eb_root->start; 13558c2ecf20Sopenharmony_ci level = btrfs_header_level(eb_root); 13568c2ecf20Sopenharmony_ci } 13578c2ecf20Sopenharmony_ci 13588c2ecf20Sopenharmony_ci tm = tree_mod_log_search(fs_info, logical, time_seq); 13598c2ecf20Sopenharmony_ci if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { 13608c2ecf20Sopenharmony_ci btrfs_tree_read_unlock(eb_root); 13618c2ecf20Sopenharmony_ci free_extent_buffer(eb_root); 13628c2ecf20Sopenharmony_ci old = read_tree_block(fs_info, logical, 0, level, NULL); 13638c2ecf20Sopenharmony_ci if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) { 13648c2ecf20Sopenharmony_ci if (!IS_ERR(old)) 13658c2ecf20Sopenharmony_ci free_extent_buffer(old); 13668c2ecf20Sopenharmony_ci btrfs_warn(fs_info, 13678c2ecf20Sopenharmony_ci "failed to read tree block %llu from get_old_root", 13688c2ecf20Sopenharmony_ci logical); 13698c2ecf20Sopenharmony_ci } else { 13708c2ecf20Sopenharmony_ci struct tree_mod_elem *tm2; 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_ci btrfs_tree_read_lock(old); 13738c2ecf20Sopenharmony_ci eb = btrfs_clone_extent_buffer(old); 13748c2ecf20Sopenharmony_ci /* 13758c2ecf20Sopenharmony_ci * After the lookup for the most recent tree mod operation 13768c2ecf20Sopenharmony_ci * above and before we locked and cloned the extent buffer 13778c2ecf20Sopenharmony_ci * 'old', a new tree mod log operation may have been added. 13788c2ecf20Sopenharmony_ci * So lookup for a more recent one to make sure the number 13798c2ecf20Sopenharmony_ci * of mod log operations we replay is consistent with the 13808c2ecf20Sopenharmony_ci * number of items we have in the cloned extent buffer, 13818c2ecf20Sopenharmony_ci * otherwise we can hit a BUG_ON when rewinding the extent 13828c2ecf20Sopenharmony_ci * buffer. 13838c2ecf20Sopenharmony_ci */ 13848c2ecf20Sopenharmony_ci tm2 = tree_mod_log_search(fs_info, logical, time_seq); 13858c2ecf20Sopenharmony_ci btrfs_tree_read_unlock(old); 13868c2ecf20Sopenharmony_ci free_extent_buffer(old); 13878c2ecf20Sopenharmony_ci ASSERT(tm2); 13888c2ecf20Sopenharmony_ci ASSERT(tm2 == tm || tm2->seq > tm->seq); 13898c2ecf20Sopenharmony_ci if (!tm2 || tm2->seq < tm->seq) { 13908c2ecf20Sopenharmony_ci free_extent_buffer(eb); 13918c2ecf20Sopenharmony_ci return NULL; 13928c2ecf20Sopenharmony_ci } 13938c2ecf20Sopenharmony_ci tm = tm2; 13948c2ecf20Sopenharmony_ci } 13958c2ecf20Sopenharmony_ci } else if (old_root) { 13968c2ecf20Sopenharmony_ci eb_root_owner = btrfs_header_owner(eb_root); 13978c2ecf20Sopenharmony_ci btrfs_tree_read_unlock(eb_root); 13988c2ecf20Sopenharmony_ci free_extent_buffer(eb_root); 13998c2ecf20Sopenharmony_ci eb = alloc_dummy_extent_buffer(fs_info, logical); 14008c2ecf20Sopenharmony_ci } else { 14018c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_read(eb_root); 14028c2ecf20Sopenharmony_ci eb = btrfs_clone_extent_buffer(eb_root); 14038c2ecf20Sopenharmony_ci btrfs_tree_read_unlock_blocking(eb_root); 14048c2ecf20Sopenharmony_ci free_extent_buffer(eb_root); 14058c2ecf20Sopenharmony_ci } 14068c2ecf20Sopenharmony_ci 14078c2ecf20Sopenharmony_ci if (!eb) 14088c2ecf20Sopenharmony_ci return NULL; 14098c2ecf20Sopenharmony_ci if (old_root) { 14108c2ecf20Sopenharmony_ci btrfs_set_header_bytenr(eb, eb->start); 14118c2ecf20Sopenharmony_ci btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV); 14128c2ecf20Sopenharmony_ci btrfs_set_header_owner(eb, eb_root_owner); 14138c2ecf20Sopenharmony_ci btrfs_set_header_level(eb, old_root->level); 14148c2ecf20Sopenharmony_ci btrfs_set_header_generation(eb, old_generation); 14158c2ecf20Sopenharmony_ci } 14168c2ecf20Sopenharmony_ci btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb, 14178c2ecf20Sopenharmony_ci btrfs_header_level(eb)); 14188c2ecf20Sopenharmony_ci btrfs_tree_read_lock(eb); 14198c2ecf20Sopenharmony_ci if (tm) 14208c2ecf20Sopenharmony_ci __tree_mod_log_rewind(fs_info, eb, time_seq, tm); 14218c2ecf20Sopenharmony_ci else 14228c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_level(eb) != 0); 14238c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(fs_info)); 14248c2ecf20Sopenharmony_ci 14258c2ecf20Sopenharmony_ci return eb; 14268c2ecf20Sopenharmony_ci} 14278c2ecf20Sopenharmony_ci 14288c2ecf20Sopenharmony_ciint btrfs_old_root_level(struct btrfs_root *root, u64 time_seq) 14298c2ecf20Sopenharmony_ci{ 14308c2ecf20Sopenharmony_ci struct tree_mod_elem *tm; 14318c2ecf20Sopenharmony_ci int level; 14328c2ecf20Sopenharmony_ci struct extent_buffer *eb_root = btrfs_root_node(root); 14338c2ecf20Sopenharmony_ci 14348c2ecf20Sopenharmony_ci tm = __tree_mod_log_oldest_root(eb_root, time_seq); 14358c2ecf20Sopenharmony_ci if (tm && tm->op == MOD_LOG_ROOT_REPLACE) { 14368c2ecf20Sopenharmony_ci level = tm->old_root.level; 14378c2ecf20Sopenharmony_ci } else { 14388c2ecf20Sopenharmony_ci level = btrfs_header_level(eb_root); 14398c2ecf20Sopenharmony_ci } 14408c2ecf20Sopenharmony_ci free_extent_buffer(eb_root); 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci return level; 14438c2ecf20Sopenharmony_ci} 14448c2ecf20Sopenharmony_ci 14458c2ecf20Sopenharmony_cistatic inline int should_cow_block(struct btrfs_trans_handle *trans, 14468c2ecf20Sopenharmony_ci struct btrfs_root *root, 14478c2ecf20Sopenharmony_ci struct extent_buffer *buf) 14488c2ecf20Sopenharmony_ci{ 14498c2ecf20Sopenharmony_ci if (btrfs_is_testing(root->fs_info)) 14508c2ecf20Sopenharmony_ci return 0; 14518c2ecf20Sopenharmony_ci 14528c2ecf20Sopenharmony_ci /* Ensure we can see the FORCE_COW bit */ 14538c2ecf20Sopenharmony_ci smp_mb__before_atomic(); 14548c2ecf20Sopenharmony_ci 14558c2ecf20Sopenharmony_ci /* 14568c2ecf20Sopenharmony_ci * We do not need to cow a block if 14578c2ecf20Sopenharmony_ci * 1) this block is not created or changed in this transaction; 14588c2ecf20Sopenharmony_ci * 2) this block does not belong to TREE_RELOC tree; 14598c2ecf20Sopenharmony_ci * 3) the root is not forced COW. 14608c2ecf20Sopenharmony_ci * 14618c2ecf20Sopenharmony_ci * What is forced COW: 14628c2ecf20Sopenharmony_ci * when we create snapshot during committing the transaction, 14638c2ecf20Sopenharmony_ci * after we've finished copying src root, we must COW the shared 14648c2ecf20Sopenharmony_ci * block to ensure the metadata consistency. 14658c2ecf20Sopenharmony_ci */ 14668c2ecf20Sopenharmony_ci if (btrfs_header_generation(buf) == trans->transid && 14678c2ecf20Sopenharmony_ci !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 14688c2ecf20Sopenharmony_ci !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 14698c2ecf20Sopenharmony_ci btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && 14708c2ecf20Sopenharmony_ci !test_bit(BTRFS_ROOT_FORCE_COW, &root->state)) 14718c2ecf20Sopenharmony_ci return 0; 14728c2ecf20Sopenharmony_ci return 1; 14738c2ecf20Sopenharmony_ci} 14748c2ecf20Sopenharmony_ci 14758c2ecf20Sopenharmony_ci/* 14768c2ecf20Sopenharmony_ci * cows a single block, see __btrfs_cow_block for the real work. 14778c2ecf20Sopenharmony_ci * This version of it has extra checks so that a block isn't COWed more than 14788c2ecf20Sopenharmony_ci * once per transaction, as long as it hasn't been written yet 14798c2ecf20Sopenharmony_ci */ 14808c2ecf20Sopenharmony_cinoinline int btrfs_cow_block(struct btrfs_trans_handle *trans, 14818c2ecf20Sopenharmony_ci struct btrfs_root *root, struct extent_buffer *buf, 14828c2ecf20Sopenharmony_ci struct extent_buffer *parent, int parent_slot, 14838c2ecf20Sopenharmony_ci struct extent_buffer **cow_ret, 14848c2ecf20Sopenharmony_ci enum btrfs_lock_nesting nest) 14858c2ecf20Sopenharmony_ci{ 14868c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 14878c2ecf20Sopenharmony_ci u64 search_start; 14888c2ecf20Sopenharmony_ci int ret; 14898c2ecf20Sopenharmony_ci 14908c2ecf20Sopenharmony_ci if (test_bit(BTRFS_ROOT_DELETING, &root->state)) 14918c2ecf20Sopenharmony_ci btrfs_err(fs_info, 14928c2ecf20Sopenharmony_ci "COW'ing blocks on a fs root that's being dropped"); 14938c2ecf20Sopenharmony_ci 14948c2ecf20Sopenharmony_ci if (trans->transaction != fs_info->running_transaction) 14958c2ecf20Sopenharmony_ci WARN(1, KERN_CRIT "trans %llu running %llu\n", 14968c2ecf20Sopenharmony_ci trans->transid, 14978c2ecf20Sopenharmony_ci fs_info->running_transaction->transid); 14988c2ecf20Sopenharmony_ci 14998c2ecf20Sopenharmony_ci if (trans->transid != fs_info->generation) 15008c2ecf20Sopenharmony_ci WARN(1, KERN_CRIT "trans %llu running %llu\n", 15018c2ecf20Sopenharmony_ci trans->transid, fs_info->generation); 15028c2ecf20Sopenharmony_ci 15038c2ecf20Sopenharmony_ci if (!should_cow_block(trans, root, buf)) { 15048c2ecf20Sopenharmony_ci trans->dirty = true; 15058c2ecf20Sopenharmony_ci *cow_ret = buf; 15068c2ecf20Sopenharmony_ci return 0; 15078c2ecf20Sopenharmony_ci } 15088c2ecf20Sopenharmony_ci 15098c2ecf20Sopenharmony_ci search_start = buf->start & ~((u64)SZ_1G - 1); 15108c2ecf20Sopenharmony_ci 15118c2ecf20Sopenharmony_ci if (parent) 15128c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(parent); 15138c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(buf); 15148c2ecf20Sopenharmony_ci 15158c2ecf20Sopenharmony_ci /* 15168c2ecf20Sopenharmony_ci * Before CoWing this block for later modification, check if it's 15178c2ecf20Sopenharmony_ci * the subtree root and do the delayed subtree trace if needed. 15188c2ecf20Sopenharmony_ci * 15198c2ecf20Sopenharmony_ci * Also We don't care about the error, as it's handled internally. 15208c2ecf20Sopenharmony_ci */ 15218c2ecf20Sopenharmony_ci btrfs_qgroup_trace_subtree_after_cow(trans, root, buf); 15228c2ecf20Sopenharmony_ci ret = __btrfs_cow_block(trans, root, buf, parent, 15238c2ecf20Sopenharmony_ci parent_slot, cow_ret, search_start, 0, nest); 15248c2ecf20Sopenharmony_ci 15258c2ecf20Sopenharmony_ci trace_btrfs_cow_block(root, buf, *cow_ret); 15268c2ecf20Sopenharmony_ci 15278c2ecf20Sopenharmony_ci return ret; 15288c2ecf20Sopenharmony_ci} 15298c2ecf20Sopenharmony_ci 15308c2ecf20Sopenharmony_ci/* 15318c2ecf20Sopenharmony_ci * helper function for defrag to decide if two blocks pointed to by a 15328c2ecf20Sopenharmony_ci * node are actually close by 15338c2ecf20Sopenharmony_ci */ 15348c2ecf20Sopenharmony_cistatic int close_blocks(u64 blocknr, u64 other, u32 blocksize) 15358c2ecf20Sopenharmony_ci{ 15368c2ecf20Sopenharmony_ci if (blocknr < other && other - (blocknr + blocksize) < 32768) 15378c2ecf20Sopenharmony_ci return 1; 15388c2ecf20Sopenharmony_ci if (blocknr > other && blocknr - (other + blocksize) < 32768) 15398c2ecf20Sopenharmony_ci return 1; 15408c2ecf20Sopenharmony_ci return 0; 15418c2ecf20Sopenharmony_ci} 15428c2ecf20Sopenharmony_ci 15438c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN 15448c2ecf20Sopenharmony_ci 15458c2ecf20Sopenharmony_ci/* 15468c2ecf20Sopenharmony_ci * Compare two keys, on little-endian the disk order is same as CPU order and 15478c2ecf20Sopenharmony_ci * we can avoid the conversion. 15488c2ecf20Sopenharmony_ci */ 15498c2ecf20Sopenharmony_cistatic int comp_keys(const struct btrfs_disk_key *disk_key, 15508c2ecf20Sopenharmony_ci const struct btrfs_key *k2) 15518c2ecf20Sopenharmony_ci{ 15528c2ecf20Sopenharmony_ci const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key; 15538c2ecf20Sopenharmony_ci 15548c2ecf20Sopenharmony_ci return btrfs_comp_cpu_keys(k1, k2); 15558c2ecf20Sopenharmony_ci} 15568c2ecf20Sopenharmony_ci 15578c2ecf20Sopenharmony_ci#else 15588c2ecf20Sopenharmony_ci 15598c2ecf20Sopenharmony_ci/* 15608c2ecf20Sopenharmony_ci * compare two keys in a memcmp fashion 15618c2ecf20Sopenharmony_ci */ 15628c2ecf20Sopenharmony_cistatic int comp_keys(const struct btrfs_disk_key *disk, 15638c2ecf20Sopenharmony_ci const struct btrfs_key *k2) 15648c2ecf20Sopenharmony_ci{ 15658c2ecf20Sopenharmony_ci struct btrfs_key k1; 15668c2ecf20Sopenharmony_ci 15678c2ecf20Sopenharmony_ci btrfs_disk_key_to_cpu(&k1, disk); 15688c2ecf20Sopenharmony_ci 15698c2ecf20Sopenharmony_ci return btrfs_comp_cpu_keys(&k1, k2); 15708c2ecf20Sopenharmony_ci} 15718c2ecf20Sopenharmony_ci#endif 15728c2ecf20Sopenharmony_ci 15738c2ecf20Sopenharmony_ci/* 15748c2ecf20Sopenharmony_ci * same as comp_keys only with two btrfs_key's 15758c2ecf20Sopenharmony_ci */ 15768c2ecf20Sopenharmony_ciint __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2) 15778c2ecf20Sopenharmony_ci{ 15788c2ecf20Sopenharmony_ci if (k1->objectid > k2->objectid) 15798c2ecf20Sopenharmony_ci return 1; 15808c2ecf20Sopenharmony_ci if (k1->objectid < k2->objectid) 15818c2ecf20Sopenharmony_ci return -1; 15828c2ecf20Sopenharmony_ci if (k1->type > k2->type) 15838c2ecf20Sopenharmony_ci return 1; 15848c2ecf20Sopenharmony_ci if (k1->type < k2->type) 15858c2ecf20Sopenharmony_ci return -1; 15868c2ecf20Sopenharmony_ci if (k1->offset > k2->offset) 15878c2ecf20Sopenharmony_ci return 1; 15888c2ecf20Sopenharmony_ci if (k1->offset < k2->offset) 15898c2ecf20Sopenharmony_ci return -1; 15908c2ecf20Sopenharmony_ci return 0; 15918c2ecf20Sopenharmony_ci} 15928c2ecf20Sopenharmony_ci 15938c2ecf20Sopenharmony_ci/* 15948c2ecf20Sopenharmony_ci * this is used by the defrag code to go through all the 15958c2ecf20Sopenharmony_ci * leaves pointed to by a node and reallocate them so that 15968c2ecf20Sopenharmony_ci * disk order is close to key order 15978c2ecf20Sopenharmony_ci */ 15988c2ecf20Sopenharmony_ciint btrfs_realloc_node(struct btrfs_trans_handle *trans, 15998c2ecf20Sopenharmony_ci struct btrfs_root *root, struct extent_buffer *parent, 16008c2ecf20Sopenharmony_ci int start_slot, u64 *last_ret, 16018c2ecf20Sopenharmony_ci struct btrfs_key *progress) 16028c2ecf20Sopenharmony_ci{ 16038c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 16048c2ecf20Sopenharmony_ci struct extent_buffer *cur; 16058c2ecf20Sopenharmony_ci u64 blocknr; 16068c2ecf20Sopenharmony_ci u64 gen; 16078c2ecf20Sopenharmony_ci u64 search_start = *last_ret; 16088c2ecf20Sopenharmony_ci u64 last_block = 0; 16098c2ecf20Sopenharmony_ci u64 other; 16108c2ecf20Sopenharmony_ci u32 parent_nritems; 16118c2ecf20Sopenharmony_ci int end_slot; 16128c2ecf20Sopenharmony_ci int i; 16138c2ecf20Sopenharmony_ci int err = 0; 16148c2ecf20Sopenharmony_ci int parent_level; 16158c2ecf20Sopenharmony_ci int uptodate; 16168c2ecf20Sopenharmony_ci u32 blocksize; 16178c2ecf20Sopenharmony_ci int progress_passed = 0; 16188c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci parent_level = btrfs_header_level(parent); 16218c2ecf20Sopenharmony_ci 16228c2ecf20Sopenharmony_ci WARN_ON(trans->transaction != fs_info->running_transaction); 16238c2ecf20Sopenharmony_ci WARN_ON(trans->transid != fs_info->generation); 16248c2ecf20Sopenharmony_ci 16258c2ecf20Sopenharmony_ci parent_nritems = btrfs_header_nritems(parent); 16268c2ecf20Sopenharmony_ci blocksize = fs_info->nodesize; 16278c2ecf20Sopenharmony_ci end_slot = parent_nritems - 1; 16288c2ecf20Sopenharmony_ci 16298c2ecf20Sopenharmony_ci if (parent_nritems <= 1) 16308c2ecf20Sopenharmony_ci return 0; 16318c2ecf20Sopenharmony_ci 16328c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(parent); 16338c2ecf20Sopenharmony_ci 16348c2ecf20Sopenharmony_ci for (i = start_slot; i <= end_slot; i++) { 16358c2ecf20Sopenharmony_ci struct btrfs_key first_key; 16368c2ecf20Sopenharmony_ci int close = 1; 16378c2ecf20Sopenharmony_ci 16388c2ecf20Sopenharmony_ci btrfs_node_key(parent, &disk_key, i); 16398c2ecf20Sopenharmony_ci if (!progress_passed && comp_keys(&disk_key, progress) < 0) 16408c2ecf20Sopenharmony_ci continue; 16418c2ecf20Sopenharmony_ci 16428c2ecf20Sopenharmony_ci progress_passed = 1; 16438c2ecf20Sopenharmony_ci blocknr = btrfs_node_blockptr(parent, i); 16448c2ecf20Sopenharmony_ci gen = btrfs_node_ptr_generation(parent, i); 16458c2ecf20Sopenharmony_ci btrfs_node_key_to_cpu(parent, &first_key, i); 16468c2ecf20Sopenharmony_ci if (last_block == 0) 16478c2ecf20Sopenharmony_ci last_block = blocknr; 16488c2ecf20Sopenharmony_ci 16498c2ecf20Sopenharmony_ci if (i > 0) { 16508c2ecf20Sopenharmony_ci other = btrfs_node_blockptr(parent, i - 1); 16518c2ecf20Sopenharmony_ci close = close_blocks(blocknr, other, blocksize); 16528c2ecf20Sopenharmony_ci } 16538c2ecf20Sopenharmony_ci if (!close && i < end_slot) { 16548c2ecf20Sopenharmony_ci other = btrfs_node_blockptr(parent, i + 1); 16558c2ecf20Sopenharmony_ci close = close_blocks(blocknr, other, blocksize); 16568c2ecf20Sopenharmony_ci } 16578c2ecf20Sopenharmony_ci if (close) { 16588c2ecf20Sopenharmony_ci last_block = blocknr; 16598c2ecf20Sopenharmony_ci continue; 16608c2ecf20Sopenharmony_ci } 16618c2ecf20Sopenharmony_ci 16628c2ecf20Sopenharmony_ci cur = find_extent_buffer(fs_info, blocknr); 16638c2ecf20Sopenharmony_ci if (cur) 16648c2ecf20Sopenharmony_ci uptodate = btrfs_buffer_uptodate(cur, gen, 0); 16658c2ecf20Sopenharmony_ci else 16668c2ecf20Sopenharmony_ci uptodate = 0; 16678c2ecf20Sopenharmony_ci if (!cur || !uptodate) { 16688c2ecf20Sopenharmony_ci if (!cur) { 16698c2ecf20Sopenharmony_ci cur = read_tree_block(fs_info, blocknr, gen, 16708c2ecf20Sopenharmony_ci parent_level - 1, 16718c2ecf20Sopenharmony_ci &first_key); 16728c2ecf20Sopenharmony_ci if (IS_ERR(cur)) { 16738c2ecf20Sopenharmony_ci return PTR_ERR(cur); 16748c2ecf20Sopenharmony_ci } else if (!extent_buffer_uptodate(cur)) { 16758c2ecf20Sopenharmony_ci free_extent_buffer(cur); 16768c2ecf20Sopenharmony_ci return -EIO; 16778c2ecf20Sopenharmony_ci } 16788c2ecf20Sopenharmony_ci } else if (!uptodate) { 16798c2ecf20Sopenharmony_ci err = btrfs_read_buffer(cur, gen, 16808c2ecf20Sopenharmony_ci parent_level - 1,&first_key); 16818c2ecf20Sopenharmony_ci if (err) { 16828c2ecf20Sopenharmony_ci free_extent_buffer(cur); 16838c2ecf20Sopenharmony_ci return err; 16848c2ecf20Sopenharmony_ci } 16858c2ecf20Sopenharmony_ci } 16868c2ecf20Sopenharmony_ci } 16878c2ecf20Sopenharmony_ci if (search_start == 0) 16888c2ecf20Sopenharmony_ci search_start = last_block; 16898c2ecf20Sopenharmony_ci 16908c2ecf20Sopenharmony_ci btrfs_tree_lock(cur); 16918c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(cur); 16928c2ecf20Sopenharmony_ci err = __btrfs_cow_block(trans, root, cur, parent, i, 16938c2ecf20Sopenharmony_ci &cur, search_start, 16948c2ecf20Sopenharmony_ci min(16 * blocksize, 16958c2ecf20Sopenharmony_ci (end_slot - i) * blocksize), 16968c2ecf20Sopenharmony_ci BTRFS_NESTING_COW); 16978c2ecf20Sopenharmony_ci if (err) { 16988c2ecf20Sopenharmony_ci btrfs_tree_unlock(cur); 16998c2ecf20Sopenharmony_ci free_extent_buffer(cur); 17008c2ecf20Sopenharmony_ci break; 17018c2ecf20Sopenharmony_ci } 17028c2ecf20Sopenharmony_ci search_start = cur->start; 17038c2ecf20Sopenharmony_ci last_block = cur->start; 17048c2ecf20Sopenharmony_ci *last_ret = search_start; 17058c2ecf20Sopenharmony_ci btrfs_tree_unlock(cur); 17068c2ecf20Sopenharmony_ci free_extent_buffer(cur); 17078c2ecf20Sopenharmony_ci } 17088c2ecf20Sopenharmony_ci return err; 17098c2ecf20Sopenharmony_ci} 17108c2ecf20Sopenharmony_ci 17118c2ecf20Sopenharmony_ci/* 17128c2ecf20Sopenharmony_ci * search for key in the extent_buffer. The items start at offset p, 17138c2ecf20Sopenharmony_ci * and they are item_size apart. There are 'max' items in p. 17148c2ecf20Sopenharmony_ci * 17158c2ecf20Sopenharmony_ci * the slot in the array is returned via slot, and it points to 17168c2ecf20Sopenharmony_ci * the place where you would insert key if it is not found in 17178c2ecf20Sopenharmony_ci * the array. 17188c2ecf20Sopenharmony_ci * 17198c2ecf20Sopenharmony_ci * slot may point to max if the key is bigger than all of the keys 17208c2ecf20Sopenharmony_ci */ 17218c2ecf20Sopenharmony_cistatic noinline int generic_bin_search(struct extent_buffer *eb, 17228c2ecf20Sopenharmony_ci unsigned long p, int item_size, 17238c2ecf20Sopenharmony_ci const struct btrfs_key *key, 17248c2ecf20Sopenharmony_ci int max, int *slot) 17258c2ecf20Sopenharmony_ci{ 17268c2ecf20Sopenharmony_ci int low = 0; 17278c2ecf20Sopenharmony_ci int high = max; 17288c2ecf20Sopenharmony_ci int ret; 17298c2ecf20Sopenharmony_ci const int key_size = sizeof(struct btrfs_disk_key); 17308c2ecf20Sopenharmony_ci 17318c2ecf20Sopenharmony_ci if (low > high) { 17328c2ecf20Sopenharmony_ci btrfs_err(eb->fs_info, 17338c2ecf20Sopenharmony_ci "%s: low (%d) > high (%d) eb %llu owner %llu level %d", 17348c2ecf20Sopenharmony_ci __func__, low, high, eb->start, 17358c2ecf20Sopenharmony_ci btrfs_header_owner(eb), btrfs_header_level(eb)); 17368c2ecf20Sopenharmony_ci return -EINVAL; 17378c2ecf20Sopenharmony_ci } 17388c2ecf20Sopenharmony_ci 17398c2ecf20Sopenharmony_ci while (low < high) { 17408c2ecf20Sopenharmony_ci unsigned long oip; 17418c2ecf20Sopenharmony_ci unsigned long offset; 17428c2ecf20Sopenharmony_ci struct btrfs_disk_key *tmp; 17438c2ecf20Sopenharmony_ci struct btrfs_disk_key unaligned; 17448c2ecf20Sopenharmony_ci int mid; 17458c2ecf20Sopenharmony_ci 17468c2ecf20Sopenharmony_ci mid = (low + high) / 2; 17478c2ecf20Sopenharmony_ci offset = p + mid * item_size; 17488c2ecf20Sopenharmony_ci oip = offset_in_page(offset); 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci if (oip + key_size <= PAGE_SIZE) { 17518c2ecf20Sopenharmony_ci const unsigned long idx = offset >> PAGE_SHIFT; 17528c2ecf20Sopenharmony_ci char *kaddr = page_address(eb->pages[idx]); 17538c2ecf20Sopenharmony_ci 17548c2ecf20Sopenharmony_ci tmp = (struct btrfs_disk_key *)(kaddr + oip); 17558c2ecf20Sopenharmony_ci } else { 17568c2ecf20Sopenharmony_ci read_extent_buffer(eb, &unaligned, offset, key_size); 17578c2ecf20Sopenharmony_ci tmp = &unaligned; 17588c2ecf20Sopenharmony_ci } 17598c2ecf20Sopenharmony_ci 17608c2ecf20Sopenharmony_ci ret = comp_keys(tmp, key); 17618c2ecf20Sopenharmony_ci 17628c2ecf20Sopenharmony_ci if (ret < 0) 17638c2ecf20Sopenharmony_ci low = mid + 1; 17648c2ecf20Sopenharmony_ci else if (ret > 0) 17658c2ecf20Sopenharmony_ci high = mid; 17668c2ecf20Sopenharmony_ci else { 17678c2ecf20Sopenharmony_ci *slot = mid; 17688c2ecf20Sopenharmony_ci return 0; 17698c2ecf20Sopenharmony_ci } 17708c2ecf20Sopenharmony_ci } 17718c2ecf20Sopenharmony_ci *slot = low; 17728c2ecf20Sopenharmony_ci return 1; 17738c2ecf20Sopenharmony_ci} 17748c2ecf20Sopenharmony_ci 17758c2ecf20Sopenharmony_ci/* 17768c2ecf20Sopenharmony_ci * simple bin_search frontend that does the right thing for 17778c2ecf20Sopenharmony_ci * leaves vs nodes 17788c2ecf20Sopenharmony_ci */ 17798c2ecf20Sopenharmony_ciint btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key, 17808c2ecf20Sopenharmony_ci int *slot) 17818c2ecf20Sopenharmony_ci{ 17828c2ecf20Sopenharmony_ci if (btrfs_header_level(eb) == 0) 17838c2ecf20Sopenharmony_ci return generic_bin_search(eb, 17848c2ecf20Sopenharmony_ci offsetof(struct btrfs_leaf, items), 17858c2ecf20Sopenharmony_ci sizeof(struct btrfs_item), 17868c2ecf20Sopenharmony_ci key, btrfs_header_nritems(eb), 17878c2ecf20Sopenharmony_ci slot); 17888c2ecf20Sopenharmony_ci else 17898c2ecf20Sopenharmony_ci return generic_bin_search(eb, 17908c2ecf20Sopenharmony_ci offsetof(struct btrfs_node, ptrs), 17918c2ecf20Sopenharmony_ci sizeof(struct btrfs_key_ptr), 17928c2ecf20Sopenharmony_ci key, btrfs_header_nritems(eb), 17938c2ecf20Sopenharmony_ci slot); 17948c2ecf20Sopenharmony_ci} 17958c2ecf20Sopenharmony_ci 17968c2ecf20Sopenharmony_cistatic void root_add_used(struct btrfs_root *root, u32 size) 17978c2ecf20Sopenharmony_ci{ 17988c2ecf20Sopenharmony_ci spin_lock(&root->accounting_lock); 17998c2ecf20Sopenharmony_ci btrfs_set_root_used(&root->root_item, 18008c2ecf20Sopenharmony_ci btrfs_root_used(&root->root_item) + size); 18018c2ecf20Sopenharmony_ci spin_unlock(&root->accounting_lock); 18028c2ecf20Sopenharmony_ci} 18038c2ecf20Sopenharmony_ci 18048c2ecf20Sopenharmony_cistatic void root_sub_used(struct btrfs_root *root, u32 size) 18058c2ecf20Sopenharmony_ci{ 18068c2ecf20Sopenharmony_ci spin_lock(&root->accounting_lock); 18078c2ecf20Sopenharmony_ci btrfs_set_root_used(&root->root_item, 18088c2ecf20Sopenharmony_ci btrfs_root_used(&root->root_item) - size); 18098c2ecf20Sopenharmony_ci spin_unlock(&root->accounting_lock); 18108c2ecf20Sopenharmony_ci} 18118c2ecf20Sopenharmony_ci 18128c2ecf20Sopenharmony_ci/* given a node and slot number, this reads the blocks it points to. The 18138c2ecf20Sopenharmony_ci * extent buffer is returned with a reference taken (but unlocked). 18148c2ecf20Sopenharmony_ci */ 18158c2ecf20Sopenharmony_cistruct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent, 18168c2ecf20Sopenharmony_ci int slot) 18178c2ecf20Sopenharmony_ci{ 18188c2ecf20Sopenharmony_ci int level = btrfs_header_level(parent); 18198c2ecf20Sopenharmony_ci struct extent_buffer *eb; 18208c2ecf20Sopenharmony_ci struct btrfs_key first_key; 18218c2ecf20Sopenharmony_ci 18228c2ecf20Sopenharmony_ci if (slot < 0 || slot >= btrfs_header_nritems(parent)) 18238c2ecf20Sopenharmony_ci return ERR_PTR(-ENOENT); 18248c2ecf20Sopenharmony_ci 18258c2ecf20Sopenharmony_ci BUG_ON(level == 0); 18268c2ecf20Sopenharmony_ci 18278c2ecf20Sopenharmony_ci btrfs_node_key_to_cpu(parent, &first_key, slot); 18288c2ecf20Sopenharmony_ci eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot), 18298c2ecf20Sopenharmony_ci btrfs_node_ptr_generation(parent, slot), 18308c2ecf20Sopenharmony_ci level - 1, &first_key); 18318c2ecf20Sopenharmony_ci if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { 18328c2ecf20Sopenharmony_ci free_extent_buffer(eb); 18338c2ecf20Sopenharmony_ci eb = ERR_PTR(-EIO); 18348c2ecf20Sopenharmony_ci } 18358c2ecf20Sopenharmony_ci 18368c2ecf20Sopenharmony_ci return eb; 18378c2ecf20Sopenharmony_ci} 18388c2ecf20Sopenharmony_ci 18398c2ecf20Sopenharmony_ci/* 18408c2ecf20Sopenharmony_ci * node level balancing, used to make sure nodes are in proper order for 18418c2ecf20Sopenharmony_ci * item deletion. We balance from the top down, so we have to make sure 18428c2ecf20Sopenharmony_ci * that a deletion won't leave an node completely empty later on. 18438c2ecf20Sopenharmony_ci */ 18448c2ecf20Sopenharmony_cistatic noinline int balance_level(struct btrfs_trans_handle *trans, 18458c2ecf20Sopenharmony_ci struct btrfs_root *root, 18468c2ecf20Sopenharmony_ci struct btrfs_path *path, int level) 18478c2ecf20Sopenharmony_ci{ 18488c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 18498c2ecf20Sopenharmony_ci struct extent_buffer *right = NULL; 18508c2ecf20Sopenharmony_ci struct extent_buffer *mid; 18518c2ecf20Sopenharmony_ci struct extent_buffer *left = NULL; 18528c2ecf20Sopenharmony_ci struct extent_buffer *parent = NULL; 18538c2ecf20Sopenharmony_ci int ret = 0; 18548c2ecf20Sopenharmony_ci int wret; 18558c2ecf20Sopenharmony_ci int pslot; 18568c2ecf20Sopenharmony_ci int orig_slot = path->slots[level]; 18578c2ecf20Sopenharmony_ci u64 orig_ptr; 18588c2ecf20Sopenharmony_ci 18598c2ecf20Sopenharmony_ci ASSERT(level > 0); 18608c2ecf20Sopenharmony_ci 18618c2ecf20Sopenharmony_ci mid = path->nodes[level]; 18628c2ecf20Sopenharmony_ci 18638c2ecf20Sopenharmony_ci WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && 18648c2ecf20Sopenharmony_ci path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING); 18658c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_generation(mid) != trans->transid); 18668c2ecf20Sopenharmony_ci 18678c2ecf20Sopenharmony_ci orig_ptr = btrfs_node_blockptr(mid, orig_slot); 18688c2ecf20Sopenharmony_ci 18698c2ecf20Sopenharmony_ci if (level < BTRFS_MAX_LEVEL - 1) { 18708c2ecf20Sopenharmony_ci parent = path->nodes[level + 1]; 18718c2ecf20Sopenharmony_ci pslot = path->slots[level + 1]; 18728c2ecf20Sopenharmony_ci } 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_ci /* 18758c2ecf20Sopenharmony_ci * deal with the case where there is only one pointer in the root 18768c2ecf20Sopenharmony_ci * by promoting the node below to a root 18778c2ecf20Sopenharmony_ci */ 18788c2ecf20Sopenharmony_ci if (!parent) { 18798c2ecf20Sopenharmony_ci struct extent_buffer *child; 18808c2ecf20Sopenharmony_ci 18818c2ecf20Sopenharmony_ci if (btrfs_header_nritems(mid) != 1) 18828c2ecf20Sopenharmony_ci return 0; 18838c2ecf20Sopenharmony_ci 18848c2ecf20Sopenharmony_ci /* promote the child to a root */ 18858c2ecf20Sopenharmony_ci child = btrfs_read_node_slot(mid, 0); 18868c2ecf20Sopenharmony_ci if (IS_ERR(child)) { 18878c2ecf20Sopenharmony_ci ret = PTR_ERR(child); 18888c2ecf20Sopenharmony_ci btrfs_handle_fs_error(fs_info, ret, NULL); 18898c2ecf20Sopenharmony_ci goto enospc; 18908c2ecf20Sopenharmony_ci } 18918c2ecf20Sopenharmony_ci 18928c2ecf20Sopenharmony_ci btrfs_tree_lock(child); 18938c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(child); 18948c2ecf20Sopenharmony_ci ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 18958c2ecf20Sopenharmony_ci BTRFS_NESTING_COW); 18968c2ecf20Sopenharmony_ci if (ret) { 18978c2ecf20Sopenharmony_ci btrfs_tree_unlock(child); 18988c2ecf20Sopenharmony_ci free_extent_buffer(child); 18998c2ecf20Sopenharmony_ci goto enospc; 19008c2ecf20Sopenharmony_ci } 19018c2ecf20Sopenharmony_ci 19028c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_root(root->node, child, 1); 19038c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 19048c2ecf20Sopenharmony_ci rcu_assign_pointer(root->node, child); 19058c2ecf20Sopenharmony_ci 19068c2ecf20Sopenharmony_ci add_root_to_dirty_list(root); 19078c2ecf20Sopenharmony_ci btrfs_tree_unlock(child); 19088c2ecf20Sopenharmony_ci 19098c2ecf20Sopenharmony_ci path->locks[level] = 0; 19108c2ecf20Sopenharmony_ci path->nodes[level] = NULL; 19118c2ecf20Sopenharmony_ci btrfs_clean_tree_block(mid); 19128c2ecf20Sopenharmony_ci btrfs_tree_unlock(mid); 19138c2ecf20Sopenharmony_ci /* once for the path */ 19148c2ecf20Sopenharmony_ci free_extent_buffer(mid); 19158c2ecf20Sopenharmony_ci 19168c2ecf20Sopenharmony_ci root_sub_used(root, mid->len); 19178c2ecf20Sopenharmony_ci btrfs_free_tree_block(trans, root, mid, 0, 1); 19188c2ecf20Sopenharmony_ci /* once for the root ptr */ 19198c2ecf20Sopenharmony_ci free_extent_buffer_stale(mid); 19208c2ecf20Sopenharmony_ci return 0; 19218c2ecf20Sopenharmony_ci } 19228c2ecf20Sopenharmony_ci if (btrfs_header_nritems(mid) > 19238c2ecf20Sopenharmony_ci BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4) 19248c2ecf20Sopenharmony_ci return 0; 19258c2ecf20Sopenharmony_ci 19268c2ecf20Sopenharmony_ci left = btrfs_read_node_slot(parent, pslot - 1); 19278c2ecf20Sopenharmony_ci if (IS_ERR(left)) 19288c2ecf20Sopenharmony_ci left = NULL; 19298c2ecf20Sopenharmony_ci 19308c2ecf20Sopenharmony_ci if (left) { 19318c2ecf20Sopenharmony_ci __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); 19328c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(left); 19338c2ecf20Sopenharmony_ci wret = btrfs_cow_block(trans, root, left, 19348c2ecf20Sopenharmony_ci parent, pslot - 1, &left, 19358c2ecf20Sopenharmony_ci BTRFS_NESTING_LEFT_COW); 19368c2ecf20Sopenharmony_ci if (wret) { 19378c2ecf20Sopenharmony_ci ret = wret; 19388c2ecf20Sopenharmony_ci goto enospc; 19398c2ecf20Sopenharmony_ci } 19408c2ecf20Sopenharmony_ci } 19418c2ecf20Sopenharmony_ci 19428c2ecf20Sopenharmony_ci right = btrfs_read_node_slot(parent, pslot + 1); 19438c2ecf20Sopenharmony_ci if (IS_ERR(right)) 19448c2ecf20Sopenharmony_ci right = NULL; 19458c2ecf20Sopenharmony_ci 19468c2ecf20Sopenharmony_ci if (right) { 19478c2ecf20Sopenharmony_ci __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); 19488c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(right); 19498c2ecf20Sopenharmony_ci wret = btrfs_cow_block(trans, root, right, 19508c2ecf20Sopenharmony_ci parent, pslot + 1, &right, 19518c2ecf20Sopenharmony_ci BTRFS_NESTING_RIGHT_COW); 19528c2ecf20Sopenharmony_ci if (wret) { 19538c2ecf20Sopenharmony_ci ret = wret; 19548c2ecf20Sopenharmony_ci goto enospc; 19558c2ecf20Sopenharmony_ci } 19568c2ecf20Sopenharmony_ci } 19578c2ecf20Sopenharmony_ci 19588c2ecf20Sopenharmony_ci /* first, try to make some room in the middle buffer */ 19598c2ecf20Sopenharmony_ci if (left) { 19608c2ecf20Sopenharmony_ci orig_slot += btrfs_header_nritems(left); 19618c2ecf20Sopenharmony_ci wret = push_node_left(trans, left, mid, 1); 19628c2ecf20Sopenharmony_ci if (wret < 0) 19638c2ecf20Sopenharmony_ci ret = wret; 19648c2ecf20Sopenharmony_ci } 19658c2ecf20Sopenharmony_ci 19668c2ecf20Sopenharmony_ci /* 19678c2ecf20Sopenharmony_ci * then try to empty the right most buffer into the middle 19688c2ecf20Sopenharmony_ci */ 19698c2ecf20Sopenharmony_ci if (right) { 19708c2ecf20Sopenharmony_ci wret = push_node_left(trans, mid, right, 1); 19718c2ecf20Sopenharmony_ci if (wret < 0 && wret != -ENOSPC) 19728c2ecf20Sopenharmony_ci ret = wret; 19738c2ecf20Sopenharmony_ci if (btrfs_header_nritems(right) == 0) { 19748c2ecf20Sopenharmony_ci btrfs_clean_tree_block(right); 19758c2ecf20Sopenharmony_ci btrfs_tree_unlock(right); 19768c2ecf20Sopenharmony_ci del_ptr(root, path, level + 1, pslot + 1); 19778c2ecf20Sopenharmony_ci root_sub_used(root, right->len); 19788c2ecf20Sopenharmony_ci btrfs_free_tree_block(trans, root, right, 0, 1); 19798c2ecf20Sopenharmony_ci free_extent_buffer_stale(right); 19808c2ecf20Sopenharmony_ci right = NULL; 19818c2ecf20Sopenharmony_ci } else { 19828c2ecf20Sopenharmony_ci struct btrfs_disk_key right_key; 19838c2ecf20Sopenharmony_ci btrfs_node_key(right, &right_key, 0); 19848c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_key(parent, pslot + 1, 19858c2ecf20Sopenharmony_ci MOD_LOG_KEY_REPLACE, GFP_NOFS); 19868c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 19878c2ecf20Sopenharmony_ci btrfs_set_node_key(parent, &right_key, pslot + 1); 19888c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(parent); 19898c2ecf20Sopenharmony_ci } 19908c2ecf20Sopenharmony_ci } 19918c2ecf20Sopenharmony_ci if (btrfs_header_nritems(mid) == 1) { 19928c2ecf20Sopenharmony_ci /* 19938c2ecf20Sopenharmony_ci * we're not allowed to leave a node with one item in the 19948c2ecf20Sopenharmony_ci * tree during a delete. A deletion from lower in the tree 19958c2ecf20Sopenharmony_ci * could try to delete the only pointer in this node. 19968c2ecf20Sopenharmony_ci * So, pull some keys from the left. 19978c2ecf20Sopenharmony_ci * There has to be a left pointer at this point because 19988c2ecf20Sopenharmony_ci * otherwise we would have pulled some pointers from the 19998c2ecf20Sopenharmony_ci * right 20008c2ecf20Sopenharmony_ci */ 20018c2ecf20Sopenharmony_ci if (!left) { 20028c2ecf20Sopenharmony_ci ret = -EROFS; 20038c2ecf20Sopenharmony_ci btrfs_handle_fs_error(fs_info, ret, NULL); 20048c2ecf20Sopenharmony_ci goto enospc; 20058c2ecf20Sopenharmony_ci } 20068c2ecf20Sopenharmony_ci wret = balance_node_right(trans, mid, left); 20078c2ecf20Sopenharmony_ci if (wret < 0) { 20088c2ecf20Sopenharmony_ci ret = wret; 20098c2ecf20Sopenharmony_ci goto enospc; 20108c2ecf20Sopenharmony_ci } 20118c2ecf20Sopenharmony_ci if (wret == 1) { 20128c2ecf20Sopenharmony_ci wret = push_node_left(trans, left, mid, 1); 20138c2ecf20Sopenharmony_ci if (wret < 0) 20148c2ecf20Sopenharmony_ci ret = wret; 20158c2ecf20Sopenharmony_ci } 20168c2ecf20Sopenharmony_ci BUG_ON(wret == 1); 20178c2ecf20Sopenharmony_ci } 20188c2ecf20Sopenharmony_ci if (btrfs_header_nritems(mid) == 0) { 20198c2ecf20Sopenharmony_ci btrfs_clean_tree_block(mid); 20208c2ecf20Sopenharmony_ci btrfs_tree_unlock(mid); 20218c2ecf20Sopenharmony_ci del_ptr(root, path, level + 1, pslot); 20228c2ecf20Sopenharmony_ci root_sub_used(root, mid->len); 20238c2ecf20Sopenharmony_ci btrfs_free_tree_block(trans, root, mid, 0, 1); 20248c2ecf20Sopenharmony_ci free_extent_buffer_stale(mid); 20258c2ecf20Sopenharmony_ci mid = NULL; 20268c2ecf20Sopenharmony_ci } else { 20278c2ecf20Sopenharmony_ci /* update the parent key to reflect our changes */ 20288c2ecf20Sopenharmony_ci struct btrfs_disk_key mid_key; 20298c2ecf20Sopenharmony_ci btrfs_node_key(mid, &mid_key, 0); 20308c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_key(parent, pslot, 20318c2ecf20Sopenharmony_ci MOD_LOG_KEY_REPLACE, GFP_NOFS); 20328c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 20338c2ecf20Sopenharmony_ci btrfs_set_node_key(parent, &mid_key, pslot); 20348c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(parent); 20358c2ecf20Sopenharmony_ci } 20368c2ecf20Sopenharmony_ci 20378c2ecf20Sopenharmony_ci /* update the path */ 20388c2ecf20Sopenharmony_ci if (left) { 20398c2ecf20Sopenharmony_ci if (btrfs_header_nritems(left) > orig_slot) { 20408c2ecf20Sopenharmony_ci atomic_inc(&left->refs); 20418c2ecf20Sopenharmony_ci /* left was locked after cow */ 20428c2ecf20Sopenharmony_ci path->nodes[level] = left; 20438c2ecf20Sopenharmony_ci path->slots[level + 1] -= 1; 20448c2ecf20Sopenharmony_ci path->slots[level] = orig_slot; 20458c2ecf20Sopenharmony_ci if (mid) { 20468c2ecf20Sopenharmony_ci btrfs_tree_unlock(mid); 20478c2ecf20Sopenharmony_ci free_extent_buffer(mid); 20488c2ecf20Sopenharmony_ci } 20498c2ecf20Sopenharmony_ci } else { 20508c2ecf20Sopenharmony_ci orig_slot -= btrfs_header_nritems(left); 20518c2ecf20Sopenharmony_ci path->slots[level] = orig_slot; 20528c2ecf20Sopenharmony_ci } 20538c2ecf20Sopenharmony_ci } 20548c2ecf20Sopenharmony_ci /* double check we haven't messed things up */ 20558c2ecf20Sopenharmony_ci if (orig_ptr != 20568c2ecf20Sopenharmony_ci btrfs_node_blockptr(path->nodes[level], path->slots[level])) 20578c2ecf20Sopenharmony_ci BUG(); 20588c2ecf20Sopenharmony_cienospc: 20598c2ecf20Sopenharmony_ci if (right) { 20608c2ecf20Sopenharmony_ci btrfs_tree_unlock(right); 20618c2ecf20Sopenharmony_ci free_extent_buffer(right); 20628c2ecf20Sopenharmony_ci } 20638c2ecf20Sopenharmony_ci if (left) { 20648c2ecf20Sopenharmony_ci if (path->nodes[level] != left) 20658c2ecf20Sopenharmony_ci btrfs_tree_unlock(left); 20668c2ecf20Sopenharmony_ci free_extent_buffer(left); 20678c2ecf20Sopenharmony_ci } 20688c2ecf20Sopenharmony_ci return ret; 20698c2ecf20Sopenharmony_ci} 20708c2ecf20Sopenharmony_ci 20718c2ecf20Sopenharmony_ci/* Node balancing for insertion. Here we only split or push nodes around 20728c2ecf20Sopenharmony_ci * when they are completely full. This is also done top down, so we 20738c2ecf20Sopenharmony_ci * have to be pessimistic. 20748c2ecf20Sopenharmony_ci */ 20758c2ecf20Sopenharmony_cistatic noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, 20768c2ecf20Sopenharmony_ci struct btrfs_root *root, 20778c2ecf20Sopenharmony_ci struct btrfs_path *path, int level) 20788c2ecf20Sopenharmony_ci{ 20798c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 20808c2ecf20Sopenharmony_ci struct extent_buffer *right = NULL; 20818c2ecf20Sopenharmony_ci struct extent_buffer *mid; 20828c2ecf20Sopenharmony_ci struct extent_buffer *left = NULL; 20838c2ecf20Sopenharmony_ci struct extent_buffer *parent = NULL; 20848c2ecf20Sopenharmony_ci int ret = 0; 20858c2ecf20Sopenharmony_ci int wret; 20868c2ecf20Sopenharmony_ci int pslot; 20878c2ecf20Sopenharmony_ci int orig_slot = path->slots[level]; 20888c2ecf20Sopenharmony_ci 20898c2ecf20Sopenharmony_ci if (level == 0) 20908c2ecf20Sopenharmony_ci return 1; 20918c2ecf20Sopenharmony_ci 20928c2ecf20Sopenharmony_ci mid = path->nodes[level]; 20938c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_generation(mid) != trans->transid); 20948c2ecf20Sopenharmony_ci 20958c2ecf20Sopenharmony_ci if (level < BTRFS_MAX_LEVEL - 1) { 20968c2ecf20Sopenharmony_ci parent = path->nodes[level + 1]; 20978c2ecf20Sopenharmony_ci pslot = path->slots[level + 1]; 20988c2ecf20Sopenharmony_ci } 20998c2ecf20Sopenharmony_ci 21008c2ecf20Sopenharmony_ci if (!parent) 21018c2ecf20Sopenharmony_ci return 1; 21028c2ecf20Sopenharmony_ci 21038c2ecf20Sopenharmony_ci left = btrfs_read_node_slot(parent, pslot - 1); 21048c2ecf20Sopenharmony_ci if (IS_ERR(left)) 21058c2ecf20Sopenharmony_ci left = NULL; 21068c2ecf20Sopenharmony_ci 21078c2ecf20Sopenharmony_ci /* first, try to make some room in the middle buffer */ 21088c2ecf20Sopenharmony_ci if (left) { 21098c2ecf20Sopenharmony_ci u32 left_nr; 21108c2ecf20Sopenharmony_ci 21118c2ecf20Sopenharmony_ci __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); 21128c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(left); 21138c2ecf20Sopenharmony_ci 21148c2ecf20Sopenharmony_ci left_nr = btrfs_header_nritems(left); 21158c2ecf20Sopenharmony_ci if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) { 21168c2ecf20Sopenharmony_ci wret = 1; 21178c2ecf20Sopenharmony_ci } else { 21188c2ecf20Sopenharmony_ci ret = btrfs_cow_block(trans, root, left, parent, 21198c2ecf20Sopenharmony_ci pslot - 1, &left, 21208c2ecf20Sopenharmony_ci BTRFS_NESTING_LEFT_COW); 21218c2ecf20Sopenharmony_ci if (ret) 21228c2ecf20Sopenharmony_ci wret = 1; 21238c2ecf20Sopenharmony_ci else { 21248c2ecf20Sopenharmony_ci wret = push_node_left(trans, left, mid, 0); 21258c2ecf20Sopenharmony_ci } 21268c2ecf20Sopenharmony_ci } 21278c2ecf20Sopenharmony_ci if (wret < 0) 21288c2ecf20Sopenharmony_ci ret = wret; 21298c2ecf20Sopenharmony_ci if (wret == 0) { 21308c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 21318c2ecf20Sopenharmony_ci orig_slot += left_nr; 21328c2ecf20Sopenharmony_ci btrfs_node_key(mid, &disk_key, 0); 21338c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_key(parent, pslot, 21348c2ecf20Sopenharmony_ci MOD_LOG_KEY_REPLACE, GFP_NOFS); 21358c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 21368c2ecf20Sopenharmony_ci btrfs_set_node_key(parent, &disk_key, pslot); 21378c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(parent); 21388c2ecf20Sopenharmony_ci if (btrfs_header_nritems(left) > orig_slot) { 21398c2ecf20Sopenharmony_ci path->nodes[level] = left; 21408c2ecf20Sopenharmony_ci path->slots[level + 1] -= 1; 21418c2ecf20Sopenharmony_ci path->slots[level] = orig_slot; 21428c2ecf20Sopenharmony_ci btrfs_tree_unlock(mid); 21438c2ecf20Sopenharmony_ci free_extent_buffer(mid); 21448c2ecf20Sopenharmony_ci } else { 21458c2ecf20Sopenharmony_ci orig_slot -= 21468c2ecf20Sopenharmony_ci btrfs_header_nritems(left); 21478c2ecf20Sopenharmony_ci path->slots[level] = orig_slot; 21488c2ecf20Sopenharmony_ci btrfs_tree_unlock(left); 21498c2ecf20Sopenharmony_ci free_extent_buffer(left); 21508c2ecf20Sopenharmony_ci } 21518c2ecf20Sopenharmony_ci return 0; 21528c2ecf20Sopenharmony_ci } 21538c2ecf20Sopenharmony_ci btrfs_tree_unlock(left); 21548c2ecf20Sopenharmony_ci free_extent_buffer(left); 21558c2ecf20Sopenharmony_ci } 21568c2ecf20Sopenharmony_ci right = btrfs_read_node_slot(parent, pslot + 1); 21578c2ecf20Sopenharmony_ci if (IS_ERR(right)) 21588c2ecf20Sopenharmony_ci right = NULL; 21598c2ecf20Sopenharmony_ci 21608c2ecf20Sopenharmony_ci /* 21618c2ecf20Sopenharmony_ci * then try to empty the right most buffer into the middle 21628c2ecf20Sopenharmony_ci */ 21638c2ecf20Sopenharmony_ci if (right) { 21648c2ecf20Sopenharmony_ci u32 right_nr; 21658c2ecf20Sopenharmony_ci 21668c2ecf20Sopenharmony_ci __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); 21678c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(right); 21688c2ecf20Sopenharmony_ci 21698c2ecf20Sopenharmony_ci right_nr = btrfs_header_nritems(right); 21708c2ecf20Sopenharmony_ci if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) { 21718c2ecf20Sopenharmony_ci wret = 1; 21728c2ecf20Sopenharmony_ci } else { 21738c2ecf20Sopenharmony_ci ret = btrfs_cow_block(trans, root, right, 21748c2ecf20Sopenharmony_ci parent, pslot + 1, 21758c2ecf20Sopenharmony_ci &right, BTRFS_NESTING_RIGHT_COW); 21768c2ecf20Sopenharmony_ci if (ret) 21778c2ecf20Sopenharmony_ci wret = 1; 21788c2ecf20Sopenharmony_ci else { 21798c2ecf20Sopenharmony_ci wret = balance_node_right(trans, right, mid); 21808c2ecf20Sopenharmony_ci } 21818c2ecf20Sopenharmony_ci } 21828c2ecf20Sopenharmony_ci if (wret < 0) 21838c2ecf20Sopenharmony_ci ret = wret; 21848c2ecf20Sopenharmony_ci if (wret == 0) { 21858c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 21868c2ecf20Sopenharmony_ci 21878c2ecf20Sopenharmony_ci btrfs_node_key(right, &disk_key, 0); 21888c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_key(parent, pslot + 1, 21898c2ecf20Sopenharmony_ci MOD_LOG_KEY_REPLACE, GFP_NOFS); 21908c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 21918c2ecf20Sopenharmony_ci btrfs_set_node_key(parent, &disk_key, pslot + 1); 21928c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(parent); 21938c2ecf20Sopenharmony_ci 21948c2ecf20Sopenharmony_ci if (btrfs_header_nritems(mid) <= orig_slot) { 21958c2ecf20Sopenharmony_ci path->nodes[level] = right; 21968c2ecf20Sopenharmony_ci path->slots[level + 1] += 1; 21978c2ecf20Sopenharmony_ci path->slots[level] = orig_slot - 21988c2ecf20Sopenharmony_ci btrfs_header_nritems(mid); 21998c2ecf20Sopenharmony_ci btrfs_tree_unlock(mid); 22008c2ecf20Sopenharmony_ci free_extent_buffer(mid); 22018c2ecf20Sopenharmony_ci } else { 22028c2ecf20Sopenharmony_ci btrfs_tree_unlock(right); 22038c2ecf20Sopenharmony_ci free_extent_buffer(right); 22048c2ecf20Sopenharmony_ci } 22058c2ecf20Sopenharmony_ci return 0; 22068c2ecf20Sopenharmony_ci } 22078c2ecf20Sopenharmony_ci btrfs_tree_unlock(right); 22088c2ecf20Sopenharmony_ci free_extent_buffer(right); 22098c2ecf20Sopenharmony_ci } 22108c2ecf20Sopenharmony_ci return 1; 22118c2ecf20Sopenharmony_ci} 22128c2ecf20Sopenharmony_ci 22138c2ecf20Sopenharmony_ci/* 22148c2ecf20Sopenharmony_ci * readahead one full node of leaves, finding things that are close 22158c2ecf20Sopenharmony_ci * to the block in 'slot', and triggering ra on them. 22168c2ecf20Sopenharmony_ci */ 22178c2ecf20Sopenharmony_cistatic void reada_for_search(struct btrfs_fs_info *fs_info, 22188c2ecf20Sopenharmony_ci struct btrfs_path *path, 22198c2ecf20Sopenharmony_ci int level, int slot, u64 objectid) 22208c2ecf20Sopenharmony_ci{ 22218c2ecf20Sopenharmony_ci struct extent_buffer *node; 22228c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 22238c2ecf20Sopenharmony_ci u32 nritems; 22248c2ecf20Sopenharmony_ci u64 search; 22258c2ecf20Sopenharmony_ci u64 target; 22268c2ecf20Sopenharmony_ci u64 nread = 0; 22278c2ecf20Sopenharmony_ci struct extent_buffer *eb; 22288c2ecf20Sopenharmony_ci u32 nr; 22298c2ecf20Sopenharmony_ci u32 blocksize; 22308c2ecf20Sopenharmony_ci u32 nscan = 0; 22318c2ecf20Sopenharmony_ci 22328c2ecf20Sopenharmony_ci if (level != 1) 22338c2ecf20Sopenharmony_ci return; 22348c2ecf20Sopenharmony_ci 22358c2ecf20Sopenharmony_ci if (!path->nodes[level]) 22368c2ecf20Sopenharmony_ci return; 22378c2ecf20Sopenharmony_ci 22388c2ecf20Sopenharmony_ci node = path->nodes[level]; 22398c2ecf20Sopenharmony_ci 22408c2ecf20Sopenharmony_ci search = btrfs_node_blockptr(node, slot); 22418c2ecf20Sopenharmony_ci blocksize = fs_info->nodesize; 22428c2ecf20Sopenharmony_ci eb = find_extent_buffer(fs_info, search); 22438c2ecf20Sopenharmony_ci if (eb) { 22448c2ecf20Sopenharmony_ci free_extent_buffer(eb); 22458c2ecf20Sopenharmony_ci return; 22468c2ecf20Sopenharmony_ci } 22478c2ecf20Sopenharmony_ci 22488c2ecf20Sopenharmony_ci target = search; 22498c2ecf20Sopenharmony_ci 22508c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(node); 22518c2ecf20Sopenharmony_ci nr = slot; 22528c2ecf20Sopenharmony_ci 22538c2ecf20Sopenharmony_ci while (1) { 22548c2ecf20Sopenharmony_ci if (path->reada == READA_BACK) { 22558c2ecf20Sopenharmony_ci if (nr == 0) 22568c2ecf20Sopenharmony_ci break; 22578c2ecf20Sopenharmony_ci nr--; 22588c2ecf20Sopenharmony_ci } else if (path->reada == READA_FORWARD) { 22598c2ecf20Sopenharmony_ci nr++; 22608c2ecf20Sopenharmony_ci if (nr >= nritems) 22618c2ecf20Sopenharmony_ci break; 22628c2ecf20Sopenharmony_ci } 22638c2ecf20Sopenharmony_ci if (path->reada == READA_BACK && objectid) { 22648c2ecf20Sopenharmony_ci btrfs_node_key(node, &disk_key, nr); 22658c2ecf20Sopenharmony_ci if (btrfs_disk_key_objectid(&disk_key) != objectid) 22668c2ecf20Sopenharmony_ci break; 22678c2ecf20Sopenharmony_ci } 22688c2ecf20Sopenharmony_ci search = btrfs_node_blockptr(node, nr); 22698c2ecf20Sopenharmony_ci if ((search <= target && target - search <= 65536) || 22708c2ecf20Sopenharmony_ci (search > target && search - target <= 65536)) { 22718c2ecf20Sopenharmony_ci readahead_tree_block(fs_info, search); 22728c2ecf20Sopenharmony_ci nread += blocksize; 22738c2ecf20Sopenharmony_ci } 22748c2ecf20Sopenharmony_ci nscan++; 22758c2ecf20Sopenharmony_ci if ((nread > 65536 || nscan > 32)) 22768c2ecf20Sopenharmony_ci break; 22778c2ecf20Sopenharmony_ci } 22788c2ecf20Sopenharmony_ci} 22798c2ecf20Sopenharmony_ci 22808c2ecf20Sopenharmony_cistatic noinline void reada_for_balance(struct btrfs_fs_info *fs_info, 22818c2ecf20Sopenharmony_ci struct btrfs_path *path, int level) 22828c2ecf20Sopenharmony_ci{ 22838c2ecf20Sopenharmony_ci int slot; 22848c2ecf20Sopenharmony_ci int nritems; 22858c2ecf20Sopenharmony_ci struct extent_buffer *parent; 22868c2ecf20Sopenharmony_ci struct extent_buffer *eb; 22878c2ecf20Sopenharmony_ci u64 gen; 22888c2ecf20Sopenharmony_ci u64 block1 = 0; 22898c2ecf20Sopenharmony_ci u64 block2 = 0; 22908c2ecf20Sopenharmony_ci 22918c2ecf20Sopenharmony_ci parent = path->nodes[level + 1]; 22928c2ecf20Sopenharmony_ci if (!parent) 22938c2ecf20Sopenharmony_ci return; 22948c2ecf20Sopenharmony_ci 22958c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(parent); 22968c2ecf20Sopenharmony_ci slot = path->slots[level + 1]; 22978c2ecf20Sopenharmony_ci 22988c2ecf20Sopenharmony_ci if (slot > 0) { 22998c2ecf20Sopenharmony_ci block1 = btrfs_node_blockptr(parent, slot - 1); 23008c2ecf20Sopenharmony_ci gen = btrfs_node_ptr_generation(parent, slot - 1); 23018c2ecf20Sopenharmony_ci eb = find_extent_buffer(fs_info, block1); 23028c2ecf20Sopenharmony_ci /* 23038c2ecf20Sopenharmony_ci * if we get -eagain from btrfs_buffer_uptodate, we 23048c2ecf20Sopenharmony_ci * don't want to return eagain here. That will loop 23058c2ecf20Sopenharmony_ci * forever 23068c2ecf20Sopenharmony_ci */ 23078c2ecf20Sopenharmony_ci if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) 23088c2ecf20Sopenharmony_ci block1 = 0; 23098c2ecf20Sopenharmony_ci free_extent_buffer(eb); 23108c2ecf20Sopenharmony_ci } 23118c2ecf20Sopenharmony_ci if (slot + 1 < nritems) { 23128c2ecf20Sopenharmony_ci block2 = btrfs_node_blockptr(parent, slot + 1); 23138c2ecf20Sopenharmony_ci gen = btrfs_node_ptr_generation(parent, slot + 1); 23148c2ecf20Sopenharmony_ci eb = find_extent_buffer(fs_info, block2); 23158c2ecf20Sopenharmony_ci if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) 23168c2ecf20Sopenharmony_ci block2 = 0; 23178c2ecf20Sopenharmony_ci free_extent_buffer(eb); 23188c2ecf20Sopenharmony_ci } 23198c2ecf20Sopenharmony_ci 23208c2ecf20Sopenharmony_ci if (block1) 23218c2ecf20Sopenharmony_ci readahead_tree_block(fs_info, block1); 23228c2ecf20Sopenharmony_ci if (block2) 23238c2ecf20Sopenharmony_ci readahead_tree_block(fs_info, block2); 23248c2ecf20Sopenharmony_ci} 23258c2ecf20Sopenharmony_ci 23268c2ecf20Sopenharmony_ci 23278c2ecf20Sopenharmony_ci/* 23288c2ecf20Sopenharmony_ci * when we walk down the tree, it is usually safe to unlock the higher layers 23298c2ecf20Sopenharmony_ci * in the tree. The exceptions are when our path goes through slot 0, because 23308c2ecf20Sopenharmony_ci * operations on the tree might require changing key pointers higher up in the 23318c2ecf20Sopenharmony_ci * tree. 23328c2ecf20Sopenharmony_ci * 23338c2ecf20Sopenharmony_ci * callers might also have set path->keep_locks, which tells this code to keep 23348c2ecf20Sopenharmony_ci * the lock if the path points to the last slot in the block. This is part of 23358c2ecf20Sopenharmony_ci * walking through the tree, and selecting the next slot in the higher block. 23368c2ecf20Sopenharmony_ci * 23378c2ecf20Sopenharmony_ci * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so 23388c2ecf20Sopenharmony_ci * if lowest_unlock is 1, level 0 won't be unlocked 23398c2ecf20Sopenharmony_ci */ 23408c2ecf20Sopenharmony_cistatic noinline void unlock_up(struct btrfs_path *path, int level, 23418c2ecf20Sopenharmony_ci int lowest_unlock, int min_write_lock_level, 23428c2ecf20Sopenharmony_ci int *write_lock_level) 23438c2ecf20Sopenharmony_ci{ 23448c2ecf20Sopenharmony_ci int i; 23458c2ecf20Sopenharmony_ci int skip_level = level; 23468c2ecf20Sopenharmony_ci int no_skips = 0; 23478c2ecf20Sopenharmony_ci struct extent_buffer *t; 23488c2ecf20Sopenharmony_ci 23498c2ecf20Sopenharmony_ci for (i = level; i < BTRFS_MAX_LEVEL; i++) { 23508c2ecf20Sopenharmony_ci if (!path->nodes[i]) 23518c2ecf20Sopenharmony_ci break; 23528c2ecf20Sopenharmony_ci if (!path->locks[i]) 23538c2ecf20Sopenharmony_ci break; 23548c2ecf20Sopenharmony_ci if (!no_skips && path->slots[i] == 0) { 23558c2ecf20Sopenharmony_ci skip_level = i + 1; 23568c2ecf20Sopenharmony_ci continue; 23578c2ecf20Sopenharmony_ci } 23588c2ecf20Sopenharmony_ci if (!no_skips && path->keep_locks) { 23598c2ecf20Sopenharmony_ci u32 nritems; 23608c2ecf20Sopenharmony_ci t = path->nodes[i]; 23618c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(t); 23628c2ecf20Sopenharmony_ci if (nritems < 1 || path->slots[i] >= nritems - 1) { 23638c2ecf20Sopenharmony_ci skip_level = i + 1; 23648c2ecf20Sopenharmony_ci continue; 23658c2ecf20Sopenharmony_ci } 23668c2ecf20Sopenharmony_ci } 23678c2ecf20Sopenharmony_ci if (skip_level < i && i >= lowest_unlock) 23688c2ecf20Sopenharmony_ci no_skips = 1; 23698c2ecf20Sopenharmony_ci 23708c2ecf20Sopenharmony_ci t = path->nodes[i]; 23718c2ecf20Sopenharmony_ci if (i >= lowest_unlock && i > skip_level) { 23728c2ecf20Sopenharmony_ci btrfs_tree_unlock_rw(t, path->locks[i]); 23738c2ecf20Sopenharmony_ci path->locks[i] = 0; 23748c2ecf20Sopenharmony_ci if (write_lock_level && 23758c2ecf20Sopenharmony_ci i > min_write_lock_level && 23768c2ecf20Sopenharmony_ci i <= *write_lock_level) { 23778c2ecf20Sopenharmony_ci *write_lock_level = i - 1; 23788c2ecf20Sopenharmony_ci } 23798c2ecf20Sopenharmony_ci } 23808c2ecf20Sopenharmony_ci } 23818c2ecf20Sopenharmony_ci} 23828c2ecf20Sopenharmony_ci 23838c2ecf20Sopenharmony_ci/* 23848c2ecf20Sopenharmony_ci * helper function for btrfs_search_slot. The goal is to find a block 23858c2ecf20Sopenharmony_ci * in cache without setting the path to blocking. If we find the block 23868c2ecf20Sopenharmony_ci * we return zero and the path is unchanged. 23878c2ecf20Sopenharmony_ci * 23888c2ecf20Sopenharmony_ci * If we can't find the block, we set the path blocking and do some 23898c2ecf20Sopenharmony_ci * reada. -EAGAIN is returned and the search must be repeated. 23908c2ecf20Sopenharmony_ci */ 23918c2ecf20Sopenharmony_cistatic int 23928c2ecf20Sopenharmony_ciread_block_for_search(struct btrfs_root *root, struct btrfs_path *p, 23938c2ecf20Sopenharmony_ci struct extent_buffer **eb_ret, int level, int slot, 23948c2ecf20Sopenharmony_ci const struct btrfs_key *key) 23958c2ecf20Sopenharmony_ci{ 23968c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 23978c2ecf20Sopenharmony_ci u64 blocknr; 23988c2ecf20Sopenharmony_ci u64 gen; 23998c2ecf20Sopenharmony_ci struct extent_buffer *tmp; 24008c2ecf20Sopenharmony_ci struct btrfs_key first_key; 24018c2ecf20Sopenharmony_ci int ret; 24028c2ecf20Sopenharmony_ci int parent_level; 24038c2ecf20Sopenharmony_ci 24048c2ecf20Sopenharmony_ci blocknr = btrfs_node_blockptr(*eb_ret, slot); 24058c2ecf20Sopenharmony_ci gen = btrfs_node_ptr_generation(*eb_ret, slot); 24068c2ecf20Sopenharmony_ci parent_level = btrfs_header_level(*eb_ret); 24078c2ecf20Sopenharmony_ci btrfs_node_key_to_cpu(*eb_ret, &first_key, slot); 24088c2ecf20Sopenharmony_ci 24098c2ecf20Sopenharmony_ci tmp = find_extent_buffer(fs_info, blocknr); 24108c2ecf20Sopenharmony_ci if (tmp) { 24118c2ecf20Sopenharmony_ci /* first we do an atomic uptodate check */ 24128c2ecf20Sopenharmony_ci if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { 24138c2ecf20Sopenharmony_ci /* 24148c2ecf20Sopenharmony_ci * Do extra check for first_key, eb can be stale due to 24158c2ecf20Sopenharmony_ci * being cached, read from scrub, or have multiple 24168c2ecf20Sopenharmony_ci * parents (shared tree blocks). 24178c2ecf20Sopenharmony_ci */ 24188c2ecf20Sopenharmony_ci if (btrfs_verify_level_key(tmp, 24198c2ecf20Sopenharmony_ci parent_level - 1, &first_key, gen)) { 24208c2ecf20Sopenharmony_ci free_extent_buffer(tmp); 24218c2ecf20Sopenharmony_ci return -EUCLEAN; 24228c2ecf20Sopenharmony_ci } 24238c2ecf20Sopenharmony_ci *eb_ret = tmp; 24248c2ecf20Sopenharmony_ci return 0; 24258c2ecf20Sopenharmony_ci } 24268c2ecf20Sopenharmony_ci 24278c2ecf20Sopenharmony_ci /* the pages were up to date, but we failed 24288c2ecf20Sopenharmony_ci * the generation number check. Do a full 24298c2ecf20Sopenharmony_ci * read for the generation number that is correct. 24308c2ecf20Sopenharmony_ci * We must do this without dropping locks so 24318c2ecf20Sopenharmony_ci * we can trust our generation number 24328c2ecf20Sopenharmony_ci */ 24338c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 24348c2ecf20Sopenharmony_ci 24358c2ecf20Sopenharmony_ci /* now we're allowed to do a blocking uptodate check */ 24368c2ecf20Sopenharmony_ci ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key); 24378c2ecf20Sopenharmony_ci if (!ret) { 24388c2ecf20Sopenharmony_ci *eb_ret = tmp; 24398c2ecf20Sopenharmony_ci return 0; 24408c2ecf20Sopenharmony_ci } 24418c2ecf20Sopenharmony_ci free_extent_buffer(tmp); 24428c2ecf20Sopenharmony_ci btrfs_release_path(p); 24438c2ecf20Sopenharmony_ci return -EIO; 24448c2ecf20Sopenharmony_ci } 24458c2ecf20Sopenharmony_ci 24468c2ecf20Sopenharmony_ci /* 24478c2ecf20Sopenharmony_ci * reduce lock contention at high levels 24488c2ecf20Sopenharmony_ci * of the btree by dropping locks before 24498c2ecf20Sopenharmony_ci * we read. Don't release the lock on the current 24508c2ecf20Sopenharmony_ci * level because we need to walk this node to figure 24518c2ecf20Sopenharmony_ci * out which blocks to read. 24528c2ecf20Sopenharmony_ci */ 24538c2ecf20Sopenharmony_ci btrfs_unlock_up_safe(p, level + 1); 24548c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 24558c2ecf20Sopenharmony_ci 24568c2ecf20Sopenharmony_ci if (p->reada != READA_NONE) 24578c2ecf20Sopenharmony_ci reada_for_search(fs_info, p, level, slot, key->objectid); 24588c2ecf20Sopenharmony_ci 24598c2ecf20Sopenharmony_ci ret = -EAGAIN; 24608c2ecf20Sopenharmony_ci tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1, 24618c2ecf20Sopenharmony_ci &first_key); 24628c2ecf20Sopenharmony_ci if (!IS_ERR(tmp)) { 24638c2ecf20Sopenharmony_ci /* 24648c2ecf20Sopenharmony_ci * If the read above didn't mark this buffer up to date, 24658c2ecf20Sopenharmony_ci * it will never end up being up to date. Set ret to EIO now 24668c2ecf20Sopenharmony_ci * and give up so that our caller doesn't loop forever 24678c2ecf20Sopenharmony_ci * on our EAGAINs. 24688c2ecf20Sopenharmony_ci */ 24698c2ecf20Sopenharmony_ci if (!extent_buffer_uptodate(tmp)) 24708c2ecf20Sopenharmony_ci ret = -EIO; 24718c2ecf20Sopenharmony_ci free_extent_buffer(tmp); 24728c2ecf20Sopenharmony_ci } else { 24738c2ecf20Sopenharmony_ci ret = PTR_ERR(tmp); 24748c2ecf20Sopenharmony_ci } 24758c2ecf20Sopenharmony_ci 24768c2ecf20Sopenharmony_ci btrfs_release_path(p); 24778c2ecf20Sopenharmony_ci return ret; 24788c2ecf20Sopenharmony_ci} 24798c2ecf20Sopenharmony_ci 24808c2ecf20Sopenharmony_ci/* 24818c2ecf20Sopenharmony_ci * helper function for btrfs_search_slot. This does all of the checks 24828c2ecf20Sopenharmony_ci * for node-level blocks and does any balancing required based on 24838c2ecf20Sopenharmony_ci * the ins_len. 24848c2ecf20Sopenharmony_ci * 24858c2ecf20Sopenharmony_ci * If no extra work was required, zero is returned. If we had to 24868c2ecf20Sopenharmony_ci * drop the path, -EAGAIN is returned and btrfs_search_slot must 24878c2ecf20Sopenharmony_ci * start over 24888c2ecf20Sopenharmony_ci */ 24898c2ecf20Sopenharmony_cistatic int 24908c2ecf20Sopenharmony_cisetup_nodes_for_search(struct btrfs_trans_handle *trans, 24918c2ecf20Sopenharmony_ci struct btrfs_root *root, struct btrfs_path *p, 24928c2ecf20Sopenharmony_ci struct extent_buffer *b, int level, int ins_len, 24938c2ecf20Sopenharmony_ci int *write_lock_level) 24948c2ecf20Sopenharmony_ci{ 24958c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 24968c2ecf20Sopenharmony_ci int ret; 24978c2ecf20Sopenharmony_ci 24988c2ecf20Sopenharmony_ci if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= 24998c2ecf20Sopenharmony_ci BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) { 25008c2ecf20Sopenharmony_ci int sret; 25018c2ecf20Sopenharmony_ci 25028c2ecf20Sopenharmony_ci if (*write_lock_level < level + 1) { 25038c2ecf20Sopenharmony_ci *write_lock_level = level + 1; 25048c2ecf20Sopenharmony_ci btrfs_release_path(p); 25058c2ecf20Sopenharmony_ci goto again; 25068c2ecf20Sopenharmony_ci } 25078c2ecf20Sopenharmony_ci 25088c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 25098c2ecf20Sopenharmony_ci reada_for_balance(fs_info, p, level); 25108c2ecf20Sopenharmony_ci sret = split_node(trans, root, p, level); 25118c2ecf20Sopenharmony_ci 25128c2ecf20Sopenharmony_ci BUG_ON(sret > 0); 25138c2ecf20Sopenharmony_ci if (sret) { 25148c2ecf20Sopenharmony_ci ret = sret; 25158c2ecf20Sopenharmony_ci goto done; 25168c2ecf20Sopenharmony_ci } 25178c2ecf20Sopenharmony_ci b = p->nodes[level]; 25188c2ecf20Sopenharmony_ci } else if (ins_len < 0 && btrfs_header_nritems(b) < 25198c2ecf20Sopenharmony_ci BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) { 25208c2ecf20Sopenharmony_ci int sret; 25218c2ecf20Sopenharmony_ci 25228c2ecf20Sopenharmony_ci if (*write_lock_level < level + 1) { 25238c2ecf20Sopenharmony_ci *write_lock_level = level + 1; 25248c2ecf20Sopenharmony_ci btrfs_release_path(p); 25258c2ecf20Sopenharmony_ci goto again; 25268c2ecf20Sopenharmony_ci } 25278c2ecf20Sopenharmony_ci 25288c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 25298c2ecf20Sopenharmony_ci reada_for_balance(fs_info, p, level); 25308c2ecf20Sopenharmony_ci sret = balance_level(trans, root, p, level); 25318c2ecf20Sopenharmony_ci 25328c2ecf20Sopenharmony_ci if (sret) { 25338c2ecf20Sopenharmony_ci ret = sret; 25348c2ecf20Sopenharmony_ci goto done; 25358c2ecf20Sopenharmony_ci } 25368c2ecf20Sopenharmony_ci b = p->nodes[level]; 25378c2ecf20Sopenharmony_ci if (!b) { 25388c2ecf20Sopenharmony_ci btrfs_release_path(p); 25398c2ecf20Sopenharmony_ci goto again; 25408c2ecf20Sopenharmony_ci } 25418c2ecf20Sopenharmony_ci BUG_ON(btrfs_header_nritems(b) == 1); 25428c2ecf20Sopenharmony_ci } 25438c2ecf20Sopenharmony_ci return 0; 25448c2ecf20Sopenharmony_ci 25458c2ecf20Sopenharmony_ciagain: 25468c2ecf20Sopenharmony_ci ret = -EAGAIN; 25478c2ecf20Sopenharmony_cidone: 25488c2ecf20Sopenharmony_ci return ret; 25498c2ecf20Sopenharmony_ci} 25508c2ecf20Sopenharmony_ci 25518c2ecf20Sopenharmony_ciint btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, 25528c2ecf20Sopenharmony_ci u64 iobjectid, u64 ioff, u8 key_type, 25538c2ecf20Sopenharmony_ci struct btrfs_key *found_key) 25548c2ecf20Sopenharmony_ci{ 25558c2ecf20Sopenharmony_ci int ret; 25568c2ecf20Sopenharmony_ci struct btrfs_key key; 25578c2ecf20Sopenharmony_ci struct extent_buffer *eb; 25588c2ecf20Sopenharmony_ci 25598c2ecf20Sopenharmony_ci ASSERT(path); 25608c2ecf20Sopenharmony_ci ASSERT(found_key); 25618c2ecf20Sopenharmony_ci 25628c2ecf20Sopenharmony_ci key.type = key_type; 25638c2ecf20Sopenharmony_ci key.objectid = iobjectid; 25648c2ecf20Sopenharmony_ci key.offset = ioff; 25658c2ecf20Sopenharmony_ci 25668c2ecf20Sopenharmony_ci ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); 25678c2ecf20Sopenharmony_ci if (ret < 0) 25688c2ecf20Sopenharmony_ci return ret; 25698c2ecf20Sopenharmony_ci 25708c2ecf20Sopenharmony_ci eb = path->nodes[0]; 25718c2ecf20Sopenharmony_ci if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { 25728c2ecf20Sopenharmony_ci ret = btrfs_next_leaf(fs_root, path); 25738c2ecf20Sopenharmony_ci if (ret) 25748c2ecf20Sopenharmony_ci return ret; 25758c2ecf20Sopenharmony_ci eb = path->nodes[0]; 25768c2ecf20Sopenharmony_ci } 25778c2ecf20Sopenharmony_ci 25788c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); 25798c2ecf20Sopenharmony_ci if (found_key->type != key.type || 25808c2ecf20Sopenharmony_ci found_key->objectid != key.objectid) 25818c2ecf20Sopenharmony_ci return 1; 25828c2ecf20Sopenharmony_ci 25838c2ecf20Sopenharmony_ci return 0; 25848c2ecf20Sopenharmony_ci} 25858c2ecf20Sopenharmony_ci 25868c2ecf20Sopenharmony_cistatic struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, 25878c2ecf20Sopenharmony_ci struct btrfs_path *p, 25888c2ecf20Sopenharmony_ci int write_lock_level) 25898c2ecf20Sopenharmony_ci{ 25908c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 25918c2ecf20Sopenharmony_ci struct extent_buffer *b; 25928c2ecf20Sopenharmony_ci int root_lock = 0; 25938c2ecf20Sopenharmony_ci int level = 0; 25948c2ecf20Sopenharmony_ci 25958c2ecf20Sopenharmony_ci if (p->search_commit_root) { 25968c2ecf20Sopenharmony_ci /* 25978c2ecf20Sopenharmony_ci * The commit roots are read only so we always do read locks, 25988c2ecf20Sopenharmony_ci * and we always must hold the commit_root_sem when doing 25998c2ecf20Sopenharmony_ci * searches on them, the only exception is send where we don't 26008c2ecf20Sopenharmony_ci * want to block transaction commits for a long time, so 26018c2ecf20Sopenharmony_ci * we need to clone the commit root in order to avoid races 26028c2ecf20Sopenharmony_ci * with transaction commits that create a snapshot of one of 26038c2ecf20Sopenharmony_ci * the roots used by a send operation. 26048c2ecf20Sopenharmony_ci */ 26058c2ecf20Sopenharmony_ci if (p->need_commit_sem) { 26068c2ecf20Sopenharmony_ci down_read(&fs_info->commit_root_sem); 26078c2ecf20Sopenharmony_ci b = btrfs_clone_extent_buffer(root->commit_root); 26088c2ecf20Sopenharmony_ci up_read(&fs_info->commit_root_sem); 26098c2ecf20Sopenharmony_ci if (!b) 26108c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 26118c2ecf20Sopenharmony_ci 26128c2ecf20Sopenharmony_ci } else { 26138c2ecf20Sopenharmony_ci b = root->commit_root; 26148c2ecf20Sopenharmony_ci atomic_inc(&b->refs); 26158c2ecf20Sopenharmony_ci } 26168c2ecf20Sopenharmony_ci level = btrfs_header_level(b); 26178c2ecf20Sopenharmony_ci /* 26188c2ecf20Sopenharmony_ci * Ensure that all callers have set skip_locking when 26198c2ecf20Sopenharmony_ci * p->search_commit_root = 1. 26208c2ecf20Sopenharmony_ci */ 26218c2ecf20Sopenharmony_ci ASSERT(p->skip_locking == 1); 26228c2ecf20Sopenharmony_ci 26238c2ecf20Sopenharmony_ci goto out; 26248c2ecf20Sopenharmony_ci } 26258c2ecf20Sopenharmony_ci 26268c2ecf20Sopenharmony_ci if (p->skip_locking) { 26278c2ecf20Sopenharmony_ci b = btrfs_root_node(root); 26288c2ecf20Sopenharmony_ci level = btrfs_header_level(b); 26298c2ecf20Sopenharmony_ci goto out; 26308c2ecf20Sopenharmony_ci } 26318c2ecf20Sopenharmony_ci 26328c2ecf20Sopenharmony_ci /* We try very hard to do read locks on the root */ 26338c2ecf20Sopenharmony_ci root_lock = BTRFS_READ_LOCK; 26348c2ecf20Sopenharmony_ci 26358c2ecf20Sopenharmony_ci /* 26368c2ecf20Sopenharmony_ci * If the level is set to maximum, we can skip trying to get the read 26378c2ecf20Sopenharmony_ci * lock. 26388c2ecf20Sopenharmony_ci */ 26398c2ecf20Sopenharmony_ci if (write_lock_level < BTRFS_MAX_LEVEL) { 26408c2ecf20Sopenharmony_ci /* 26418c2ecf20Sopenharmony_ci * We don't know the level of the root node until we actually 26428c2ecf20Sopenharmony_ci * have it read locked 26438c2ecf20Sopenharmony_ci */ 26448c2ecf20Sopenharmony_ci b = __btrfs_read_lock_root_node(root, p->recurse); 26458c2ecf20Sopenharmony_ci level = btrfs_header_level(b); 26468c2ecf20Sopenharmony_ci if (level > write_lock_level) 26478c2ecf20Sopenharmony_ci goto out; 26488c2ecf20Sopenharmony_ci 26498c2ecf20Sopenharmony_ci /* Whoops, must trade for write lock */ 26508c2ecf20Sopenharmony_ci btrfs_tree_read_unlock(b); 26518c2ecf20Sopenharmony_ci free_extent_buffer(b); 26528c2ecf20Sopenharmony_ci } 26538c2ecf20Sopenharmony_ci 26548c2ecf20Sopenharmony_ci b = btrfs_lock_root_node(root); 26558c2ecf20Sopenharmony_ci root_lock = BTRFS_WRITE_LOCK; 26568c2ecf20Sopenharmony_ci 26578c2ecf20Sopenharmony_ci /* The level might have changed, check again */ 26588c2ecf20Sopenharmony_ci level = btrfs_header_level(b); 26598c2ecf20Sopenharmony_ci 26608c2ecf20Sopenharmony_ciout: 26618c2ecf20Sopenharmony_ci /* 26628c2ecf20Sopenharmony_ci * The root may have failed to write out at some point, and thus is no 26638c2ecf20Sopenharmony_ci * longer valid, return an error in this case. 26648c2ecf20Sopenharmony_ci */ 26658c2ecf20Sopenharmony_ci if (!extent_buffer_uptodate(b)) { 26668c2ecf20Sopenharmony_ci if (root_lock) 26678c2ecf20Sopenharmony_ci btrfs_tree_unlock_rw(b, root_lock); 26688c2ecf20Sopenharmony_ci free_extent_buffer(b); 26698c2ecf20Sopenharmony_ci return ERR_PTR(-EIO); 26708c2ecf20Sopenharmony_ci } 26718c2ecf20Sopenharmony_ci 26728c2ecf20Sopenharmony_ci p->nodes[level] = b; 26738c2ecf20Sopenharmony_ci if (!p->skip_locking) 26748c2ecf20Sopenharmony_ci p->locks[level] = root_lock; 26758c2ecf20Sopenharmony_ci /* 26768c2ecf20Sopenharmony_ci * Callers are responsible for dropping b's references. 26778c2ecf20Sopenharmony_ci */ 26788c2ecf20Sopenharmony_ci return b; 26798c2ecf20Sopenharmony_ci} 26808c2ecf20Sopenharmony_ci 26818c2ecf20Sopenharmony_ci 26828c2ecf20Sopenharmony_ci/* 26838c2ecf20Sopenharmony_ci * btrfs_search_slot - look for a key in a tree and perform necessary 26848c2ecf20Sopenharmony_ci * modifications to preserve tree invariants. 26858c2ecf20Sopenharmony_ci * 26868c2ecf20Sopenharmony_ci * @trans: Handle of transaction, used when modifying the tree 26878c2ecf20Sopenharmony_ci * @p: Holds all btree nodes along the search path 26888c2ecf20Sopenharmony_ci * @root: The root node of the tree 26898c2ecf20Sopenharmony_ci * @key: The key we are looking for 26908c2ecf20Sopenharmony_ci * @ins_len: Indicates purpose of search, for inserts it is 1, for 26918c2ecf20Sopenharmony_ci * deletions it's -1. 0 for plain searches 26928c2ecf20Sopenharmony_ci * @cow: boolean should CoW operations be performed. Must always be 1 26938c2ecf20Sopenharmony_ci * when modifying the tree. 26948c2ecf20Sopenharmony_ci * 26958c2ecf20Sopenharmony_ci * If @ins_len > 0, nodes and leaves will be split as we walk down the tree. 26968c2ecf20Sopenharmony_ci * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible) 26978c2ecf20Sopenharmony_ci * 26988c2ecf20Sopenharmony_ci * If @key is found, 0 is returned and you can find the item in the leaf level 26998c2ecf20Sopenharmony_ci * of the path (level 0) 27008c2ecf20Sopenharmony_ci * 27018c2ecf20Sopenharmony_ci * If @key isn't found, 1 is returned and the leaf level of the path (level 0) 27028c2ecf20Sopenharmony_ci * points to the slot where it should be inserted 27038c2ecf20Sopenharmony_ci * 27048c2ecf20Sopenharmony_ci * If an error is encountered while searching the tree a negative error number 27058c2ecf20Sopenharmony_ci * is returned 27068c2ecf20Sopenharmony_ci */ 27078c2ecf20Sopenharmony_ciint btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, 27088c2ecf20Sopenharmony_ci const struct btrfs_key *key, struct btrfs_path *p, 27098c2ecf20Sopenharmony_ci int ins_len, int cow) 27108c2ecf20Sopenharmony_ci{ 27118c2ecf20Sopenharmony_ci struct extent_buffer *b; 27128c2ecf20Sopenharmony_ci int slot; 27138c2ecf20Sopenharmony_ci int ret; 27148c2ecf20Sopenharmony_ci int err; 27158c2ecf20Sopenharmony_ci int level; 27168c2ecf20Sopenharmony_ci int lowest_unlock = 1; 27178c2ecf20Sopenharmony_ci /* everything at write_lock_level or lower must be write locked */ 27188c2ecf20Sopenharmony_ci int write_lock_level = 0; 27198c2ecf20Sopenharmony_ci u8 lowest_level = 0; 27208c2ecf20Sopenharmony_ci int min_write_lock_level; 27218c2ecf20Sopenharmony_ci int prev_cmp; 27228c2ecf20Sopenharmony_ci 27238c2ecf20Sopenharmony_ci lowest_level = p->lowest_level; 27248c2ecf20Sopenharmony_ci WARN_ON(lowest_level && ins_len > 0); 27258c2ecf20Sopenharmony_ci WARN_ON(p->nodes[0] != NULL); 27268c2ecf20Sopenharmony_ci BUG_ON(!cow && ins_len); 27278c2ecf20Sopenharmony_ci 27288c2ecf20Sopenharmony_ci if (ins_len < 0) { 27298c2ecf20Sopenharmony_ci lowest_unlock = 2; 27308c2ecf20Sopenharmony_ci 27318c2ecf20Sopenharmony_ci /* when we are removing items, we might have to go up to level 27328c2ecf20Sopenharmony_ci * two as we update tree pointers Make sure we keep write 27338c2ecf20Sopenharmony_ci * for those levels as well 27348c2ecf20Sopenharmony_ci */ 27358c2ecf20Sopenharmony_ci write_lock_level = 2; 27368c2ecf20Sopenharmony_ci } else if (ins_len > 0) { 27378c2ecf20Sopenharmony_ci /* 27388c2ecf20Sopenharmony_ci * for inserting items, make sure we have a write lock on 27398c2ecf20Sopenharmony_ci * level 1 so we can update keys 27408c2ecf20Sopenharmony_ci */ 27418c2ecf20Sopenharmony_ci write_lock_level = 1; 27428c2ecf20Sopenharmony_ci } 27438c2ecf20Sopenharmony_ci 27448c2ecf20Sopenharmony_ci if (!cow) 27458c2ecf20Sopenharmony_ci write_lock_level = -1; 27468c2ecf20Sopenharmony_ci 27478c2ecf20Sopenharmony_ci if (cow && (p->keep_locks || p->lowest_level)) 27488c2ecf20Sopenharmony_ci write_lock_level = BTRFS_MAX_LEVEL; 27498c2ecf20Sopenharmony_ci 27508c2ecf20Sopenharmony_ci min_write_lock_level = write_lock_level; 27518c2ecf20Sopenharmony_ci 27528c2ecf20Sopenharmony_ciagain: 27538c2ecf20Sopenharmony_ci prev_cmp = -1; 27548c2ecf20Sopenharmony_ci b = btrfs_search_slot_get_root(root, p, write_lock_level); 27558c2ecf20Sopenharmony_ci if (IS_ERR(b)) { 27568c2ecf20Sopenharmony_ci ret = PTR_ERR(b); 27578c2ecf20Sopenharmony_ci goto done; 27588c2ecf20Sopenharmony_ci } 27598c2ecf20Sopenharmony_ci 27608c2ecf20Sopenharmony_ci while (b) { 27618c2ecf20Sopenharmony_ci int dec = 0; 27628c2ecf20Sopenharmony_ci 27638c2ecf20Sopenharmony_ci level = btrfs_header_level(b); 27648c2ecf20Sopenharmony_ci 27658c2ecf20Sopenharmony_ci if (cow) { 27668c2ecf20Sopenharmony_ci bool last_level = (level == (BTRFS_MAX_LEVEL - 1)); 27678c2ecf20Sopenharmony_ci 27688c2ecf20Sopenharmony_ci /* 27698c2ecf20Sopenharmony_ci * if we don't really need to cow this block 27708c2ecf20Sopenharmony_ci * then we don't want to set the path blocking, 27718c2ecf20Sopenharmony_ci * so we test it here 27728c2ecf20Sopenharmony_ci */ 27738c2ecf20Sopenharmony_ci if (!should_cow_block(trans, root, b)) { 27748c2ecf20Sopenharmony_ci trans->dirty = true; 27758c2ecf20Sopenharmony_ci goto cow_done; 27768c2ecf20Sopenharmony_ci } 27778c2ecf20Sopenharmony_ci 27788c2ecf20Sopenharmony_ci /* 27798c2ecf20Sopenharmony_ci * must have write locks on this node and the 27808c2ecf20Sopenharmony_ci * parent 27818c2ecf20Sopenharmony_ci */ 27828c2ecf20Sopenharmony_ci if (level > write_lock_level || 27838c2ecf20Sopenharmony_ci (level + 1 > write_lock_level && 27848c2ecf20Sopenharmony_ci level + 1 < BTRFS_MAX_LEVEL && 27858c2ecf20Sopenharmony_ci p->nodes[level + 1])) { 27868c2ecf20Sopenharmony_ci write_lock_level = level + 1; 27878c2ecf20Sopenharmony_ci btrfs_release_path(p); 27888c2ecf20Sopenharmony_ci goto again; 27898c2ecf20Sopenharmony_ci } 27908c2ecf20Sopenharmony_ci 27918c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 27928c2ecf20Sopenharmony_ci if (last_level) 27938c2ecf20Sopenharmony_ci err = btrfs_cow_block(trans, root, b, NULL, 0, 27948c2ecf20Sopenharmony_ci &b, 27958c2ecf20Sopenharmony_ci BTRFS_NESTING_COW); 27968c2ecf20Sopenharmony_ci else 27978c2ecf20Sopenharmony_ci err = btrfs_cow_block(trans, root, b, 27988c2ecf20Sopenharmony_ci p->nodes[level + 1], 27998c2ecf20Sopenharmony_ci p->slots[level + 1], &b, 28008c2ecf20Sopenharmony_ci BTRFS_NESTING_COW); 28018c2ecf20Sopenharmony_ci if (err) { 28028c2ecf20Sopenharmony_ci ret = err; 28038c2ecf20Sopenharmony_ci goto done; 28048c2ecf20Sopenharmony_ci } 28058c2ecf20Sopenharmony_ci } 28068c2ecf20Sopenharmony_cicow_done: 28078c2ecf20Sopenharmony_ci p->nodes[level] = b; 28088c2ecf20Sopenharmony_ci /* 28098c2ecf20Sopenharmony_ci * Leave path with blocking locks to avoid massive 28108c2ecf20Sopenharmony_ci * lock context switch, this is made on purpose. 28118c2ecf20Sopenharmony_ci */ 28128c2ecf20Sopenharmony_ci 28138c2ecf20Sopenharmony_ci /* 28148c2ecf20Sopenharmony_ci * we have a lock on b and as long as we aren't changing 28158c2ecf20Sopenharmony_ci * the tree, there is no way to for the items in b to change. 28168c2ecf20Sopenharmony_ci * It is safe to drop the lock on our parent before we 28178c2ecf20Sopenharmony_ci * go through the expensive btree search on b. 28188c2ecf20Sopenharmony_ci * 28198c2ecf20Sopenharmony_ci * If we're inserting or deleting (ins_len != 0), then we might 28208c2ecf20Sopenharmony_ci * be changing slot zero, which may require changing the parent. 28218c2ecf20Sopenharmony_ci * So, we can't drop the lock until after we know which slot 28228c2ecf20Sopenharmony_ci * we're operating on. 28238c2ecf20Sopenharmony_ci */ 28248c2ecf20Sopenharmony_ci if (!ins_len && !p->keep_locks) { 28258c2ecf20Sopenharmony_ci int u = level + 1; 28268c2ecf20Sopenharmony_ci 28278c2ecf20Sopenharmony_ci if (u < BTRFS_MAX_LEVEL && p->locks[u]) { 28288c2ecf20Sopenharmony_ci btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]); 28298c2ecf20Sopenharmony_ci p->locks[u] = 0; 28308c2ecf20Sopenharmony_ci } 28318c2ecf20Sopenharmony_ci } 28328c2ecf20Sopenharmony_ci 28338c2ecf20Sopenharmony_ci /* 28348c2ecf20Sopenharmony_ci * If btrfs_bin_search returns an exact match (prev_cmp == 0) 28358c2ecf20Sopenharmony_ci * we can safely assume the target key will always be in slot 0 28368c2ecf20Sopenharmony_ci * on lower levels due to the invariants BTRFS' btree provides, 28378c2ecf20Sopenharmony_ci * namely that a btrfs_key_ptr entry always points to the 28388c2ecf20Sopenharmony_ci * lowest key in the child node, thus we can skip searching 28398c2ecf20Sopenharmony_ci * lower levels 28408c2ecf20Sopenharmony_ci */ 28418c2ecf20Sopenharmony_ci if (prev_cmp == 0) { 28428c2ecf20Sopenharmony_ci slot = 0; 28438c2ecf20Sopenharmony_ci ret = 0; 28448c2ecf20Sopenharmony_ci } else { 28458c2ecf20Sopenharmony_ci ret = btrfs_bin_search(b, key, &slot); 28468c2ecf20Sopenharmony_ci prev_cmp = ret; 28478c2ecf20Sopenharmony_ci if (ret < 0) 28488c2ecf20Sopenharmony_ci goto done; 28498c2ecf20Sopenharmony_ci } 28508c2ecf20Sopenharmony_ci 28518c2ecf20Sopenharmony_ci if (level == 0) { 28528c2ecf20Sopenharmony_ci p->slots[level] = slot; 28538c2ecf20Sopenharmony_ci if (ins_len > 0 && 28548c2ecf20Sopenharmony_ci btrfs_leaf_free_space(b) < ins_len) { 28558c2ecf20Sopenharmony_ci if (write_lock_level < 1) { 28568c2ecf20Sopenharmony_ci write_lock_level = 1; 28578c2ecf20Sopenharmony_ci btrfs_release_path(p); 28588c2ecf20Sopenharmony_ci goto again; 28598c2ecf20Sopenharmony_ci } 28608c2ecf20Sopenharmony_ci 28618c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 28628c2ecf20Sopenharmony_ci err = split_leaf(trans, root, key, 28638c2ecf20Sopenharmony_ci p, ins_len, ret == 0); 28648c2ecf20Sopenharmony_ci 28658c2ecf20Sopenharmony_ci BUG_ON(err > 0); 28668c2ecf20Sopenharmony_ci if (err) { 28678c2ecf20Sopenharmony_ci ret = err; 28688c2ecf20Sopenharmony_ci goto done; 28698c2ecf20Sopenharmony_ci } 28708c2ecf20Sopenharmony_ci } 28718c2ecf20Sopenharmony_ci if (!p->search_for_split) 28728c2ecf20Sopenharmony_ci unlock_up(p, level, lowest_unlock, 28738c2ecf20Sopenharmony_ci min_write_lock_level, NULL); 28748c2ecf20Sopenharmony_ci goto done; 28758c2ecf20Sopenharmony_ci } 28768c2ecf20Sopenharmony_ci if (ret && slot > 0) { 28778c2ecf20Sopenharmony_ci dec = 1; 28788c2ecf20Sopenharmony_ci slot--; 28798c2ecf20Sopenharmony_ci } 28808c2ecf20Sopenharmony_ci p->slots[level] = slot; 28818c2ecf20Sopenharmony_ci err = setup_nodes_for_search(trans, root, p, b, level, ins_len, 28828c2ecf20Sopenharmony_ci &write_lock_level); 28838c2ecf20Sopenharmony_ci if (err == -EAGAIN) 28848c2ecf20Sopenharmony_ci goto again; 28858c2ecf20Sopenharmony_ci if (err) { 28868c2ecf20Sopenharmony_ci ret = err; 28878c2ecf20Sopenharmony_ci goto done; 28888c2ecf20Sopenharmony_ci } 28898c2ecf20Sopenharmony_ci b = p->nodes[level]; 28908c2ecf20Sopenharmony_ci slot = p->slots[level]; 28918c2ecf20Sopenharmony_ci 28928c2ecf20Sopenharmony_ci /* 28938c2ecf20Sopenharmony_ci * Slot 0 is special, if we change the key we have to update 28948c2ecf20Sopenharmony_ci * the parent pointer which means we must have a write lock on 28958c2ecf20Sopenharmony_ci * the parent 28968c2ecf20Sopenharmony_ci */ 28978c2ecf20Sopenharmony_ci if (slot == 0 && ins_len && write_lock_level < level + 1) { 28988c2ecf20Sopenharmony_ci write_lock_level = level + 1; 28998c2ecf20Sopenharmony_ci btrfs_release_path(p); 29008c2ecf20Sopenharmony_ci goto again; 29018c2ecf20Sopenharmony_ci } 29028c2ecf20Sopenharmony_ci 29038c2ecf20Sopenharmony_ci unlock_up(p, level, lowest_unlock, min_write_lock_level, 29048c2ecf20Sopenharmony_ci &write_lock_level); 29058c2ecf20Sopenharmony_ci 29068c2ecf20Sopenharmony_ci if (level == lowest_level) { 29078c2ecf20Sopenharmony_ci if (dec) 29088c2ecf20Sopenharmony_ci p->slots[level]++; 29098c2ecf20Sopenharmony_ci goto done; 29108c2ecf20Sopenharmony_ci } 29118c2ecf20Sopenharmony_ci 29128c2ecf20Sopenharmony_ci err = read_block_for_search(root, p, &b, level, slot, key); 29138c2ecf20Sopenharmony_ci if (err == -EAGAIN) 29148c2ecf20Sopenharmony_ci goto again; 29158c2ecf20Sopenharmony_ci if (err) { 29168c2ecf20Sopenharmony_ci ret = err; 29178c2ecf20Sopenharmony_ci goto done; 29188c2ecf20Sopenharmony_ci } 29198c2ecf20Sopenharmony_ci 29208c2ecf20Sopenharmony_ci if (!p->skip_locking) { 29218c2ecf20Sopenharmony_ci level = btrfs_header_level(b); 29228c2ecf20Sopenharmony_ci if (level <= write_lock_level) { 29238c2ecf20Sopenharmony_ci if (!btrfs_try_tree_write_lock(b)) { 29248c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 29258c2ecf20Sopenharmony_ci btrfs_tree_lock(b); 29268c2ecf20Sopenharmony_ci } 29278c2ecf20Sopenharmony_ci p->locks[level] = BTRFS_WRITE_LOCK; 29288c2ecf20Sopenharmony_ci } else { 29298c2ecf20Sopenharmony_ci if (!btrfs_tree_read_lock_atomic(b)) { 29308c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 29318c2ecf20Sopenharmony_ci __btrfs_tree_read_lock(b, BTRFS_NESTING_NORMAL, 29328c2ecf20Sopenharmony_ci p->recurse); 29338c2ecf20Sopenharmony_ci } 29348c2ecf20Sopenharmony_ci p->locks[level] = BTRFS_READ_LOCK; 29358c2ecf20Sopenharmony_ci } 29368c2ecf20Sopenharmony_ci p->nodes[level] = b; 29378c2ecf20Sopenharmony_ci } 29388c2ecf20Sopenharmony_ci } 29398c2ecf20Sopenharmony_ci ret = 1; 29408c2ecf20Sopenharmony_cidone: 29418c2ecf20Sopenharmony_ci /* 29428c2ecf20Sopenharmony_ci * we don't really know what they plan on doing with the path 29438c2ecf20Sopenharmony_ci * from here on, so for now just mark it as blocking 29448c2ecf20Sopenharmony_ci */ 29458c2ecf20Sopenharmony_ci if (!p->leave_spinning) 29468c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 29478c2ecf20Sopenharmony_ci if (ret < 0 && !p->skip_release_on_error) 29488c2ecf20Sopenharmony_ci btrfs_release_path(p); 29498c2ecf20Sopenharmony_ci return ret; 29508c2ecf20Sopenharmony_ci} 29518c2ecf20Sopenharmony_ci 29528c2ecf20Sopenharmony_ci/* 29538c2ecf20Sopenharmony_ci * Like btrfs_search_slot, this looks for a key in the given tree. It uses the 29548c2ecf20Sopenharmony_ci * current state of the tree together with the operations recorded in the tree 29558c2ecf20Sopenharmony_ci * modification log to search for the key in a previous version of this tree, as 29568c2ecf20Sopenharmony_ci * denoted by the time_seq parameter. 29578c2ecf20Sopenharmony_ci * 29588c2ecf20Sopenharmony_ci * Naturally, there is no support for insert, delete or cow operations. 29598c2ecf20Sopenharmony_ci * 29608c2ecf20Sopenharmony_ci * The resulting path and return value will be set up as if we called 29618c2ecf20Sopenharmony_ci * btrfs_search_slot at that point in time with ins_len and cow both set to 0. 29628c2ecf20Sopenharmony_ci */ 29638c2ecf20Sopenharmony_ciint btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, 29648c2ecf20Sopenharmony_ci struct btrfs_path *p, u64 time_seq) 29658c2ecf20Sopenharmony_ci{ 29668c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 29678c2ecf20Sopenharmony_ci struct extent_buffer *b; 29688c2ecf20Sopenharmony_ci int slot; 29698c2ecf20Sopenharmony_ci int ret; 29708c2ecf20Sopenharmony_ci int err; 29718c2ecf20Sopenharmony_ci int level; 29728c2ecf20Sopenharmony_ci int lowest_unlock = 1; 29738c2ecf20Sopenharmony_ci u8 lowest_level = 0; 29748c2ecf20Sopenharmony_ci 29758c2ecf20Sopenharmony_ci lowest_level = p->lowest_level; 29768c2ecf20Sopenharmony_ci WARN_ON(p->nodes[0] != NULL); 29778c2ecf20Sopenharmony_ci 29788c2ecf20Sopenharmony_ci if (p->search_commit_root) { 29798c2ecf20Sopenharmony_ci BUG_ON(time_seq); 29808c2ecf20Sopenharmony_ci return btrfs_search_slot(NULL, root, key, p, 0, 0); 29818c2ecf20Sopenharmony_ci } 29828c2ecf20Sopenharmony_ci 29838c2ecf20Sopenharmony_ciagain: 29848c2ecf20Sopenharmony_ci b = get_old_root(root, time_seq); 29858c2ecf20Sopenharmony_ci if (!b) { 29868c2ecf20Sopenharmony_ci ret = -EIO; 29878c2ecf20Sopenharmony_ci goto done; 29888c2ecf20Sopenharmony_ci } 29898c2ecf20Sopenharmony_ci level = btrfs_header_level(b); 29908c2ecf20Sopenharmony_ci p->locks[level] = BTRFS_READ_LOCK; 29918c2ecf20Sopenharmony_ci 29928c2ecf20Sopenharmony_ci while (b) { 29938c2ecf20Sopenharmony_ci int dec = 0; 29948c2ecf20Sopenharmony_ci 29958c2ecf20Sopenharmony_ci level = btrfs_header_level(b); 29968c2ecf20Sopenharmony_ci p->nodes[level] = b; 29978c2ecf20Sopenharmony_ci 29988c2ecf20Sopenharmony_ci /* 29998c2ecf20Sopenharmony_ci * we have a lock on b and as long as we aren't changing 30008c2ecf20Sopenharmony_ci * the tree, there is no way to for the items in b to change. 30018c2ecf20Sopenharmony_ci * It is safe to drop the lock on our parent before we 30028c2ecf20Sopenharmony_ci * go through the expensive btree search on b. 30038c2ecf20Sopenharmony_ci */ 30048c2ecf20Sopenharmony_ci btrfs_unlock_up_safe(p, level + 1); 30058c2ecf20Sopenharmony_ci 30068c2ecf20Sopenharmony_ci ret = btrfs_bin_search(b, key, &slot); 30078c2ecf20Sopenharmony_ci if (ret < 0) 30088c2ecf20Sopenharmony_ci goto done; 30098c2ecf20Sopenharmony_ci 30108c2ecf20Sopenharmony_ci if (level == 0) { 30118c2ecf20Sopenharmony_ci p->slots[level] = slot; 30128c2ecf20Sopenharmony_ci unlock_up(p, level, lowest_unlock, 0, NULL); 30138c2ecf20Sopenharmony_ci goto done; 30148c2ecf20Sopenharmony_ci } 30158c2ecf20Sopenharmony_ci 30168c2ecf20Sopenharmony_ci if (ret && slot > 0) { 30178c2ecf20Sopenharmony_ci dec = 1; 30188c2ecf20Sopenharmony_ci slot--; 30198c2ecf20Sopenharmony_ci } 30208c2ecf20Sopenharmony_ci p->slots[level] = slot; 30218c2ecf20Sopenharmony_ci unlock_up(p, level, lowest_unlock, 0, NULL); 30228c2ecf20Sopenharmony_ci 30238c2ecf20Sopenharmony_ci if (level == lowest_level) { 30248c2ecf20Sopenharmony_ci if (dec) 30258c2ecf20Sopenharmony_ci p->slots[level]++; 30268c2ecf20Sopenharmony_ci goto done; 30278c2ecf20Sopenharmony_ci } 30288c2ecf20Sopenharmony_ci 30298c2ecf20Sopenharmony_ci err = read_block_for_search(root, p, &b, level, slot, key); 30308c2ecf20Sopenharmony_ci if (err == -EAGAIN) 30318c2ecf20Sopenharmony_ci goto again; 30328c2ecf20Sopenharmony_ci if (err) { 30338c2ecf20Sopenharmony_ci ret = err; 30348c2ecf20Sopenharmony_ci goto done; 30358c2ecf20Sopenharmony_ci } 30368c2ecf20Sopenharmony_ci 30378c2ecf20Sopenharmony_ci level = btrfs_header_level(b); 30388c2ecf20Sopenharmony_ci if (!btrfs_tree_read_lock_atomic(b)) { 30398c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 30408c2ecf20Sopenharmony_ci btrfs_tree_read_lock(b); 30418c2ecf20Sopenharmony_ci } 30428c2ecf20Sopenharmony_ci b = tree_mod_log_rewind(fs_info, p, b, time_seq); 30438c2ecf20Sopenharmony_ci if (!b) { 30448c2ecf20Sopenharmony_ci ret = -ENOMEM; 30458c2ecf20Sopenharmony_ci goto done; 30468c2ecf20Sopenharmony_ci } 30478c2ecf20Sopenharmony_ci p->locks[level] = BTRFS_READ_LOCK; 30488c2ecf20Sopenharmony_ci p->nodes[level] = b; 30498c2ecf20Sopenharmony_ci } 30508c2ecf20Sopenharmony_ci ret = 1; 30518c2ecf20Sopenharmony_cidone: 30528c2ecf20Sopenharmony_ci if (!p->leave_spinning) 30538c2ecf20Sopenharmony_ci btrfs_set_path_blocking(p); 30548c2ecf20Sopenharmony_ci if (ret < 0) 30558c2ecf20Sopenharmony_ci btrfs_release_path(p); 30568c2ecf20Sopenharmony_ci 30578c2ecf20Sopenharmony_ci return ret; 30588c2ecf20Sopenharmony_ci} 30598c2ecf20Sopenharmony_ci 30608c2ecf20Sopenharmony_ci/* 30618c2ecf20Sopenharmony_ci * helper to use instead of search slot if no exact match is needed but 30628c2ecf20Sopenharmony_ci * instead the next or previous item should be returned. 30638c2ecf20Sopenharmony_ci * When find_higher is true, the next higher item is returned, the next lower 30648c2ecf20Sopenharmony_ci * otherwise. 30658c2ecf20Sopenharmony_ci * When return_any and find_higher are both true, and no higher item is found, 30668c2ecf20Sopenharmony_ci * return the next lower instead. 30678c2ecf20Sopenharmony_ci * When return_any is true and find_higher is false, and no lower item is found, 30688c2ecf20Sopenharmony_ci * return the next higher instead. 30698c2ecf20Sopenharmony_ci * It returns 0 if any item is found, 1 if none is found (tree empty), and 30708c2ecf20Sopenharmony_ci * < 0 on error 30718c2ecf20Sopenharmony_ci */ 30728c2ecf20Sopenharmony_ciint btrfs_search_slot_for_read(struct btrfs_root *root, 30738c2ecf20Sopenharmony_ci const struct btrfs_key *key, 30748c2ecf20Sopenharmony_ci struct btrfs_path *p, int find_higher, 30758c2ecf20Sopenharmony_ci int return_any) 30768c2ecf20Sopenharmony_ci{ 30778c2ecf20Sopenharmony_ci int ret; 30788c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 30798c2ecf20Sopenharmony_ci 30808c2ecf20Sopenharmony_ciagain: 30818c2ecf20Sopenharmony_ci ret = btrfs_search_slot(NULL, root, key, p, 0, 0); 30828c2ecf20Sopenharmony_ci if (ret <= 0) 30838c2ecf20Sopenharmony_ci return ret; 30848c2ecf20Sopenharmony_ci /* 30858c2ecf20Sopenharmony_ci * a return value of 1 means the path is at the position where the 30868c2ecf20Sopenharmony_ci * item should be inserted. Normally this is the next bigger item, 30878c2ecf20Sopenharmony_ci * but in case the previous item is the last in a leaf, path points 30888c2ecf20Sopenharmony_ci * to the first free slot in the previous leaf, i.e. at an invalid 30898c2ecf20Sopenharmony_ci * item. 30908c2ecf20Sopenharmony_ci */ 30918c2ecf20Sopenharmony_ci leaf = p->nodes[0]; 30928c2ecf20Sopenharmony_ci 30938c2ecf20Sopenharmony_ci if (find_higher) { 30948c2ecf20Sopenharmony_ci if (p->slots[0] >= btrfs_header_nritems(leaf)) { 30958c2ecf20Sopenharmony_ci ret = btrfs_next_leaf(root, p); 30968c2ecf20Sopenharmony_ci if (ret <= 0) 30978c2ecf20Sopenharmony_ci return ret; 30988c2ecf20Sopenharmony_ci if (!return_any) 30998c2ecf20Sopenharmony_ci return 1; 31008c2ecf20Sopenharmony_ci /* 31018c2ecf20Sopenharmony_ci * no higher item found, return the next 31028c2ecf20Sopenharmony_ci * lower instead 31038c2ecf20Sopenharmony_ci */ 31048c2ecf20Sopenharmony_ci return_any = 0; 31058c2ecf20Sopenharmony_ci find_higher = 0; 31068c2ecf20Sopenharmony_ci btrfs_release_path(p); 31078c2ecf20Sopenharmony_ci goto again; 31088c2ecf20Sopenharmony_ci } 31098c2ecf20Sopenharmony_ci } else { 31108c2ecf20Sopenharmony_ci if (p->slots[0] == 0) { 31118c2ecf20Sopenharmony_ci ret = btrfs_prev_leaf(root, p); 31128c2ecf20Sopenharmony_ci if (ret < 0) 31138c2ecf20Sopenharmony_ci return ret; 31148c2ecf20Sopenharmony_ci if (!ret) { 31158c2ecf20Sopenharmony_ci leaf = p->nodes[0]; 31168c2ecf20Sopenharmony_ci if (p->slots[0] == btrfs_header_nritems(leaf)) 31178c2ecf20Sopenharmony_ci p->slots[0]--; 31188c2ecf20Sopenharmony_ci return 0; 31198c2ecf20Sopenharmony_ci } 31208c2ecf20Sopenharmony_ci if (!return_any) 31218c2ecf20Sopenharmony_ci return 1; 31228c2ecf20Sopenharmony_ci /* 31238c2ecf20Sopenharmony_ci * no lower item found, return the next 31248c2ecf20Sopenharmony_ci * higher instead 31258c2ecf20Sopenharmony_ci */ 31268c2ecf20Sopenharmony_ci return_any = 0; 31278c2ecf20Sopenharmony_ci find_higher = 1; 31288c2ecf20Sopenharmony_ci btrfs_release_path(p); 31298c2ecf20Sopenharmony_ci goto again; 31308c2ecf20Sopenharmony_ci } else { 31318c2ecf20Sopenharmony_ci --p->slots[0]; 31328c2ecf20Sopenharmony_ci } 31338c2ecf20Sopenharmony_ci } 31348c2ecf20Sopenharmony_ci return 0; 31358c2ecf20Sopenharmony_ci} 31368c2ecf20Sopenharmony_ci 31378c2ecf20Sopenharmony_ci/* 31388c2ecf20Sopenharmony_ci * adjust the pointers going up the tree, starting at level 31398c2ecf20Sopenharmony_ci * making sure the right key of each node is points to 'key'. 31408c2ecf20Sopenharmony_ci * This is used after shifting pointers to the left, so it stops 31418c2ecf20Sopenharmony_ci * fixing up pointers when a given leaf/node is not in slot 0 of the 31428c2ecf20Sopenharmony_ci * higher levels 31438c2ecf20Sopenharmony_ci * 31448c2ecf20Sopenharmony_ci */ 31458c2ecf20Sopenharmony_cistatic void fixup_low_keys(struct btrfs_path *path, 31468c2ecf20Sopenharmony_ci struct btrfs_disk_key *key, int level) 31478c2ecf20Sopenharmony_ci{ 31488c2ecf20Sopenharmony_ci int i; 31498c2ecf20Sopenharmony_ci struct extent_buffer *t; 31508c2ecf20Sopenharmony_ci int ret; 31518c2ecf20Sopenharmony_ci 31528c2ecf20Sopenharmony_ci for (i = level; i < BTRFS_MAX_LEVEL; i++) { 31538c2ecf20Sopenharmony_ci int tslot = path->slots[i]; 31548c2ecf20Sopenharmony_ci 31558c2ecf20Sopenharmony_ci if (!path->nodes[i]) 31568c2ecf20Sopenharmony_ci break; 31578c2ecf20Sopenharmony_ci t = path->nodes[i]; 31588c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_key(t, tslot, MOD_LOG_KEY_REPLACE, 31598c2ecf20Sopenharmony_ci GFP_ATOMIC); 31608c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 31618c2ecf20Sopenharmony_ci btrfs_set_node_key(t, key, tslot); 31628c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(path->nodes[i]); 31638c2ecf20Sopenharmony_ci if (tslot != 0) 31648c2ecf20Sopenharmony_ci break; 31658c2ecf20Sopenharmony_ci } 31668c2ecf20Sopenharmony_ci} 31678c2ecf20Sopenharmony_ci 31688c2ecf20Sopenharmony_ci/* 31698c2ecf20Sopenharmony_ci * update item key. 31708c2ecf20Sopenharmony_ci * 31718c2ecf20Sopenharmony_ci * This function isn't completely safe. It's the caller's responsibility 31728c2ecf20Sopenharmony_ci * that the new key won't break the order 31738c2ecf20Sopenharmony_ci */ 31748c2ecf20Sopenharmony_civoid btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, 31758c2ecf20Sopenharmony_ci struct btrfs_path *path, 31768c2ecf20Sopenharmony_ci const struct btrfs_key *new_key) 31778c2ecf20Sopenharmony_ci{ 31788c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 31798c2ecf20Sopenharmony_ci struct extent_buffer *eb; 31808c2ecf20Sopenharmony_ci int slot; 31818c2ecf20Sopenharmony_ci 31828c2ecf20Sopenharmony_ci eb = path->nodes[0]; 31838c2ecf20Sopenharmony_ci slot = path->slots[0]; 31848c2ecf20Sopenharmony_ci if (slot > 0) { 31858c2ecf20Sopenharmony_ci btrfs_item_key(eb, &disk_key, slot - 1); 31868c2ecf20Sopenharmony_ci if (unlikely(comp_keys(&disk_key, new_key) >= 0)) { 31878c2ecf20Sopenharmony_ci btrfs_crit(fs_info, 31888c2ecf20Sopenharmony_ci "slot %u key (%llu %u %llu) new key (%llu %u %llu)", 31898c2ecf20Sopenharmony_ci slot, btrfs_disk_key_objectid(&disk_key), 31908c2ecf20Sopenharmony_ci btrfs_disk_key_type(&disk_key), 31918c2ecf20Sopenharmony_ci btrfs_disk_key_offset(&disk_key), 31928c2ecf20Sopenharmony_ci new_key->objectid, new_key->type, 31938c2ecf20Sopenharmony_ci new_key->offset); 31948c2ecf20Sopenharmony_ci btrfs_print_leaf(eb); 31958c2ecf20Sopenharmony_ci BUG(); 31968c2ecf20Sopenharmony_ci } 31978c2ecf20Sopenharmony_ci } 31988c2ecf20Sopenharmony_ci if (slot < btrfs_header_nritems(eb) - 1) { 31998c2ecf20Sopenharmony_ci btrfs_item_key(eb, &disk_key, slot + 1); 32008c2ecf20Sopenharmony_ci if (unlikely(comp_keys(&disk_key, new_key) <= 0)) { 32018c2ecf20Sopenharmony_ci btrfs_crit(fs_info, 32028c2ecf20Sopenharmony_ci "slot %u key (%llu %u %llu) new key (%llu %u %llu)", 32038c2ecf20Sopenharmony_ci slot, btrfs_disk_key_objectid(&disk_key), 32048c2ecf20Sopenharmony_ci btrfs_disk_key_type(&disk_key), 32058c2ecf20Sopenharmony_ci btrfs_disk_key_offset(&disk_key), 32068c2ecf20Sopenharmony_ci new_key->objectid, new_key->type, 32078c2ecf20Sopenharmony_ci new_key->offset); 32088c2ecf20Sopenharmony_ci btrfs_print_leaf(eb); 32098c2ecf20Sopenharmony_ci BUG(); 32108c2ecf20Sopenharmony_ci } 32118c2ecf20Sopenharmony_ci } 32128c2ecf20Sopenharmony_ci 32138c2ecf20Sopenharmony_ci btrfs_cpu_key_to_disk(&disk_key, new_key); 32148c2ecf20Sopenharmony_ci btrfs_set_item_key(eb, &disk_key, slot); 32158c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(eb); 32168c2ecf20Sopenharmony_ci if (slot == 0) 32178c2ecf20Sopenharmony_ci fixup_low_keys(path, &disk_key, 1); 32188c2ecf20Sopenharmony_ci} 32198c2ecf20Sopenharmony_ci 32208c2ecf20Sopenharmony_ci/* 32218c2ecf20Sopenharmony_ci * Check key order of two sibling extent buffers. 32228c2ecf20Sopenharmony_ci * 32238c2ecf20Sopenharmony_ci * Return true if something is wrong. 32248c2ecf20Sopenharmony_ci * Return false if everything is fine. 32258c2ecf20Sopenharmony_ci * 32268c2ecf20Sopenharmony_ci * Tree-checker only works inside one tree block, thus the following 32278c2ecf20Sopenharmony_ci * corruption can not be detected by tree-checker: 32288c2ecf20Sopenharmony_ci * 32298c2ecf20Sopenharmony_ci * Leaf @left | Leaf @right 32308c2ecf20Sopenharmony_ci * -------------------------------------------------------------- 32318c2ecf20Sopenharmony_ci * | 1 | 2 | 3 | 4 | 5 | f6 | | 7 | 8 | 32328c2ecf20Sopenharmony_ci * 32338c2ecf20Sopenharmony_ci * Key f6 in leaf @left itself is valid, but not valid when the next 32348c2ecf20Sopenharmony_ci * key in leaf @right is 7. 32358c2ecf20Sopenharmony_ci * This can only be checked at tree block merge time. 32368c2ecf20Sopenharmony_ci * And since tree checker has ensured all key order in each tree block 32378c2ecf20Sopenharmony_ci * is correct, we only need to bother the last key of @left and the first 32388c2ecf20Sopenharmony_ci * key of @right. 32398c2ecf20Sopenharmony_ci */ 32408c2ecf20Sopenharmony_cistatic bool check_sibling_keys(struct extent_buffer *left, 32418c2ecf20Sopenharmony_ci struct extent_buffer *right) 32428c2ecf20Sopenharmony_ci{ 32438c2ecf20Sopenharmony_ci struct btrfs_key left_last; 32448c2ecf20Sopenharmony_ci struct btrfs_key right_first; 32458c2ecf20Sopenharmony_ci int level = btrfs_header_level(left); 32468c2ecf20Sopenharmony_ci int nr_left = btrfs_header_nritems(left); 32478c2ecf20Sopenharmony_ci int nr_right = btrfs_header_nritems(right); 32488c2ecf20Sopenharmony_ci 32498c2ecf20Sopenharmony_ci /* No key to check in one of the tree blocks */ 32508c2ecf20Sopenharmony_ci if (!nr_left || !nr_right) 32518c2ecf20Sopenharmony_ci return false; 32528c2ecf20Sopenharmony_ci 32538c2ecf20Sopenharmony_ci if (level) { 32548c2ecf20Sopenharmony_ci btrfs_node_key_to_cpu(left, &left_last, nr_left - 1); 32558c2ecf20Sopenharmony_ci btrfs_node_key_to_cpu(right, &right_first, 0); 32568c2ecf20Sopenharmony_ci } else { 32578c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(left, &left_last, nr_left - 1); 32588c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(right, &right_first, 0); 32598c2ecf20Sopenharmony_ci } 32608c2ecf20Sopenharmony_ci 32618c2ecf20Sopenharmony_ci if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) { 32628c2ecf20Sopenharmony_ci btrfs_crit(left->fs_info, 32638c2ecf20Sopenharmony_ci"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)", 32648c2ecf20Sopenharmony_ci left_last.objectid, left_last.type, 32658c2ecf20Sopenharmony_ci left_last.offset, right_first.objectid, 32668c2ecf20Sopenharmony_ci right_first.type, right_first.offset); 32678c2ecf20Sopenharmony_ci return true; 32688c2ecf20Sopenharmony_ci } 32698c2ecf20Sopenharmony_ci return false; 32708c2ecf20Sopenharmony_ci} 32718c2ecf20Sopenharmony_ci 32728c2ecf20Sopenharmony_ci/* 32738c2ecf20Sopenharmony_ci * try to push data from one node into the next node left in the 32748c2ecf20Sopenharmony_ci * tree. 32758c2ecf20Sopenharmony_ci * 32768c2ecf20Sopenharmony_ci * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 32778c2ecf20Sopenharmony_ci * error, and > 0 if there was no room in the left hand block. 32788c2ecf20Sopenharmony_ci */ 32798c2ecf20Sopenharmony_cistatic int push_node_left(struct btrfs_trans_handle *trans, 32808c2ecf20Sopenharmony_ci struct extent_buffer *dst, 32818c2ecf20Sopenharmony_ci struct extent_buffer *src, int empty) 32828c2ecf20Sopenharmony_ci{ 32838c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 32848c2ecf20Sopenharmony_ci int push_items = 0; 32858c2ecf20Sopenharmony_ci int src_nritems; 32868c2ecf20Sopenharmony_ci int dst_nritems; 32878c2ecf20Sopenharmony_ci int ret = 0; 32888c2ecf20Sopenharmony_ci 32898c2ecf20Sopenharmony_ci src_nritems = btrfs_header_nritems(src); 32908c2ecf20Sopenharmony_ci dst_nritems = btrfs_header_nritems(dst); 32918c2ecf20Sopenharmony_ci push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems; 32928c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_generation(src) != trans->transid); 32938c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_generation(dst) != trans->transid); 32948c2ecf20Sopenharmony_ci 32958c2ecf20Sopenharmony_ci if (!empty && src_nritems <= 8) 32968c2ecf20Sopenharmony_ci return 1; 32978c2ecf20Sopenharmony_ci 32988c2ecf20Sopenharmony_ci if (push_items <= 0) 32998c2ecf20Sopenharmony_ci return 1; 33008c2ecf20Sopenharmony_ci 33018c2ecf20Sopenharmony_ci if (empty) { 33028c2ecf20Sopenharmony_ci push_items = min(src_nritems, push_items); 33038c2ecf20Sopenharmony_ci if (push_items < src_nritems) { 33048c2ecf20Sopenharmony_ci /* leave at least 8 pointers in the node if 33058c2ecf20Sopenharmony_ci * we aren't going to empty it 33068c2ecf20Sopenharmony_ci */ 33078c2ecf20Sopenharmony_ci if (src_nritems - push_items < 8) { 33088c2ecf20Sopenharmony_ci if (push_items <= 8) 33098c2ecf20Sopenharmony_ci return 1; 33108c2ecf20Sopenharmony_ci push_items -= 8; 33118c2ecf20Sopenharmony_ci } 33128c2ecf20Sopenharmony_ci } 33138c2ecf20Sopenharmony_ci } else 33148c2ecf20Sopenharmony_ci push_items = min(src_nritems - 8, push_items); 33158c2ecf20Sopenharmony_ci 33168c2ecf20Sopenharmony_ci /* dst is the left eb, src is the middle eb */ 33178c2ecf20Sopenharmony_ci if (check_sibling_keys(dst, src)) { 33188c2ecf20Sopenharmony_ci ret = -EUCLEAN; 33198c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 33208c2ecf20Sopenharmony_ci return ret; 33218c2ecf20Sopenharmony_ci } 33228c2ecf20Sopenharmony_ci ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items); 33238c2ecf20Sopenharmony_ci if (ret) { 33248c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 33258c2ecf20Sopenharmony_ci return ret; 33268c2ecf20Sopenharmony_ci } 33278c2ecf20Sopenharmony_ci copy_extent_buffer(dst, src, 33288c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(dst_nritems), 33298c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(0), 33308c2ecf20Sopenharmony_ci push_items * sizeof(struct btrfs_key_ptr)); 33318c2ecf20Sopenharmony_ci 33328c2ecf20Sopenharmony_ci if (push_items < src_nritems) { 33338c2ecf20Sopenharmony_ci /* 33348c2ecf20Sopenharmony_ci * Don't call tree_mod_log_insert_move here, key removal was 33358c2ecf20Sopenharmony_ci * already fully logged by tree_mod_log_eb_copy above. 33368c2ecf20Sopenharmony_ci */ 33378c2ecf20Sopenharmony_ci memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 33388c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(push_items), 33398c2ecf20Sopenharmony_ci (src_nritems - push_items) * 33408c2ecf20Sopenharmony_ci sizeof(struct btrfs_key_ptr)); 33418c2ecf20Sopenharmony_ci } 33428c2ecf20Sopenharmony_ci btrfs_set_header_nritems(src, src_nritems - push_items); 33438c2ecf20Sopenharmony_ci btrfs_set_header_nritems(dst, dst_nritems + push_items); 33448c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(src); 33458c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(dst); 33468c2ecf20Sopenharmony_ci 33478c2ecf20Sopenharmony_ci return ret; 33488c2ecf20Sopenharmony_ci} 33498c2ecf20Sopenharmony_ci 33508c2ecf20Sopenharmony_ci/* 33518c2ecf20Sopenharmony_ci * try to push data from one node into the next node right in the 33528c2ecf20Sopenharmony_ci * tree. 33538c2ecf20Sopenharmony_ci * 33548c2ecf20Sopenharmony_ci * returns 0 if some ptrs were pushed, < 0 if there was some horrible 33558c2ecf20Sopenharmony_ci * error, and > 0 if there was no room in the right hand block. 33568c2ecf20Sopenharmony_ci * 33578c2ecf20Sopenharmony_ci * this will only push up to 1/2 the contents of the left node over 33588c2ecf20Sopenharmony_ci */ 33598c2ecf20Sopenharmony_cistatic int balance_node_right(struct btrfs_trans_handle *trans, 33608c2ecf20Sopenharmony_ci struct extent_buffer *dst, 33618c2ecf20Sopenharmony_ci struct extent_buffer *src) 33628c2ecf20Sopenharmony_ci{ 33638c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 33648c2ecf20Sopenharmony_ci int push_items = 0; 33658c2ecf20Sopenharmony_ci int max_push; 33668c2ecf20Sopenharmony_ci int src_nritems; 33678c2ecf20Sopenharmony_ci int dst_nritems; 33688c2ecf20Sopenharmony_ci int ret = 0; 33698c2ecf20Sopenharmony_ci 33708c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_generation(src) != trans->transid); 33718c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_generation(dst) != trans->transid); 33728c2ecf20Sopenharmony_ci 33738c2ecf20Sopenharmony_ci src_nritems = btrfs_header_nritems(src); 33748c2ecf20Sopenharmony_ci dst_nritems = btrfs_header_nritems(dst); 33758c2ecf20Sopenharmony_ci push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems; 33768c2ecf20Sopenharmony_ci if (push_items <= 0) 33778c2ecf20Sopenharmony_ci return 1; 33788c2ecf20Sopenharmony_ci 33798c2ecf20Sopenharmony_ci if (src_nritems < 4) 33808c2ecf20Sopenharmony_ci return 1; 33818c2ecf20Sopenharmony_ci 33828c2ecf20Sopenharmony_ci max_push = src_nritems / 2 + 1; 33838c2ecf20Sopenharmony_ci /* don't try to empty the node */ 33848c2ecf20Sopenharmony_ci if (max_push >= src_nritems) 33858c2ecf20Sopenharmony_ci return 1; 33868c2ecf20Sopenharmony_ci 33878c2ecf20Sopenharmony_ci if (max_push < push_items) 33888c2ecf20Sopenharmony_ci push_items = max_push; 33898c2ecf20Sopenharmony_ci 33908c2ecf20Sopenharmony_ci /* dst is the right eb, src is the middle eb */ 33918c2ecf20Sopenharmony_ci if (check_sibling_keys(src, dst)) { 33928c2ecf20Sopenharmony_ci ret = -EUCLEAN; 33938c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 33948c2ecf20Sopenharmony_ci return ret; 33958c2ecf20Sopenharmony_ci } 33968c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_move(dst, push_items, 0, dst_nritems); 33978c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 33988c2ecf20Sopenharmony_ci memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), 33998c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(0), 34008c2ecf20Sopenharmony_ci (dst_nritems) * 34018c2ecf20Sopenharmony_ci sizeof(struct btrfs_key_ptr)); 34028c2ecf20Sopenharmony_ci 34038c2ecf20Sopenharmony_ci ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items, 34048c2ecf20Sopenharmony_ci push_items); 34058c2ecf20Sopenharmony_ci if (ret) { 34068c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 34078c2ecf20Sopenharmony_ci return ret; 34088c2ecf20Sopenharmony_ci } 34098c2ecf20Sopenharmony_ci copy_extent_buffer(dst, src, 34108c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(0), 34118c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(src_nritems - push_items), 34128c2ecf20Sopenharmony_ci push_items * sizeof(struct btrfs_key_ptr)); 34138c2ecf20Sopenharmony_ci 34148c2ecf20Sopenharmony_ci btrfs_set_header_nritems(src, src_nritems - push_items); 34158c2ecf20Sopenharmony_ci btrfs_set_header_nritems(dst, dst_nritems + push_items); 34168c2ecf20Sopenharmony_ci 34178c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(src); 34188c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(dst); 34198c2ecf20Sopenharmony_ci 34208c2ecf20Sopenharmony_ci return ret; 34218c2ecf20Sopenharmony_ci} 34228c2ecf20Sopenharmony_ci 34238c2ecf20Sopenharmony_ci/* 34248c2ecf20Sopenharmony_ci * helper function to insert a new root level in the tree. 34258c2ecf20Sopenharmony_ci * A new node is allocated, and a single item is inserted to 34268c2ecf20Sopenharmony_ci * point to the existing root 34278c2ecf20Sopenharmony_ci * 34288c2ecf20Sopenharmony_ci * returns zero on success or < 0 on failure. 34298c2ecf20Sopenharmony_ci */ 34308c2ecf20Sopenharmony_cistatic noinline int insert_new_root(struct btrfs_trans_handle *trans, 34318c2ecf20Sopenharmony_ci struct btrfs_root *root, 34328c2ecf20Sopenharmony_ci struct btrfs_path *path, int level) 34338c2ecf20Sopenharmony_ci{ 34348c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 34358c2ecf20Sopenharmony_ci u64 lower_gen; 34368c2ecf20Sopenharmony_ci struct extent_buffer *lower; 34378c2ecf20Sopenharmony_ci struct extent_buffer *c; 34388c2ecf20Sopenharmony_ci struct extent_buffer *old; 34398c2ecf20Sopenharmony_ci struct btrfs_disk_key lower_key; 34408c2ecf20Sopenharmony_ci int ret; 34418c2ecf20Sopenharmony_ci 34428c2ecf20Sopenharmony_ci BUG_ON(path->nodes[level]); 34438c2ecf20Sopenharmony_ci BUG_ON(path->nodes[level-1] != root->node); 34448c2ecf20Sopenharmony_ci 34458c2ecf20Sopenharmony_ci lower = path->nodes[level-1]; 34468c2ecf20Sopenharmony_ci if (level == 1) 34478c2ecf20Sopenharmony_ci btrfs_item_key(lower, &lower_key, 0); 34488c2ecf20Sopenharmony_ci else 34498c2ecf20Sopenharmony_ci btrfs_node_key(lower, &lower_key, 0); 34508c2ecf20Sopenharmony_ci 34518c2ecf20Sopenharmony_ci c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level, 34528c2ecf20Sopenharmony_ci root->node->start, 0, 34538c2ecf20Sopenharmony_ci BTRFS_NESTING_NEW_ROOT); 34548c2ecf20Sopenharmony_ci if (IS_ERR(c)) 34558c2ecf20Sopenharmony_ci return PTR_ERR(c); 34568c2ecf20Sopenharmony_ci 34578c2ecf20Sopenharmony_ci root_add_used(root, fs_info->nodesize); 34588c2ecf20Sopenharmony_ci 34598c2ecf20Sopenharmony_ci btrfs_set_header_nritems(c, 1); 34608c2ecf20Sopenharmony_ci btrfs_set_node_key(c, &lower_key, 0); 34618c2ecf20Sopenharmony_ci btrfs_set_node_blockptr(c, 0, lower->start); 34628c2ecf20Sopenharmony_ci lower_gen = btrfs_header_generation(lower); 34638c2ecf20Sopenharmony_ci WARN_ON(lower_gen != trans->transid); 34648c2ecf20Sopenharmony_ci 34658c2ecf20Sopenharmony_ci btrfs_set_node_ptr_generation(c, 0, lower_gen); 34668c2ecf20Sopenharmony_ci 34678c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(c); 34688c2ecf20Sopenharmony_ci 34698c2ecf20Sopenharmony_ci old = root->node; 34708c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_root(root->node, c, 0); 34718c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 34728c2ecf20Sopenharmony_ci rcu_assign_pointer(root->node, c); 34738c2ecf20Sopenharmony_ci 34748c2ecf20Sopenharmony_ci /* the super has an extra ref to root->node */ 34758c2ecf20Sopenharmony_ci free_extent_buffer(old); 34768c2ecf20Sopenharmony_ci 34778c2ecf20Sopenharmony_ci add_root_to_dirty_list(root); 34788c2ecf20Sopenharmony_ci atomic_inc(&c->refs); 34798c2ecf20Sopenharmony_ci path->nodes[level] = c; 34808c2ecf20Sopenharmony_ci path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 34818c2ecf20Sopenharmony_ci path->slots[level] = 0; 34828c2ecf20Sopenharmony_ci return 0; 34838c2ecf20Sopenharmony_ci} 34848c2ecf20Sopenharmony_ci 34858c2ecf20Sopenharmony_ci/* 34868c2ecf20Sopenharmony_ci * worker function to insert a single pointer in a node. 34878c2ecf20Sopenharmony_ci * the node should have enough room for the pointer already 34888c2ecf20Sopenharmony_ci * 34898c2ecf20Sopenharmony_ci * slot and level indicate where you want the key to go, and 34908c2ecf20Sopenharmony_ci * blocknr is the block the key points to. 34918c2ecf20Sopenharmony_ci */ 34928c2ecf20Sopenharmony_cistatic void insert_ptr(struct btrfs_trans_handle *trans, 34938c2ecf20Sopenharmony_ci struct btrfs_path *path, 34948c2ecf20Sopenharmony_ci struct btrfs_disk_key *key, u64 bytenr, 34958c2ecf20Sopenharmony_ci int slot, int level) 34968c2ecf20Sopenharmony_ci{ 34978c2ecf20Sopenharmony_ci struct extent_buffer *lower; 34988c2ecf20Sopenharmony_ci int nritems; 34998c2ecf20Sopenharmony_ci int ret; 35008c2ecf20Sopenharmony_ci 35018c2ecf20Sopenharmony_ci BUG_ON(!path->nodes[level]); 35028c2ecf20Sopenharmony_ci btrfs_assert_tree_locked(path->nodes[level]); 35038c2ecf20Sopenharmony_ci lower = path->nodes[level]; 35048c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(lower); 35058c2ecf20Sopenharmony_ci BUG_ON(slot > nritems); 35068c2ecf20Sopenharmony_ci BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info)); 35078c2ecf20Sopenharmony_ci if (slot != nritems) { 35088c2ecf20Sopenharmony_ci if (level) { 35098c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_move(lower, slot + 1, slot, 35108c2ecf20Sopenharmony_ci nritems - slot); 35118c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 35128c2ecf20Sopenharmony_ci } 35138c2ecf20Sopenharmony_ci memmove_extent_buffer(lower, 35148c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(slot + 1), 35158c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(slot), 35168c2ecf20Sopenharmony_ci (nritems - slot) * sizeof(struct btrfs_key_ptr)); 35178c2ecf20Sopenharmony_ci } 35188c2ecf20Sopenharmony_ci if (level) { 35198c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_key(lower, slot, MOD_LOG_KEY_ADD, 35208c2ecf20Sopenharmony_ci GFP_NOFS); 35218c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 35228c2ecf20Sopenharmony_ci } 35238c2ecf20Sopenharmony_ci btrfs_set_node_key(lower, key, slot); 35248c2ecf20Sopenharmony_ci btrfs_set_node_blockptr(lower, slot, bytenr); 35258c2ecf20Sopenharmony_ci WARN_ON(trans->transid == 0); 35268c2ecf20Sopenharmony_ci btrfs_set_node_ptr_generation(lower, slot, trans->transid); 35278c2ecf20Sopenharmony_ci btrfs_set_header_nritems(lower, nritems + 1); 35288c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(lower); 35298c2ecf20Sopenharmony_ci} 35308c2ecf20Sopenharmony_ci 35318c2ecf20Sopenharmony_ci/* 35328c2ecf20Sopenharmony_ci * split the node at the specified level in path in two. 35338c2ecf20Sopenharmony_ci * The path is corrected to point to the appropriate node after the split 35348c2ecf20Sopenharmony_ci * 35358c2ecf20Sopenharmony_ci * Before splitting this tries to make some room in the node by pushing 35368c2ecf20Sopenharmony_ci * left and right, if either one works, it returns right away. 35378c2ecf20Sopenharmony_ci * 35388c2ecf20Sopenharmony_ci * returns 0 on success and < 0 on failure 35398c2ecf20Sopenharmony_ci */ 35408c2ecf20Sopenharmony_cistatic noinline int split_node(struct btrfs_trans_handle *trans, 35418c2ecf20Sopenharmony_ci struct btrfs_root *root, 35428c2ecf20Sopenharmony_ci struct btrfs_path *path, int level) 35438c2ecf20Sopenharmony_ci{ 35448c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 35458c2ecf20Sopenharmony_ci struct extent_buffer *c; 35468c2ecf20Sopenharmony_ci struct extent_buffer *split; 35478c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 35488c2ecf20Sopenharmony_ci int mid; 35498c2ecf20Sopenharmony_ci int ret; 35508c2ecf20Sopenharmony_ci u32 c_nritems; 35518c2ecf20Sopenharmony_ci 35528c2ecf20Sopenharmony_ci c = path->nodes[level]; 35538c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_generation(c) != trans->transid); 35548c2ecf20Sopenharmony_ci if (c == root->node) { 35558c2ecf20Sopenharmony_ci /* 35568c2ecf20Sopenharmony_ci * trying to split the root, lets make a new one 35578c2ecf20Sopenharmony_ci * 35588c2ecf20Sopenharmony_ci * tree mod log: We don't log_removal old root in 35598c2ecf20Sopenharmony_ci * insert_new_root, because that root buffer will be kept as a 35608c2ecf20Sopenharmony_ci * normal node. We are going to log removal of half of the 35618c2ecf20Sopenharmony_ci * elements below with tree_mod_log_eb_copy. We're holding a 35628c2ecf20Sopenharmony_ci * tree lock on the buffer, which is why we cannot race with 35638c2ecf20Sopenharmony_ci * other tree_mod_log users. 35648c2ecf20Sopenharmony_ci */ 35658c2ecf20Sopenharmony_ci ret = insert_new_root(trans, root, path, level + 1); 35668c2ecf20Sopenharmony_ci if (ret) 35678c2ecf20Sopenharmony_ci return ret; 35688c2ecf20Sopenharmony_ci } else { 35698c2ecf20Sopenharmony_ci ret = push_nodes_for_insert(trans, root, path, level); 35708c2ecf20Sopenharmony_ci c = path->nodes[level]; 35718c2ecf20Sopenharmony_ci if (!ret && btrfs_header_nritems(c) < 35728c2ecf20Sopenharmony_ci BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) 35738c2ecf20Sopenharmony_ci return 0; 35748c2ecf20Sopenharmony_ci if (ret < 0) 35758c2ecf20Sopenharmony_ci return ret; 35768c2ecf20Sopenharmony_ci } 35778c2ecf20Sopenharmony_ci 35788c2ecf20Sopenharmony_ci c_nritems = btrfs_header_nritems(c); 35798c2ecf20Sopenharmony_ci mid = (c_nritems + 1) / 2; 35808c2ecf20Sopenharmony_ci btrfs_node_key(c, &disk_key, mid); 35818c2ecf20Sopenharmony_ci 35828c2ecf20Sopenharmony_ci split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level, 35838c2ecf20Sopenharmony_ci c->start, 0, BTRFS_NESTING_SPLIT); 35848c2ecf20Sopenharmony_ci if (IS_ERR(split)) 35858c2ecf20Sopenharmony_ci return PTR_ERR(split); 35868c2ecf20Sopenharmony_ci 35878c2ecf20Sopenharmony_ci root_add_used(root, fs_info->nodesize); 35888c2ecf20Sopenharmony_ci ASSERT(btrfs_header_level(c) == level); 35898c2ecf20Sopenharmony_ci 35908c2ecf20Sopenharmony_ci ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid); 35918c2ecf20Sopenharmony_ci if (ret) { 35928c2ecf20Sopenharmony_ci btrfs_tree_unlock(split); 35938c2ecf20Sopenharmony_ci free_extent_buffer(split); 35948c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 35958c2ecf20Sopenharmony_ci return ret; 35968c2ecf20Sopenharmony_ci } 35978c2ecf20Sopenharmony_ci copy_extent_buffer(split, c, 35988c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(0), 35998c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(mid), 36008c2ecf20Sopenharmony_ci (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 36018c2ecf20Sopenharmony_ci btrfs_set_header_nritems(split, c_nritems - mid); 36028c2ecf20Sopenharmony_ci btrfs_set_header_nritems(c, mid); 36038c2ecf20Sopenharmony_ci ret = 0; 36048c2ecf20Sopenharmony_ci 36058c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(c); 36068c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(split); 36078c2ecf20Sopenharmony_ci 36088c2ecf20Sopenharmony_ci insert_ptr(trans, path, &disk_key, split->start, 36098c2ecf20Sopenharmony_ci path->slots[level + 1] + 1, level + 1); 36108c2ecf20Sopenharmony_ci 36118c2ecf20Sopenharmony_ci if (path->slots[level] >= mid) { 36128c2ecf20Sopenharmony_ci path->slots[level] -= mid; 36138c2ecf20Sopenharmony_ci btrfs_tree_unlock(c); 36148c2ecf20Sopenharmony_ci free_extent_buffer(c); 36158c2ecf20Sopenharmony_ci path->nodes[level] = split; 36168c2ecf20Sopenharmony_ci path->slots[level + 1] += 1; 36178c2ecf20Sopenharmony_ci } else { 36188c2ecf20Sopenharmony_ci btrfs_tree_unlock(split); 36198c2ecf20Sopenharmony_ci free_extent_buffer(split); 36208c2ecf20Sopenharmony_ci } 36218c2ecf20Sopenharmony_ci return ret; 36228c2ecf20Sopenharmony_ci} 36238c2ecf20Sopenharmony_ci 36248c2ecf20Sopenharmony_ci/* 36258c2ecf20Sopenharmony_ci * how many bytes are required to store the items in a leaf. start 36268c2ecf20Sopenharmony_ci * and nr indicate which items in the leaf to check. This totals up the 36278c2ecf20Sopenharmony_ci * space used both by the item structs and the item data 36288c2ecf20Sopenharmony_ci */ 36298c2ecf20Sopenharmony_cistatic int leaf_space_used(struct extent_buffer *l, int start, int nr) 36308c2ecf20Sopenharmony_ci{ 36318c2ecf20Sopenharmony_ci struct btrfs_item *start_item; 36328c2ecf20Sopenharmony_ci struct btrfs_item *end_item; 36338c2ecf20Sopenharmony_ci int data_len; 36348c2ecf20Sopenharmony_ci int nritems = btrfs_header_nritems(l); 36358c2ecf20Sopenharmony_ci int end = min(nritems, start + nr) - 1; 36368c2ecf20Sopenharmony_ci 36378c2ecf20Sopenharmony_ci if (!nr) 36388c2ecf20Sopenharmony_ci return 0; 36398c2ecf20Sopenharmony_ci start_item = btrfs_item_nr(start); 36408c2ecf20Sopenharmony_ci end_item = btrfs_item_nr(end); 36418c2ecf20Sopenharmony_ci data_len = btrfs_item_offset(l, start_item) + 36428c2ecf20Sopenharmony_ci btrfs_item_size(l, start_item); 36438c2ecf20Sopenharmony_ci data_len = data_len - btrfs_item_offset(l, end_item); 36448c2ecf20Sopenharmony_ci data_len += sizeof(struct btrfs_item) * nr; 36458c2ecf20Sopenharmony_ci WARN_ON(data_len < 0); 36468c2ecf20Sopenharmony_ci return data_len; 36478c2ecf20Sopenharmony_ci} 36488c2ecf20Sopenharmony_ci 36498c2ecf20Sopenharmony_ci/* 36508c2ecf20Sopenharmony_ci * The space between the end of the leaf items and 36518c2ecf20Sopenharmony_ci * the start of the leaf data. IOW, how much room 36528c2ecf20Sopenharmony_ci * the leaf has left for both items and data 36538c2ecf20Sopenharmony_ci */ 36548c2ecf20Sopenharmony_cinoinline int btrfs_leaf_free_space(struct extent_buffer *leaf) 36558c2ecf20Sopenharmony_ci{ 36568c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = leaf->fs_info; 36578c2ecf20Sopenharmony_ci int nritems = btrfs_header_nritems(leaf); 36588c2ecf20Sopenharmony_ci int ret; 36598c2ecf20Sopenharmony_ci 36608c2ecf20Sopenharmony_ci ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems); 36618c2ecf20Sopenharmony_ci if (ret < 0) { 36628c2ecf20Sopenharmony_ci btrfs_crit(fs_info, 36638c2ecf20Sopenharmony_ci "leaf free space ret %d, leaf data size %lu, used %d nritems %d", 36648c2ecf20Sopenharmony_ci ret, 36658c2ecf20Sopenharmony_ci (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info), 36668c2ecf20Sopenharmony_ci leaf_space_used(leaf, 0, nritems), nritems); 36678c2ecf20Sopenharmony_ci } 36688c2ecf20Sopenharmony_ci return ret; 36698c2ecf20Sopenharmony_ci} 36708c2ecf20Sopenharmony_ci 36718c2ecf20Sopenharmony_ci/* 36728c2ecf20Sopenharmony_ci * min slot controls the lowest index we're willing to push to the 36738c2ecf20Sopenharmony_ci * right. We'll push up to and including min_slot, but no lower 36748c2ecf20Sopenharmony_ci */ 36758c2ecf20Sopenharmony_cistatic noinline int __push_leaf_right(struct btrfs_path *path, 36768c2ecf20Sopenharmony_ci int data_size, int empty, 36778c2ecf20Sopenharmony_ci struct extent_buffer *right, 36788c2ecf20Sopenharmony_ci int free_space, u32 left_nritems, 36798c2ecf20Sopenharmony_ci u32 min_slot) 36808c2ecf20Sopenharmony_ci{ 36818c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = right->fs_info; 36828c2ecf20Sopenharmony_ci struct extent_buffer *left = path->nodes[0]; 36838c2ecf20Sopenharmony_ci struct extent_buffer *upper = path->nodes[1]; 36848c2ecf20Sopenharmony_ci struct btrfs_map_token token; 36858c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 36868c2ecf20Sopenharmony_ci int slot; 36878c2ecf20Sopenharmony_ci u32 i; 36888c2ecf20Sopenharmony_ci int push_space = 0; 36898c2ecf20Sopenharmony_ci int push_items = 0; 36908c2ecf20Sopenharmony_ci struct btrfs_item *item; 36918c2ecf20Sopenharmony_ci u32 nr; 36928c2ecf20Sopenharmony_ci u32 right_nritems; 36938c2ecf20Sopenharmony_ci u32 data_end; 36948c2ecf20Sopenharmony_ci u32 this_item_size; 36958c2ecf20Sopenharmony_ci 36968c2ecf20Sopenharmony_ci if (empty) 36978c2ecf20Sopenharmony_ci nr = 0; 36988c2ecf20Sopenharmony_ci else 36998c2ecf20Sopenharmony_ci nr = max_t(u32, 1, min_slot); 37008c2ecf20Sopenharmony_ci 37018c2ecf20Sopenharmony_ci if (path->slots[0] >= left_nritems) 37028c2ecf20Sopenharmony_ci push_space += data_size; 37038c2ecf20Sopenharmony_ci 37048c2ecf20Sopenharmony_ci slot = path->slots[1]; 37058c2ecf20Sopenharmony_ci i = left_nritems - 1; 37068c2ecf20Sopenharmony_ci while (i >= nr) { 37078c2ecf20Sopenharmony_ci item = btrfs_item_nr(i); 37088c2ecf20Sopenharmony_ci 37098c2ecf20Sopenharmony_ci if (!empty && push_items > 0) { 37108c2ecf20Sopenharmony_ci if (path->slots[0] > i) 37118c2ecf20Sopenharmony_ci break; 37128c2ecf20Sopenharmony_ci if (path->slots[0] == i) { 37138c2ecf20Sopenharmony_ci int space = btrfs_leaf_free_space(left); 37148c2ecf20Sopenharmony_ci 37158c2ecf20Sopenharmony_ci if (space + push_space * 2 > free_space) 37168c2ecf20Sopenharmony_ci break; 37178c2ecf20Sopenharmony_ci } 37188c2ecf20Sopenharmony_ci } 37198c2ecf20Sopenharmony_ci 37208c2ecf20Sopenharmony_ci if (path->slots[0] == i) 37218c2ecf20Sopenharmony_ci push_space += data_size; 37228c2ecf20Sopenharmony_ci 37238c2ecf20Sopenharmony_ci this_item_size = btrfs_item_size(left, item); 37248c2ecf20Sopenharmony_ci if (this_item_size + sizeof(*item) + push_space > free_space) 37258c2ecf20Sopenharmony_ci break; 37268c2ecf20Sopenharmony_ci 37278c2ecf20Sopenharmony_ci push_items++; 37288c2ecf20Sopenharmony_ci push_space += this_item_size + sizeof(*item); 37298c2ecf20Sopenharmony_ci if (i == 0) 37308c2ecf20Sopenharmony_ci break; 37318c2ecf20Sopenharmony_ci i--; 37328c2ecf20Sopenharmony_ci } 37338c2ecf20Sopenharmony_ci 37348c2ecf20Sopenharmony_ci if (push_items == 0) 37358c2ecf20Sopenharmony_ci goto out_unlock; 37368c2ecf20Sopenharmony_ci 37378c2ecf20Sopenharmony_ci WARN_ON(!empty && push_items == left_nritems); 37388c2ecf20Sopenharmony_ci 37398c2ecf20Sopenharmony_ci /* push left to right */ 37408c2ecf20Sopenharmony_ci right_nritems = btrfs_header_nritems(right); 37418c2ecf20Sopenharmony_ci 37428c2ecf20Sopenharmony_ci push_space = btrfs_item_end_nr(left, left_nritems - push_items); 37438c2ecf20Sopenharmony_ci push_space -= leaf_data_end(left); 37448c2ecf20Sopenharmony_ci 37458c2ecf20Sopenharmony_ci /* make room in the right data area */ 37468c2ecf20Sopenharmony_ci data_end = leaf_data_end(right); 37478c2ecf20Sopenharmony_ci memmove_extent_buffer(right, 37488c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_OFFSET + data_end - push_space, 37498c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_OFFSET + data_end, 37508c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_SIZE(fs_info) - data_end); 37518c2ecf20Sopenharmony_ci 37528c2ecf20Sopenharmony_ci /* copy from the left data area */ 37538c2ecf20Sopenharmony_ci copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET + 37548c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, 37558c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left), 37568c2ecf20Sopenharmony_ci push_space); 37578c2ecf20Sopenharmony_ci 37588c2ecf20Sopenharmony_ci memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), 37598c2ecf20Sopenharmony_ci btrfs_item_nr_offset(0), 37608c2ecf20Sopenharmony_ci right_nritems * sizeof(struct btrfs_item)); 37618c2ecf20Sopenharmony_ci 37628c2ecf20Sopenharmony_ci /* copy the items from left to right */ 37638c2ecf20Sopenharmony_ci copy_extent_buffer(right, left, btrfs_item_nr_offset(0), 37648c2ecf20Sopenharmony_ci btrfs_item_nr_offset(left_nritems - push_items), 37658c2ecf20Sopenharmony_ci push_items * sizeof(struct btrfs_item)); 37668c2ecf20Sopenharmony_ci 37678c2ecf20Sopenharmony_ci /* update the item pointers */ 37688c2ecf20Sopenharmony_ci btrfs_init_map_token(&token, right); 37698c2ecf20Sopenharmony_ci right_nritems += push_items; 37708c2ecf20Sopenharmony_ci btrfs_set_header_nritems(right, right_nritems); 37718c2ecf20Sopenharmony_ci push_space = BTRFS_LEAF_DATA_SIZE(fs_info); 37728c2ecf20Sopenharmony_ci for (i = 0; i < right_nritems; i++) { 37738c2ecf20Sopenharmony_ci item = btrfs_item_nr(i); 37748c2ecf20Sopenharmony_ci push_space -= btrfs_token_item_size(&token, item); 37758c2ecf20Sopenharmony_ci btrfs_set_token_item_offset(&token, item, push_space); 37768c2ecf20Sopenharmony_ci } 37778c2ecf20Sopenharmony_ci 37788c2ecf20Sopenharmony_ci left_nritems -= push_items; 37798c2ecf20Sopenharmony_ci btrfs_set_header_nritems(left, left_nritems); 37808c2ecf20Sopenharmony_ci 37818c2ecf20Sopenharmony_ci if (left_nritems) 37828c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(left); 37838c2ecf20Sopenharmony_ci else 37848c2ecf20Sopenharmony_ci btrfs_clean_tree_block(left); 37858c2ecf20Sopenharmony_ci 37868c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(right); 37878c2ecf20Sopenharmony_ci 37888c2ecf20Sopenharmony_ci btrfs_item_key(right, &disk_key, 0); 37898c2ecf20Sopenharmony_ci btrfs_set_node_key(upper, &disk_key, slot + 1); 37908c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(upper); 37918c2ecf20Sopenharmony_ci 37928c2ecf20Sopenharmony_ci /* then fixup the leaf pointer in the path */ 37938c2ecf20Sopenharmony_ci if (path->slots[0] >= left_nritems) { 37948c2ecf20Sopenharmony_ci path->slots[0] -= left_nritems; 37958c2ecf20Sopenharmony_ci if (btrfs_header_nritems(path->nodes[0]) == 0) 37968c2ecf20Sopenharmony_ci btrfs_clean_tree_block(path->nodes[0]); 37978c2ecf20Sopenharmony_ci btrfs_tree_unlock(path->nodes[0]); 37988c2ecf20Sopenharmony_ci free_extent_buffer(path->nodes[0]); 37998c2ecf20Sopenharmony_ci path->nodes[0] = right; 38008c2ecf20Sopenharmony_ci path->slots[1] += 1; 38018c2ecf20Sopenharmony_ci } else { 38028c2ecf20Sopenharmony_ci btrfs_tree_unlock(right); 38038c2ecf20Sopenharmony_ci free_extent_buffer(right); 38048c2ecf20Sopenharmony_ci } 38058c2ecf20Sopenharmony_ci return 0; 38068c2ecf20Sopenharmony_ci 38078c2ecf20Sopenharmony_ciout_unlock: 38088c2ecf20Sopenharmony_ci btrfs_tree_unlock(right); 38098c2ecf20Sopenharmony_ci free_extent_buffer(right); 38108c2ecf20Sopenharmony_ci return 1; 38118c2ecf20Sopenharmony_ci} 38128c2ecf20Sopenharmony_ci 38138c2ecf20Sopenharmony_ci/* 38148c2ecf20Sopenharmony_ci * push some data in the path leaf to the right, trying to free up at 38158c2ecf20Sopenharmony_ci * least data_size bytes. returns zero if the push worked, nonzero otherwise 38168c2ecf20Sopenharmony_ci * 38178c2ecf20Sopenharmony_ci * returns 1 if the push failed because the other node didn't have enough 38188c2ecf20Sopenharmony_ci * room, 0 if everything worked out and < 0 if there were major errors. 38198c2ecf20Sopenharmony_ci * 38208c2ecf20Sopenharmony_ci * this will push starting from min_slot to the end of the leaf. It won't 38218c2ecf20Sopenharmony_ci * push any slot lower than min_slot 38228c2ecf20Sopenharmony_ci */ 38238c2ecf20Sopenharmony_cistatic int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 38248c2ecf20Sopenharmony_ci *root, struct btrfs_path *path, 38258c2ecf20Sopenharmony_ci int min_data_size, int data_size, 38268c2ecf20Sopenharmony_ci int empty, u32 min_slot) 38278c2ecf20Sopenharmony_ci{ 38288c2ecf20Sopenharmony_ci struct extent_buffer *left = path->nodes[0]; 38298c2ecf20Sopenharmony_ci struct extent_buffer *right; 38308c2ecf20Sopenharmony_ci struct extent_buffer *upper; 38318c2ecf20Sopenharmony_ci int slot; 38328c2ecf20Sopenharmony_ci int free_space; 38338c2ecf20Sopenharmony_ci u32 left_nritems; 38348c2ecf20Sopenharmony_ci int ret; 38358c2ecf20Sopenharmony_ci 38368c2ecf20Sopenharmony_ci if (!path->nodes[1]) 38378c2ecf20Sopenharmony_ci return 1; 38388c2ecf20Sopenharmony_ci 38398c2ecf20Sopenharmony_ci slot = path->slots[1]; 38408c2ecf20Sopenharmony_ci upper = path->nodes[1]; 38418c2ecf20Sopenharmony_ci if (slot >= btrfs_header_nritems(upper) - 1) 38428c2ecf20Sopenharmony_ci return 1; 38438c2ecf20Sopenharmony_ci 38448c2ecf20Sopenharmony_ci btrfs_assert_tree_locked(path->nodes[1]); 38458c2ecf20Sopenharmony_ci 38468c2ecf20Sopenharmony_ci right = btrfs_read_node_slot(upper, slot + 1); 38478c2ecf20Sopenharmony_ci /* 38488c2ecf20Sopenharmony_ci * slot + 1 is not valid or we fail to read the right node, 38498c2ecf20Sopenharmony_ci * no big deal, just return. 38508c2ecf20Sopenharmony_ci */ 38518c2ecf20Sopenharmony_ci if (IS_ERR(right)) 38528c2ecf20Sopenharmony_ci return 1; 38538c2ecf20Sopenharmony_ci 38548c2ecf20Sopenharmony_ci __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); 38558c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(right); 38568c2ecf20Sopenharmony_ci 38578c2ecf20Sopenharmony_ci free_space = btrfs_leaf_free_space(right); 38588c2ecf20Sopenharmony_ci if (free_space < data_size) 38598c2ecf20Sopenharmony_ci goto out_unlock; 38608c2ecf20Sopenharmony_ci 38618c2ecf20Sopenharmony_ci /* cow and double check */ 38628c2ecf20Sopenharmony_ci ret = btrfs_cow_block(trans, root, right, upper, 38638c2ecf20Sopenharmony_ci slot + 1, &right, BTRFS_NESTING_RIGHT_COW); 38648c2ecf20Sopenharmony_ci if (ret) 38658c2ecf20Sopenharmony_ci goto out_unlock; 38668c2ecf20Sopenharmony_ci 38678c2ecf20Sopenharmony_ci free_space = btrfs_leaf_free_space(right); 38688c2ecf20Sopenharmony_ci if (free_space < data_size) 38698c2ecf20Sopenharmony_ci goto out_unlock; 38708c2ecf20Sopenharmony_ci 38718c2ecf20Sopenharmony_ci left_nritems = btrfs_header_nritems(left); 38728c2ecf20Sopenharmony_ci if (left_nritems == 0) 38738c2ecf20Sopenharmony_ci goto out_unlock; 38748c2ecf20Sopenharmony_ci 38758c2ecf20Sopenharmony_ci if (check_sibling_keys(left, right)) { 38768c2ecf20Sopenharmony_ci ret = -EUCLEAN; 38778c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 38788c2ecf20Sopenharmony_ci btrfs_tree_unlock(right); 38798c2ecf20Sopenharmony_ci free_extent_buffer(right); 38808c2ecf20Sopenharmony_ci return ret; 38818c2ecf20Sopenharmony_ci } 38828c2ecf20Sopenharmony_ci if (path->slots[0] == left_nritems && !empty) { 38838c2ecf20Sopenharmony_ci /* Key greater than all keys in the leaf, right neighbor has 38848c2ecf20Sopenharmony_ci * enough room for it and we're not emptying our leaf to delete 38858c2ecf20Sopenharmony_ci * it, therefore use right neighbor to insert the new item and 38868c2ecf20Sopenharmony_ci * no need to touch/dirty our left leaf. */ 38878c2ecf20Sopenharmony_ci btrfs_tree_unlock(left); 38888c2ecf20Sopenharmony_ci free_extent_buffer(left); 38898c2ecf20Sopenharmony_ci path->nodes[0] = right; 38908c2ecf20Sopenharmony_ci path->slots[0] = 0; 38918c2ecf20Sopenharmony_ci path->slots[1]++; 38928c2ecf20Sopenharmony_ci return 0; 38938c2ecf20Sopenharmony_ci } 38948c2ecf20Sopenharmony_ci 38958c2ecf20Sopenharmony_ci return __push_leaf_right(path, min_data_size, empty, 38968c2ecf20Sopenharmony_ci right, free_space, left_nritems, min_slot); 38978c2ecf20Sopenharmony_ciout_unlock: 38988c2ecf20Sopenharmony_ci btrfs_tree_unlock(right); 38998c2ecf20Sopenharmony_ci free_extent_buffer(right); 39008c2ecf20Sopenharmony_ci return 1; 39018c2ecf20Sopenharmony_ci} 39028c2ecf20Sopenharmony_ci 39038c2ecf20Sopenharmony_ci/* 39048c2ecf20Sopenharmony_ci * push some data in the path leaf to the left, trying to free up at 39058c2ecf20Sopenharmony_ci * least data_size bytes. returns zero if the push worked, nonzero otherwise 39068c2ecf20Sopenharmony_ci * 39078c2ecf20Sopenharmony_ci * max_slot can put a limit on how far into the leaf we'll push items. The 39088c2ecf20Sopenharmony_ci * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the 39098c2ecf20Sopenharmony_ci * items 39108c2ecf20Sopenharmony_ci */ 39118c2ecf20Sopenharmony_cistatic noinline int __push_leaf_left(struct btrfs_path *path, int data_size, 39128c2ecf20Sopenharmony_ci int empty, struct extent_buffer *left, 39138c2ecf20Sopenharmony_ci int free_space, u32 right_nritems, 39148c2ecf20Sopenharmony_ci u32 max_slot) 39158c2ecf20Sopenharmony_ci{ 39168c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = left->fs_info; 39178c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 39188c2ecf20Sopenharmony_ci struct extent_buffer *right = path->nodes[0]; 39198c2ecf20Sopenharmony_ci int i; 39208c2ecf20Sopenharmony_ci int push_space = 0; 39218c2ecf20Sopenharmony_ci int push_items = 0; 39228c2ecf20Sopenharmony_ci struct btrfs_item *item; 39238c2ecf20Sopenharmony_ci u32 old_left_nritems; 39248c2ecf20Sopenharmony_ci u32 nr; 39258c2ecf20Sopenharmony_ci int ret = 0; 39268c2ecf20Sopenharmony_ci u32 this_item_size; 39278c2ecf20Sopenharmony_ci u32 old_left_item_size; 39288c2ecf20Sopenharmony_ci struct btrfs_map_token token; 39298c2ecf20Sopenharmony_ci 39308c2ecf20Sopenharmony_ci if (empty) 39318c2ecf20Sopenharmony_ci nr = min(right_nritems, max_slot); 39328c2ecf20Sopenharmony_ci else 39338c2ecf20Sopenharmony_ci nr = min(right_nritems - 1, max_slot); 39348c2ecf20Sopenharmony_ci 39358c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 39368c2ecf20Sopenharmony_ci item = btrfs_item_nr(i); 39378c2ecf20Sopenharmony_ci 39388c2ecf20Sopenharmony_ci if (!empty && push_items > 0) { 39398c2ecf20Sopenharmony_ci if (path->slots[0] < i) 39408c2ecf20Sopenharmony_ci break; 39418c2ecf20Sopenharmony_ci if (path->slots[0] == i) { 39428c2ecf20Sopenharmony_ci int space = btrfs_leaf_free_space(right); 39438c2ecf20Sopenharmony_ci 39448c2ecf20Sopenharmony_ci if (space + push_space * 2 > free_space) 39458c2ecf20Sopenharmony_ci break; 39468c2ecf20Sopenharmony_ci } 39478c2ecf20Sopenharmony_ci } 39488c2ecf20Sopenharmony_ci 39498c2ecf20Sopenharmony_ci if (path->slots[0] == i) 39508c2ecf20Sopenharmony_ci push_space += data_size; 39518c2ecf20Sopenharmony_ci 39528c2ecf20Sopenharmony_ci this_item_size = btrfs_item_size(right, item); 39538c2ecf20Sopenharmony_ci if (this_item_size + sizeof(*item) + push_space > free_space) 39548c2ecf20Sopenharmony_ci break; 39558c2ecf20Sopenharmony_ci 39568c2ecf20Sopenharmony_ci push_items++; 39578c2ecf20Sopenharmony_ci push_space += this_item_size + sizeof(*item); 39588c2ecf20Sopenharmony_ci } 39598c2ecf20Sopenharmony_ci 39608c2ecf20Sopenharmony_ci if (push_items == 0) { 39618c2ecf20Sopenharmony_ci ret = 1; 39628c2ecf20Sopenharmony_ci goto out; 39638c2ecf20Sopenharmony_ci } 39648c2ecf20Sopenharmony_ci WARN_ON(!empty && push_items == btrfs_header_nritems(right)); 39658c2ecf20Sopenharmony_ci 39668c2ecf20Sopenharmony_ci /* push data from right to left */ 39678c2ecf20Sopenharmony_ci copy_extent_buffer(left, right, 39688c2ecf20Sopenharmony_ci btrfs_item_nr_offset(btrfs_header_nritems(left)), 39698c2ecf20Sopenharmony_ci btrfs_item_nr_offset(0), 39708c2ecf20Sopenharmony_ci push_items * sizeof(struct btrfs_item)); 39718c2ecf20Sopenharmony_ci 39728c2ecf20Sopenharmony_ci push_space = BTRFS_LEAF_DATA_SIZE(fs_info) - 39738c2ecf20Sopenharmony_ci btrfs_item_offset_nr(right, push_items - 1); 39748c2ecf20Sopenharmony_ci 39758c2ecf20Sopenharmony_ci copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET + 39768c2ecf20Sopenharmony_ci leaf_data_end(left) - push_space, 39778c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_OFFSET + 39788c2ecf20Sopenharmony_ci btrfs_item_offset_nr(right, push_items - 1), 39798c2ecf20Sopenharmony_ci push_space); 39808c2ecf20Sopenharmony_ci old_left_nritems = btrfs_header_nritems(left); 39818c2ecf20Sopenharmony_ci BUG_ON(old_left_nritems <= 0); 39828c2ecf20Sopenharmony_ci 39838c2ecf20Sopenharmony_ci btrfs_init_map_token(&token, left); 39848c2ecf20Sopenharmony_ci old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); 39858c2ecf20Sopenharmony_ci for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 39868c2ecf20Sopenharmony_ci u32 ioff; 39878c2ecf20Sopenharmony_ci 39888c2ecf20Sopenharmony_ci item = btrfs_item_nr(i); 39898c2ecf20Sopenharmony_ci 39908c2ecf20Sopenharmony_ci ioff = btrfs_token_item_offset(&token, item); 39918c2ecf20Sopenharmony_ci btrfs_set_token_item_offset(&token, item, 39928c2ecf20Sopenharmony_ci ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size)); 39938c2ecf20Sopenharmony_ci } 39948c2ecf20Sopenharmony_ci btrfs_set_header_nritems(left, old_left_nritems + push_items); 39958c2ecf20Sopenharmony_ci 39968c2ecf20Sopenharmony_ci /* fixup right node */ 39978c2ecf20Sopenharmony_ci if (push_items > right_nritems) 39988c2ecf20Sopenharmony_ci WARN(1, KERN_CRIT "push items %d nr %u\n", push_items, 39998c2ecf20Sopenharmony_ci right_nritems); 40008c2ecf20Sopenharmony_ci 40018c2ecf20Sopenharmony_ci if (push_items < right_nritems) { 40028c2ecf20Sopenharmony_ci push_space = btrfs_item_offset_nr(right, push_items - 1) - 40038c2ecf20Sopenharmony_ci leaf_data_end(right); 40048c2ecf20Sopenharmony_ci memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET + 40058c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, 40068c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_OFFSET + 40078c2ecf20Sopenharmony_ci leaf_data_end(right), push_space); 40088c2ecf20Sopenharmony_ci 40098c2ecf20Sopenharmony_ci memmove_extent_buffer(right, btrfs_item_nr_offset(0), 40108c2ecf20Sopenharmony_ci btrfs_item_nr_offset(push_items), 40118c2ecf20Sopenharmony_ci (btrfs_header_nritems(right) - push_items) * 40128c2ecf20Sopenharmony_ci sizeof(struct btrfs_item)); 40138c2ecf20Sopenharmony_ci } 40148c2ecf20Sopenharmony_ci 40158c2ecf20Sopenharmony_ci btrfs_init_map_token(&token, right); 40168c2ecf20Sopenharmony_ci right_nritems -= push_items; 40178c2ecf20Sopenharmony_ci btrfs_set_header_nritems(right, right_nritems); 40188c2ecf20Sopenharmony_ci push_space = BTRFS_LEAF_DATA_SIZE(fs_info); 40198c2ecf20Sopenharmony_ci for (i = 0; i < right_nritems; i++) { 40208c2ecf20Sopenharmony_ci item = btrfs_item_nr(i); 40218c2ecf20Sopenharmony_ci 40228c2ecf20Sopenharmony_ci push_space = push_space - btrfs_token_item_size(&token, item); 40238c2ecf20Sopenharmony_ci btrfs_set_token_item_offset(&token, item, push_space); 40248c2ecf20Sopenharmony_ci } 40258c2ecf20Sopenharmony_ci 40268c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(left); 40278c2ecf20Sopenharmony_ci if (right_nritems) 40288c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(right); 40298c2ecf20Sopenharmony_ci else 40308c2ecf20Sopenharmony_ci btrfs_clean_tree_block(right); 40318c2ecf20Sopenharmony_ci 40328c2ecf20Sopenharmony_ci btrfs_item_key(right, &disk_key, 0); 40338c2ecf20Sopenharmony_ci fixup_low_keys(path, &disk_key, 1); 40348c2ecf20Sopenharmony_ci 40358c2ecf20Sopenharmony_ci /* then fixup the leaf pointer in the path */ 40368c2ecf20Sopenharmony_ci if (path->slots[0] < push_items) { 40378c2ecf20Sopenharmony_ci path->slots[0] += old_left_nritems; 40388c2ecf20Sopenharmony_ci btrfs_tree_unlock(path->nodes[0]); 40398c2ecf20Sopenharmony_ci free_extent_buffer(path->nodes[0]); 40408c2ecf20Sopenharmony_ci path->nodes[0] = left; 40418c2ecf20Sopenharmony_ci path->slots[1] -= 1; 40428c2ecf20Sopenharmony_ci } else { 40438c2ecf20Sopenharmony_ci btrfs_tree_unlock(left); 40448c2ecf20Sopenharmony_ci free_extent_buffer(left); 40458c2ecf20Sopenharmony_ci path->slots[0] -= push_items; 40468c2ecf20Sopenharmony_ci } 40478c2ecf20Sopenharmony_ci BUG_ON(path->slots[0] < 0); 40488c2ecf20Sopenharmony_ci return ret; 40498c2ecf20Sopenharmony_ciout: 40508c2ecf20Sopenharmony_ci btrfs_tree_unlock(left); 40518c2ecf20Sopenharmony_ci free_extent_buffer(left); 40528c2ecf20Sopenharmony_ci return ret; 40538c2ecf20Sopenharmony_ci} 40548c2ecf20Sopenharmony_ci 40558c2ecf20Sopenharmony_ci/* 40568c2ecf20Sopenharmony_ci * push some data in the path leaf to the left, trying to free up at 40578c2ecf20Sopenharmony_ci * least data_size bytes. returns zero if the push worked, nonzero otherwise 40588c2ecf20Sopenharmony_ci * 40598c2ecf20Sopenharmony_ci * max_slot can put a limit on how far into the leaf we'll push items. The 40608c2ecf20Sopenharmony_ci * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the 40618c2ecf20Sopenharmony_ci * items 40628c2ecf20Sopenharmony_ci */ 40638c2ecf20Sopenharmony_cistatic int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 40648c2ecf20Sopenharmony_ci *root, struct btrfs_path *path, int min_data_size, 40658c2ecf20Sopenharmony_ci int data_size, int empty, u32 max_slot) 40668c2ecf20Sopenharmony_ci{ 40678c2ecf20Sopenharmony_ci struct extent_buffer *right = path->nodes[0]; 40688c2ecf20Sopenharmony_ci struct extent_buffer *left; 40698c2ecf20Sopenharmony_ci int slot; 40708c2ecf20Sopenharmony_ci int free_space; 40718c2ecf20Sopenharmony_ci u32 right_nritems; 40728c2ecf20Sopenharmony_ci int ret = 0; 40738c2ecf20Sopenharmony_ci 40748c2ecf20Sopenharmony_ci slot = path->slots[1]; 40758c2ecf20Sopenharmony_ci if (slot == 0) 40768c2ecf20Sopenharmony_ci return 1; 40778c2ecf20Sopenharmony_ci if (!path->nodes[1]) 40788c2ecf20Sopenharmony_ci return 1; 40798c2ecf20Sopenharmony_ci 40808c2ecf20Sopenharmony_ci right_nritems = btrfs_header_nritems(right); 40818c2ecf20Sopenharmony_ci if (right_nritems == 0) 40828c2ecf20Sopenharmony_ci return 1; 40838c2ecf20Sopenharmony_ci 40848c2ecf20Sopenharmony_ci btrfs_assert_tree_locked(path->nodes[1]); 40858c2ecf20Sopenharmony_ci 40868c2ecf20Sopenharmony_ci left = btrfs_read_node_slot(path->nodes[1], slot - 1); 40878c2ecf20Sopenharmony_ci /* 40888c2ecf20Sopenharmony_ci * slot - 1 is not valid or we fail to read the left node, 40898c2ecf20Sopenharmony_ci * no big deal, just return. 40908c2ecf20Sopenharmony_ci */ 40918c2ecf20Sopenharmony_ci if (IS_ERR(left)) 40928c2ecf20Sopenharmony_ci return 1; 40938c2ecf20Sopenharmony_ci 40948c2ecf20Sopenharmony_ci __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); 40958c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(left); 40968c2ecf20Sopenharmony_ci 40978c2ecf20Sopenharmony_ci free_space = btrfs_leaf_free_space(left); 40988c2ecf20Sopenharmony_ci if (free_space < data_size) { 40998c2ecf20Sopenharmony_ci ret = 1; 41008c2ecf20Sopenharmony_ci goto out; 41018c2ecf20Sopenharmony_ci } 41028c2ecf20Sopenharmony_ci 41038c2ecf20Sopenharmony_ci /* cow and double check */ 41048c2ecf20Sopenharmony_ci ret = btrfs_cow_block(trans, root, left, 41058c2ecf20Sopenharmony_ci path->nodes[1], slot - 1, &left, 41068c2ecf20Sopenharmony_ci BTRFS_NESTING_LEFT_COW); 41078c2ecf20Sopenharmony_ci if (ret) { 41088c2ecf20Sopenharmony_ci /* we hit -ENOSPC, but it isn't fatal here */ 41098c2ecf20Sopenharmony_ci if (ret == -ENOSPC) 41108c2ecf20Sopenharmony_ci ret = 1; 41118c2ecf20Sopenharmony_ci goto out; 41128c2ecf20Sopenharmony_ci } 41138c2ecf20Sopenharmony_ci 41148c2ecf20Sopenharmony_ci free_space = btrfs_leaf_free_space(left); 41158c2ecf20Sopenharmony_ci if (free_space < data_size) { 41168c2ecf20Sopenharmony_ci ret = 1; 41178c2ecf20Sopenharmony_ci goto out; 41188c2ecf20Sopenharmony_ci } 41198c2ecf20Sopenharmony_ci 41208c2ecf20Sopenharmony_ci if (check_sibling_keys(left, right)) { 41218c2ecf20Sopenharmony_ci ret = -EUCLEAN; 41228c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 41238c2ecf20Sopenharmony_ci goto out; 41248c2ecf20Sopenharmony_ci } 41258c2ecf20Sopenharmony_ci return __push_leaf_left(path, min_data_size, 41268c2ecf20Sopenharmony_ci empty, left, free_space, right_nritems, 41278c2ecf20Sopenharmony_ci max_slot); 41288c2ecf20Sopenharmony_ciout: 41298c2ecf20Sopenharmony_ci btrfs_tree_unlock(left); 41308c2ecf20Sopenharmony_ci free_extent_buffer(left); 41318c2ecf20Sopenharmony_ci return ret; 41328c2ecf20Sopenharmony_ci} 41338c2ecf20Sopenharmony_ci 41348c2ecf20Sopenharmony_ci/* 41358c2ecf20Sopenharmony_ci * split the path's leaf in two, making sure there is at least data_size 41368c2ecf20Sopenharmony_ci * available for the resulting leaf level of the path. 41378c2ecf20Sopenharmony_ci */ 41388c2ecf20Sopenharmony_cistatic noinline void copy_for_split(struct btrfs_trans_handle *trans, 41398c2ecf20Sopenharmony_ci struct btrfs_path *path, 41408c2ecf20Sopenharmony_ci struct extent_buffer *l, 41418c2ecf20Sopenharmony_ci struct extent_buffer *right, 41428c2ecf20Sopenharmony_ci int slot, int mid, int nritems) 41438c2ecf20Sopenharmony_ci{ 41448c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 41458c2ecf20Sopenharmony_ci int data_copy_size; 41468c2ecf20Sopenharmony_ci int rt_data_off; 41478c2ecf20Sopenharmony_ci int i; 41488c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 41498c2ecf20Sopenharmony_ci struct btrfs_map_token token; 41508c2ecf20Sopenharmony_ci 41518c2ecf20Sopenharmony_ci nritems = nritems - mid; 41528c2ecf20Sopenharmony_ci btrfs_set_header_nritems(right, nritems); 41538c2ecf20Sopenharmony_ci data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l); 41548c2ecf20Sopenharmony_ci 41558c2ecf20Sopenharmony_ci copy_extent_buffer(right, l, btrfs_item_nr_offset(0), 41568c2ecf20Sopenharmony_ci btrfs_item_nr_offset(mid), 41578c2ecf20Sopenharmony_ci nritems * sizeof(struct btrfs_item)); 41588c2ecf20Sopenharmony_ci 41598c2ecf20Sopenharmony_ci copy_extent_buffer(right, l, 41608c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) - 41618c2ecf20Sopenharmony_ci data_copy_size, BTRFS_LEAF_DATA_OFFSET + 41628c2ecf20Sopenharmony_ci leaf_data_end(l), data_copy_size); 41638c2ecf20Sopenharmony_ci 41648c2ecf20Sopenharmony_ci rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid); 41658c2ecf20Sopenharmony_ci 41668c2ecf20Sopenharmony_ci btrfs_init_map_token(&token, right); 41678c2ecf20Sopenharmony_ci for (i = 0; i < nritems; i++) { 41688c2ecf20Sopenharmony_ci struct btrfs_item *item = btrfs_item_nr(i); 41698c2ecf20Sopenharmony_ci u32 ioff; 41708c2ecf20Sopenharmony_ci 41718c2ecf20Sopenharmony_ci ioff = btrfs_token_item_offset(&token, item); 41728c2ecf20Sopenharmony_ci btrfs_set_token_item_offset(&token, item, ioff + rt_data_off); 41738c2ecf20Sopenharmony_ci } 41748c2ecf20Sopenharmony_ci 41758c2ecf20Sopenharmony_ci btrfs_set_header_nritems(l, mid); 41768c2ecf20Sopenharmony_ci btrfs_item_key(right, &disk_key, 0); 41778c2ecf20Sopenharmony_ci insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1); 41788c2ecf20Sopenharmony_ci 41798c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(right); 41808c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(l); 41818c2ecf20Sopenharmony_ci BUG_ON(path->slots[0] != slot); 41828c2ecf20Sopenharmony_ci 41838c2ecf20Sopenharmony_ci if (mid <= slot) { 41848c2ecf20Sopenharmony_ci btrfs_tree_unlock(path->nodes[0]); 41858c2ecf20Sopenharmony_ci free_extent_buffer(path->nodes[0]); 41868c2ecf20Sopenharmony_ci path->nodes[0] = right; 41878c2ecf20Sopenharmony_ci path->slots[0] -= mid; 41888c2ecf20Sopenharmony_ci path->slots[1] += 1; 41898c2ecf20Sopenharmony_ci } else { 41908c2ecf20Sopenharmony_ci btrfs_tree_unlock(right); 41918c2ecf20Sopenharmony_ci free_extent_buffer(right); 41928c2ecf20Sopenharmony_ci } 41938c2ecf20Sopenharmony_ci 41948c2ecf20Sopenharmony_ci BUG_ON(path->slots[0] < 0); 41958c2ecf20Sopenharmony_ci} 41968c2ecf20Sopenharmony_ci 41978c2ecf20Sopenharmony_ci/* 41988c2ecf20Sopenharmony_ci * double splits happen when we need to insert a big item in the middle 41998c2ecf20Sopenharmony_ci * of a leaf. A double split can leave us with 3 mostly empty leaves: 42008c2ecf20Sopenharmony_ci * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] 42018c2ecf20Sopenharmony_ci * A B C 42028c2ecf20Sopenharmony_ci * 42038c2ecf20Sopenharmony_ci * We avoid this by trying to push the items on either side of our target 42048c2ecf20Sopenharmony_ci * into the adjacent leaves. If all goes well we can avoid the double split 42058c2ecf20Sopenharmony_ci * completely. 42068c2ecf20Sopenharmony_ci */ 42078c2ecf20Sopenharmony_cistatic noinline int push_for_double_split(struct btrfs_trans_handle *trans, 42088c2ecf20Sopenharmony_ci struct btrfs_root *root, 42098c2ecf20Sopenharmony_ci struct btrfs_path *path, 42108c2ecf20Sopenharmony_ci int data_size) 42118c2ecf20Sopenharmony_ci{ 42128c2ecf20Sopenharmony_ci int ret; 42138c2ecf20Sopenharmony_ci int progress = 0; 42148c2ecf20Sopenharmony_ci int slot; 42158c2ecf20Sopenharmony_ci u32 nritems; 42168c2ecf20Sopenharmony_ci int space_needed = data_size; 42178c2ecf20Sopenharmony_ci 42188c2ecf20Sopenharmony_ci slot = path->slots[0]; 42198c2ecf20Sopenharmony_ci if (slot < btrfs_header_nritems(path->nodes[0])) 42208c2ecf20Sopenharmony_ci space_needed -= btrfs_leaf_free_space(path->nodes[0]); 42218c2ecf20Sopenharmony_ci 42228c2ecf20Sopenharmony_ci /* 42238c2ecf20Sopenharmony_ci * try to push all the items after our slot into the 42248c2ecf20Sopenharmony_ci * right leaf 42258c2ecf20Sopenharmony_ci */ 42268c2ecf20Sopenharmony_ci ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot); 42278c2ecf20Sopenharmony_ci if (ret < 0) 42288c2ecf20Sopenharmony_ci return ret; 42298c2ecf20Sopenharmony_ci 42308c2ecf20Sopenharmony_ci if (ret == 0) 42318c2ecf20Sopenharmony_ci progress++; 42328c2ecf20Sopenharmony_ci 42338c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(path->nodes[0]); 42348c2ecf20Sopenharmony_ci /* 42358c2ecf20Sopenharmony_ci * our goal is to get our slot at the start or end of a leaf. If 42368c2ecf20Sopenharmony_ci * we've done so we're done 42378c2ecf20Sopenharmony_ci */ 42388c2ecf20Sopenharmony_ci if (path->slots[0] == 0 || path->slots[0] == nritems) 42398c2ecf20Sopenharmony_ci return 0; 42408c2ecf20Sopenharmony_ci 42418c2ecf20Sopenharmony_ci if (btrfs_leaf_free_space(path->nodes[0]) >= data_size) 42428c2ecf20Sopenharmony_ci return 0; 42438c2ecf20Sopenharmony_ci 42448c2ecf20Sopenharmony_ci /* try to push all the items before our slot into the next leaf */ 42458c2ecf20Sopenharmony_ci slot = path->slots[0]; 42468c2ecf20Sopenharmony_ci space_needed = data_size; 42478c2ecf20Sopenharmony_ci if (slot > 0) 42488c2ecf20Sopenharmony_ci space_needed -= btrfs_leaf_free_space(path->nodes[0]); 42498c2ecf20Sopenharmony_ci ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot); 42508c2ecf20Sopenharmony_ci if (ret < 0) 42518c2ecf20Sopenharmony_ci return ret; 42528c2ecf20Sopenharmony_ci 42538c2ecf20Sopenharmony_ci if (ret == 0) 42548c2ecf20Sopenharmony_ci progress++; 42558c2ecf20Sopenharmony_ci 42568c2ecf20Sopenharmony_ci if (progress) 42578c2ecf20Sopenharmony_ci return 0; 42588c2ecf20Sopenharmony_ci return 1; 42598c2ecf20Sopenharmony_ci} 42608c2ecf20Sopenharmony_ci 42618c2ecf20Sopenharmony_ci/* 42628c2ecf20Sopenharmony_ci * split the path's leaf in two, making sure there is at least data_size 42638c2ecf20Sopenharmony_ci * available for the resulting leaf level of the path. 42648c2ecf20Sopenharmony_ci * 42658c2ecf20Sopenharmony_ci * returns 0 if all went well and < 0 on failure. 42668c2ecf20Sopenharmony_ci */ 42678c2ecf20Sopenharmony_cistatic noinline int split_leaf(struct btrfs_trans_handle *trans, 42688c2ecf20Sopenharmony_ci struct btrfs_root *root, 42698c2ecf20Sopenharmony_ci const struct btrfs_key *ins_key, 42708c2ecf20Sopenharmony_ci struct btrfs_path *path, int data_size, 42718c2ecf20Sopenharmony_ci int extend) 42728c2ecf20Sopenharmony_ci{ 42738c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 42748c2ecf20Sopenharmony_ci struct extent_buffer *l; 42758c2ecf20Sopenharmony_ci u32 nritems; 42768c2ecf20Sopenharmony_ci int mid; 42778c2ecf20Sopenharmony_ci int slot; 42788c2ecf20Sopenharmony_ci struct extent_buffer *right; 42798c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 42808c2ecf20Sopenharmony_ci int ret = 0; 42818c2ecf20Sopenharmony_ci int wret; 42828c2ecf20Sopenharmony_ci int split; 42838c2ecf20Sopenharmony_ci int num_doubles = 0; 42848c2ecf20Sopenharmony_ci int tried_avoid_double = 0; 42858c2ecf20Sopenharmony_ci 42868c2ecf20Sopenharmony_ci l = path->nodes[0]; 42878c2ecf20Sopenharmony_ci slot = path->slots[0]; 42888c2ecf20Sopenharmony_ci if (extend && data_size + btrfs_item_size_nr(l, slot) + 42898c2ecf20Sopenharmony_ci sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info)) 42908c2ecf20Sopenharmony_ci return -EOVERFLOW; 42918c2ecf20Sopenharmony_ci 42928c2ecf20Sopenharmony_ci /* first try to make some room by pushing left and right */ 42938c2ecf20Sopenharmony_ci if (data_size && path->nodes[1]) { 42948c2ecf20Sopenharmony_ci int space_needed = data_size; 42958c2ecf20Sopenharmony_ci 42968c2ecf20Sopenharmony_ci if (slot < btrfs_header_nritems(l)) 42978c2ecf20Sopenharmony_ci space_needed -= btrfs_leaf_free_space(l); 42988c2ecf20Sopenharmony_ci 42998c2ecf20Sopenharmony_ci wret = push_leaf_right(trans, root, path, space_needed, 43008c2ecf20Sopenharmony_ci space_needed, 0, 0); 43018c2ecf20Sopenharmony_ci if (wret < 0) 43028c2ecf20Sopenharmony_ci return wret; 43038c2ecf20Sopenharmony_ci if (wret) { 43048c2ecf20Sopenharmony_ci space_needed = data_size; 43058c2ecf20Sopenharmony_ci if (slot > 0) 43068c2ecf20Sopenharmony_ci space_needed -= btrfs_leaf_free_space(l); 43078c2ecf20Sopenharmony_ci wret = push_leaf_left(trans, root, path, space_needed, 43088c2ecf20Sopenharmony_ci space_needed, 0, (u32)-1); 43098c2ecf20Sopenharmony_ci if (wret < 0) 43108c2ecf20Sopenharmony_ci return wret; 43118c2ecf20Sopenharmony_ci } 43128c2ecf20Sopenharmony_ci l = path->nodes[0]; 43138c2ecf20Sopenharmony_ci 43148c2ecf20Sopenharmony_ci /* did the pushes work? */ 43158c2ecf20Sopenharmony_ci if (btrfs_leaf_free_space(l) >= data_size) 43168c2ecf20Sopenharmony_ci return 0; 43178c2ecf20Sopenharmony_ci } 43188c2ecf20Sopenharmony_ci 43198c2ecf20Sopenharmony_ci if (!path->nodes[1]) { 43208c2ecf20Sopenharmony_ci ret = insert_new_root(trans, root, path, 1); 43218c2ecf20Sopenharmony_ci if (ret) 43228c2ecf20Sopenharmony_ci return ret; 43238c2ecf20Sopenharmony_ci } 43248c2ecf20Sopenharmony_ciagain: 43258c2ecf20Sopenharmony_ci split = 1; 43268c2ecf20Sopenharmony_ci l = path->nodes[0]; 43278c2ecf20Sopenharmony_ci slot = path->slots[0]; 43288c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(l); 43298c2ecf20Sopenharmony_ci mid = (nritems + 1) / 2; 43308c2ecf20Sopenharmony_ci 43318c2ecf20Sopenharmony_ci if (mid <= slot) { 43328c2ecf20Sopenharmony_ci if (nritems == 1 || 43338c2ecf20Sopenharmony_ci leaf_space_used(l, mid, nritems - mid) + data_size > 43348c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_SIZE(fs_info)) { 43358c2ecf20Sopenharmony_ci if (slot >= nritems) { 43368c2ecf20Sopenharmony_ci split = 0; 43378c2ecf20Sopenharmony_ci } else { 43388c2ecf20Sopenharmony_ci mid = slot; 43398c2ecf20Sopenharmony_ci if (mid != nritems && 43408c2ecf20Sopenharmony_ci leaf_space_used(l, mid, nritems - mid) + 43418c2ecf20Sopenharmony_ci data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) { 43428c2ecf20Sopenharmony_ci if (data_size && !tried_avoid_double) 43438c2ecf20Sopenharmony_ci goto push_for_double; 43448c2ecf20Sopenharmony_ci split = 2; 43458c2ecf20Sopenharmony_ci } 43468c2ecf20Sopenharmony_ci } 43478c2ecf20Sopenharmony_ci } 43488c2ecf20Sopenharmony_ci } else { 43498c2ecf20Sopenharmony_ci if (leaf_space_used(l, 0, mid) + data_size > 43508c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_SIZE(fs_info)) { 43518c2ecf20Sopenharmony_ci if (!extend && data_size && slot == 0) { 43528c2ecf20Sopenharmony_ci split = 0; 43538c2ecf20Sopenharmony_ci } else if ((extend || !data_size) && slot == 0) { 43548c2ecf20Sopenharmony_ci mid = 1; 43558c2ecf20Sopenharmony_ci } else { 43568c2ecf20Sopenharmony_ci mid = slot; 43578c2ecf20Sopenharmony_ci if (mid != nritems && 43588c2ecf20Sopenharmony_ci leaf_space_used(l, mid, nritems - mid) + 43598c2ecf20Sopenharmony_ci data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) { 43608c2ecf20Sopenharmony_ci if (data_size && !tried_avoid_double) 43618c2ecf20Sopenharmony_ci goto push_for_double; 43628c2ecf20Sopenharmony_ci split = 2; 43638c2ecf20Sopenharmony_ci } 43648c2ecf20Sopenharmony_ci } 43658c2ecf20Sopenharmony_ci } 43668c2ecf20Sopenharmony_ci } 43678c2ecf20Sopenharmony_ci 43688c2ecf20Sopenharmony_ci if (split == 0) 43698c2ecf20Sopenharmony_ci btrfs_cpu_key_to_disk(&disk_key, ins_key); 43708c2ecf20Sopenharmony_ci else 43718c2ecf20Sopenharmony_ci btrfs_item_key(l, &disk_key, mid); 43728c2ecf20Sopenharmony_ci 43738c2ecf20Sopenharmony_ci /* 43748c2ecf20Sopenharmony_ci * We have to about BTRFS_NESTING_NEW_ROOT here if we've done a double 43758c2ecf20Sopenharmony_ci * split, because we're only allowed to have MAX_LOCKDEP_SUBCLASSES 43768c2ecf20Sopenharmony_ci * subclasses, which is 8 at the time of this patch, and we've maxed it 43778c2ecf20Sopenharmony_ci * out. In the future we could add a 43788c2ecf20Sopenharmony_ci * BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just 43798c2ecf20Sopenharmony_ci * use BTRFS_NESTING_NEW_ROOT. 43808c2ecf20Sopenharmony_ci */ 43818c2ecf20Sopenharmony_ci right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0, 43828c2ecf20Sopenharmony_ci l->start, 0, num_doubles ? 43838c2ecf20Sopenharmony_ci BTRFS_NESTING_NEW_ROOT : 43848c2ecf20Sopenharmony_ci BTRFS_NESTING_SPLIT); 43858c2ecf20Sopenharmony_ci if (IS_ERR(right)) 43868c2ecf20Sopenharmony_ci return PTR_ERR(right); 43878c2ecf20Sopenharmony_ci 43888c2ecf20Sopenharmony_ci root_add_used(root, fs_info->nodesize); 43898c2ecf20Sopenharmony_ci 43908c2ecf20Sopenharmony_ci if (split == 0) { 43918c2ecf20Sopenharmony_ci if (mid <= slot) { 43928c2ecf20Sopenharmony_ci btrfs_set_header_nritems(right, 0); 43938c2ecf20Sopenharmony_ci insert_ptr(trans, path, &disk_key, 43948c2ecf20Sopenharmony_ci right->start, path->slots[1] + 1, 1); 43958c2ecf20Sopenharmony_ci btrfs_tree_unlock(path->nodes[0]); 43968c2ecf20Sopenharmony_ci free_extent_buffer(path->nodes[0]); 43978c2ecf20Sopenharmony_ci path->nodes[0] = right; 43988c2ecf20Sopenharmony_ci path->slots[0] = 0; 43998c2ecf20Sopenharmony_ci path->slots[1] += 1; 44008c2ecf20Sopenharmony_ci } else { 44018c2ecf20Sopenharmony_ci btrfs_set_header_nritems(right, 0); 44028c2ecf20Sopenharmony_ci insert_ptr(trans, path, &disk_key, 44038c2ecf20Sopenharmony_ci right->start, path->slots[1], 1); 44048c2ecf20Sopenharmony_ci btrfs_tree_unlock(path->nodes[0]); 44058c2ecf20Sopenharmony_ci free_extent_buffer(path->nodes[0]); 44068c2ecf20Sopenharmony_ci path->nodes[0] = right; 44078c2ecf20Sopenharmony_ci path->slots[0] = 0; 44088c2ecf20Sopenharmony_ci if (path->slots[1] == 0) 44098c2ecf20Sopenharmony_ci fixup_low_keys(path, &disk_key, 1); 44108c2ecf20Sopenharmony_ci } 44118c2ecf20Sopenharmony_ci /* 44128c2ecf20Sopenharmony_ci * We create a new leaf 'right' for the required ins_len and 44138c2ecf20Sopenharmony_ci * we'll do btrfs_mark_buffer_dirty() on this leaf after copying 44148c2ecf20Sopenharmony_ci * the content of ins_len to 'right'. 44158c2ecf20Sopenharmony_ci */ 44168c2ecf20Sopenharmony_ci return ret; 44178c2ecf20Sopenharmony_ci } 44188c2ecf20Sopenharmony_ci 44198c2ecf20Sopenharmony_ci copy_for_split(trans, path, l, right, slot, mid, nritems); 44208c2ecf20Sopenharmony_ci 44218c2ecf20Sopenharmony_ci if (split == 2) { 44228c2ecf20Sopenharmony_ci BUG_ON(num_doubles != 0); 44238c2ecf20Sopenharmony_ci num_doubles++; 44248c2ecf20Sopenharmony_ci goto again; 44258c2ecf20Sopenharmony_ci } 44268c2ecf20Sopenharmony_ci 44278c2ecf20Sopenharmony_ci return 0; 44288c2ecf20Sopenharmony_ci 44298c2ecf20Sopenharmony_cipush_for_double: 44308c2ecf20Sopenharmony_ci push_for_double_split(trans, root, path, data_size); 44318c2ecf20Sopenharmony_ci tried_avoid_double = 1; 44328c2ecf20Sopenharmony_ci if (btrfs_leaf_free_space(path->nodes[0]) >= data_size) 44338c2ecf20Sopenharmony_ci return 0; 44348c2ecf20Sopenharmony_ci goto again; 44358c2ecf20Sopenharmony_ci} 44368c2ecf20Sopenharmony_ci 44378c2ecf20Sopenharmony_cistatic noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 44388c2ecf20Sopenharmony_ci struct btrfs_root *root, 44398c2ecf20Sopenharmony_ci struct btrfs_path *path, int ins_len) 44408c2ecf20Sopenharmony_ci{ 44418c2ecf20Sopenharmony_ci struct btrfs_key key; 44428c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 44438c2ecf20Sopenharmony_ci struct btrfs_file_extent_item *fi; 44448c2ecf20Sopenharmony_ci u64 extent_len = 0; 44458c2ecf20Sopenharmony_ci u32 item_size; 44468c2ecf20Sopenharmony_ci int ret; 44478c2ecf20Sopenharmony_ci 44488c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 44498c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 44508c2ecf20Sopenharmony_ci 44518c2ecf20Sopenharmony_ci BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && 44528c2ecf20Sopenharmony_ci key.type != BTRFS_EXTENT_CSUM_KEY); 44538c2ecf20Sopenharmony_ci 44548c2ecf20Sopenharmony_ci if (btrfs_leaf_free_space(leaf) >= ins_len) 44558c2ecf20Sopenharmony_ci return 0; 44568c2ecf20Sopenharmony_ci 44578c2ecf20Sopenharmony_ci item_size = btrfs_item_size_nr(leaf, path->slots[0]); 44588c2ecf20Sopenharmony_ci if (key.type == BTRFS_EXTENT_DATA_KEY) { 44598c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 44608c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 44618c2ecf20Sopenharmony_ci extent_len = btrfs_file_extent_num_bytes(leaf, fi); 44628c2ecf20Sopenharmony_ci } 44638c2ecf20Sopenharmony_ci btrfs_release_path(path); 44648c2ecf20Sopenharmony_ci 44658c2ecf20Sopenharmony_ci path->keep_locks = 1; 44668c2ecf20Sopenharmony_ci path->search_for_split = 1; 44678c2ecf20Sopenharmony_ci ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 44688c2ecf20Sopenharmony_ci path->search_for_split = 0; 44698c2ecf20Sopenharmony_ci if (ret > 0) 44708c2ecf20Sopenharmony_ci ret = -EAGAIN; 44718c2ecf20Sopenharmony_ci if (ret < 0) 44728c2ecf20Sopenharmony_ci goto err; 44738c2ecf20Sopenharmony_ci 44748c2ecf20Sopenharmony_ci ret = -EAGAIN; 44758c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 44768c2ecf20Sopenharmony_ci /* if our item isn't there, return now */ 44778c2ecf20Sopenharmony_ci if (item_size != btrfs_item_size_nr(leaf, path->slots[0])) 44788c2ecf20Sopenharmony_ci goto err; 44798c2ecf20Sopenharmony_ci 44808c2ecf20Sopenharmony_ci /* the leaf has changed, it now has room. return now */ 44818c2ecf20Sopenharmony_ci if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len) 44828c2ecf20Sopenharmony_ci goto err; 44838c2ecf20Sopenharmony_ci 44848c2ecf20Sopenharmony_ci if (key.type == BTRFS_EXTENT_DATA_KEY) { 44858c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 44868c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 44878c2ecf20Sopenharmony_ci if (extent_len != btrfs_file_extent_num_bytes(leaf, fi)) 44888c2ecf20Sopenharmony_ci goto err; 44898c2ecf20Sopenharmony_ci } 44908c2ecf20Sopenharmony_ci 44918c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 44928c2ecf20Sopenharmony_ci ret = split_leaf(trans, root, &key, path, ins_len, 1); 44938c2ecf20Sopenharmony_ci if (ret) 44948c2ecf20Sopenharmony_ci goto err; 44958c2ecf20Sopenharmony_ci 44968c2ecf20Sopenharmony_ci path->keep_locks = 0; 44978c2ecf20Sopenharmony_ci btrfs_unlock_up_safe(path, 1); 44988c2ecf20Sopenharmony_ci return 0; 44998c2ecf20Sopenharmony_cierr: 45008c2ecf20Sopenharmony_ci path->keep_locks = 0; 45018c2ecf20Sopenharmony_ci return ret; 45028c2ecf20Sopenharmony_ci} 45038c2ecf20Sopenharmony_ci 45048c2ecf20Sopenharmony_cistatic noinline int split_item(struct btrfs_path *path, 45058c2ecf20Sopenharmony_ci const struct btrfs_key *new_key, 45068c2ecf20Sopenharmony_ci unsigned long split_offset) 45078c2ecf20Sopenharmony_ci{ 45088c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 45098c2ecf20Sopenharmony_ci struct btrfs_item *item; 45108c2ecf20Sopenharmony_ci struct btrfs_item *new_item; 45118c2ecf20Sopenharmony_ci int slot; 45128c2ecf20Sopenharmony_ci char *buf; 45138c2ecf20Sopenharmony_ci u32 nritems; 45148c2ecf20Sopenharmony_ci u32 item_size; 45158c2ecf20Sopenharmony_ci u32 orig_offset; 45168c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 45178c2ecf20Sopenharmony_ci 45188c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 45198c2ecf20Sopenharmony_ci BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item)); 45208c2ecf20Sopenharmony_ci 45218c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 45228c2ecf20Sopenharmony_ci 45238c2ecf20Sopenharmony_ci item = btrfs_item_nr(path->slots[0]); 45248c2ecf20Sopenharmony_ci orig_offset = btrfs_item_offset(leaf, item); 45258c2ecf20Sopenharmony_ci item_size = btrfs_item_size(leaf, item); 45268c2ecf20Sopenharmony_ci 45278c2ecf20Sopenharmony_ci buf = kmalloc(item_size, GFP_NOFS); 45288c2ecf20Sopenharmony_ci if (!buf) 45298c2ecf20Sopenharmony_ci return -ENOMEM; 45308c2ecf20Sopenharmony_ci 45318c2ecf20Sopenharmony_ci read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 45328c2ecf20Sopenharmony_ci path->slots[0]), item_size); 45338c2ecf20Sopenharmony_ci 45348c2ecf20Sopenharmony_ci slot = path->slots[0] + 1; 45358c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 45368c2ecf20Sopenharmony_ci if (slot != nritems) { 45378c2ecf20Sopenharmony_ci /* shift the items */ 45388c2ecf20Sopenharmony_ci memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 45398c2ecf20Sopenharmony_ci btrfs_item_nr_offset(slot), 45408c2ecf20Sopenharmony_ci (nritems - slot) * sizeof(struct btrfs_item)); 45418c2ecf20Sopenharmony_ci } 45428c2ecf20Sopenharmony_ci 45438c2ecf20Sopenharmony_ci btrfs_cpu_key_to_disk(&disk_key, new_key); 45448c2ecf20Sopenharmony_ci btrfs_set_item_key(leaf, &disk_key, slot); 45458c2ecf20Sopenharmony_ci 45468c2ecf20Sopenharmony_ci new_item = btrfs_item_nr(slot); 45478c2ecf20Sopenharmony_ci 45488c2ecf20Sopenharmony_ci btrfs_set_item_offset(leaf, new_item, orig_offset); 45498c2ecf20Sopenharmony_ci btrfs_set_item_size(leaf, new_item, item_size - split_offset); 45508c2ecf20Sopenharmony_ci 45518c2ecf20Sopenharmony_ci btrfs_set_item_offset(leaf, item, 45528c2ecf20Sopenharmony_ci orig_offset + item_size - split_offset); 45538c2ecf20Sopenharmony_ci btrfs_set_item_size(leaf, item, split_offset); 45548c2ecf20Sopenharmony_ci 45558c2ecf20Sopenharmony_ci btrfs_set_header_nritems(leaf, nritems + 1); 45568c2ecf20Sopenharmony_ci 45578c2ecf20Sopenharmony_ci /* write the data for the start of the original item */ 45588c2ecf20Sopenharmony_ci write_extent_buffer(leaf, buf, 45598c2ecf20Sopenharmony_ci btrfs_item_ptr_offset(leaf, path->slots[0]), 45608c2ecf20Sopenharmony_ci split_offset); 45618c2ecf20Sopenharmony_ci 45628c2ecf20Sopenharmony_ci /* write the data for the new item */ 45638c2ecf20Sopenharmony_ci write_extent_buffer(leaf, buf + split_offset, 45648c2ecf20Sopenharmony_ci btrfs_item_ptr_offset(leaf, slot), 45658c2ecf20Sopenharmony_ci item_size - split_offset); 45668c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 45678c2ecf20Sopenharmony_ci 45688c2ecf20Sopenharmony_ci BUG_ON(btrfs_leaf_free_space(leaf) < 0); 45698c2ecf20Sopenharmony_ci kfree(buf); 45708c2ecf20Sopenharmony_ci return 0; 45718c2ecf20Sopenharmony_ci} 45728c2ecf20Sopenharmony_ci 45738c2ecf20Sopenharmony_ci/* 45748c2ecf20Sopenharmony_ci * This function splits a single item into two items, 45758c2ecf20Sopenharmony_ci * giving 'new_key' to the new item and splitting the 45768c2ecf20Sopenharmony_ci * old one at split_offset (from the start of the item). 45778c2ecf20Sopenharmony_ci * 45788c2ecf20Sopenharmony_ci * The path may be released by this operation. After 45798c2ecf20Sopenharmony_ci * the split, the path is pointing to the old item. The 45808c2ecf20Sopenharmony_ci * new item is going to be in the same node as the old one. 45818c2ecf20Sopenharmony_ci * 45828c2ecf20Sopenharmony_ci * Note, the item being split must be smaller enough to live alone on 45838c2ecf20Sopenharmony_ci * a tree block with room for one extra struct btrfs_item 45848c2ecf20Sopenharmony_ci * 45858c2ecf20Sopenharmony_ci * This allows us to split the item in place, keeping a lock on the 45868c2ecf20Sopenharmony_ci * leaf the entire time. 45878c2ecf20Sopenharmony_ci */ 45888c2ecf20Sopenharmony_ciint btrfs_split_item(struct btrfs_trans_handle *trans, 45898c2ecf20Sopenharmony_ci struct btrfs_root *root, 45908c2ecf20Sopenharmony_ci struct btrfs_path *path, 45918c2ecf20Sopenharmony_ci const struct btrfs_key *new_key, 45928c2ecf20Sopenharmony_ci unsigned long split_offset) 45938c2ecf20Sopenharmony_ci{ 45948c2ecf20Sopenharmony_ci int ret; 45958c2ecf20Sopenharmony_ci ret = setup_leaf_for_split(trans, root, path, 45968c2ecf20Sopenharmony_ci sizeof(struct btrfs_item)); 45978c2ecf20Sopenharmony_ci if (ret) 45988c2ecf20Sopenharmony_ci return ret; 45998c2ecf20Sopenharmony_ci 46008c2ecf20Sopenharmony_ci ret = split_item(path, new_key, split_offset); 46018c2ecf20Sopenharmony_ci return ret; 46028c2ecf20Sopenharmony_ci} 46038c2ecf20Sopenharmony_ci 46048c2ecf20Sopenharmony_ci/* 46058c2ecf20Sopenharmony_ci * This function duplicate a item, giving 'new_key' to the new item. 46068c2ecf20Sopenharmony_ci * It guarantees both items live in the same tree leaf and the new item 46078c2ecf20Sopenharmony_ci * is contiguous with the original item. 46088c2ecf20Sopenharmony_ci * 46098c2ecf20Sopenharmony_ci * This allows us to split file extent in place, keeping a lock on the 46108c2ecf20Sopenharmony_ci * leaf the entire time. 46118c2ecf20Sopenharmony_ci */ 46128c2ecf20Sopenharmony_ciint btrfs_duplicate_item(struct btrfs_trans_handle *trans, 46138c2ecf20Sopenharmony_ci struct btrfs_root *root, 46148c2ecf20Sopenharmony_ci struct btrfs_path *path, 46158c2ecf20Sopenharmony_ci const struct btrfs_key *new_key) 46168c2ecf20Sopenharmony_ci{ 46178c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 46188c2ecf20Sopenharmony_ci int ret; 46198c2ecf20Sopenharmony_ci u32 item_size; 46208c2ecf20Sopenharmony_ci 46218c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 46228c2ecf20Sopenharmony_ci item_size = btrfs_item_size_nr(leaf, path->slots[0]); 46238c2ecf20Sopenharmony_ci ret = setup_leaf_for_split(trans, root, path, 46248c2ecf20Sopenharmony_ci item_size + sizeof(struct btrfs_item)); 46258c2ecf20Sopenharmony_ci if (ret) 46268c2ecf20Sopenharmony_ci return ret; 46278c2ecf20Sopenharmony_ci 46288c2ecf20Sopenharmony_ci path->slots[0]++; 46298c2ecf20Sopenharmony_ci setup_items_for_insert(root, path, new_key, &item_size, 1); 46308c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 46318c2ecf20Sopenharmony_ci memcpy_extent_buffer(leaf, 46328c2ecf20Sopenharmony_ci btrfs_item_ptr_offset(leaf, path->slots[0]), 46338c2ecf20Sopenharmony_ci btrfs_item_ptr_offset(leaf, path->slots[0] - 1), 46348c2ecf20Sopenharmony_ci item_size); 46358c2ecf20Sopenharmony_ci return 0; 46368c2ecf20Sopenharmony_ci} 46378c2ecf20Sopenharmony_ci 46388c2ecf20Sopenharmony_ci/* 46398c2ecf20Sopenharmony_ci * make the item pointed to by the path smaller. new_size indicates 46408c2ecf20Sopenharmony_ci * how small to make it, and from_end tells us if we just chop bytes 46418c2ecf20Sopenharmony_ci * off the end of the item or if we shift the item to chop bytes off 46428c2ecf20Sopenharmony_ci * the front. 46438c2ecf20Sopenharmony_ci */ 46448c2ecf20Sopenharmony_civoid btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end) 46458c2ecf20Sopenharmony_ci{ 46468c2ecf20Sopenharmony_ci int slot; 46478c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 46488c2ecf20Sopenharmony_ci struct btrfs_item *item; 46498c2ecf20Sopenharmony_ci u32 nritems; 46508c2ecf20Sopenharmony_ci unsigned int data_end; 46518c2ecf20Sopenharmony_ci unsigned int old_data_start; 46528c2ecf20Sopenharmony_ci unsigned int old_size; 46538c2ecf20Sopenharmony_ci unsigned int size_diff; 46548c2ecf20Sopenharmony_ci int i; 46558c2ecf20Sopenharmony_ci struct btrfs_map_token token; 46568c2ecf20Sopenharmony_ci 46578c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 46588c2ecf20Sopenharmony_ci slot = path->slots[0]; 46598c2ecf20Sopenharmony_ci 46608c2ecf20Sopenharmony_ci old_size = btrfs_item_size_nr(leaf, slot); 46618c2ecf20Sopenharmony_ci if (old_size == new_size) 46628c2ecf20Sopenharmony_ci return; 46638c2ecf20Sopenharmony_ci 46648c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 46658c2ecf20Sopenharmony_ci data_end = leaf_data_end(leaf); 46668c2ecf20Sopenharmony_ci 46678c2ecf20Sopenharmony_ci old_data_start = btrfs_item_offset_nr(leaf, slot); 46688c2ecf20Sopenharmony_ci 46698c2ecf20Sopenharmony_ci size_diff = old_size - new_size; 46708c2ecf20Sopenharmony_ci 46718c2ecf20Sopenharmony_ci BUG_ON(slot < 0); 46728c2ecf20Sopenharmony_ci BUG_ON(slot >= nritems); 46738c2ecf20Sopenharmony_ci 46748c2ecf20Sopenharmony_ci /* 46758c2ecf20Sopenharmony_ci * item0..itemN ... dataN.offset..dataN.size .. data0.size 46768c2ecf20Sopenharmony_ci */ 46778c2ecf20Sopenharmony_ci /* first correct the data pointers */ 46788c2ecf20Sopenharmony_ci btrfs_init_map_token(&token, leaf); 46798c2ecf20Sopenharmony_ci for (i = slot; i < nritems; i++) { 46808c2ecf20Sopenharmony_ci u32 ioff; 46818c2ecf20Sopenharmony_ci item = btrfs_item_nr(i); 46828c2ecf20Sopenharmony_ci 46838c2ecf20Sopenharmony_ci ioff = btrfs_token_item_offset(&token, item); 46848c2ecf20Sopenharmony_ci btrfs_set_token_item_offset(&token, item, ioff + size_diff); 46858c2ecf20Sopenharmony_ci } 46868c2ecf20Sopenharmony_ci 46878c2ecf20Sopenharmony_ci /* shift the data */ 46888c2ecf20Sopenharmony_ci if (from_end) { 46898c2ecf20Sopenharmony_ci memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 46908c2ecf20Sopenharmony_ci data_end + size_diff, BTRFS_LEAF_DATA_OFFSET + 46918c2ecf20Sopenharmony_ci data_end, old_data_start + new_size - data_end); 46928c2ecf20Sopenharmony_ci } else { 46938c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 46948c2ecf20Sopenharmony_ci u64 offset; 46958c2ecf20Sopenharmony_ci 46968c2ecf20Sopenharmony_ci btrfs_item_key(leaf, &disk_key, slot); 46978c2ecf20Sopenharmony_ci 46988c2ecf20Sopenharmony_ci if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { 46998c2ecf20Sopenharmony_ci unsigned long ptr; 47008c2ecf20Sopenharmony_ci struct btrfs_file_extent_item *fi; 47018c2ecf20Sopenharmony_ci 47028c2ecf20Sopenharmony_ci fi = btrfs_item_ptr(leaf, slot, 47038c2ecf20Sopenharmony_ci struct btrfs_file_extent_item); 47048c2ecf20Sopenharmony_ci fi = (struct btrfs_file_extent_item *)( 47058c2ecf20Sopenharmony_ci (unsigned long)fi - size_diff); 47068c2ecf20Sopenharmony_ci 47078c2ecf20Sopenharmony_ci if (btrfs_file_extent_type(leaf, fi) == 47088c2ecf20Sopenharmony_ci BTRFS_FILE_EXTENT_INLINE) { 47098c2ecf20Sopenharmony_ci ptr = btrfs_item_ptr_offset(leaf, slot); 47108c2ecf20Sopenharmony_ci memmove_extent_buffer(leaf, ptr, 47118c2ecf20Sopenharmony_ci (unsigned long)fi, 47128c2ecf20Sopenharmony_ci BTRFS_FILE_EXTENT_INLINE_DATA_START); 47138c2ecf20Sopenharmony_ci } 47148c2ecf20Sopenharmony_ci } 47158c2ecf20Sopenharmony_ci 47168c2ecf20Sopenharmony_ci memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 47178c2ecf20Sopenharmony_ci data_end + size_diff, BTRFS_LEAF_DATA_OFFSET + 47188c2ecf20Sopenharmony_ci data_end, old_data_start - data_end); 47198c2ecf20Sopenharmony_ci 47208c2ecf20Sopenharmony_ci offset = btrfs_disk_key_offset(&disk_key); 47218c2ecf20Sopenharmony_ci btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 47228c2ecf20Sopenharmony_ci btrfs_set_item_key(leaf, &disk_key, slot); 47238c2ecf20Sopenharmony_ci if (slot == 0) 47248c2ecf20Sopenharmony_ci fixup_low_keys(path, &disk_key, 1); 47258c2ecf20Sopenharmony_ci } 47268c2ecf20Sopenharmony_ci 47278c2ecf20Sopenharmony_ci item = btrfs_item_nr(slot); 47288c2ecf20Sopenharmony_ci btrfs_set_item_size(leaf, item, new_size); 47298c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 47308c2ecf20Sopenharmony_ci 47318c2ecf20Sopenharmony_ci if (btrfs_leaf_free_space(leaf) < 0) { 47328c2ecf20Sopenharmony_ci btrfs_print_leaf(leaf); 47338c2ecf20Sopenharmony_ci BUG(); 47348c2ecf20Sopenharmony_ci } 47358c2ecf20Sopenharmony_ci} 47368c2ecf20Sopenharmony_ci 47378c2ecf20Sopenharmony_ci/* 47388c2ecf20Sopenharmony_ci * make the item pointed to by the path bigger, data_size is the added size. 47398c2ecf20Sopenharmony_ci */ 47408c2ecf20Sopenharmony_civoid btrfs_extend_item(struct btrfs_path *path, u32 data_size) 47418c2ecf20Sopenharmony_ci{ 47428c2ecf20Sopenharmony_ci int slot; 47438c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 47448c2ecf20Sopenharmony_ci struct btrfs_item *item; 47458c2ecf20Sopenharmony_ci u32 nritems; 47468c2ecf20Sopenharmony_ci unsigned int data_end; 47478c2ecf20Sopenharmony_ci unsigned int old_data; 47488c2ecf20Sopenharmony_ci unsigned int old_size; 47498c2ecf20Sopenharmony_ci int i; 47508c2ecf20Sopenharmony_ci struct btrfs_map_token token; 47518c2ecf20Sopenharmony_ci 47528c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 47538c2ecf20Sopenharmony_ci 47548c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 47558c2ecf20Sopenharmony_ci data_end = leaf_data_end(leaf); 47568c2ecf20Sopenharmony_ci 47578c2ecf20Sopenharmony_ci if (btrfs_leaf_free_space(leaf) < data_size) { 47588c2ecf20Sopenharmony_ci btrfs_print_leaf(leaf); 47598c2ecf20Sopenharmony_ci BUG(); 47608c2ecf20Sopenharmony_ci } 47618c2ecf20Sopenharmony_ci slot = path->slots[0]; 47628c2ecf20Sopenharmony_ci old_data = btrfs_item_end_nr(leaf, slot); 47638c2ecf20Sopenharmony_ci 47648c2ecf20Sopenharmony_ci BUG_ON(slot < 0); 47658c2ecf20Sopenharmony_ci if (slot >= nritems) { 47668c2ecf20Sopenharmony_ci btrfs_print_leaf(leaf); 47678c2ecf20Sopenharmony_ci btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d", 47688c2ecf20Sopenharmony_ci slot, nritems); 47698c2ecf20Sopenharmony_ci BUG(); 47708c2ecf20Sopenharmony_ci } 47718c2ecf20Sopenharmony_ci 47728c2ecf20Sopenharmony_ci /* 47738c2ecf20Sopenharmony_ci * item0..itemN ... dataN.offset..dataN.size .. data0.size 47748c2ecf20Sopenharmony_ci */ 47758c2ecf20Sopenharmony_ci /* first correct the data pointers */ 47768c2ecf20Sopenharmony_ci btrfs_init_map_token(&token, leaf); 47778c2ecf20Sopenharmony_ci for (i = slot; i < nritems; i++) { 47788c2ecf20Sopenharmony_ci u32 ioff; 47798c2ecf20Sopenharmony_ci item = btrfs_item_nr(i); 47808c2ecf20Sopenharmony_ci 47818c2ecf20Sopenharmony_ci ioff = btrfs_token_item_offset(&token, item); 47828c2ecf20Sopenharmony_ci btrfs_set_token_item_offset(&token, item, ioff - data_size); 47838c2ecf20Sopenharmony_ci } 47848c2ecf20Sopenharmony_ci 47858c2ecf20Sopenharmony_ci /* shift the data */ 47868c2ecf20Sopenharmony_ci memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 47878c2ecf20Sopenharmony_ci data_end - data_size, BTRFS_LEAF_DATA_OFFSET + 47888c2ecf20Sopenharmony_ci data_end, old_data - data_end); 47898c2ecf20Sopenharmony_ci 47908c2ecf20Sopenharmony_ci data_end = old_data; 47918c2ecf20Sopenharmony_ci old_size = btrfs_item_size_nr(leaf, slot); 47928c2ecf20Sopenharmony_ci item = btrfs_item_nr(slot); 47938c2ecf20Sopenharmony_ci btrfs_set_item_size(leaf, item, old_size + data_size); 47948c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 47958c2ecf20Sopenharmony_ci 47968c2ecf20Sopenharmony_ci if (btrfs_leaf_free_space(leaf) < 0) { 47978c2ecf20Sopenharmony_ci btrfs_print_leaf(leaf); 47988c2ecf20Sopenharmony_ci BUG(); 47998c2ecf20Sopenharmony_ci } 48008c2ecf20Sopenharmony_ci} 48018c2ecf20Sopenharmony_ci 48028c2ecf20Sopenharmony_ci/** 48038c2ecf20Sopenharmony_ci * setup_items_for_insert - Helper called before inserting one or more items 48048c2ecf20Sopenharmony_ci * to a leaf. Main purpose is to save stack depth by doing the bulk of the work 48058c2ecf20Sopenharmony_ci * in a function that doesn't call btrfs_search_slot 48068c2ecf20Sopenharmony_ci * 48078c2ecf20Sopenharmony_ci * @root: root we are inserting items to 48088c2ecf20Sopenharmony_ci * @path: points to the leaf/slot where we are going to insert new items 48098c2ecf20Sopenharmony_ci * @cpu_key: array of keys for items to be inserted 48108c2ecf20Sopenharmony_ci * @data_size: size of the body of each item we are going to insert 48118c2ecf20Sopenharmony_ci * @nr: size of @cpu_key/@data_size arrays 48128c2ecf20Sopenharmony_ci */ 48138c2ecf20Sopenharmony_civoid setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path, 48148c2ecf20Sopenharmony_ci const struct btrfs_key *cpu_key, u32 *data_size, 48158c2ecf20Sopenharmony_ci int nr) 48168c2ecf20Sopenharmony_ci{ 48178c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 48188c2ecf20Sopenharmony_ci struct btrfs_item *item; 48198c2ecf20Sopenharmony_ci int i; 48208c2ecf20Sopenharmony_ci u32 nritems; 48218c2ecf20Sopenharmony_ci unsigned int data_end; 48228c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 48238c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 48248c2ecf20Sopenharmony_ci int slot; 48258c2ecf20Sopenharmony_ci struct btrfs_map_token token; 48268c2ecf20Sopenharmony_ci u32 total_size; 48278c2ecf20Sopenharmony_ci u32 total_data = 0; 48288c2ecf20Sopenharmony_ci 48298c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) 48308c2ecf20Sopenharmony_ci total_data += data_size[i]; 48318c2ecf20Sopenharmony_ci total_size = total_data + (nr * sizeof(struct btrfs_item)); 48328c2ecf20Sopenharmony_ci 48338c2ecf20Sopenharmony_ci if (path->slots[0] == 0) { 48348c2ecf20Sopenharmony_ci btrfs_cpu_key_to_disk(&disk_key, cpu_key); 48358c2ecf20Sopenharmony_ci fixup_low_keys(path, &disk_key, 1); 48368c2ecf20Sopenharmony_ci } 48378c2ecf20Sopenharmony_ci btrfs_unlock_up_safe(path, 1); 48388c2ecf20Sopenharmony_ci 48398c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 48408c2ecf20Sopenharmony_ci slot = path->slots[0]; 48418c2ecf20Sopenharmony_ci 48428c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 48438c2ecf20Sopenharmony_ci data_end = leaf_data_end(leaf); 48448c2ecf20Sopenharmony_ci 48458c2ecf20Sopenharmony_ci if (btrfs_leaf_free_space(leaf) < total_size) { 48468c2ecf20Sopenharmony_ci btrfs_print_leaf(leaf); 48478c2ecf20Sopenharmony_ci btrfs_crit(fs_info, "not enough freespace need %u have %d", 48488c2ecf20Sopenharmony_ci total_size, btrfs_leaf_free_space(leaf)); 48498c2ecf20Sopenharmony_ci BUG(); 48508c2ecf20Sopenharmony_ci } 48518c2ecf20Sopenharmony_ci 48528c2ecf20Sopenharmony_ci btrfs_init_map_token(&token, leaf); 48538c2ecf20Sopenharmony_ci if (slot != nritems) { 48548c2ecf20Sopenharmony_ci unsigned int old_data = btrfs_item_end_nr(leaf, slot); 48558c2ecf20Sopenharmony_ci 48568c2ecf20Sopenharmony_ci if (old_data < data_end) { 48578c2ecf20Sopenharmony_ci btrfs_print_leaf(leaf); 48588c2ecf20Sopenharmony_ci btrfs_crit(fs_info, 48598c2ecf20Sopenharmony_ci "item at slot %d with data offset %u beyond data end of leaf %u", 48608c2ecf20Sopenharmony_ci slot, old_data, data_end); 48618c2ecf20Sopenharmony_ci BUG(); 48628c2ecf20Sopenharmony_ci } 48638c2ecf20Sopenharmony_ci /* 48648c2ecf20Sopenharmony_ci * item0..itemN ... dataN.offset..dataN.size .. data0.size 48658c2ecf20Sopenharmony_ci */ 48668c2ecf20Sopenharmony_ci /* first correct the data pointers */ 48678c2ecf20Sopenharmony_ci for (i = slot; i < nritems; i++) { 48688c2ecf20Sopenharmony_ci u32 ioff; 48698c2ecf20Sopenharmony_ci 48708c2ecf20Sopenharmony_ci item = btrfs_item_nr(i); 48718c2ecf20Sopenharmony_ci ioff = btrfs_token_item_offset(&token, item); 48728c2ecf20Sopenharmony_ci btrfs_set_token_item_offset(&token, item, 48738c2ecf20Sopenharmony_ci ioff - total_data); 48748c2ecf20Sopenharmony_ci } 48758c2ecf20Sopenharmony_ci /* shift the items */ 48768c2ecf20Sopenharmony_ci memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 48778c2ecf20Sopenharmony_ci btrfs_item_nr_offset(slot), 48788c2ecf20Sopenharmony_ci (nritems - slot) * sizeof(struct btrfs_item)); 48798c2ecf20Sopenharmony_ci 48808c2ecf20Sopenharmony_ci /* shift the data */ 48818c2ecf20Sopenharmony_ci memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 48828c2ecf20Sopenharmony_ci data_end - total_data, BTRFS_LEAF_DATA_OFFSET + 48838c2ecf20Sopenharmony_ci data_end, old_data - data_end); 48848c2ecf20Sopenharmony_ci data_end = old_data; 48858c2ecf20Sopenharmony_ci } 48868c2ecf20Sopenharmony_ci 48878c2ecf20Sopenharmony_ci /* setup the item for the new data */ 48888c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 48898c2ecf20Sopenharmony_ci btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 48908c2ecf20Sopenharmony_ci btrfs_set_item_key(leaf, &disk_key, slot + i); 48918c2ecf20Sopenharmony_ci item = btrfs_item_nr(slot + i); 48928c2ecf20Sopenharmony_ci data_end -= data_size[i]; 48938c2ecf20Sopenharmony_ci btrfs_set_token_item_offset(&token, item, data_end); 48948c2ecf20Sopenharmony_ci btrfs_set_token_item_size(&token, item, data_size[i]); 48958c2ecf20Sopenharmony_ci } 48968c2ecf20Sopenharmony_ci 48978c2ecf20Sopenharmony_ci btrfs_set_header_nritems(leaf, nritems + nr); 48988c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 48998c2ecf20Sopenharmony_ci 49008c2ecf20Sopenharmony_ci if (btrfs_leaf_free_space(leaf) < 0) { 49018c2ecf20Sopenharmony_ci btrfs_print_leaf(leaf); 49028c2ecf20Sopenharmony_ci BUG(); 49038c2ecf20Sopenharmony_ci } 49048c2ecf20Sopenharmony_ci} 49058c2ecf20Sopenharmony_ci 49068c2ecf20Sopenharmony_ci/* 49078c2ecf20Sopenharmony_ci * Given a key and some data, insert items into the tree. 49088c2ecf20Sopenharmony_ci * This does all the path init required, making room in the tree if needed. 49098c2ecf20Sopenharmony_ci */ 49108c2ecf20Sopenharmony_ciint btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 49118c2ecf20Sopenharmony_ci struct btrfs_root *root, 49128c2ecf20Sopenharmony_ci struct btrfs_path *path, 49138c2ecf20Sopenharmony_ci const struct btrfs_key *cpu_key, u32 *data_size, 49148c2ecf20Sopenharmony_ci int nr) 49158c2ecf20Sopenharmony_ci{ 49168c2ecf20Sopenharmony_ci int ret = 0; 49178c2ecf20Sopenharmony_ci int slot; 49188c2ecf20Sopenharmony_ci int i; 49198c2ecf20Sopenharmony_ci u32 total_size = 0; 49208c2ecf20Sopenharmony_ci u32 total_data = 0; 49218c2ecf20Sopenharmony_ci 49228c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) 49238c2ecf20Sopenharmony_ci total_data += data_size[i]; 49248c2ecf20Sopenharmony_ci 49258c2ecf20Sopenharmony_ci total_size = total_data + (nr * sizeof(struct btrfs_item)); 49268c2ecf20Sopenharmony_ci ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 49278c2ecf20Sopenharmony_ci if (ret == 0) 49288c2ecf20Sopenharmony_ci return -EEXIST; 49298c2ecf20Sopenharmony_ci if (ret < 0) 49308c2ecf20Sopenharmony_ci return ret; 49318c2ecf20Sopenharmony_ci 49328c2ecf20Sopenharmony_ci slot = path->slots[0]; 49338c2ecf20Sopenharmony_ci BUG_ON(slot < 0); 49348c2ecf20Sopenharmony_ci 49358c2ecf20Sopenharmony_ci setup_items_for_insert(root, path, cpu_key, data_size, nr); 49368c2ecf20Sopenharmony_ci return 0; 49378c2ecf20Sopenharmony_ci} 49388c2ecf20Sopenharmony_ci 49398c2ecf20Sopenharmony_ci/* 49408c2ecf20Sopenharmony_ci * Given a key and some data, insert an item into the tree. 49418c2ecf20Sopenharmony_ci * This does all the path init required, making room in the tree if needed. 49428c2ecf20Sopenharmony_ci */ 49438c2ecf20Sopenharmony_ciint btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, 49448c2ecf20Sopenharmony_ci const struct btrfs_key *cpu_key, void *data, 49458c2ecf20Sopenharmony_ci u32 data_size) 49468c2ecf20Sopenharmony_ci{ 49478c2ecf20Sopenharmony_ci int ret = 0; 49488c2ecf20Sopenharmony_ci struct btrfs_path *path; 49498c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 49508c2ecf20Sopenharmony_ci unsigned long ptr; 49518c2ecf20Sopenharmony_ci 49528c2ecf20Sopenharmony_ci path = btrfs_alloc_path(); 49538c2ecf20Sopenharmony_ci if (!path) 49548c2ecf20Sopenharmony_ci return -ENOMEM; 49558c2ecf20Sopenharmony_ci ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 49568c2ecf20Sopenharmony_ci if (!ret) { 49578c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 49588c2ecf20Sopenharmony_ci ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 49598c2ecf20Sopenharmony_ci write_extent_buffer(leaf, data, ptr, data_size); 49608c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 49618c2ecf20Sopenharmony_ci } 49628c2ecf20Sopenharmony_ci btrfs_free_path(path); 49638c2ecf20Sopenharmony_ci return ret; 49648c2ecf20Sopenharmony_ci} 49658c2ecf20Sopenharmony_ci 49668c2ecf20Sopenharmony_ci/* 49678c2ecf20Sopenharmony_ci * delete the pointer from a given node. 49688c2ecf20Sopenharmony_ci * 49698c2ecf20Sopenharmony_ci * the tree should have been previously balanced so the deletion does not 49708c2ecf20Sopenharmony_ci * empty a node. 49718c2ecf20Sopenharmony_ci */ 49728c2ecf20Sopenharmony_cistatic void del_ptr(struct btrfs_root *root, struct btrfs_path *path, 49738c2ecf20Sopenharmony_ci int level, int slot) 49748c2ecf20Sopenharmony_ci{ 49758c2ecf20Sopenharmony_ci struct extent_buffer *parent = path->nodes[level]; 49768c2ecf20Sopenharmony_ci u32 nritems; 49778c2ecf20Sopenharmony_ci int ret; 49788c2ecf20Sopenharmony_ci 49798c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(parent); 49808c2ecf20Sopenharmony_ci if (slot != nritems - 1) { 49818c2ecf20Sopenharmony_ci if (level) { 49828c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_move(parent, slot, slot + 1, 49838c2ecf20Sopenharmony_ci nritems - slot - 1); 49848c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 49858c2ecf20Sopenharmony_ci } 49868c2ecf20Sopenharmony_ci memmove_extent_buffer(parent, 49878c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(slot), 49888c2ecf20Sopenharmony_ci btrfs_node_key_ptr_offset(slot + 1), 49898c2ecf20Sopenharmony_ci sizeof(struct btrfs_key_ptr) * 49908c2ecf20Sopenharmony_ci (nritems - slot - 1)); 49918c2ecf20Sopenharmony_ci } else if (level) { 49928c2ecf20Sopenharmony_ci ret = tree_mod_log_insert_key(parent, slot, MOD_LOG_KEY_REMOVE, 49938c2ecf20Sopenharmony_ci GFP_NOFS); 49948c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 49958c2ecf20Sopenharmony_ci } 49968c2ecf20Sopenharmony_ci 49978c2ecf20Sopenharmony_ci nritems--; 49988c2ecf20Sopenharmony_ci btrfs_set_header_nritems(parent, nritems); 49998c2ecf20Sopenharmony_ci if (nritems == 0 && parent == root->node) { 50008c2ecf20Sopenharmony_ci BUG_ON(btrfs_header_level(root->node) != 1); 50018c2ecf20Sopenharmony_ci /* just turn the root into a leaf and break */ 50028c2ecf20Sopenharmony_ci btrfs_set_header_level(root->node, 0); 50038c2ecf20Sopenharmony_ci } else if (slot == 0) { 50048c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 50058c2ecf20Sopenharmony_ci 50068c2ecf20Sopenharmony_ci btrfs_node_key(parent, &disk_key, 0); 50078c2ecf20Sopenharmony_ci fixup_low_keys(path, &disk_key, level + 1); 50088c2ecf20Sopenharmony_ci } 50098c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(parent); 50108c2ecf20Sopenharmony_ci} 50118c2ecf20Sopenharmony_ci 50128c2ecf20Sopenharmony_ci/* 50138c2ecf20Sopenharmony_ci * a helper function to delete the leaf pointed to by path->slots[1] and 50148c2ecf20Sopenharmony_ci * path->nodes[1]. 50158c2ecf20Sopenharmony_ci * 50168c2ecf20Sopenharmony_ci * This deletes the pointer in path->nodes[1] and frees the leaf 50178c2ecf20Sopenharmony_ci * block extent. zero is returned if it all worked out, < 0 otherwise. 50188c2ecf20Sopenharmony_ci * 50198c2ecf20Sopenharmony_ci * The path must have already been setup for deleting the leaf, including 50208c2ecf20Sopenharmony_ci * all the proper balancing. path->nodes[1] must be locked. 50218c2ecf20Sopenharmony_ci */ 50228c2ecf20Sopenharmony_cistatic noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, 50238c2ecf20Sopenharmony_ci struct btrfs_root *root, 50248c2ecf20Sopenharmony_ci struct btrfs_path *path, 50258c2ecf20Sopenharmony_ci struct extent_buffer *leaf) 50268c2ecf20Sopenharmony_ci{ 50278c2ecf20Sopenharmony_ci WARN_ON(btrfs_header_generation(leaf) != trans->transid); 50288c2ecf20Sopenharmony_ci del_ptr(root, path, 1, path->slots[1]); 50298c2ecf20Sopenharmony_ci 50308c2ecf20Sopenharmony_ci /* 50318c2ecf20Sopenharmony_ci * btrfs_free_extent is expensive, we want to make sure we 50328c2ecf20Sopenharmony_ci * aren't holding any locks when we call it 50338c2ecf20Sopenharmony_ci */ 50348c2ecf20Sopenharmony_ci btrfs_unlock_up_safe(path, 0); 50358c2ecf20Sopenharmony_ci 50368c2ecf20Sopenharmony_ci root_sub_used(root, leaf->len); 50378c2ecf20Sopenharmony_ci 50388c2ecf20Sopenharmony_ci atomic_inc(&leaf->refs); 50398c2ecf20Sopenharmony_ci btrfs_free_tree_block(trans, root, leaf, 0, 1); 50408c2ecf20Sopenharmony_ci free_extent_buffer_stale(leaf); 50418c2ecf20Sopenharmony_ci} 50428c2ecf20Sopenharmony_ci/* 50438c2ecf20Sopenharmony_ci * delete the item at the leaf level in path. If that empties 50448c2ecf20Sopenharmony_ci * the leaf, remove it from the tree 50458c2ecf20Sopenharmony_ci */ 50468c2ecf20Sopenharmony_ciint btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 50478c2ecf20Sopenharmony_ci struct btrfs_path *path, int slot, int nr) 50488c2ecf20Sopenharmony_ci{ 50498c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 50508c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 50518c2ecf20Sopenharmony_ci struct btrfs_item *item; 50528c2ecf20Sopenharmony_ci u32 last_off; 50538c2ecf20Sopenharmony_ci u32 dsize = 0; 50548c2ecf20Sopenharmony_ci int ret = 0; 50558c2ecf20Sopenharmony_ci int wret; 50568c2ecf20Sopenharmony_ci int i; 50578c2ecf20Sopenharmony_ci u32 nritems; 50588c2ecf20Sopenharmony_ci 50598c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 50608c2ecf20Sopenharmony_ci last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 50618c2ecf20Sopenharmony_ci 50628c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) 50638c2ecf20Sopenharmony_ci dsize += btrfs_item_size_nr(leaf, slot + i); 50648c2ecf20Sopenharmony_ci 50658c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 50668c2ecf20Sopenharmony_ci 50678c2ecf20Sopenharmony_ci if (slot + nr != nritems) { 50688c2ecf20Sopenharmony_ci int data_end = leaf_data_end(leaf); 50698c2ecf20Sopenharmony_ci struct btrfs_map_token token; 50708c2ecf20Sopenharmony_ci 50718c2ecf20Sopenharmony_ci memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + 50728c2ecf20Sopenharmony_ci data_end + dsize, 50738c2ecf20Sopenharmony_ci BTRFS_LEAF_DATA_OFFSET + data_end, 50748c2ecf20Sopenharmony_ci last_off - data_end); 50758c2ecf20Sopenharmony_ci 50768c2ecf20Sopenharmony_ci btrfs_init_map_token(&token, leaf); 50778c2ecf20Sopenharmony_ci for (i = slot + nr; i < nritems; i++) { 50788c2ecf20Sopenharmony_ci u32 ioff; 50798c2ecf20Sopenharmony_ci 50808c2ecf20Sopenharmony_ci item = btrfs_item_nr(i); 50818c2ecf20Sopenharmony_ci ioff = btrfs_token_item_offset(&token, item); 50828c2ecf20Sopenharmony_ci btrfs_set_token_item_offset(&token, item, ioff + dsize); 50838c2ecf20Sopenharmony_ci } 50848c2ecf20Sopenharmony_ci 50858c2ecf20Sopenharmony_ci memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 50868c2ecf20Sopenharmony_ci btrfs_item_nr_offset(slot + nr), 50878c2ecf20Sopenharmony_ci sizeof(struct btrfs_item) * 50888c2ecf20Sopenharmony_ci (nritems - slot - nr)); 50898c2ecf20Sopenharmony_ci } 50908c2ecf20Sopenharmony_ci btrfs_set_header_nritems(leaf, nritems - nr); 50918c2ecf20Sopenharmony_ci nritems -= nr; 50928c2ecf20Sopenharmony_ci 50938c2ecf20Sopenharmony_ci /* delete the leaf if we've emptied it */ 50948c2ecf20Sopenharmony_ci if (nritems == 0) { 50958c2ecf20Sopenharmony_ci if (leaf == root->node) { 50968c2ecf20Sopenharmony_ci btrfs_set_header_level(leaf, 0); 50978c2ecf20Sopenharmony_ci } else { 50988c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 50998c2ecf20Sopenharmony_ci btrfs_clean_tree_block(leaf); 51008c2ecf20Sopenharmony_ci btrfs_del_leaf(trans, root, path, leaf); 51018c2ecf20Sopenharmony_ci } 51028c2ecf20Sopenharmony_ci } else { 51038c2ecf20Sopenharmony_ci int used = leaf_space_used(leaf, 0, nritems); 51048c2ecf20Sopenharmony_ci if (slot == 0) { 51058c2ecf20Sopenharmony_ci struct btrfs_disk_key disk_key; 51068c2ecf20Sopenharmony_ci 51078c2ecf20Sopenharmony_ci btrfs_item_key(leaf, &disk_key, 0); 51088c2ecf20Sopenharmony_ci fixup_low_keys(path, &disk_key, 1); 51098c2ecf20Sopenharmony_ci } 51108c2ecf20Sopenharmony_ci 51118c2ecf20Sopenharmony_ci /* delete the leaf if it is mostly empty */ 51128c2ecf20Sopenharmony_ci if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) { 51138c2ecf20Sopenharmony_ci /* push_leaf_left fixes the path. 51148c2ecf20Sopenharmony_ci * make sure the path still points to our leaf 51158c2ecf20Sopenharmony_ci * for possible call to del_ptr below 51168c2ecf20Sopenharmony_ci */ 51178c2ecf20Sopenharmony_ci slot = path->slots[1]; 51188c2ecf20Sopenharmony_ci atomic_inc(&leaf->refs); 51198c2ecf20Sopenharmony_ci 51208c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 51218c2ecf20Sopenharmony_ci wret = push_leaf_left(trans, root, path, 1, 1, 51228c2ecf20Sopenharmony_ci 1, (u32)-1); 51238c2ecf20Sopenharmony_ci if (wret < 0 && wret != -ENOSPC) 51248c2ecf20Sopenharmony_ci ret = wret; 51258c2ecf20Sopenharmony_ci 51268c2ecf20Sopenharmony_ci if (path->nodes[0] == leaf && 51278c2ecf20Sopenharmony_ci btrfs_header_nritems(leaf)) { 51288c2ecf20Sopenharmony_ci wret = push_leaf_right(trans, root, path, 1, 51298c2ecf20Sopenharmony_ci 1, 1, 0); 51308c2ecf20Sopenharmony_ci if (wret < 0 && wret != -ENOSPC) 51318c2ecf20Sopenharmony_ci ret = wret; 51328c2ecf20Sopenharmony_ci } 51338c2ecf20Sopenharmony_ci 51348c2ecf20Sopenharmony_ci if (btrfs_header_nritems(leaf) == 0) { 51358c2ecf20Sopenharmony_ci path->slots[1] = slot; 51368c2ecf20Sopenharmony_ci btrfs_del_leaf(trans, root, path, leaf); 51378c2ecf20Sopenharmony_ci free_extent_buffer(leaf); 51388c2ecf20Sopenharmony_ci ret = 0; 51398c2ecf20Sopenharmony_ci } else { 51408c2ecf20Sopenharmony_ci /* if we're still in the path, make sure 51418c2ecf20Sopenharmony_ci * we're dirty. Otherwise, one of the 51428c2ecf20Sopenharmony_ci * push_leaf functions must have already 51438c2ecf20Sopenharmony_ci * dirtied this buffer 51448c2ecf20Sopenharmony_ci */ 51458c2ecf20Sopenharmony_ci if (path->nodes[0] == leaf) 51468c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 51478c2ecf20Sopenharmony_ci free_extent_buffer(leaf); 51488c2ecf20Sopenharmony_ci } 51498c2ecf20Sopenharmony_ci } else { 51508c2ecf20Sopenharmony_ci btrfs_mark_buffer_dirty(leaf); 51518c2ecf20Sopenharmony_ci } 51528c2ecf20Sopenharmony_ci } 51538c2ecf20Sopenharmony_ci return ret; 51548c2ecf20Sopenharmony_ci} 51558c2ecf20Sopenharmony_ci 51568c2ecf20Sopenharmony_ci/* 51578c2ecf20Sopenharmony_ci * search the tree again to find a leaf with lesser keys 51588c2ecf20Sopenharmony_ci * returns 0 if it found something or 1 if there are no lesser leaves. 51598c2ecf20Sopenharmony_ci * returns < 0 on io errors. 51608c2ecf20Sopenharmony_ci * 51618c2ecf20Sopenharmony_ci * This may release the path, and so you may lose any locks held at the 51628c2ecf20Sopenharmony_ci * time you call it. 51638c2ecf20Sopenharmony_ci */ 51648c2ecf20Sopenharmony_ciint btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 51658c2ecf20Sopenharmony_ci{ 51668c2ecf20Sopenharmony_ci struct btrfs_key key; 51678c2ecf20Sopenharmony_ci struct btrfs_key orig_key; 51688c2ecf20Sopenharmony_ci struct btrfs_disk_key found_key; 51698c2ecf20Sopenharmony_ci int ret; 51708c2ecf20Sopenharmony_ci 51718c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 51728c2ecf20Sopenharmony_ci orig_key = key; 51738c2ecf20Sopenharmony_ci 51748c2ecf20Sopenharmony_ci if (key.offset > 0) { 51758c2ecf20Sopenharmony_ci key.offset--; 51768c2ecf20Sopenharmony_ci } else if (key.type > 0) { 51778c2ecf20Sopenharmony_ci key.type--; 51788c2ecf20Sopenharmony_ci key.offset = (u64)-1; 51798c2ecf20Sopenharmony_ci } else if (key.objectid > 0) { 51808c2ecf20Sopenharmony_ci key.objectid--; 51818c2ecf20Sopenharmony_ci key.type = (u8)-1; 51828c2ecf20Sopenharmony_ci key.offset = (u64)-1; 51838c2ecf20Sopenharmony_ci } else { 51848c2ecf20Sopenharmony_ci return 1; 51858c2ecf20Sopenharmony_ci } 51868c2ecf20Sopenharmony_ci 51878c2ecf20Sopenharmony_ci btrfs_release_path(path); 51888c2ecf20Sopenharmony_ci ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 51898c2ecf20Sopenharmony_ci if (ret <= 0) 51908c2ecf20Sopenharmony_ci return ret; 51918c2ecf20Sopenharmony_ci 51928c2ecf20Sopenharmony_ci /* 51938c2ecf20Sopenharmony_ci * Previous key not found. Even if we were at slot 0 of the leaf we had 51948c2ecf20Sopenharmony_ci * before releasing the path and calling btrfs_search_slot(), we now may 51958c2ecf20Sopenharmony_ci * be in a slot pointing to the same original key - this can happen if 51968c2ecf20Sopenharmony_ci * after we released the path, one of more items were moved from a 51978c2ecf20Sopenharmony_ci * sibling leaf into the front of the leaf we had due to an insertion 51988c2ecf20Sopenharmony_ci * (see push_leaf_right()). 51998c2ecf20Sopenharmony_ci * If we hit this case and our slot is > 0 and just decrement the slot 52008c2ecf20Sopenharmony_ci * so that the caller does not process the same key again, which may or 52018c2ecf20Sopenharmony_ci * may not break the caller, depending on its logic. 52028c2ecf20Sopenharmony_ci */ 52038c2ecf20Sopenharmony_ci if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) { 52048c2ecf20Sopenharmony_ci btrfs_item_key(path->nodes[0], &found_key, path->slots[0]); 52058c2ecf20Sopenharmony_ci ret = comp_keys(&found_key, &orig_key); 52068c2ecf20Sopenharmony_ci if (ret == 0) { 52078c2ecf20Sopenharmony_ci if (path->slots[0] > 0) { 52088c2ecf20Sopenharmony_ci path->slots[0]--; 52098c2ecf20Sopenharmony_ci return 0; 52108c2ecf20Sopenharmony_ci } 52118c2ecf20Sopenharmony_ci /* 52128c2ecf20Sopenharmony_ci * At slot 0, same key as before, it means orig_key is 52138c2ecf20Sopenharmony_ci * the lowest, leftmost, key in the tree. We're done. 52148c2ecf20Sopenharmony_ci */ 52158c2ecf20Sopenharmony_ci return 1; 52168c2ecf20Sopenharmony_ci } 52178c2ecf20Sopenharmony_ci } 52188c2ecf20Sopenharmony_ci 52198c2ecf20Sopenharmony_ci btrfs_item_key(path->nodes[0], &found_key, 0); 52208c2ecf20Sopenharmony_ci ret = comp_keys(&found_key, &key); 52218c2ecf20Sopenharmony_ci /* 52228c2ecf20Sopenharmony_ci * We might have had an item with the previous key in the tree right 52238c2ecf20Sopenharmony_ci * before we released our path. And after we released our path, that 52248c2ecf20Sopenharmony_ci * item might have been pushed to the first slot (0) of the leaf we 52258c2ecf20Sopenharmony_ci * were holding due to a tree balance. Alternatively, an item with the 52268c2ecf20Sopenharmony_ci * previous key can exist as the only element of a leaf (big fat item). 52278c2ecf20Sopenharmony_ci * Therefore account for these 2 cases, so that our callers (like 52288c2ecf20Sopenharmony_ci * btrfs_previous_item) don't miss an existing item with a key matching 52298c2ecf20Sopenharmony_ci * the previous key we computed above. 52308c2ecf20Sopenharmony_ci */ 52318c2ecf20Sopenharmony_ci if (ret <= 0) 52328c2ecf20Sopenharmony_ci return 0; 52338c2ecf20Sopenharmony_ci return 1; 52348c2ecf20Sopenharmony_ci} 52358c2ecf20Sopenharmony_ci 52368c2ecf20Sopenharmony_ci/* 52378c2ecf20Sopenharmony_ci * A helper function to walk down the tree starting at min_key, and looking 52388c2ecf20Sopenharmony_ci * for nodes or leaves that are have a minimum transaction id. 52398c2ecf20Sopenharmony_ci * This is used by the btree defrag code, and tree logging 52408c2ecf20Sopenharmony_ci * 52418c2ecf20Sopenharmony_ci * This does not cow, but it does stuff the starting key it finds back 52428c2ecf20Sopenharmony_ci * into min_key, so you can call btrfs_search_slot with cow=1 on the 52438c2ecf20Sopenharmony_ci * key and get a writable path. 52448c2ecf20Sopenharmony_ci * 52458c2ecf20Sopenharmony_ci * This honors path->lowest_level to prevent descent past a given level 52468c2ecf20Sopenharmony_ci * of the tree. 52478c2ecf20Sopenharmony_ci * 52488c2ecf20Sopenharmony_ci * min_trans indicates the oldest transaction that you are interested 52498c2ecf20Sopenharmony_ci * in walking through. Any nodes or leaves older than min_trans are 52508c2ecf20Sopenharmony_ci * skipped over (without reading them). 52518c2ecf20Sopenharmony_ci * 52528c2ecf20Sopenharmony_ci * returns zero if something useful was found, < 0 on error and 1 if there 52538c2ecf20Sopenharmony_ci * was nothing in the tree that matched the search criteria. 52548c2ecf20Sopenharmony_ci */ 52558c2ecf20Sopenharmony_ciint btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 52568c2ecf20Sopenharmony_ci struct btrfs_path *path, 52578c2ecf20Sopenharmony_ci u64 min_trans) 52588c2ecf20Sopenharmony_ci{ 52598c2ecf20Sopenharmony_ci struct extent_buffer *cur; 52608c2ecf20Sopenharmony_ci struct btrfs_key found_key; 52618c2ecf20Sopenharmony_ci int slot; 52628c2ecf20Sopenharmony_ci int sret; 52638c2ecf20Sopenharmony_ci u32 nritems; 52648c2ecf20Sopenharmony_ci int level; 52658c2ecf20Sopenharmony_ci int ret = 1; 52668c2ecf20Sopenharmony_ci int keep_locks = path->keep_locks; 52678c2ecf20Sopenharmony_ci 52688c2ecf20Sopenharmony_ci path->keep_locks = 1; 52698c2ecf20Sopenharmony_ciagain: 52708c2ecf20Sopenharmony_ci cur = btrfs_read_lock_root_node(root); 52718c2ecf20Sopenharmony_ci level = btrfs_header_level(cur); 52728c2ecf20Sopenharmony_ci WARN_ON(path->nodes[level]); 52738c2ecf20Sopenharmony_ci path->nodes[level] = cur; 52748c2ecf20Sopenharmony_ci path->locks[level] = BTRFS_READ_LOCK; 52758c2ecf20Sopenharmony_ci 52768c2ecf20Sopenharmony_ci if (btrfs_header_generation(cur) < min_trans) { 52778c2ecf20Sopenharmony_ci ret = 1; 52788c2ecf20Sopenharmony_ci goto out; 52798c2ecf20Sopenharmony_ci } 52808c2ecf20Sopenharmony_ci while (1) { 52818c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(cur); 52828c2ecf20Sopenharmony_ci level = btrfs_header_level(cur); 52838c2ecf20Sopenharmony_ci sret = btrfs_bin_search(cur, min_key, &slot); 52848c2ecf20Sopenharmony_ci if (sret < 0) { 52858c2ecf20Sopenharmony_ci ret = sret; 52868c2ecf20Sopenharmony_ci goto out; 52878c2ecf20Sopenharmony_ci } 52888c2ecf20Sopenharmony_ci 52898c2ecf20Sopenharmony_ci /* at the lowest level, we're done, setup the path and exit */ 52908c2ecf20Sopenharmony_ci if (level == path->lowest_level) { 52918c2ecf20Sopenharmony_ci if (slot >= nritems) 52928c2ecf20Sopenharmony_ci goto find_next_key; 52938c2ecf20Sopenharmony_ci ret = 0; 52948c2ecf20Sopenharmony_ci path->slots[level] = slot; 52958c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(cur, &found_key, slot); 52968c2ecf20Sopenharmony_ci goto out; 52978c2ecf20Sopenharmony_ci } 52988c2ecf20Sopenharmony_ci if (sret && slot > 0) 52998c2ecf20Sopenharmony_ci slot--; 53008c2ecf20Sopenharmony_ci /* 53018c2ecf20Sopenharmony_ci * check this node pointer against the min_trans parameters. 53028c2ecf20Sopenharmony_ci * If it is too old, skip to the next one. 53038c2ecf20Sopenharmony_ci */ 53048c2ecf20Sopenharmony_ci while (slot < nritems) { 53058c2ecf20Sopenharmony_ci u64 gen; 53068c2ecf20Sopenharmony_ci 53078c2ecf20Sopenharmony_ci gen = btrfs_node_ptr_generation(cur, slot); 53088c2ecf20Sopenharmony_ci if (gen < min_trans) { 53098c2ecf20Sopenharmony_ci slot++; 53108c2ecf20Sopenharmony_ci continue; 53118c2ecf20Sopenharmony_ci } 53128c2ecf20Sopenharmony_ci break; 53138c2ecf20Sopenharmony_ci } 53148c2ecf20Sopenharmony_cifind_next_key: 53158c2ecf20Sopenharmony_ci /* 53168c2ecf20Sopenharmony_ci * we didn't find a candidate key in this node, walk forward 53178c2ecf20Sopenharmony_ci * and find another one 53188c2ecf20Sopenharmony_ci */ 53198c2ecf20Sopenharmony_ci if (slot >= nritems) { 53208c2ecf20Sopenharmony_ci path->slots[level] = slot; 53218c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 53228c2ecf20Sopenharmony_ci sret = btrfs_find_next_key(root, path, min_key, level, 53238c2ecf20Sopenharmony_ci min_trans); 53248c2ecf20Sopenharmony_ci if (sret == 0) { 53258c2ecf20Sopenharmony_ci btrfs_release_path(path); 53268c2ecf20Sopenharmony_ci goto again; 53278c2ecf20Sopenharmony_ci } else { 53288c2ecf20Sopenharmony_ci goto out; 53298c2ecf20Sopenharmony_ci } 53308c2ecf20Sopenharmony_ci } 53318c2ecf20Sopenharmony_ci /* save our key for returning back */ 53328c2ecf20Sopenharmony_ci btrfs_node_key_to_cpu(cur, &found_key, slot); 53338c2ecf20Sopenharmony_ci path->slots[level] = slot; 53348c2ecf20Sopenharmony_ci if (level == path->lowest_level) { 53358c2ecf20Sopenharmony_ci ret = 0; 53368c2ecf20Sopenharmony_ci goto out; 53378c2ecf20Sopenharmony_ci } 53388c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 53398c2ecf20Sopenharmony_ci cur = btrfs_read_node_slot(cur, slot); 53408c2ecf20Sopenharmony_ci if (IS_ERR(cur)) { 53418c2ecf20Sopenharmony_ci ret = PTR_ERR(cur); 53428c2ecf20Sopenharmony_ci goto out; 53438c2ecf20Sopenharmony_ci } 53448c2ecf20Sopenharmony_ci 53458c2ecf20Sopenharmony_ci btrfs_tree_read_lock(cur); 53468c2ecf20Sopenharmony_ci 53478c2ecf20Sopenharmony_ci path->locks[level - 1] = BTRFS_READ_LOCK; 53488c2ecf20Sopenharmony_ci path->nodes[level - 1] = cur; 53498c2ecf20Sopenharmony_ci unlock_up(path, level, 1, 0, NULL); 53508c2ecf20Sopenharmony_ci } 53518c2ecf20Sopenharmony_ciout: 53528c2ecf20Sopenharmony_ci path->keep_locks = keep_locks; 53538c2ecf20Sopenharmony_ci if (ret == 0) { 53548c2ecf20Sopenharmony_ci btrfs_unlock_up_safe(path, path->lowest_level + 1); 53558c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 53568c2ecf20Sopenharmony_ci memcpy(min_key, &found_key, sizeof(found_key)); 53578c2ecf20Sopenharmony_ci } 53588c2ecf20Sopenharmony_ci return ret; 53598c2ecf20Sopenharmony_ci} 53608c2ecf20Sopenharmony_ci 53618c2ecf20Sopenharmony_ci/* 53628c2ecf20Sopenharmony_ci * this is similar to btrfs_next_leaf, but does not try to preserve 53638c2ecf20Sopenharmony_ci * and fixup the path. It looks for and returns the next key in the 53648c2ecf20Sopenharmony_ci * tree based on the current path and the min_trans parameters. 53658c2ecf20Sopenharmony_ci * 53668c2ecf20Sopenharmony_ci * 0 is returned if another key is found, < 0 if there are any errors 53678c2ecf20Sopenharmony_ci * and 1 is returned if there are no higher keys in the tree 53688c2ecf20Sopenharmony_ci * 53698c2ecf20Sopenharmony_ci * path->keep_locks should be set to 1 on the search made before 53708c2ecf20Sopenharmony_ci * calling this function. 53718c2ecf20Sopenharmony_ci */ 53728c2ecf20Sopenharmony_ciint btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 53738c2ecf20Sopenharmony_ci struct btrfs_key *key, int level, u64 min_trans) 53748c2ecf20Sopenharmony_ci{ 53758c2ecf20Sopenharmony_ci int slot; 53768c2ecf20Sopenharmony_ci struct extent_buffer *c; 53778c2ecf20Sopenharmony_ci 53788c2ecf20Sopenharmony_ci WARN_ON(!path->keep_locks && !path->skip_locking); 53798c2ecf20Sopenharmony_ci while (level < BTRFS_MAX_LEVEL) { 53808c2ecf20Sopenharmony_ci if (!path->nodes[level]) 53818c2ecf20Sopenharmony_ci return 1; 53828c2ecf20Sopenharmony_ci 53838c2ecf20Sopenharmony_ci slot = path->slots[level] + 1; 53848c2ecf20Sopenharmony_ci c = path->nodes[level]; 53858c2ecf20Sopenharmony_cinext: 53868c2ecf20Sopenharmony_ci if (slot >= btrfs_header_nritems(c)) { 53878c2ecf20Sopenharmony_ci int ret; 53888c2ecf20Sopenharmony_ci int orig_lowest; 53898c2ecf20Sopenharmony_ci struct btrfs_key cur_key; 53908c2ecf20Sopenharmony_ci if (level + 1 >= BTRFS_MAX_LEVEL || 53918c2ecf20Sopenharmony_ci !path->nodes[level + 1]) 53928c2ecf20Sopenharmony_ci return 1; 53938c2ecf20Sopenharmony_ci 53948c2ecf20Sopenharmony_ci if (path->locks[level + 1] || path->skip_locking) { 53958c2ecf20Sopenharmony_ci level++; 53968c2ecf20Sopenharmony_ci continue; 53978c2ecf20Sopenharmony_ci } 53988c2ecf20Sopenharmony_ci 53998c2ecf20Sopenharmony_ci slot = btrfs_header_nritems(c) - 1; 54008c2ecf20Sopenharmony_ci if (level == 0) 54018c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(c, &cur_key, slot); 54028c2ecf20Sopenharmony_ci else 54038c2ecf20Sopenharmony_ci btrfs_node_key_to_cpu(c, &cur_key, slot); 54048c2ecf20Sopenharmony_ci 54058c2ecf20Sopenharmony_ci orig_lowest = path->lowest_level; 54068c2ecf20Sopenharmony_ci btrfs_release_path(path); 54078c2ecf20Sopenharmony_ci path->lowest_level = level; 54088c2ecf20Sopenharmony_ci ret = btrfs_search_slot(NULL, root, &cur_key, path, 54098c2ecf20Sopenharmony_ci 0, 0); 54108c2ecf20Sopenharmony_ci path->lowest_level = orig_lowest; 54118c2ecf20Sopenharmony_ci if (ret < 0) 54128c2ecf20Sopenharmony_ci return ret; 54138c2ecf20Sopenharmony_ci 54148c2ecf20Sopenharmony_ci c = path->nodes[level]; 54158c2ecf20Sopenharmony_ci slot = path->slots[level]; 54168c2ecf20Sopenharmony_ci if (ret == 0) 54178c2ecf20Sopenharmony_ci slot++; 54188c2ecf20Sopenharmony_ci goto next; 54198c2ecf20Sopenharmony_ci } 54208c2ecf20Sopenharmony_ci 54218c2ecf20Sopenharmony_ci if (level == 0) 54228c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(c, key, slot); 54238c2ecf20Sopenharmony_ci else { 54248c2ecf20Sopenharmony_ci u64 gen = btrfs_node_ptr_generation(c, slot); 54258c2ecf20Sopenharmony_ci 54268c2ecf20Sopenharmony_ci if (gen < min_trans) { 54278c2ecf20Sopenharmony_ci slot++; 54288c2ecf20Sopenharmony_ci goto next; 54298c2ecf20Sopenharmony_ci } 54308c2ecf20Sopenharmony_ci btrfs_node_key_to_cpu(c, key, slot); 54318c2ecf20Sopenharmony_ci } 54328c2ecf20Sopenharmony_ci return 0; 54338c2ecf20Sopenharmony_ci } 54348c2ecf20Sopenharmony_ci return 1; 54358c2ecf20Sopenharmony_ci} 54368c2ecf20Sopenharmony_ci 54378c2ecf20Sopenharmony_ci/* 54388c2ecf20Sopenharmony_ci * search the tree again to find a leaf with greater keys 54398c2ecf20Sopenharmony_ci * returns 0 if it found something or 1 if there are no greater leaves. 54408c2ecf20Sopenharmony_ci * returns < 0 on io errors. 54418c2ecf20Sopenharmony_ci */ 54428c2ecf20Sopenharmony_ciint btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 54438c2ecf20Sopenharmony_ci{ 54448c2ecf20Sopenharmony_ci return btrfs_next_old_leaf(root, path, 0); 54458c2ecf20Sopenharmony_ci} 54468c2ecf20Sopenharmony_ci 54478c2ecf20Sopenharmony_ciint btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, 54488c2ecf20Sopenharmony_ci u64 time_seq) 54498c2ecf20Sopenharmony_ci{ 54508c2ecf20Sopenharmony_ci int slot; 54518c2ecf20Sopenharmony_ci int level; 54528c2ecf20Sopenharmony_ci struct extent_buffer *c; 54538c2ecf20Sopenharmony_ci struct extent_buffer *next; 54548c2ecf20Sopenharmony_ci struct btrfs_key key; 54558c2ecf20Sopenharmony_ci u32 nritems; 54568c2ecf20Sopenharmony_ci int ret; 54578c2ecf20Sopenharmony_ci int old_spinning = path->leave_spinning; 54588c2ecf20Sopenharmony_ci int next_rw_lock = 0; 54598c2ecf20Sopenharmony_ci 54608c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(path->nodes[0]); 54618c2ecf20Sopenharmony_ci if (nritems == 0) 54628c2ecf20Sopenharmony_ci return 1; 54638c2ecf20Sopenharmony_ci 54648c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 54658c2ecf20Sopenharmony_ciagain: 54668c2ecf20Sopenharmony_ci level = 1; 54678c2ecf20Sopenharmony_ci next = NULL; 54688c2ecf20Sopenharmony_ci next_rw_lock = 0; 54698c2ecf20Sopenharmony_ci btrfs_release_path(path); 54708c2ecf20Sopenharmony_ci 54718c2ecf20Sopenharmony_ci path->keep_locks = 1; 54728c2ecf20Sopenharmony_ci path->leave_spinning = 1; 54738c2ecf20Sopenharmony_ci 54748c2ecf20Sopenharmony_ci if (time_seq) 54758c2ecf20Sopenharmony_ci ret = btrfs_search_old_slot(root, &key, path, time_seq); 54768c2ecf20Sopenharmony_ci else 54778c2ecf20Sopenharmony_ci ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 54788c2ecf20Sopenharmony_ci path->keep_locks = 0; 54798c2ecf20Sopenharmony_ci 54808c2ecf20Sopenharmony_ci if (ret < 0) 54818c2ecf20Sopenharmony_ci return ret; 54828c2ecf20Sopenharmony_ci 54838c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(path->nodes[0]); 54848c2ecf20Sopenharmony_ci /* 54858c2ecf20Sopenharmony_ci * by releasing the path above we dropped all our locks. A balance 54868c2ecf20Sopenharmony_ci * could have added more items next to the key that used to be 54878c2ecf20Sopenharmony_ci * at the very end of the block. So, check again here and 54888c2ecf20Sopenharmony_ci * advance the path if there are now more items available. 54898c2ecf20Sopenharmony_ci */ 54908c2ecf20Sopenharmony_ci if (nritems > 0 && path->slots[0] < nritems - 1) { 54918c2ecf20Sopenharmony_ci if (ret == 0) 54928c2ecf20Sopenharmony_ci path->slots[0]++; 54938c2ecf20Sopenharmony_ci ret = 0; 54948c2ecf20Sopenharmony_ci goto done; 54958c2ecf20Sopenharmony_ci } 54968c2ecf20Sopenharmony_ci /* 54978c2ecf20Sopenharmony_ci * So the above check misses one case: 54988c2ecf20Sopenharmony_ci * - after releasing the path above, someone has removed the item that 54998c2ecf20Sopenharmony_ci * used to be at the very end of the block, and balance between leafs 55008c2ecf20Sopenharmony_ci * gets another one with bigger key.offset to replace it. 55018c2ecf20Sopenharmony_ci * 55028c2ecf20Sopenharmony_ci * This one should be returned as well, or we can get leaf corruption 55038c2ecf20Sopenharmony_ci * later(esp. in __btrfs_drop_extents()). 55048c2ecf20Sopenharmony_ci * 55058c2ecf20Sopenharmony_ci * And a bit more explanation about this check, 55068c2ecf20Sopenharmony_ci * with ret > 0, the key isn't found, the path points to the slot 55078c2ecf20Sopenharmony_ci * where it should be inserted, so the path->slots[0] item must be the 55088c2ecf20Sopenharmony_ci * bigger one. 55098c2ecf20Sopenharmony_ci */ 55108c2ecf20Sopenharmony_ci if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) { 55118c2ecf20Sopenharmony_ci ret = 0; 55128c2ecf20Sopenharmony_ci goto done; 55138c2ecf20Sopenharmony_ci } 55148c2ecf20Sopenharmony_ci 55158c2ecf20Sopenharmony_ci while (level < BTRFS_MAX_LEVEL) { 55168c2ecf20Sopenharmony_ci if (!path->nodes[level]) { 55178c2ecf20Sopenharmony_ci ret = 1; 55188c2ecf20Sopenharmony_ci goto done; 55198c2ecf20Sopenharmony_ci } 55208c2ecf20Sopenharmony_ci 55218c2ecf20Sopenharmony_ci slot = path->slots[level] + 1; 55228c2ecf20Sopenharmony_ci c = path->nodes[level]; 55238c2ecf20Sopenharmony_ci if (slot >= btrfs_header_nritems(c)) { 55248c2ecf20Sopenharmony_ci level++; 55258c2ecf20Sopenharmony_ci if (level == BTRFS_MAX_LEVEL) { 55268c2ecf20Sopenharmony_ci ret = 1; 55278c2ecf20Sopenharmony_ci goto done; 55288c2ecf20Sopenharmony_ci } 55298c2ecf20Sopenharmony_ci continue; 55308c2ecf20Sopenharmony_ci } 55318c2ecf20Sopenharmony_ci 55328c2ecf20Sopenharmony_ci if (next) { 55338c2ecf20Sopenharmony_ci btrfs_tree_unlock_rw(next, next_rw_lock); 55348c2ecf20Sopenharmony_ci free_extent_buffer(next); 55358c2ecf20Sopenharmony_ci } 55368c2ecf20Sopenharmony_ci 55378c2ecf20Sopenharmony_ci next = c; 55388c2ecf20Sopenharmony_ci next_rw_lock = path->locks[level]; 55398c2ecf20Sopenharmony_ci ret = read_block_for_search(root, path, &next, level, 55408c2ecf20Sopenharmony_ci slot, &key); 55418c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 55428c2ecf20Sopenharmony_ci goto again; 55438c2ecf20Sopenharmony_ci 55448c2ecf20Sopenharmony_ci if (ret < 0) { 55458c2ecf20Sopenharmony_ci btrfs_release_path(path); 55468c2ecf20Sopenharmony_ci goto done; 55478c2ecf20Sopenharmony_ci } 55488c2ecf20Sopenharmony_ci 55498c2ecf20Sopenharmony_ci if (!path->skip_locking) { 55508c2ecf20Sopenharmony_ci ret = btrfs_try_tree_read_lock(next); 55518c2ecf20Sopenharmony_ci if (!ret && time_seq) { 55528c2ecf20Sopenharmony_ci /* 55538c2ecf20Sopenharmony_ci * If we don't get the lock, we may be racing 55548c2ecf20Sopenharmony_ci * with push_leaf_left, holding that lock while 55558c2ecf20Sopenharmony_ci * itself waiting for the leaf we've currently 55568c2ecf20Sopenharmony_ci * locked. To solve this situation, we give up 55578c2ecf20Sopenharmony_ci * on our lock and cycle. 55588c2ecf20Sopenharmony_ci */ 55598c2ecf20Sopenharmony_ci free_extent_buffer(next); 55608c2ecf20Sopenharmony_ci btrfs_release_path(path); 55618c2ecf20Sopenharmony_ci cond_resched(); 55628c2ecf20Sopenharmony_ci goto again; 55638c2ecf20Sopenharmony_ci } 55648c2ecf20Sopenharmony_ci if (!ret) { 55658c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 55668c2ecf20Sopenharmony_ci __btrfs_tree_read_lock(next, 55678c2ecf20Sopenharmony_ci BTRFS_NESTING_RIGHT, 55688c2ecf20Sopenharmony_ci path->recurse); 55698c2ecf20Sopenharmony_ci } 55708c2ecf20Sopenharmony_ci next_rw_lock = BTRFS_READ_LOCK; 55718c2ecf20Sopenharmony_ci } 55728c2ecf20Sopenharmony_ci break; 55738c2ecf20Sopenharmony_ci } 55748c2ecf20Sopenharmony_ci path->slots[level] = slot; 55758c2ecf20Sopenharmony_ci while (1) { 55768c2ecf20Sopenharmony_ci level--; 55778c2ecf20Sopenharmony_ci c = path->nodes[level]; 55788c2ecf20Sopenharmony_ci if (path->locks[level]) 55798c2ecf20Sopenharmony_ci btrfs_tree_unlock_rw(c, path->locks[level]); 55808c2ecf20Sopenharmony_ci 55818c2ecf20Sopenharmony_ci free_extent_buffer(c); 55828c2ecf20Sopenharmony_ci path->nodes[level] = next; 55838c2ecf20Sopenharmony_ci path->slots[level] = 0; 55848c2ecf20Sopenharmony_ci if (!path->skip_locking) 55858c2ecf20Sopenharmony_ci path->locks[level] = next_rw_lock; 55868c2ecf20Sopenharmony_ci if (!level) 55878c2ecf20Sopenharmony_ci break; 55888c2ecf20Sopenharmony_ci 55898c2ecf20Sopenharmony_ci ret = read_block_for_search(root, path, &next, level, 55908c2ecf20Sopenharmony_ci 0, &key); 55918c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 55928c2ecf20Sopenharmony_ci goto again; 55938c2ecf20Sopenharmony_ci 55948c2ecf20Sopenharmony_ci if (ret < 0) { 55958c2ecf20Sopenharmony_ci btrfs_release_path(path); 55968c2ecf20Sopenharmony_ci goto done; 55978c2ecf20Sopenharmony_ci } 55988c2ecf20Sopenharmony_ci 55998c2ecf20Sopenharmony_ci if (!path->skip_locking) { 56008c2ecf20Sopenharmony_ci ret = btrfs_try_tree_read_lock(next); 56018c2ecf20Sopenharmony_ci if (!ret) { 56028c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 56038c2ecf20Sopenharmony_ci __btrfs_tree_read_lock(next, 56048c2ecf20Sopenharmony_ci BTRFS_NESTING_RIGHT, 56058c2ecf20Sopenharmony_ci path->recurse); 56068c2ecf20Sopenharmony_ci } 56078c2ecf20Sopenharmony_ci next_rw_lock = BTRFS_READ_LOCK; 56088c2ecf20Sopenharmony_ci } 56098c2ecf20Sopenharmony_ci } 56108c2ecf20Sopenharmony_ci ret = 0; 56118c2ecf20Sopenharmony_cidone: 56128c2ecf20Sopenharmony_ci unlock_up(path, 0, 1, 0, NULL); 56138c2ecf20Sopenharmony_ci path->leave_spinning = old_spinning; 56148c2ecf20Sopenharmony_ci if (!old_spinning) 56158c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 56168c2ecf20Sopenharmony_ci 56178c2ecf20Sopenharmony_ci return ret; 56188c2ecf20Sopenharmony_ci} 56198c2ecf20Sopenharmony_ci 56208c2ecf20Sopenharmony_ci/* 56218c2ecf20Sopenharmony_ci * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps 56228c2ecf20Sopenharmony_ci * searching until it gets past min_objectid or finds an item of 'type' 56238c2ecf20Sopenharmony_ci * 56248c2ecf20Sopenharmony_ci * returns 0 if something is found, 1 if nothing was found and < 0 on error 56258c2ecf20Sopenharmony_ci */ 56268c2ecf20Sopenharmony_ciint btrfs_previous_item(struct btrfs_root *root, 56278c2ecf20Sopenharmony_ci struct btrfs_path *path, u64 min_objectid, 56288c2ecf20Sopenharmony_ci int type) 56298c2ecf20Sopenharmony_ci{ 56308c2ecf20Sopenharmony_ci struct btrfs_key found_key; 56318c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 56328c2ecf20Sopenharmony_ci u32 nritems; 56338c2ecf20Sopenharmony_ci int ret; 56348c2ecf20Sopenharmony_ci 56358c2ecf20Sopenharmony_ci while (1) { 56368c2ecf20Sopenharmony_ci if (path->slots[0] == 0) { 56378c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 56388c2ecf20Sopenharmony_ci ret = btrfs_prev_leaf(root, path); 56398c2ecf20Sopenharmony_ci if (ret != 0) 56408c2ecf20Sopenharmony_ci return ret; 56418c2ecf20Sopenharmony_ci } else { 56428c2ecf20Sopenharmony_ci path->slots[0]--; 56438c2ecf20Sopenharmony_ci } 56448c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 56458c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 56468c2ecf20Sopenharmony_ci if (nritems == 0) 56478c2ecf20Sopenharmony_ci return 1; 56488c2ecf20Sopenharmony_ci if (path->slots[0] == nritems) 56498c2ecf20Sopenharmony_ci path->slots[0]--; 56508c2ecf20Sopenharmony_ci 56518c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 56528c2ecf20Sopenharmony_ci if (found_key.objectid < min_objectid) 56538c2ecf20Sopenharmony_ci break; 56548c2ecf20Sopenharmony_ci if (found_key.type == type) 56558c2ecf20Sopenharmony_ci return 0; 56568c2ecf20Sopenharmony_ci if (found_key.objectid == min_objectid && 56578c2ecf20Sopenharmony_ci found_key.type < type) 56588c2ecf20Sopenharmony_ci break; 56598c2ecf20Sopenharmony_ci } 56608c2ecf20Sopenharmony_ci return 1; 56618c2ecf20Sopenharmony_ci} 56628c2ecf20Sopenharmony_ci 56638c2ecf20Sopenharmony_ci/* 56648c2ecf20Sopenharmony_ci * search in extent tree to find a previous Metadata/Data extent item with 56658c2ecf20Sopenharmony_ci * min objecitd. 56668c2ecf20Sopenharmony_ci * 56678c2ecf20Sopenharmony_ci * returns 0 if something is found, 1 if nothing was found and < 0 on error 56688c2ecf20Sopenharmony_ci */ 56698c2ecf20Sopenharmony_ciint btrfs_previous_extent_item(struct btrfs_root *root, 56708c2ecf20Sopenharmony_ci struct btrfs_path *path, u64 min_objectid) 56718c2ecf20Sopenharmony_ci{ 56728c2ecf20Sopenharmony_ci struct btrfs_key found_key; 56738c2ecf20Sopenharmony_ci struct extent_buffer *leaf; 56748c2ecf20Sopenharmony_ci u32 nritems; 56758c2ecf20Sopenharmony_ci int ret; 56768c2ecf20Sopenharmony_ci 56778c2ecf20Sopenharmony_ci while (1) { 56788c2ecf20Sopenharmony_ci if (path->slots[0] == 0) { 56798c2ecf20Sopenharmony_ci btrfs_set_path_blocking(path); 56808c2ecf20Sopenharmony_ci ret = btrfs_prev_leaf(root, path); 56818c2ecf20Sopenharmony_ci if (ret != 0) 56828c2ecf20Sopenharmony_ci return ret; 56838c2ecf20Sopenharmony_ci } else { 56848c2ecf20Sopenharmony_ci path->slots[0]--; 56858c2ecf20Sopenharmony_ci } 56868c2ecf20Sopenharmony_ci leaf = path->nodes[0]; 56878c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 56888c2ecf20Sopenharmony_ci if (nritems == 0) 56898c2ecf20Sopenharmony_ci return 1; 56908c2ecf20Sopenharmony_ci if (path->slots[0] == nritems) 56918c2ecf20Sopenharmony_ci path->slots[0]--; 56928c2ecf20Sopenharmony_ci 56938c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 56948c2ecf20Sopenharmony_ci if (found_key.objectid < min_objectid) 56958c2ecf20Sopenharmony_ci break; 56968c2ecf20Sopenharmony_ci if (found_key.type == BTRFS_EXTENT_ITEM_KEY || 56978c2ecf20Sopenharmony_ci found_key.type == BTRFS_METADATA_ITEM_KEY) 56988c2ecf20Sopenharmony_ci return 0; 56998c2ecf20Sopenharmony_ci if (found_key.objectid == min_objectid && 57008c2ecf20Sopenharmony_ci found_key.type < BTRFS_EXTENT_ITEM_KEY) 57018c2ecf20Sopenharmony_ci break; 57028c2ecf20Sopenharmony_ci } 57038c2ecf20Sopenharmony_ci return 1; 57048c2ecf20Sopenharmony_ci} 5705