18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2007 Oracle. All rights reserved. 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <linux/sched.h> 78c2ecf20Sopenharmony_ci#include "ctree.h" 88c2ecf20Sopenharmony_ci#include "disk-io.h" 98c2ecf20Sopenharmony_ci#include "print-tree.h" 108c2ecf20Sopenharmony_ci#include "transaction.h" 118c2ecf20Sopenharmony_ci#include "locking.h" 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci/* 148c2ecf20Sopenharmony_ci * Defrag all the leaves in a given btree. 158c2ecf20Sopenharmony_ci * Read all the leaves and try to get key order to 168c2ecf20Sopenharmony_ci * better reflect disk order 178c2ecf20Sopenharmony_ci */ 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ciint btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 208c2ecf20Sopenharmony_ci struct btrfs_root *root) 218c2ecf20Sopenharmony_ci{ 228c2ecf20Sopenharmony_ci struct btrfs_path *path = NULL; 238c2ecf20Sopenharmony_ci struct btrfs_key key; 248c2ecf20Sopenharmony_ci int ret = 0; 258c2ecf20Sopenharmony_ci int wret; 268c2ecf20Sopenharmony_ci int level; 278c2ecf20Sopenharmony_ci int next_key_ret = 0; 288c2ecf20Sopenharmony_ci u64 last_ret = 0; 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci if (root->fs_info->extent_root == root) { 318c2ecf20Sopenharmony_ci /* 328c2ecf20Sopenharmony_ci * there's recursion here right now in the tree locking, 338c2ecf20Sopenharmony_ci * we can't defrag the extent root without deadlock 348c2ecf20Sopenharmony_ci */ 358c2ecf20Sopenharmony_ci goto out; 368c2ecf20Sopenharmony_ci } 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) 398c2ecf20Sopenharmony_ci goto out; 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci path = btrfs_alloc_path(); 428c2ecf20Sopenharmony_ci if (!path) 438c2ecf20Sopenharmony_ci return -ENOMEM; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci level = btrfs_header_level(root->node); 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci if (level == 0) 488c2ecf20Sopenharmony_ci goto out; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci if (root->defrag_progress.objectid == 0) { 518c2ecf20Sopenharmony_ci struct extent_buffer *root_node; 528c2ecf20Sopenharmony_ci u32 nritems; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci root_node = btrfs_lock_root_node(root); 558c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(root_node); 568c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(root_node); 578c2ecf20Sopenharmony_ci root->defrag_max.objectid = 0; 588c2ecf20Sopenharmony_ci /* from above we know this is not a leaf */ 598c2ecf20Sopenharmony_ci btrfs_node_key_to_cpu(root_node, &root->defrag_max, 608c2ecf20Sopenharmony_ci nritems - 1); 618c2ecf20Sopenharmony_ci btrfs_tree_unlock(root_node); 628c2ecf20Sopenharmony_ci free_extent_buffer(root_node); 638c2ecf20Sopenharmony_ci memset(&key, 0, sizeof(key)); 648c2ecf20Sopenharmony_ci } else { 658c2ecf20Sopenharmony_ci memcpy(&key, &root->defrag_progress, sizeof(key)); 668c2ecf20Sopenharmony_ci } 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci path->keep_locks = 1; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION); 718c2ecf20Sopenharmony_ci if (ret < 0) 728c2ecf20Sopenharmony_ci goto out; 738c2ecf20Sopenharmony_ci if (ret > 0) { 748c2ecf20Sopenharmony_ci ret = 0; 758c2ecf20Sopenharmony_ci goto out; 768c2ecf20Sopenharmony_ci } 778c2ecf20Sopenharmony_ci btrfs_release_path(path); 788c2ecf20Sopenharmony_ci /* 798c2ecf20Sopenharmony_ci * We don't need a lock on a leaf. btrfs_realloc_node() will lock all 808c2ecf20Sopenharmony_ci * leafs from path->nodes[1], so set lowest_level to 1 to avoid later 818c2ecf20Sopenharmony_ci * a deadlock (attempting to write lock an already write locked leaf). 828c2ecf20Sopenharmony_ci */ 838c2ecf20Sopenharmony_ci path->lowest_level = 1; 848c2ecf20Sopenharmony_ci wret = btrfs_search_slot(trans, root, &key, path, 0, 1); 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci if (wret < 0) { 878c2ecf20Sopenharmony_ci ret = wret; 888c2ecf20Sopenharmony_ci goto out; 898c2ecf20Sopenharmony_ci } 908c2ecf20Sopenharmony_ci if (!path->nodes[1]) { 918c2ecf20Sopenharmony_ci ret = 0; 928c2ecf20Sopenharmony_ci goto out; 938c2ecf20Sopenharmony_ci } 948c2ecf20Sopenharmony_ci /* 958c2ecf20Sopenharmony_ci * The node at level 1 must always be locked when our path has 968c2ecf20Sopenharmony_ci * keep_locks set and lowest_level is 1, regardless of the value of 978c2ecf20Sopenharmony_ci * path->slots[1]. 988c2ecf20Sopenharmony_ci */ 998c2ecf20Sopenharmony_ci BUG_ON(path->locks[1] == 0); 1008c2ecf20Sopenharmony_ci ret = btrfs_realloc_node(trans, root, 1018c2ecf20Sopenharmony_ci path->nodes[1], 0, 1028c2ecf20Sopenharmony_ci &last_ret, 1038c2ecf20Sopenharmony_ci &root->defrag_progress); 1048c2ecf20Sopenharmony_ci if (ret) { 1058c2ecf20Sopenharmony_ci WARN_ON(ret == -EAGAIN); 1068c2ecf20Sopenharmony_ci goto out; 1078c2ecf20Sopenharmony_ci } 1088c2ecf20Sopenharmony_ci /* 1098c2ecf20Sopenharmony_ci * Now that we reallocated the node we can find the next key. Note that 1108c2ecf20Sopenharmony_ci * btrfs_find_next_key() can release our path and do another search 1118c2ecf20Sopenharmony_ci * without COWing, this is because even with path->keep_locks = 1, 1128c2ecf20Sopenharmony_ci * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a 1138c2ecf20Sopenharmony_ci * node when path->slots[node_level - 1] does not point to the last 1148c2ecf20Sopenharmony_ci * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore 1158c2ecf20Sopenharmony_ci * we search for the next key after reallocating our node. 1168c2ecf20Sopenharmony_ci */ 1178c2ecf20Sopenharmony_ci path->slots[1] = btrfs_header_nritems(path->nodes[1]); 1188c2ecf20Sopenharmony_ci next_key_ret = btrfs_find_next_key(root, path, &key, 1, 1198c2ecf20Sopenharmony_ci BTRFS_OLDEST_GENERATION); 1208c2ecf20Sopenharmony_ci if (next_key_ret == 0) { 1218c2ecf20Sopenharmony_ci memcpy(&root->defrag_progress, &key, sizeof(key)); 1228c2ecf20Sopenharmony_ci ret = -EAGAIN; 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ciout: 1258c2ecf20Sopenharmony_ci btrfs_free_path(path); 1268c2ecf20Sopenharmony_ci if (ret == -EAGAIN) { 1278c2ecf20Sopenharmony_ci if (root->defrag_max.objectid > root->defrag_progress.objectid) 1288c2ecf20Sopenharmony_ci goto done; 1298c2ecf20Sopenharmony_ci if (root->defrag_max.type > root->defrag_progress.type) 1308c2ecf20Sopenharmony_ci goto done; 1318c2ecf20Sopenharmony_ci if (root->defrag_max.offset > root->defrag_progress.offset) 1328c2ecf20Sopenharmony_ci goto done; 1338c2ecf20Sopenharmony_ci ret = 0; 1348c2ecf20Sopenharmony_ci } 1358c2ecf20Sopenharmony_cidone: 1368c2ecf20Sopenharmony_ci if (ret != -EAGAIN) 1378c2ecf20Sopenharmony_ci memset(&root->defrag_progress, 0, 1388c2ecf20Sopenharmony_ci sizeof(root->defrag_progress)); 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci return ret; 1418c2ecf20Sopenharmony_ci} 142