18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2007 Oracle.  All rights reserved.
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/sched.h>
78c2ecf20Sopenharmony_ci#include "ctree.h"
88c2ecf20Sopenharmony_ci#include "disk-io.h"
98c2ecf20Sopenharmony_ci#include "print-tree.h"
108c2ecf20Sopenharmony_ci#include "transaction.h"
118c2ecf20Sopenharmony_ci#include "locking.h"
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci/*
148c2ecf20Sopenharmony_ci * Defrag all the leaves in a given btree.
158c2ecf20Sopenharmony_ci * Read all the leaves and try to get key order to
168c2ecf20Sopenharmony_ci * better reflect disk order
178c2ecf20Sopenharmony_ci */
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ciint btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
208c2ecf20Sopenharmony_ci			struct btrfs_root *root)
218c2ecf20Sopenharmony_ci{
228c2ecf20Sopenharmony_ci	struct btrfs_path *path = NULL;
238c2ecf20Sopenharmony_ci	struct btrfs_key key;
248c2ecf20Sopenharmony_ci	int ret = 0;
258c2ecf20Sopenharmony_ci	int wret;
268c2ecf20Sopenharmony_ci	int level;
278c2ecf20Sopenharmony_ci	int next_key_ret = 0;
288c2ecf20Sopenharmony_ci	u64 last_ret = 0;
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci	if (root->fs_info->extent_root == root) {
318c2ecf20Sopenharmony_ci		/*
328c2ecf20Sopenharmony_ci		 * there's recursion here right now in the tree locking,
338c2ecf20Sopenharmony_ci		 * we can't defrag the extent root without deadlock
348c2ecf20Sopenharmony_ci		 */
358c2ecf20Sopenharmony_ci		goto out;
368c2ecf20Sopenharmony_ci	}
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
398c2ecf20Sopenharmony_ci		goto out;
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	path = btrfs_alloc_path();
428c2ecf20Sopenharmony_ci	if (!path)
438c2ecf20Sopenharmony_ci		return -ENOMEM;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	level = btrfs_header_level(root->node);
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	if (level == 0)
488c2ecf20Sopenharmony_ci		goto out;
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	if (root->defrag_progress.objectid == 0) {
518c2ecf20Sopenharmony_ci		struct extent_buffer *root_node;
528c2ecf20Sopenharmony_ci		u32 nritems;
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci		root_node = btrfs_lock_root_node(root);
558c2ecf20Sopenharmony_ci		btrfs_set_lock_blocking_write(root_node);
568c2ecf20Sopenharmony_ci		nritems = btrfs_header_nritems(root_node);
578c2ecf20Sopenharmony_ci		root->defrag_max.objectid = 0;
588c2ecf20Sopenharmony_ci		/* from above we know this is not a leaf */
598c2ecf20Sopenharmony_ci		btrfs_node_key_to_cpu(root_node, &root->defrag_max,
608c2ecf20Sopenharmony_ci				      nritems - 1);
618c2ecf20Sopenharmony_ci		btrfs_tree_unlock(root_node);
628c2ecf20Sopenharmony_ci		free_extent_buffer(root_node);
638c2ecf20Sopenharmony_ci		memset(&key, 0, sizeof(key));
648c2ecf20Sopenharmony_ci	} else {
658c2ecf20Sopenharmony_ci		memcpy(&key, &root->defrag_progress, sizeof(key));
668c2ecf20Sopenharmony_ci	}
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	path->keep_locks = 1;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION);
718c2ecf20Sopenharmony_ci	if (ret < 0)
728c2ecf20Sopenharmony_ci		goto out;
738c2ecf20Sopenharmony_ci	if (ret > 0) {
748c2ecf20Sopenharmony_ci		ret = 0;
758c2ecf20Sopenharmony_ci		goto out;
768c2ecf20Sopenharmony_ci	}
778c2ecf20Sopenharmony_ci	btrfs_release_path(path);
788c2ecf20Sopenharmony_ci	/*
798c2ecf20Sopenharmony_ci	 * We don't need a lock on a leaf. btrfs_realloc_node() will lock all
808c2ecf20Sopenharmony_ci	 * leafs from path->nodes[1], so set lowest_level to 1 to avoid later
818c2ecf20Sopenharmony_ci	 * a deadlock (attempting to write lock an already write locked leaf).
828c2ecf20Sopenharmony_ci	 */
838c2ecf20Sopenharmony_ci	path->lowest_level = 1;
848c2ecf20Sopenharmony_ci	wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	if (wret < 0) {
878c2ecf20Sopenharmony_ci		ret = wret;
888c2ecf20Sopenharmony_ci		goto out;
898c2ecf20Sopenharmony_ci	}
908c2ecf20Sopenharmony_ci	if (!path->nodes[1]) {
918c2ecf20Sopenharmony_ci		ret = 0;
928c2ecf20Sopenharmony_ci		goto out;
938c2ecf20Sopenharmony_ci	}
948c2ecf20Sopenharmony_ci	/*
958c2ecf20Sopenharmony_ci	 * The node at level 1 must always be locked when our path has
968c2ecf20Sopenharmony_ci	 * keep_locks set and lowest_level is 1, regardless of the value of
978c2ecf20Sopenharmony_ci	 * path->slots[1].
988c2ecf20Sopenharmony_ci	 */
998c2ecf20Sopenharmony_ci	BUG_ON(path->locks[1] == 0);
1008c2ecf20Sopenharmony_ci	ret = btrfs_realloc_node(trans, root,
1018c2ecf20Sopenharmony_ci				 path->nodes[1], 0,
1028c2ecf20Sopenharmony_ci				 &last_ret,
1038c2ecf20Sopenharmony_ci				 &root->defrag_progress);
1048c2ecf20Sopenharmony_ci	if (ret) {
1058c2ecf20Sopenharmony_ci		WARN_ON(ret == -EAGAIN);
1068c2ecf20Sopenharmony_ci		goto out;
1078c2ecf20Sopenharmony_ci	}
1088c2ecf20Sopenharmony_ci	/*
1098c2ecf20Sopenharmony_ci	 * Now that we reallocated the node we can find the next key. Note that
1108c2ecf20Sopenharmony_ci	 * btrfs_find_next_key() can release our path and do another search
1118c2ecf20Sopenharmony_ci	 * without COWing, this is because even with path->keep_locks = 1,
1128c2ecf20Sopenharmony_ci	 * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a
1138c2ecf20Sopenharmony_ci	 * node when path->slots[node_level - 1] does not point to the last
1148c2ecf20Sopenharmony_ci	 * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore
1158c2ecf20Sopenharmony_ci	 * we search for the next key after reallocating our node.
1168c2ecf20Sopenharmony_ci	 */
1178c2ecf20Sopenharmony_ci	path->slots[1] = btrfs_header_nritems(path->nodes[1]);
1188c2ecf20Sopenharmony_ci	next_key_ret = btrfs_find_next_key(root, path, &key, 1,
1198c2ecf20Sopenharmony_ci					   BTRFS_OLDEST_GENERATION);
1208c2ecf20Sopenharmony_ci	if (next_key_ret == 0) {
1218c2ecf20Sopenharmony_ci		memcpy(&root->defrag_progress, &key, sizeof(key));
1228c2ecf20Sopenharmony_ci		ret = -EAGAIN;
1238c2ecf20Sopenharmony_ci	}
1248c2ecf20Sopenharmony_ciout:
1258c2ecf20Sopenharmony_ci	btrfs_free_path(path);
1268c2ecf20Sopenharmony_ci	if (ret == -EAGAIN) {
1278c2ecf20Sopenharmony_ci		if (root->defrag_max.objectid > root->defrag_progress.objectid)
1288c2ecf20Sopenharmony_ci			goto done;
1298c2ecf20Sopenharmony_ci		if (root->defrag_max.type > root->defrag_progress.type)
1308c2ecf20Sopenharmony_ci			goto done;
1318c2ecf20Sopenharmony_ci		if (root->defrag_max.offset > root->defrag_progress.offset)
1328c2ecf20Sopenharmony_ci			goto done;
1338c2ecf20Sopenharmony_ci		ret = 0;
1348c2ecf20Sopenharmony_ci	}
1358c2ecf20Sopenharmony_cidone:
1368c2ecf20Sopenharmony_ci	if (ret != -EAGAIN)
1378c2ecf20Sopenharmony_ci		memset(&root->defrag_progress, 0,
1388c2ecf20Sopenharmony_ci		       sizeof(root->defrag_progress));
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	return ret;
1418c2ecf20Sopenharmony_ci}
142