1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2015 Facebook.  All rights reserved.
4 */
5
6#include <linux/kernel.h>
7#include <linux/sched/mm.h>
8#include "messages.h"
9#include "ctree.h"
10#include "disk-io.h"
11#include "locking.h"
12#include "free-space-tree.h"
13#include "transaction.h"
14#include "block-group.h"
15#include "fs.h"
16#include "accessors.h"
17#include "extent-tree.h"
18#include "root-tree.h"
19
20static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
21					struct btrfs_block_group *block_group,
22					struct btrfs_path *path);
23
24static struct btrfs_root *btrfs_free_space_root(
25				struct btrfs_block_group *block_group)
26{
27	struct btrfs_key key = {
28		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
29		.type = BTRFS_ROOT_ITEM_KEY,
30		.offset = 0,
31	};
32
33	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
34		key.offset = block_group->global_root_id;
35	return btrfs_global_root(block_group->fs_info, &key);
36}
37
38void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
39{
40	u32 bitmap_range;
41	size_t bitmap_size;
42	u64 num_bitmaps, total_bitmap_size;
43
44	if (WARN_ON(cache->length == 0))
45		btrfs_warn(cache->fs_info, "block group %llu length is zero",
46			   cache->start);
47
48	/*
49	 * We convert to bitmaps when the disk space required for using extents
50	 * exceeds that required for using bitmaps.
51	 */
52	bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
53	num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
54	bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
55	total_bitmap_size = num_bitmaps * bitmap_size;
56	cache->bitmap_high_thresh = div_u64(total_bitmap_size,
57					    sizeof(struct btrfs_item));
58
59	/*
60	 * We allow for a small buffer between the high threshold and low
61	 * threshold to avoid thrashing back and forth between the two formats.
62	 */
63	if (cache->bitmap_high_thresh > 100)
64		cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
65	else
66		cache->bitmap_low_thresh = 0;
67}
68
69static int add_new_free_space_info(struct btrfs_trans_handle *trans,
70				   struct btrfs_block_group *block_group,
71				   struct btrfs_path *path)
72{
73	struct btrfs_root *root = btrfs_free_space_root(block_group);
74	struct btrfs_free_space_info *info;
75	struct btrfs_key key;
76	struct extent_buffer *leaf;
77	int ret;
78
79	key.objectid = block_group->start;
80	key.type = BTRFS_FREE_SPACE_INFO_KEY;
81	key.offset = block_group->length;
82
83	ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
84	if (ret)
85		goto out;
86
87	leaf = path->nodes[0];
88	info = btrfs_item_ptr(leaf, path->slots[0],
89			      struct btrfs_free_space_info);
90	btrfs_set_free_space_extent_count(leaf, info, 0);
91	btrfs_set_free_space_flags(leaf, info, 0);
92	btrfs_mark_buffer_dirty(trans, leaf);
93
94	ret = 0;
95out:
96	btrfs_release_path(path);
97	return ret;
98}
99
100EXPORT_FOR_TESTS
101struct btrfs_free_space_info *search_free_space_info(
102		struct btrfs_trans_handle *trans,
103		struct btrfs_block_group *block_group,
104		struct btrfs_path *path, int cow)
105{
106	struct btrfs_fs_info *fs_info = block_group->fs_info;
107	struct btrfs_root *root = btrfs_free_space_root(block_group);
108	struct btrfs_key key;
109	int ret;
110
111	key.objectid = block_group->start;
112	key.type = BTRFS_FREE_SPACE_INFO_KEY;
113	key.offset = block_group->length;
114
115	ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
116	if (ret < 0)
117		return ERR_PTR(ret);
118	if (ret != 0) {
119		btrfs_warn(fs_info, "missing free space info for %llu",
120			   block_group->start);
121		ASSERT(0);
122		return ERR_PTR(-ENOENT);
123	}
124
125	return btrfs_item_ptr(path->nodes[0], path->slots[0],
126			      struct btrfs_free_space_info);
127}
128
129/*
130 * btrfs_search_slot() but we're looking for the greatest key less than the
131 * passed key.
132 */
133static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
134				  struct btrfs_root *root,
135				  struct btrfs_key *key, struct btrfs_path *p,
136				  int ins_len, int cow)
137{
138	int ret;
139
140	ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
141	if (ret < 0)
142		return ret;
143
144	if (ret == 0) {
145		ASSERT(0);
146		return -EIO;
147	}
148
149	if (p->slots[0] == 0) {
150		ASSERT(0);
151		return -EIO;
152	}
153	p->slots[0]--;
154
155	return 0;
156}
157
158static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info,
159					 u64 size)
160{
161	return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE);
162}
163
164static unsigned long *alloc_bitmap(u32 bitmap_size)
165{
166	unsigned long *ret;
167	unsigned int nofs_flag;
168	u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long));
169
170	/*
171	 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse
172	 * into the filesystem as the free space bitmap can be modified in the
173	 * critical section of a transaction commit.
174	 *
175	 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we
176	 * know that recursion is unsafe.
177	 */
178	nofs_flag = memalloc_nofs_save();
179	ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL);
180	memalloc_nofs_restore(nofs_flag);
181	return ret;
182}
183
184static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
185{
186	u8 *p = ((u8 *)map) + BIT_BYTE(start);
187	const unsigned int size = start + len;
188	int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE);
189	u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start);
190
191	while (len - bits_to_set >= 0) {
192		*p |= mask_to_set;
193		len -= bits_to_set;
194		bits_to_set = BITS_PER_BYTE;
195		mask_to_set = ~0;
196		p++;
197	}
198	if (len) {
199		mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
200		*p |= mask_to_set;
201	}
202}
203
204EXPORT_FOR_TESTS
205int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
206				  struct btrfs_block_group *block_group,
207				  struct btrfs_path *path)
208{
209	struct btrfs_fs_info *fs_info = trans->fs_info;
210	struct btrfs_root *root = btrfs_free_space_root(block_group);
211	struct btrfs_free_space_info *info;
212	struct btrfs_key key, found_key;
213	struct extent_buffer *leaf;
214	unsigned long *bitmap;
215	char *bitmap_cursor;
216	u64 start, end;
217	u64 bitmap_range, i;
218	u32 bitmap_size, flags, expected_extent_count;
219	u32 extent_count = 0;
220	int done = 0, nr;
221	int ret;
222
223	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
224	bitmap = alloc_bitmap(bitmap_size);
225	if (!bitmap) {
226		ret = -ENOMEM;
227		goto out;
228	}
229
230	start = block_group->start;
231	end = block_group->start + block_group->length;
232
233	key.objectid = end - 1;
234	key.type = (u8)-1;
235	key.offset = (u64)-1;
236
237	while (!done) {
238		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
239		if (ret)
240			goto out;
241
242		leaf = path->nodes[0];
243		nr = 0;
244		path->slots[0]++;
245		while (path->slots[0] > 0) {
246			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
247
248			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
249				ASSERT(found_key.objectid == block_group->start);
250				ASSERT(found_key.offset == block_group->length);
251				done = 1;
252				break;
253			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
254				u64 first, last;
255
256				ASSERT(found_key.objectid >= start);
257				ASSERT(found_key.objectid < end);
258				ASSERT(found_key.objectid + found_key.offset <= end);
259
260				first = div_u64(found_key.objectid - start,
261						fs_info->sectorsize);
262				last = div_u64(found_key.objectid + found_key.offset - start,
263					       fs_info->sectorsize);
264				le_bitmap_set(bitmap, first, last - first);
265
266				extent_count++;
267				nr++;
268				path->slots[0]--;
269			} else {
270				ASSERT(0);
271			}
272		}
273
274		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
275		if (ret)
276			goto out;
277		btrfs_release_path(path);
278	}
279
280	info = search_free_space_info(trans, block_group, path, 1);
281	if (IS_ERR(info)) {
282		ret = PTR_ERR(info);
283		goto out;
284	}
285	leaf = path->nodes[0];
286	flags = btrfs_free_space_flags(leaf, info);
287	flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
288	btrfs_set_free_space_flags(leaf, info, flags);
289	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
290	btrfs_mark_buffer_dirty(trans, leaf);
291	btrfs_release_path(path);
292
293	if (extent_count != expected_extent_count) {
294		btrfs_err(fs_info,
295			  "incorrect extent count for %llu; counted %u, expected %u",
296			  block_group->start, extent_count,
297			  expected_extent_count);
298		ASSERT(0);
299		ret = -EIO;
300		goto out;
301	}
302
303	bitmap_cursor = (char *)bitmap;
304	bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
305	i = start;
306	while (i < end) {
307		unsigned long ptr;
308		u64 extent_size;
309		u32 data_size;
310
311		extent_size = min(end - i, bitmap_range);
312		data_size = free_space_bitmap_size(fs_info, extent_size);
313
314		key.objectid = i;
315		key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
316		key.offset = extent_size;
317
318		ret = btrfs_insert_empty_item(trans, root, path, &key,
319					      data_size);
320		if (ret)
321			goto out;
322
323		leaf = path->nodes[0];
324		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
325		write_extent_buffer(leaf, bitmap_cursor, ptr,
326				    data_size);
327		btrfs_mark_buffer_dirty(trans, leaf);
328		btrfs_release_path(path);
329
330		i += extent_size;
331		bitmap_cursor += data_size;
332	}
333
334	ret = 0;
335out:
336	kvfree(bitmap);
337	if (ret)
338		btrfs_abort_transaction(trans, ret);
339	return ret;
340}
341
342EXPORT_FOR_TESTS
343int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
344				  struct btrfs_block_group *block_group,
345				  struct btrfs_path *path)
346{
347	struct btrfs_fs_info *fs_info = trans->fs_info;
348	struct btrfs_root *root = btrfs_free_space_root(block_group);
349	struct btrfs_free_space_info *info;
350	struct btrfs_key key, found_key;
351	struct extent_buffer *leaf;
352	unsigned long *bitmap;
353	u64 start, end;
354	u32 bitmap_size, flags, expected_extent_count;
355	unsigned long nrbits, start_bit, end_bit;
356	u32 extent_count = 0;
357	int done = 0, nr;
358	int ret;
359
360	bitmap_size = free_space_bitmap_size(fs_info, block_group->length);
361	bitmap = alloc_bitmap(bitmap_size);
362	if (!bitmap) {
363		ret = -ENOMEM;
364		goto out;
365	}
366
367	start = block_group->start;
368	end = block_group->start + block_group->length;
369
370	key.objectid = end - 1;
371	key.type = (u8)-1;
372	key.offset = (u64)-1;
373
374	while (!done) {
375		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
376		if (ret)
377			goto out;
378
379		leaf = path->nodes[0];
380		nr = 0;
381		path->slots[0]++;
382		while (path->slots[0] > 0) {
383			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
384
385			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
386				ASSERT(found_key.objectid == block_group->start);
387				ASSERT(found_key.offset == block_group->length);
388				done = 1;
389				break;
390			} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
391				unsigned long ptr;
392				char *bitmap_cursor;
393				u32 bitmap_pos, data_size;
394
395				ASSERT(found_key.objectid >= start);
396				ASSERT(found_key.objectid < end);
397				ASSERT(found_key.objectid + found_key.offset <= end);
398
399				bitmap_pos = div_u64(found_key.objectid - start,
400						     fs_info->sectorsize *
401						     BITS_PER_BYTE);
402				bitmap_cursor = ((char *)bitmap) + bitmap_pos;
403				data_size = free_space_bitmap_size(fs_info,
404								found_key.offset);
405
406				ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
407				read_extent_buffer(leaf, bitmap_cursor, ptr,
408						   data_size);
409
410				nr++;
411				path->slots[0]--;
412			} else {
413				ASSERT(0);
414			}
415		}
416
417		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
418		if (ret)
419			goto out;
420		btrfs_release_path(path);
421	}
422
423	info = search_free_space_info(trans, block_group, path, 1);
424	if (IS_ERR(info)) {
425		ret = PTR_ERR(info);
426		goto out;
427	}
428	leaf = path->nodes[0];
429	flags = btrfs_free_space_flags(leaf, info);
430	flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
431	btrfs_set_free_space_flags(leaf, info, flags);
432	expected_extent_count = btrfs_free_space_extent_count(leaf, info);
433	btrfs_mark_buffer_dirty(trans, leaf);
434	btrfs_release_path(path);
435
436	nrbits = block_group->length >> block_group->fs_info->sectorsize_bits;
437	start_bit = find_next_bit_le(bitmap, nrbits, 0);
438
439	while (start_bit < nrbits) {
440		end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit);
441		ASSERT(start_bit < end_bit);
442
443		key.objectid = start + start_bit * block_group->fs_info->sectorsize;
444		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
445		key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize;
446
447		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
448		if (ret)
449			goto out;
450		btrfs_release_path(path);
451
452		extent_count++;
453
454		start_bit = find_next_bit_le(bitmap, nrbits, end_bit);
455	}
456
457	if (extent_count != expected_extent_count) {
458		btrfs_err(fs_info,
459			  "incorrect extent count for %llu; counted %u, expected %u",
460			  block_group->start, extent_count,
461			  expected_extent_count);
462		ASSERT(0);
463		ret = -EIO;
464		goto out;
465	}
466
467	ret = 0;
468out:
469	kvfree(bitmap);
470	if (ret)
471		btrfs_abort_transaction(trans, ret);
472	return ret;
473}
474
475static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
476					  struct btrfs_block_group *block_group,
477					  struct btrfs_path *path,
478					  int new_extents)
479{
480	struct btrfs_free_space_info *info;
481	u32 flags;
482	u32 extent_count;
483	int ret = 0;
484
485	if (new_extents == 0)
486		return 0;
487
488	info = search_free_space_info(trans, block_group, path, 1);
489	if (IS_ERR(info)) {
490		ret = PTR_ERR(info);
491		goto out;
492	}
493	flags = btrfs_free_space_flags(path->nodes[0], info);
494	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
495
496	extent_count += new_extents;
497	btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
498	btrfs_mark_buffer_dirty(trans, path->nodes[0]);
499	btrfs_release_path(path);
500
501	if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
502	    extent_count > block_group->bitmap_high_thresh) {
503		ret = convert_free_space_to_bitmaps(trans, block_group, path);
504	} else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
505		   extent_count < block_group->bitmap_low_thresh) {
506		ret = convert_free_space_to_extents(trans, block_group, path);
507	}
508
509out:
510	return ret;
511}
512
513EXPORT_FOR_TESTS
514int free_space_test_bit(struct btrfs_block_group *block_group,
515			struct btrfs_path *path, u64 offset)
516{
517	struct extent_buffer *leaf;
518	struct btrfs_key key;
519	u64 found_start, found_end;
520	unsigned long ptr, i;
521
522	leaf = path->nodes[0];
523	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
524	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
525
526	found_start = key.objectid;
527	found_end = key.objectid + key.offset;
528	ASSERT(offset >= found_start && offset < found_end);
529
530	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
531	i = div_u64(offset - found_start,
532		    block_group->fs_info->sectorsize);
533	return !!extent_buffer_test_bit(leaf, ptr, i);
534}
535
536static void free_space_set_bits(struct btrfs_trans_handle *trans,
537				struct btrfs_block_group *block_group,
538				struct btrfs_path *path, u64 *start, u64 *size,
539				int bit)
540{
541	struct btrfs_fs_info *fs_info = block_group->fs_info;
542	struct extent_buffer *leaf;
543	struct btrfs_key key;
544	u64 end = *start + *size;
545	u64 found_start, found_end;
546	unsigned long ptr, first, last;
547
548	leaf = path->nodes[0];
549	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
550	ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
551
552	found_start = key.objectid;
553	found_end = key.objectid + key.offset;
554	ASSERT(*start >= found_start && *start < found_end);
555	ASSERT(end > found_start);
556
557	if (end > found_end)
558		end = found_end;
559
560	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
561	first = (*start - found_start) >> fs_info->sectorsize_bits;
562	last = (end - found_start) >> fs_info->sectorsize_bits;
563	if (bit)
564		extent_buffer_bitmap_set(leaf, ptr, first, last - first);
565	else
566		extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
567	btrfs_mark_buffer_dirty(trans, leaf);
568
569	*size -= end - *start;
570	*start = end;
571}
572
573/*
574 * We can't use btrfs_next_item() in modify_free_space_bitmap() because
575 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
576 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
577 * looking for.
578 */
579static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
580				  struct btrfs_root *root, struct btrfs_path *p)
581{
582	struct btrfs_key key;
583
584	if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
585		p->slots[0]++;
586		return 0;
587	}
588
589	btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
590	btrfs_release_path(p);
591
592	key.objectid += key.offset;
593	key.type = (u8)-1;
594	key.offset = (u64)-1;
595
596	return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
597}
598
599/*
600 * If remove is 1, then we are removing free space, thus clearing bits in the
601 * bitmap. If remove is 0, then we are adding free space, thus setting bits in
602 * the bitmap.
603 */
604static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
605				    struct btrfs_block_group *block_group,
606				    struct btrfs_path *path,
607				    u64 start, u64 size, int remove)
608{
609	struct btrfs_root *root = btrfs_free_space_root(block_group);
610	struct btrfs_key key;
611	u64 end = start + size;
612	u64 cur_start, cur_size;
613	int prev_bit, next_bit;
614	int new_extents;
615	int ret;
616
617	/*
618	 * Read the bit for the block immediately before the extent of space if
619	 * that block is within the block group.
620	 */
621	if (start > block_group->start) {
622		u64 prev_block = start - block_group->fs_info->sectorsize;
623
624		key.objectid = prev_block;
625		key.type = (u8)-1;
626		key.offset = (u64)-1;
627
628		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
629		if (ret)
630			goto out;
631
632		prev_bit = free_space_test_bit(block_group, path, prev_block);
633
634		/* The previous block may have been in the previous bitmap. */
635		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
636		if (start >= key.objectid + key.offset) {
637			ret = free_space_next_bitmap(trans, root, path);
638			if (ret)
639				goto out;
640		}
641	} else {
642		key.objectid = start;
643		key.type = (u8)-1;
644		key.offset = (u64)-1;
645
646		ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
647		if (ret)
648			goto out;
649
650		prev_bit = -1;
651	}
652
653	/*
654	 * Iterate over all of the bitmaps overlapped by the extent of space,
655	 * clearing/setting bits as required.
656	 */
657	cur_start = start;
658	cur_size = size;
659	while (1) {
660		free_space_set_bits(trans, block_group, path, &cur_start, &cur_size,
661				    !remove);
662		if (cur_size == 0)
663			break;
664		ret = free_space_next_bitmap(trans, root, path);
665		if (ret)
666			goto out;
667	}
668
669	/*
670	 * Read the bit for the block immediately after the extent of space if
671	 * that block is within the block group.
672	 */
673	if (end < block_group->start + block_group->length) {
674		/* The next block may be in the next bitmap. */
675		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
676		if (end >= key.objectid + key.offset) {
677			ret = free_space_next_bitmap(trans, root, path);
678			if (ret)
679				goto out;
680		}
681
682		next_bit = free_space_test_bit(block_group, path, end);
683	} else {
684		next_bit = -1;
685	}
686
687	if (remove) {
688		new_extents = -1;
689		if (prev_bit == 1) {
690			/* Leftover on the left. */
691			new_extents++;
692		}
693		if (next_bit == 1) {
694			/* Leftover on the right. */
695			new_extents++;
696		}
697	} else {
698		new_extents = 1;
699		if (prev_bit == 1) {
700			/* Merging with neighbor on the left. */
701			new_extents--;
702		}
703		if (next_bit == 1) {
704			/* Merging with neighbor on the right. */
705			new_extents--;
706		}
707	}
708
709	btrfs_release_path(path);
710	ret = update_free_space_extent_count(trans, block_group, path,
711					     new_extents);
712
713out:
714	return ret;
715}
716
717static int remove_free_space_extent(struct btrfs_trans_handle *trans,
718				    struct btrfs_block_group *block_group,
719				    struct btrfs_path *path,
720				    u64 start, u64 size)
721{
722	struct btrfs_root *root = btrfs_free_space_root(block_group);
723	struct btrfs_key key;
724	u64 found_start, found_end;
725	u64 end = start + size;
726	int new_extents = -1;
727	int ret;
728
729	key.objectid = start;
730	key.type = (u8)-1;
731	key.offset = (u64)-1;
732
733	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
734	if (ret)
735		goto out;
736
737	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
738
739	ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
740
741	found_start = key.objectid;
742	found_end = key.objectid + key.offset;
743	ASSERT(start >= found_start && end <= found_end);
744
745	/*
746	 * Okay, now that we've found the free space extent which contains the
747	 * free space that we are removing, there are four cases:
748	 *
749	 * 1. We're using the whole extent: delete the key we found and
750	 * decrement the free space extent count.
751	 * 2. We are using part of the extent starting at the beginning: delete
752	 * the key we found and insert a new key representing the leftover at
753	 * the end. There is no net change in the number of extents.
754	 * 3. We are using part of the extent ending at the end: delete the key
755	 * we found and insert a new key representing the leftover at the
756	 * beginning. There is no net change in the number of extents.
757	 * 4. We are using part of the extent in the middle: delete the key we
758	 * found and insert two new keys representing the leftovers on each
759	 * side. Where we used to have one extent, we now have two, so increment
760	 * the extent count. We may need to convert the block group to bitmaps
761	 * as a result.
762	 */
763
764	/* Delete the existing key (cases 1-4). */
765	ret = btrfs_del_item(trans, root, path);
766	if (ret)
767		goto out;
768
769	/* Add a key for leftovers at the beginning (cases 3 and 4). */
770	if (start > found_start) {
771		key.objectid = found_start;
772		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
773		key.offset = start - found_start;
774
775		btrfs_release_path(path);
776		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
777		if (ret)
778			goto out;
779		new_extents++;
780	}
781
782	/* Add a key for leftovers at the end (cases 2 and 4). */
783	if (end < found_end) {
784		key.objectid = end;
785		key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
786		key.offset = found_end - end;
787
788		btrfs_release_path(path);
789		ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
790		if (ret)
791			goto out;
792		new_extents++;
793	}
794
795	btrfs_release_path(path);
796	ret = update_free_space_extent_count(trans, block_group, path,
797					     new_extents);
798
799out:
800	return ret;
801}
802
803EXPORT_FOR_TESTS
804int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
805				  struct btrfs_block_group *block_group,
806				  struct btrfs_path *path, u64 start, u64 size)
807{
808	struct btrfs_free_space_info *info;
809	u32 flags;
810	int ret;
811
812	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
813		ret = __add_block_group_free_space(trans, block_group, path);
814		if (ret)
815			return ret;
816	}
817
818	info = search_free_space_info(NULL, block_group, path, 0);
819	if (IS_ERR(info))
820		return PTR_ERR(info);
821	flags = btrfs_free_space_flags(path->nodes[0], info);
822	btrfs_release_path(path);
823
824	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
825		return modify_free_space_bitmap(trans, block_group, path,
826						start, size, 1);
827	} else {
828		return remove_free_space_extent(trans, block_group, path,
829						start, size);
830	}
831}
832
833int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
834				u64 start, u64 size)
835{
836	struct btrfs_block_group *block_group;
837	struct btrfs_path *path;
838	int ret;
839
840	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
841		return 0;
842
843	path = btrfs_alloc_path();
844	if (!path) {
845		ret = -ENOMEM;
846		goto out;
847	}
848
849	block_group = btrfs_lookup_block_group(trans->fs_info, start);
850	if (!block_group) {
851		ASSERT(0);
852		ret = -ENOENT;
853		goto out;
854	}
855
856	mutex_lock(&block_group->free_space_lock);
857	ret = __remove_from_free_space_tree(trans, block_group, path, start,
858					    size);
859	mutex_unlock(&block_group->free_space_lock);
860
861	btrfs_put_block_group(block_group);
862out:
863	btrfs_free_path(path);
864	if (ret)
865		btrfs_abort_transaction(trans, ret);
866	return ret;
867}
868
869static int add_free_space_extent(struct btrfs_trans_handle *trans,
870				 struct btrfs_block_group *block_group,
871				 struct btrfs_path *path,
872				 u64 start, u64 size)
873{
874	struct btrfs_root *root = btrfs_free_space_root(block_group);
875	struct btrfs_key key, new_key;
876	u64 found_start, found_end;
877	u64 end = start + size;
878	int new_extents = 1;
879	int ret;
880
881	/*
882	 * We are adding a new extent of free space, but we need to merge
883	 * extents. There are four cases here:
884	 *
885	 * 1. The new extent does not have any immediate neighbors to merge
886	 * with: add the new key and increment the free space extent count. We
887	 * may need to convert the block group to bitmaps as a result.
888	 * 2. The new extent has an immediate neighbor before it: remove the
889	 * previous key and insert a new key combining both of them. There is no
890	 * net change in the number of extents.
891	 * 3. The new extent has an immediate neighbor after it: remove the next
892	 * key and insert a new key combining both of them. There is no net
893	 * change in the number of extents.
894	 * 4. The new extent has immediate neighbors on both sides: remove both
895	 * of the keys and insert a new key combining all of them. Where we used
896	 * to have two extents, we now have one, so decrement the extent count.
897	 */
898
899	new_key.objectid = start;
900	new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
901	new_key.offset = size;
902
903	/* Search for a neighbor on the left. */
904	if (start == block_group->start)
905		goto right;
906	key.objectid = start - 1;
907	key.type = (u8)-1;
908	key.offset = (u64)-1;
909
910	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
911	if (ret)
912		goto out;
913
914	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
915
916	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
917		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
918		btrfs_release_path(path);
919		goto right;
920	}
921
922	found_start = key.objectid;
923	found_end = key.objectid + key.offset;
924	ASSERT(found_start >= block_group->start &&
925	       found_end > block_group->start);
926	ASSERT(found_start < start && found_end <= start);
927
928	/*
929	 * Delete the neighbor on the left and absorb it into the new key (cases
930	 * 2 and 4).
931	 */
932	if (found_end == start) {
933		ret = btrfs_del_item(trans, root, path);
934		if (ret)
935			goto out;
936		new_key.objectid = found_start;
937		new_key.offset += key.offset;
938		new_extents--;
939	}
940	btrfs_release_path(path);
941
942right:
943	/* Search for a neighbor on the right. */
944	if (end == block_group->start + block_group->length)
945		goto insert;
946	key.objectid = end;
947	key.type = (u8)-1;
948	key.offset = (u64)-1;
949
950	ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
951	if (ret)
952		goto out;
953
954	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
955
956	if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
957		ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
958		btrfs_release_path(path);
959		goto insert;
960	}
961
962	found_start = key.objectid;
963	found_end = key.objectid + key.offset;
964	ASSERT(found_start >= block_group->start &&
965	       found_end > block_group->start);
966	ASSERT((found_start < start && found_end <= start) ||
967	       (found_start >= end && found_end > end));
968
969	/*
970	 * Delete the neighbor on the right and absorb it into the new key
971	 * (cases 3 and 4).
972	 */
973	if (found_start == end) {
974		ret = btrfs_del_item(trans, root, path);
975		if (ret)
976			goto out;
977		new_key.offset += key.offset;
978		new_extents--;
979	}
980	btrfs_release_path(path);
981
982insert:
983	/* Insert the new key (cases 1-4). */
984	ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
985	if (ret)
986		goto out;
987
988	btrfs_release_path(path);
989	ret = update_free_space_extent_count(trans, block_group, path,
990					     new_extents);
991
992out:
993	return ret;
994}
995
996EXPORT_FOR_TESTS
997int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
998			     struct btrfs_block_group *block_group,
999			     struct btrfs_path *path, u64 start, u64 size)
1000{
1001	struct btrfs_free_space_info *info;
1002	u32 flags;
1003	int ret;
1004
1005	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1006		ret = __add_block_group_free_space(trans, block_group, path);
1007		if (ret)
1008			return ret;
1009	}
1010
1011	info = search_free_space_info(NULL, block_group, path, 0);
1012	if (IS_ERR(info))
1013		return PTR_ERR(info);
1014	flags = btrfs_free_space_flags(path->nodes[0], info);
1015	btrfs_release_path(path);
1016
1017	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
1018		return modify_free_space_bitmap(trans, block_group, path,
1019						start, size, 0);
1020	} else {
1021		return add_free_space_extent(trans, block_group, path, start,
1022					     size);
1023	}
1024}
1025
1026int add_to_free_space_tree(struct btrfs_trans_handle *trans,
1027			   u64 start, u64 size)
1028{
1029	struct btrfs_block_group *block_group;
1030	struct btrfs_path *path;
1031	int ret;
1032
1033	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1034		return 0;
1035
1036	path = btrfs_alloc_path();
1037	if (!path) {
1038		ret = -ENOMEM;
1039		goto out;
1040	}
1041
1042	block_group = btrfs_lookup_block_group(trans->fs_info, start);
1043	if (!block_group) {
1044		ASSERT(0);
1045		ret = -ENOENT;
1046		goto out;
1047	}
1048
1049	mutex_lock(&block_group->free_space_lock);
1050	ret = __add_to_free_space_tree(trans, block_group, path, start, size);
1051	mutex_unlock(&block_group->free_space_lock);
1052
1053	btrfs_put_block_group(block_group);
1054out:
1055	btrfs_free_path(path);
1056	if (ret)
1057		btrfs_abort_transaction(trans, ret);
1058	return ret;
1059}
1060
1061/*
1062 * Populate the free space tree by walking the extent tree. Operations on the
1063 * extent tree that happen as a result of writes to the free space tree will go
1064 * through the normal add/remove hooks.
1065 */
1066static int populate_free_space_tree(struct btrfs_trans_handle *trans,
1067				    struct btrfs_block_group *block_group)
1068{
1069	struct btrfs_root *extent_root;
1070	struct btrfs_path *path, *path2;
1071	struct btrfs_key key;
1072	u64 start, end;
1073	int ret;
1074
1075	path = btrfs_alloc_path();
1076	if (!path)
1077		return -ENOMEM;
1078	path->reada = READA_FORWARD;
1079
1080	path2 = btrfs_alloc_path();
1081	if (!path2) {
1082		btrfs_free_path(path);
1083		return -ENOMEM;
1084	}
1085
1086	ret = add_new_free_space_info(trans, block_group, path2);
1087	if (ret)
1088		goto out;
1089
1090	mutex_lock(&block_group->free_space_lock);
1091
1092	/*
1093	 * Iterate through all of the extent and metadata items in this block
1094	 * group, adding the free space between them and the free space at the
1095	 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
1096	 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
1097	 * contained in.
1098	 */
1099	key.objectid = block_group->start;
1100	key.type = BTRFS_EXTENT_ITEM_KEY;
1101	key.offset = 0;
1102
1103	extent_root = btrfs_extent_root(trans->fs_info, key.objectid);
1104	ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
1105	if (ret < 0)
1106		goto out_locked;
1107	ASSERT(ret == 0);
1108
1109	start = block_group->start;
1110	end = block_group->start + block_group->length;
1111	while (1) {
1112		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1113
1114		if (key.type == BTRFS_EXTENT_ITEM_KEY ||
1115		    key.type == BTRFS_METADATA_ITEM_KEY) {
1116			if (key.objectid >= end)
1117				break;
1118
1119			if (start < key.objectid) {
1120				ret = __add_to_free_space_tree(trans,
1121							       block_group,
1122							       path2, start,
1123							       key.objectid -
1124							       start);
1125				if (ret)
1126					goto out_locked;
1127			}
1128			start = key.objectid;
1129			if (key.type == BTRFS_METADATA_ITEM_KEY)
1130				start += trans->fs_info->nodesize;
1131			else
1132				start += key.offset;
1133		} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
1134			if (key.objectid != block_group->start)
1135				break;
1136		}
1137
1138		ret = btrfs_next_item(extent_root, path);
1139		if (ret < 0)
1140			goto out_locked;
1141		if (ret)
1142			break;
1143	}
1144	if (start < end) {
1145		ret = __add_to_free_space_tree(trans, block_group, path2,
1146					       start, end - start);
1147		if (ret)
1148			goto out_locked;
1149	}
1150
1151	ret = 0;
1152out_locked:
1153	mutex_unlock(&block_group->free_space_lock);
1154out:
1155	btrfs_free_path(path2);
1156	btrfs_free_path(path);
1157	return ret;
1158}
1159
1160int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
1161{
1162	struct btrfs_trans_handle *trans;
1163	struct btrfs_root *tree_root = fs_info->tree_root;
1164	struct btrfs_root *free_space_root;
1165	struct btrfs_block_group *block_group;
1166	struct rb_node *node;
1167	int ret;
1168
1169	trans = btrfs_start_transaction(tree_root, 0);
1170	if (IS_ERR(trans))
1171		return PTR_ERR(trans);
1172
1173	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1174	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1175	free_space_root = btrfs_create_tree(trans,
1176					    BTRFS_FREE_SPACE_TREE_OBJECTID);
1177	if (IS_ERR(free_space_root)) {
1178		ret = PTR_ERR(free_space_root);
1179		goto abort;
1180	}
1181	ret = btrfs_global_root_insert(free_space_root);
1182	if (ret) {
1183		btrfs_put_root(free_space_root);
1184		goto abort;
1185	}
1186
1187	node = rb_first_cached(&fs_info->block_group_cache_tree);
1188	while (node) {
1189		block_group = rb_entry(node, struct btrfs_block_group,
1190				       cache_node);
1191		ret = populate_free_space_tree(trans, block_group);
1192		if (ret)
1193			goto abort;
1194		node = rb_next(node);
1195	}
1196
1197	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1198	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1199	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1200	ret = btrfs_commit_transaction(trans);
1201
1202	/*
1203	 * Now that we've committed the transaction any reading of our commit
1204	 * root will be safe, so we can cache from the free space tree now.
1205	 */
1206	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1207	return ret;
1208
1209abort:
1210	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1211	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1212	btrfs_abort_transaction(trans, ret);
1213	btrfs_end_transaction(trans);
1214	return ret;
1215}
1216
1217static int clear_free_space_tree(struct btrfs_trans_handle *trans,
1218				 struct btrfs_root *root)
1219{
1220	struct btrfs_path *path;
1221	struct btrfs_key key;
1222	int nr;
1223	int ret;
1224
1225	path = btrfs_alloc_path();
1226	if (!path)
1227		return -ENOMEM;
1228
1229	key.objectid = 0;
1230	key.type = 0;
1231	key.offset = 0;
1232
1233	while (1) {
1234		ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1235		if (ret < 0)
1236			goto out;
1237
1238		nr = btrfs_header_nritems(path->nodes[0]);
1239		if (!nr)
1240			break;
1241
1242		path->slots[0] = 0;
1243		ret = btrfs_del_items(trans, root, path, 0, nr);
1244		if (ret)
1245			goto out;
1246
1247		btrfs_release_path(path);
1248	}
1249
1250	ret = 0;
1251out:
1252	btrfs_free_path(path);
1253	return ret;
1254}
1255
1256int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
1257{
1258	struct btrfs_trans_handle *trans;
1259	struct btrfs_root *tree_root = fs_info->tree_root;
1260	struct btrfs_key key = {
1261		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1262		.type = BTRFS_ROOT_ITEM_KEY,
1263		.offset = 0,
1264	};
1265	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1266	int ret;
1267
1268	trans = btrfs_start_transaction(tree_root, 0);
1269	if (IS_ERR(trans))
1270		return PTR_ERR(trans);
1271
1272	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1273	btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1274
1275	ret = clear_free_space_tree(trans, free_space_root);
1276	if (ret)
1277		goto abort;
1278
1279	ret = btrfs_del_root(trans, &free_space_root->root_key);
1280	if (ret)
1281		goto abort;
1282
1283	btrfs_global_root_delete(free_space_root);
1284
1285	spin_lock(&fs_info->trans_lock);
1286	list_del(&free_space_root->dirty_list);
1287	spin_unlock(&fs_info->trans_lock);
1288
1289	btrfs_tree_lock(free_space_root->node);
1290	btrfs_clear_buffer_dirty(trans, free_space_root->node);
1291	btrfs_tree_unlock(free_space_root->node);
1292	btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
1293			      free_space_root->node, 0, 1);
1294
1295	btrfs_put_root(free_space_root);
1296
1297	return btrfs_commit_transaction(trans);
1298
1299abort:
1300	btrfs_abort_transaction(trans, ret);
1301	btrfs_end_transaction(trans);
1302	return ret;
1303}
1304
1305int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
1306{
1307	struct btrfs_trans_handle *trans;
1308	struct btrfs_key key = {
1309		.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID,
1310		.type = BTRFS_ROOT_ITEM_KEY,
1311		.offset = 0,
1312	};
1313	struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key);
1314	struct rb_node *node;
1315	int ret;
1316
1317	trans = btrfs_start_transaction(free_space_root, 1);
1318	if (IS_ERR(trans))
1319		return PTR_ERR(trans);
1320
1321	set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1322	set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1323
1324	ret = clear_free_space_tree(trans, free_space_root);
1325	if (ret)
1326		goto abort;
1327
1328	node = rb_first_cached(&fs_info->block_group_cache_tree);
1329	while (node) {
1330		struct btrfs_block_group *block_group;
1331
1332		block_group = rb_entry(node, struct btrfs_block_group,
1333				       cache_node);
1334		ret = populate_free_space_tree(trans, block_group);
1335		if (ret)
1336			goto abort;
1337		node = rb_next(node);
1338	}
1339
1340	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
1341	btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID);
1342	clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags);
1343
1344	ret = btrfs_commit_transaction(trans);
1345	clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags);
1346	return ret;
1347abort:
1348	btrfs_abort_transaction(trans, ret);
1349	btrfs_end_transaction(trans);
1350	return ret;
1351}
1352
1353static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
1354					struct btrfs_block_group *block_group,
1355					struct btrfs_path *path)
1356{
1357	int ret;
1358
1359	clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
1360
1361	ret = add_new_free_space_info(trans, block_group, path);
1362	if (ret)
1363		return ret;
1364
1365	return __add_to_free_space_tree(trans, block_group, path,
1366					block_group->start,
1367					block_group->length);
1368}
1369
1370int add_block_group_free_space(struct btrfs_trans_handle *trans,
1371			       struct btrfs_block_group *block_group)
1372{
1373	struct btrfs_fs_info *fs_info = trans->fs_info;
1374	struct btrfs_path *path = NULL;
1375	int ret = 0;
1376
1377	if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
1378		return 0;
1379
1380	mutex_lock(&block_group->free_space_lock);
1381	if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags))
1382		goto out;
1383
1384	path = btrfs_alloc_path();
1385	if (!path) {
1386		ret = -ENOMEM;
1387		goto out;
1388	}
1389
1390	ret = __add_block_group_free_space(trans, block_group, path);
1391
1392out:
1393	btrfs_free_path(path);
1394	mutex_unlock(&block_group->free_space_lock);
1395	if (ret)
1396		btrfs_abort_transaction(trans, ret);
1397	return ret;
1398}
1399
1400int remove_block_group_free_space(struct btrfs_trans_handle *trans,
1401				  struct btrfs_block_group *block_group)
1402{
1403	struct btrfs_root *root = btrfs_free_space_root(block_group);
1404	struct btrfs_path *path;
1405	struct btrfs_key key, found_key;
1406	struct extent_buffer *leaf;
1407	u64 start, end;
1408	int done = 0, nr;
1409	int ret;
1410
1411	if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE))
1412		return 0;
1413
1414	if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) {
1415		/* We never added this block group to the free space tree. */
1416		return 0;
1417	}
1418
1419	path = btrfs_alloc_path();
1420	if (!path) {
1421		ret = -ENOMEM;
1422		goto out;
1423	}
1424
1425	start = block_group->start;
1426	end = block_group->start + block_group->length;
1427
1428	key.objectid = end - 1;
1429	key.type = (u8)-1;
1430	key.offset = (u64)-1;
1431
1432	while (!done) {
1433		ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
1434		if (ret)
1435			goto out;
1436
1437		leaf = path->nodes[0];
1438		nr = 0;
1439		path->slots[0]++;
1440		while (path->slots[0] > 0) {
1441			btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
1442
1443			if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
1444				ASSERT(found_key.objectid == block_group->start);
1445				ASSERT(found_key.offset == block_group->length);
1446				done = 1;
1447				nr++;
1448				path->slots[0]--;
1449				break;
1450			} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
1451				   found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
1452				ASSERT(found_key.objectid >= start);
1453				ASSERT(found_key.objectid < end);
1454				ASSERT(found_key.objectid + found_key.offset <= end);
1455				nr++;
1456				path->slots[0]--;
1457			} else {
1458				ASSERT(0);
1459			}
1460		}
1461
1462		ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
1463		if (ret)
1464			goto out;
1465		btrfs_release_path(path);
1466	}
1467
1468	ret = 0;
1469out:
1470	btrfs_free_path(path);
1471	if (ret)
1472		btrfs_abort_transaction(trans, ret);
1473	return ret;
1474}
1475
1476static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
1477				   struct btrfs_path *path,
1478				   u32 expected_extent_count)
1479{
1480	struct btrfs_block_group *block_group;
1481	struct btrfs_fs_info *fs_info;
1482	struct btrfs_root *root;
1483	struct btrfs_key key;
1484	int prev_bit = 0, bit;
1485	/* Initialize to silence GCC. */
1486	u64 extent_start = 0;
1487	u64 end, offset;
1488	u64 total_found = 0;
1489	u32 extent_count = 0;
1490	int ret;
1491
1492	block_group = caching_ctl->block_group;
1493	fs_info = block_group->fs_info;
1494	root = btrfs_free_space_root(block_group);
1495
1496	end = block_group->start + block_group->length;
1497
1498	while (1) {
1499		ret = btrfs_next_item(root, path);
1500		if (ret < 0)
1501			goto out;
1502		if (ret)
1503			break;
1504
1505		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1506
1507		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1508			break;
1509
1510		ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
1511		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1512
1513		offset = key.objectid;
1514		while (offset < key.objectid + key.offset) {
1515			bit = free_space_test_bit(block_group, path, offset);
1516			if (prev_bit == 0 && bit == 1) {
1517				extent_start = offset;
1518			} else if (prev_bit == 1 && bit == 0) {
1519				u64 space_added;
1520
1521				ret = btrfs_add_new_free_space(block_group,
1522							       extent_start,
1523							       offset,
1524							       &space_added);
1525				if (ret)
1526					goto out;
1527				total_found += space_added;
1528				if (total_found > CACHING_CTL_WAKE_UP) {
1529					total_found = 0;
1530					wake_up(&caching_ctl->wait);
1531				}
1532				extent_count++;
1533			}
1534			prev_bit = bit;
1535			offset += fs_info->sectorsize;
1536		}
1537	}
1538	if (prev_bit == 1) {
1539		ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
1540		if (ret)
1541			goto out;
1542		extent_count++;
1543	}
1544
1545	if (extent_count != expected_extent_count) {
1546		btrfs_err(fs_info,
1547			  "incorrect extent count for %llu; counted %u, expected %u",
1548			  block_group->start, extent_count,
1549			  expected_extent_count);
1550		ASSERT(0);
1551		ret = -EIO;
1552		goto out;
1553	}
1554
1555	ret = 0;
1556out:
1557	return ret;
1558}
1559
1560static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
1561				   struct btrfs_path *path,
1562				   u32 expected_extent_count)
1563{
1564	struct btrfs_block_group *block_group;
1565	struct btrfs_fs_info *fs_info;
1566	struct btrfs_root *root;
1567	struct btrfs_key key;
1568	u64 end;
1569	u64 total_found = 0;
1570	u32 extent_count = 0;
1571	int ret;
1572
1573	block_group = caching_ctl->block_group;
1574	fs_info = block_group->fs_info;
1575	root = btrfs_free_space_root(block_group);
1576
1577	end = block_group->start + block_group->length;
1578
1579	while (1) {
1580		u64 space_added;
1581
1582		ret = btrfs_next_item(root, path);
1583		if (ret < 0)
1584			goto out;
1585		if (ret)
1586			break;
1587
1588		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
1589
1590		if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
1591			break;
1592
1593		ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
1594		ASSERT(key.objectid < end && key.objectid + key.offset <= end);
1595
1596		ret = btrfs_add_new_free_space(block_group, key.objectid,
1597					       key.objectid + key.offset,
1598					       &space_added);
1599		if (ret)
1600			goto out;
1601		total_found += space_added;
1602		if (total_found > CACHING_CTL_WAKE_UP) {
1603			total_found = 0;
1604			wake_up(&caching_ctl->wait);
1605		}
1606		extent_count++;
1607	}
1608
1609	if (extent_count != expected_extent_count) {
1610		btrfs_err(fs_info,
1611			  "incorrect extent count for %llu; counted %u, expected %u",
1612			  block_group->start, extent_count,
1613			  expected_extent_count);
1614		ASSERT(0);
1615		ret = -EIO;
1616		goto out;
1617	}
1618
1619	ret = 0;
1620out:
1621	return ret;
1622}
1623
1624int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
1625{
1626	struct btrfs_block_group *block_group;
1627	struct btrfs_free_space_info *info;
1628	struct btrfs_path *path;
1629	u32 extent_count, flags;
1630	int ret;
1631
1632	block_group = caching_ctl->block_group;
1633
1634	path = btrfs_alloc_path();
1635	if (!path)
1636		return -ENOMEM;
1637
1638	/*
1639	 * Just like caching_thread() doesn't want to deadlock on the extent
1640	 * tree, we don't want to deadlock on the free space tree.
1641	 */
1642	path->skip_locking = 1;
1643	path->search_commit_root = 1;
1644	path->reada = READA_FORWARD;
1645
1646	info = search_free_space_info(NULL, block_group, path, 0);
1647	if (IS_ERR(info)) {
1648		ret = PTR_ERR(info);
1649		goto out;
1650	}
1651	extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
1652	flags = btrfs_free_space_flags(path->nodes[0], info);
1653
1654	/*
1655	 * We left path pointing to the free space info item, so now
1656	 * load_free_space_foo can just iterate through the free space tree from
1657	 * there.
1658	 */
1659	if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
1660		ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
1661	else
1662		ret = load_free_space_extents(caching_ctl, path, extent_count);
1663
1664out:
1665	btrfs_free_path(path);
1666	return ret;
1667}
1668