xref: /kernel/linux/linux-6.6/fs/btrfs/extent_map.c (revision 62306a36)
162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci#include <linux/err.h>
462306a36Sopenharmony_ci#include <linux/slab.h>
562306a36Sopenharmony_ci#include <linux/spinlock.h>
662306a36Sopenharmony_ci#include "messages.h"
762306a36Sopenharmony_ci#include "ctree.h"
862306a36Sopenharmony_ci#include "volumes.h"
962306a36Sopenharmony_ci#include "extent_map.h"
1062306a36Sopenharmony_ci#include "compression.h"
1162306a36Sopenharmony_ci#include "btrfs_inode.h"
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_cistatic struct kmem_cache *extent_map_cache;
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ciint __init extent_map_init(void)
1762306a36Sopenharmony_ci{
1862306a36Sopenharmony_ci	extent_map_cache = kmem_cache_create("btrfs_extent_map",
1962306a36Sopenharmony_ci			sizeof(struct extent_map), 0,
2062306a36Sopenharmony_ci			SLAB_MEM_SPREAD, NULL);
2162306a36Sopenharmony_ci	if (!extent_map_cache)
2262306a36Sopenharmony_ci		return -ENOMEM;
2362306a36Sopenharmony_ci	return 0;
2462306a36Sopenharmony_ci}
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_civoid __cold extent_map_exit(void)
2762306a36Sopenharmony_ci{
2862306a36Sopenharmony_ci	kmem_cache_destroy(extent_map_cache);
2962306a36Sopenharmony_ci}
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci/*
3262306a36Sopenharmony_ci * Initialize the extent tree @tree.  Should be called for each new inode or
3362306a36Sopenharmony_ci * other user of the extent_map interface.
3462306a36Sopenharmony_ci */
3562306a36Sopenharmony_civoid extent_map_tree_init(struct extent_map_tree *tree)
3662306a36Sopenharmony_ci{
3762306a36Sopenharmony_ci	tree->map = RB_ROOT_CACHED;
3862306a36Sopenharmony_ci	INIT_LIST_HEAD(&tree->modified_extents);
3962306a36Sopenharmony_ci	rwlock_init(&tree->lock);
4062306a36Sopenharmony_ci}
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci/*
4362306a36Sopenharmony_ci * Allocate a new extent_map structure.  The new structure is returned with a
4462306a36Sopenharmony_ci * reference count of one and needs to be freed using free_extent_map()
4562306a36Sopenharmony_ci */
4662306a36Sopenharmony_cistruct extent_map *alloc_extent_map(void)
4762306a36Sopenharmony_ci{
4862306a36Sopenharmony_ci	struct extent_map *em;
4962306a36Sopenharmony_ci	em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS);
5062306a36Sopenharmony_ci	if (!em)
5162306a36Sopenharmony_ci		return NULL;
5262306a36Sopenharmony_ci	RB_CLEAR_NODE(&em->rb_node);
5362306a36Sopenharmony_ci	em->compress_type = BTRFS_COMPRESS_NONE;
5462306a36Sopenharmony_ci	refcount_set(&em->refs, 1);
5562306a36Sopenharmony_ci	INIT_LIST_HEAD(&em->list);
5662306a36Sopenharmony_ci	return em;
5762306a36Sopenharmony_ci}
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci/*
6062306a36Sopenharmony_ci * Drop the reference out on @em by one and free the structure if the reference
6162306a36Sopenharmony_ci * count hits zero.
6262306a36Sopenharmony_ci */
6362306a36Sopenharmony_civoid free_extent_map(struct extent_map *em)
6462306a36Sopenharmony_ci{
6562306a36Sopenharmony_ci	if (!em)
6662306a36Sopenharmony_ci		return;
6762306a36Sopenharmony_ci	if (refcount_dec_and_test(&em->refs)) {
6862306a36Sopenharmony_ci		WARN_ON(extent_map_in_tree(em));
6962306a36Sopenharmony_ci		WARN_ON(!list_empty(&em->list));
7062306a36Sopenharmony_ci		if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
7162306a36Sopenharmony_ci			kfree(em->map_lookup);
7262306a36Sopenharmony_ci		kmem_cache_free(extent_map_cache, em);
7362306a36Sopenharmony_ci	}
7462306a36Sopenharmony_ci}
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci/* Do the math around the end of an extent, handling wrapping. */
7762306a36Sopenharmony_cistatic u64 range_end(u64 start, u64 len)
7862306a36Sopenharmony_ci{
7962306a36Sopenharmony_ci	if (start + len < start)
8062306a36Sopenharmony_ci		return (u64)-1;
8162306a36Sopenharmony_ci	return start + len;
8262306a36Sopenharmony_ci}
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cistatic int tree_insert(struct rb_root_cached *root, struct extent_map *em)
8562306a36Sopenharmony_ci{
8662306a36Sopenharmony_ci	struct rb_node **p = &root->rb_root.rb_node;
8762306a36Sopenharmony_ci	struct rb_node *parent = NULL;
8862306a36Sopenharmony_ci	struct extent_map *entry = NULL;
8962306a36Sopenharmony_ci	struct rb_node *orig_parent = NULL;
9062306a36Sopenharmony_ci	u64 end = range_end(em->start, em->len);
9162306a36Sopenharmony_ci	bool leftmost = true;
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	while (*p) {
9462306a36Sopenharmony_ci		parent = *p;
9562306a36Sopenharmony_ci		entry = rb_entry(parent, struct extent_map, rb_node);
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci		if (em->start < entry->start) {
9862306a36Sopenharmony_ci			p = &(*p)->rb_left;
9962306a36Sopenharmony_ci		} else if (em->start >= extent_map_end(entry)) {
10062306a36Sopenharmony_ci			p = &(*p)->rb_right;
10162306a36Sopenharmony_ci			leftmost = false;
10262306a36Sopenharmony_ci		} else {
10362306a36Sopenharmony_ci			return -EEXIST;
10462306a36Sopenharmony_ci		}
10562306a36Sopenharmony_ci	}
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	orig_parent = parent;
10862306a36Sopenharmony_ci	while (parent && em->start >= extent_map_end(entry)) {
10962306a36Sopenharmony_ci		parent = rb_next(parent);
11062306a36Sopenharmony_ci		entry = rb_entry(parent, struct extent_map, rb_node);
11162306a36Sopenharmony_ci	}
11262306a36Sopenharmony_ci	if (parent)
11362306a36Sopenharmony_ci		if (end > entry->start && em->start < extent_map_end(entry))
11462306a36Sopenharmony_ci			return -EEXIST;
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	parent = orig_parent;
11762306a36Sopenharmony_ci	entry = rb_entry(parent, struct extent_map, rb_node);
11862306a36Sopenharmony_ci	while (parent && em->start < entry->start) {
11962306a36Sopenharmony_ci		parent = rb_prev(parent);
12062306a36Sopenharmony_ci		entry = rb_entry(parent, struct extent_map, rb_node);
12162306a36Sopenharmony_ci	}
12262306a36Sopenharmony_ci	if (parent)
12362306a36Sopenharmony_ci		if (end > entry->start && em->start < extent_map_end(entry))
12462306a36Sopenharmony_ci			return -EEXIST;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	rb_link_node(&em->rb_node, orig_parent, p);
12762306a36Sopenharmony_ci	rb_insert_color_cached(&em->rb_node, root, leftmost);
12862306a36Sopenharmony_ci	return 0;
12962306a36Sopenharmony_ci}
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci/*
13262306a36Sopenharmony_ci * Search through the tree for an extent_map with a given offset.  If it can't
13362306a36Sopenharmony_ci * be found, try to find some neighboring extents
13462306a36Sopenharmony_ci */
13562306a36Sopenharmony_cistatic struct rb_node *__tree_search(struct rb_root *root, u64 offset,
13662306a36Sopenharmony_ci				     struct rb_node **prev_or_next_ret)
13762306a36Sopenharmony_ci{
13862306a36Sopenharmony_ci	struct rb_node *n = root->rb_node;
13962306a36Sopenharmony_ci	struct rb_node *prev = NULL;
14062306a36Sopenharmony_ci	struct rb_node *orig_prev = NULL;
14162306a36Sopenharmony_ci	struct extent_map *entry;
14262306a36Sopenharmony_ci	struct extent_map *prev_entry = NULL;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	ASSERT(prev_or_next_ret);
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	while (n) {
14762306a36Sopenharmony_ci		entry = rb_entry(n, struct extent_map, rb_node);
14862306a36Sopenharmony_ci		prev = n;
14962306a36Sopenharmony_ci		prev_entry = entry;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci		if (offset < entry->start)
15262306a36Sopenharmony_ci			n = n->rb_left;
15362306a36Sopenharmony_ci		else if (offset >= extent_map_end(entry))
15462306a36Sopenharmony_ci			n = n->rb_right;
15562306a36Sopenharmony_ci		else
15662306a36Sopenharmony_ci			return n;
15762306a36Sopenharmony_ci	}
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	orig_prev = prev;
16062306a36Sopenharmony_ci	while (prev && offset >= extent_map_end(prev_entry)) {
16162306a36Sopenharmony_ci		prev = rb_next(prev);
16262306a36Sopenharmony_ci		prev_entry = rb_entry(prev, struct extent_map, rb_node);
16362306a36Sopenharmony_ci	}
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci	/*
16662306a36Sopenharmony_ci	 * Previous extent map found, return as in this case the caller does not
16762306a36Sopenharmony_ci	 * care about the next one.
16862306a36Sopenharmony_ci	 */
16962306a36Sopenharmony_ci	if (prev) {
17062306a36Sopenharmony_ci		*prev_or_next_ret = prev;
17162306a36Sopenharmony_ci		return NULL;
17262306a36Sopenharmony_ci	}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	prev = orig_prev;
17562306a36Sopenharmony_ci	prev_entry = rb_entry(prev, struct extent_map, rb_node);
17662306a36Sopenharmony_ci	while (prev && offset < prev_entry->start) {
17762306a36Sopenharmony_ci		prev = rb_prev(prev);
17862306a36Sopenharmony_ci		prev_entry = rb_entry(prev, struct extent_map, rb_node);
17962306a36Sopenharmony_ci	}
18062306a36Sopenharmony_ci	*prev_or_next_ret = prev;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	return NULL;
18362306a36Sopenharmony_ci}
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci/* Check to see if two extent_map structs are adjacent and safe to merge. */
18662306a36Sopenharmony_cistatic int mergable_maps(struct extent_map *prev, struct extent_map *next)
18762306a36Sopenharmony_ci{
18862306a36Sopenharmony_ci	if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
18962306a36Sopenharmony_ci		return 0;
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	/*
19262306a36Sopenharmony_ci	 * don't merge compressed extents, we need to know their
19362306a36Sopenharmony_ci	 * actual size
19462306a36Sopenharmony_ci	 */
19562306a36Sopenharmony_ci	if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
19662306a36Sopenharmony_ci		return 0;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
19962306a36Sopenharmony_ci	    test_bit(EXTENT_FLAG_LOGGING, &next->flags))
20062306a36Sopenharmony_ci		return 0;
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	/*
20362306a36Sopenharmony_ci	 * We don't want to merge stuff that hasn't been written to the log yet
20462306a36Sopenharmony_ci	 * since it may not reflect exactly what is on disk, and that would be
20562306a36Sopenharmony_ci	 * bad.
20662306a36Sopenharmony_ci	 */
20762306a36Sopenharmony_ci	if (!list_empty(&prev->list) || !list_empty(&next->list))
20862306a36Sopenharmony_ci		return 0;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	ASSERT(next->block_start != EXTENT_MAP_DELALLOC &&
21162306a36Sopenharmony_ci	       prev->block_start != EXTENT_MAP_DELALLOC);
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	if (prev->map_lookup || next->map_lookup)
21462306a36Sopenharmony_ci		ASSERT(test_bit(EXTENT_FLAG_FS_MAPPING, &prev->flags) &&
21562306a36Sopenharmony_ci		       test_bit(EXTENT_FLAG_FS_MAPPING, &next->flags));
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	if (extent_map_end(prev) == next->start &&
21862306a36Sopenharmony_ci	    prev->flags == next->flags &&
21962306a36Sopenharmony_ci	    prev->map_lookup == next->map_lookup &&
22062306a36Sopenharmony_ci	    ((next->block_start == EXTENT_MAP_HOLE &&
22162306a36Sopenharmony_ci	      prev->block_start == EXTENT_MAP_HOLE) ||
22262306a36Sopenharmony_ci	     (next->block_start == EXTENT_MAP_INLINE &&
22362306a36Sopenharmony_ci	      prev->block_start == EXTENT_MAP_INLINE) ||
22462306a36Sopenharmony_ci	     (next->block_start < EXTENT_MAP_LAST_BYTE - 1 &&
22562306a36Sopenharmony_ci	      next->block_start == extent_map_block_end(prev)))) {
22662306a36Sopenharmony_ci		return 1;
22762306a36Sopenharmony_ci	}
22862306a36Sopenharmony_ci	return 0;
22962306a36Sopenharmony_ci}
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_cistatic void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
23262306a36Sopenharmony_ci{
23362306a36Sopenharmony_ci	struct extent_map *merge = NULL;
23462306a36Sopenharmony_ci	struct rb_node *rb;
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	/*
23762306a36Sopenharmony_ci	 * We can't modify an extent map that is in the tree and that is being
23862306a36Sopenharmony_ci	 * used by another task, as it can cause that other task to see it in
23962306a36Sopenharmony_ci	 * inconsistent state during the merging. We always have 1 reference for
24062306a36Sopenharmony_ci	 * the tree and 1 for this task (which is unpinning the extent map or
24162306a36Sopenharmony_ci	 * clearing the logging flag), so anything > 2 means it's being used by
24262306a36Sopenharmony_ci	 * other tasks too.
24362306a36Sopenharmony_ci	 */
24462306a36Sopenharmony_ci	if (refcount_read(&em->refs) > 2)
24562306a36Sopenharmony_ci		return;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	if (em->start != 0) {
24862306a36Sopenharmony_ci		rb = rb_prev(&em->rb_node);
24962306a36Sopenharmony_ci		if (rb)
25062306a36Sopenharmony_ci			merge = rb_entry(rb, struct extent_map, rb_node);
25162306a36Sopenharmony_ci		if (rb && mergable_maps(merge, em)) {
25262306a36Sopenharmony_ci			em->start = merge->start;
25362306a36Sopenharmony_ci			em->orig_start = merge->orig_start;
25462306a36Sopenharmony_ci			em->len += merge->len;
25562306a36Sopenharmony_ci			em->block_len += merge->block_len;
25662306a36Sopenharmony_ci			em->block_start = merge->block_start;
25762306a36Sopenharmony_ci			em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
25862306a36Sopenharmony_ci			em->mod_start = merge->mod_start;
25962306a36Sopenharmony_ci			em->generation = max(em->generation, merge->generation);
26062306a36Sopenharmony_ci			set_bit(EXTENT_FLAG_MERGED, &em->flags);
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci			rb_erase_cached(&merge->rb_node, &tree->map);
26362306a36Sopenharmony_ci			RB_CLEAR_NODE(&merge->rb_node);
26462306a36Sopenharmony_ci			free_extent_map(merge);
26562306a36Sopenharmony_ci		}
26662306a36Sopenharmony_ci	}
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	rb = rb_next(&em->rb_node);
26962306a36Sopenharmony_ci	if (rb)
27062306a36Sopenharmony_ci		merge = rb_entry(rb, struct extent_map, rb_node);
27162306a36Sopenharmony_ci	if (rb && mergable_maps(em, merge)) {
27262306a36Sopenharmony_ci		em->len += merge->len;
27362306a36Sopenharmony_ci		em->block_len += merge->block_len;
27462306a36Sopenharmony_ci		rb_erase_cached(&merge->rb_node, &tree->map);
27562306a36Sopenharmony_ci		RB_CLEAR_NODE(&merge->rb_node);
27662306a36Sopenharmony_ci		em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
27762306a36Sopenharmony_ci		em->generation = max(em->generation, merge->generation);
27862306a36Sopenharmony_ci		set_bit(EXTENT_FLAG_MERGED, &em->flags);
27962306a36Sopenharmony_ci		free_extent_map(merge);
28062306a36Sopenharmony_ci	}
28162306a36Sopenharmony_ci}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci/*
28462306a36Sopenharmony_ci * Unpin an extent from the cache.
28562306a36Sopenharmony_ci *
28662306a36Sopenharmony_ci * @tree:	tree to unpin the extent in
28762306a36Sopenharmony_ci * @start:	logical offset in the file
28862306a36Sopenharmony_ci * @len:	length of the extent
28962306a36Sopenharmony_ci * @gen:	generation that this extent has been modified in
29062306a36Sopenharmony_ci *
29162306a36Sopenharmony_ci * Called after an extent has been written to disk properly.  Set the generation
29262306a36Sopenharmony_ci * to the generation that actually added the file item to the inode so we know
29362306a36Sopenharmony_ci * we need to sync this extent when we call fsync().
29462306a36Sopenharmony_ci */
29562306a36Sopenharmony_ciint unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
29662306a36Sopenharmony_ci		       u64 gen)
29762306a36Sopenharmony_ci{
29862306a36Sopenharmony_ci	int ret = 0;
29962306a36Sopenharmony_ci	struct extent_map *em;
30062306a36Sopenharmony_ci	bool prealloc = false;
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	write_lock(&tree->lock);
30362306a36Sopenharmony_ci	em = lookup_extent_mapping(tree, start, len);
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	WARN_ON(!em || em->start != start);
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci	if (!em)
30862306a36Sopenharmony_ci		goto out;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	em->generation = gen;
31162306a36Sopenharmony_ci	clear_bit(EXTENT_FLAG_PINNED, &em->flags);
31262306a36Sopenharmony_ci	em->mod_start = em->start;
31362306a36Sopenharmony_ci	em->mod_len = em->len;
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) {
31662306a36Sopenharmony_ci		prealloc = true;
31762306a36Sopenharmony_ci		clear_bit(EXTENT_FLAG_FILLING, &em->flags);
31862306a36Sopenharmony_ci	}
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	try_merge_map(tree, em);
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	if (prealloc) {
32362306a36Sopenharmony_ci		em->mod_start = em->start;
32462306a36Sopenharmony_ci		em->mod_len = em->len;
32562306a36Sopenharmony_ci	}
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	free_extent_map(em);
32862306a36Sopenharmony_ciout:
32962306a36Sopenharmony_ci	write_unlock(&tree->lock);
33062306a36Sopenharmony_ci	return ret;
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci}
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_civoid clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
33562306a36Sopenharmony_ci{
33662306a36Sopenharmony_ci	lockdep_assert_held_write(&tree->lock);
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
33962306a36Sopenharmony_ci	if (extent_map_in_tree(em))
34062306a36Sopenharmony_ci		try_merge_map(tree, em);
34162306a36Sopenharmony_ci}
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_cistatic inline void setup_extent_mapping(struct extent_map_tree *tree,
34462306a36Sopenharmony_ci					struct extent_map *em,
34562306a36Sopenharmony_ci					int modified)
34662306a36Sopenharmony_ci{
34762306a36Sopenharmony_ci	refcount_inc(&em->refs);
34862306a36Sopenharmony_ci	em->mod_start = em->start;
34962306a36Sopenharmony_ci	em->mod_len = em->len;
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	if (modified)
35262306a36Sopenharmony_ci		list_move(&em->list, &tree->modified_extents);
35362306a36Sopenharmony_ci	else
35462306a36Sopenharmony_ci		try_merge_map(tree, em);
35562306a36Sopenharmony_ci}
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_cistatic void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
35862306a36Sopenharmony_ci{
35962306a36Sopenharmony_ci	struct map_lookup *map = em->map_lookup;
36062306a36Sopenharmony_ci	u64 stripe_size = em->orig_block_len;
36162306a36Sopenharmony_ci	int i;
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci	for (i = 0; i < map->num_stripes; i++) {
36462306a36Sopenharmony_ci		struct btrfs_io_stripe *stripe = &map->stripes[i];
36562306a36Sopenharmony_ci		struct btrfs_device *device = stripe->dev;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci		set_extent_bit(&device->alloc_state, stripe->physical,
36862306a36Sopenharmony_ci			       stripe->physical + stripe_size - 1,
36962306a36Sopenharmony_ci			       bits | EXTENT_NOWAIT, NULL);
37062306a36Sopenharmony_ci	}
37162306a36Sopenharmony_ci}
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_cistatic void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
37462306a36Sopenharmony_ci{
37562306a36Sopenharmony_ci	struct map_lookup *map = em->map_lookup;
37662306a36Sopenharmony_ci	u64 stripe_size = em->orig_block_len;
37762306a36Sopenharmony_ci	int i;
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci	for (i = 0; i < map->num_stripes; i++) {
38062306a36Sopenharmony_ci		struct btrfs_io_stripe *stripe = &map->stripes[i];
38162306a36Sopenharmony_ci		struct btrfs_device *device = stripe->dev;
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci		__clear_extent_bit(&device->alloc_state, stripe->physical,
38462306a36Sopenharmony_ci				   stripe->physical + stripe_size - 1,
38562306a36Sopenharmony_ci				   bits | EXTENT_NOWAIT,
38662306a36Sopenharmony_ci				   NULL, NULL);
38762306a36Sopenharmony_ci	}
38862306a36Sopenharmony_ci}
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci/*
39162306a36Sopenharmony_ci * Add new extent map to the extent tree
39262306a36Sopenharmony_ci *
39362306a36Sopenharmony_ci * @tree:	tree to insert new map in
39462306a36Sopenharmony_ci * @em:		map to insert
39562306a36Sopenharmony_ci * @modified:	indicate whether the given @em should be added to the
39662306a36Sopenharmony_ci *	        modified list, which indicates the extent needs to be logged
39762306a36Sopenharmony_ci *
39862306a36Sopenharmony_ci * Insert @em into @tree or perform a simple forward/backward merge with
39962306a36Sopenharmony_ci * existing mappings.  The extent_map struct passed in will be inserted
40062306a36Sopenharmony_ci * into the tree directly, with an additional reference taken, or a
40162306a36Sopenharmony_ci * reference dropped if the merge attempt was successful.
40262306a36Sopenharmony_ci */
40362306a36Sopenharmony_ciint add_extent_mapping(struct extent_map_tree *tree,
40462306a36Sopenharmony_ci		       struct extent_map *em, int modified)
40562306a36Sopenharmony_ci{
40662306a36Sopenharmony_ci	int ret = 0;
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	lockdep_assert_held_write(&tree->lock);
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci	ret = tree_insert(&tree->map, em);
41162306a36Sopenharmony_ci	if (ret)
41262306a36Sopenharmony_ci		goto out;
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	setup_extent_mapping(tree, em, modified);
41562306a36Sopenharmony_ci	if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) {
41662306a36Sopenharmony_ci		extent_map_device_set_bits(em, CHUNK_ALLOCATED);
41762306a36Sopenharmony_ci		extent_map_device_clear_bits(em, CHUNK_TRIMMED);
41862306a36Sopenharmony_ci	}
41962306a36Sopenharmony_ciout:
42062306a36Sopenharmony_ci	return ret;
42162306a36Sopenharmony_ci}
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_cistatic struct extent_map *
42462306a36Sopenharmony_ci__lookup_extent_mapping(struct extent_map_tree *tree,
42562306a36Sopenharmony_ci			u64 start, u64 len, int strict)
42662306a36Sopenharmony_ci{
42762306a36Sopenharmony_ci	struct extent_map *em;
42862306a36Sopenharmony_ci	struct rb_node *rb_node;
42962306a36Sopenharmony_ci	struct rb_node *prev_or_next = NULL;
43062306a36Sopenharmony_ci	u64 end = range_end(start, len);
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	rb_node = __tree_search(&tree->map.rb_root, start, &prev_or_next);
43362306a36Sopenharmony_ci	if (!rb_node) {
43462306a36Sopenharmony_ci		if (prev_or_next)
43562306a36Sopenharmony_ci			rb_node = prev_or_next;
43662306a36Sopenharmony_ci		else
43762306a36Sopenharmony_ci			return NULL;
43862306a36Sopenharmony_ci	}
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci	em = rb_entry(rb_node, struct extent_map, rb_node);
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	if (strict && !(end > em->start && start < extent_map_end(em)))
44362306a36Sopenharmony_ci		return NULL;
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci	refcount_inc(&em->refs);
44662306a36Sopenharmony_ci	return em;
44762306a36Sopenharmony_ci}
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci/*
45062306a36Sopenharmony_ci * Lookup extent_map that intersects @start + @len range.
45162306a36Sopenharmony_ci *
45262306a36Sopenharmony_ci * @tree:	tree to lookup in
45362306a36Sopenharmony_ci * @start:	byte offset to start the search
45462306a36Sopenharmony_ci * @len:	length of the lookup range
45562306a36Sopenharmony_ci *
45662306a36Sopenharmony_ci * Find and return the first extent_map struct in @tree that intersects the
45762306a36Sopenharmony_ci * [start, len] range.  There may be additional objects in the tree that
45862306a36Sopenharmony_ci * intersect, so check the object returned carefully to make sure that no
45962306a36Sopenharmony_ci * additional lookups are needed.
46062306a36Sopenharmony_ci */
46162306a36Sopenharmony_cistruct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
46262306a36Sopenharmony_ci					 u64 start, u64 len)
46362306a36Sopenharmony_ci{
46462306a36Sopenharmony_ci	return __lookup_extent_mapping(tree, start, len, 1);
46562306a36Sopenharmony_ci}
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci/*
46862306a36Sopenharmony_ci * Find a nearby extent map intersecting @start + @len (not an exact search).
46962306a36Sopenharmony_ci *
47062306a36Sopenharmony_ci * @tree:	tree to lookup in
47162306a36Sopenharmony_ci * @start:	byte offset to start the search
47262306a36Sopenharmony_ci * @len:	length of the lookup range
47362306a36Sopenharmony_ci *
47462306a36Sopenharmony_ci * Find and return the first extent_map struct in @tree that intersects the
47562306a36Sopenharmony_ci * [start, len] range.
47662306a36Sopenharmony_ci *
47762306a36Sopenharmony_ci * If one can't be found, any nearby extent may be returned
47862306a36Sopenharmony_ci */
47962306a36Sopenharmony_cistruct extent_map *search_extent_mapping(struct extent_map_tree *tree,
48062306a36Sopenharmony_ci					 u64 start, u64 len)
48162306a36Sopenharmony_ci{
48262306a36Sopenharmony_ci	return __lookup_extent_mapping(tree, start, len, 0);
48362306a36Sopenharmony_ci}
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci/*
48662306a36Sopenharmony_ci * Remove an extent_map from the extent tree.
48762306a36Sopenharmony_ci *
48862306a36Sopenharmony_ci * @tree:	extent tree to remove from
48962306a36Sopenharmony_ci * @em:		extent map being removed
49062306a36Sopenharmony_ci *
49162306a36Sopenharmony_ci * Remove @em from @tree.  No reference counts are dropped, and no checks
49262306a36Sopenharmony_ci * are done to see if the range is in use.
49362306a36Sopenharmony_ci */
49462306a36Sopenharmony_civoid remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
49562306a36Sopenharmony_ci{
49662306a36Sopenharmony_ci	lockdep_assert_held_write(&tree->lock);
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
49962306a36Sopenharmony_ci	rb_erase_cached(&em->rb_node, &tree->map);
50062306a36Sopenharmony_ci	if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
50162306a36Sopenharmony_ci		list_del_init(&em->list);
50262306a36Sopenharmony_ci	if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags))
50362306a36Sopenharmony_ci		extent_map_device_clear_bits(em, CHUNK_ALLOCATED);
50462306a36Sopenharmony_ci	RB_CLEAR_NODE(&em->rb_node);
50562306a36Sopenharmony_ci}
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_cistatic void replace_extent_mapping(struct extent_map_tree *tree,
50862306a36Sopenharmony_ci				   struct extent_map *cur,
50962306a36Sopenharmony_ci				   struct extent_map *new,
51062306a36Sopenharmony_ci				   int modified)
51162306a36Sopenharmony_ci{
51262306a36Sopenharmony_ci	lockdep_assert_held_write(&tree->lock);
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
51562306a36Sopenharmony_ci	ASSERT(extent_map_in_tree(cur));
51662306a36Sopenharmony_ci	if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
51762306a36Sopenharmony_ci		list_del_init(&cur->list);
51862306a36Sopenharmony_ci	rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map);
51962306a36Sopenharmony_ci	RB_CLEAR_NODE(&cur->rb_node);
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci	setup_extent_mapping(tree, new, modified);
52262306a36Sopenharmony_ci}
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_cistatic struct extent_map *next_extent_map(const struct extent_map *em)
52562306a36Sopenharmony_ci{
52662306a36Sopenharmony_ci	struct rb_node *next;
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci	next = rb_next(&em->rb_node);
52962306a36Sopenharmony_ci	if (!next)
53062306a36Sopenharmony_ci		return NULL;
53162306a36Sopenharmony_ci	return container_of(next, struct extent_map, rb_node);
53262306a36Sopenharmony_ci}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_cistatic struct extent_map *prev_extent_map(struct extent_map *em)
53562306a36Sopenharmony_ci{
53662306a36Sopenharmony_ci	struct rb_node *prev;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	prev = rb_prev(&em->rb_node);
53962306a36Sopenharmony_ci	if (!prev)
54062306a36Sopenharmony_ci		return NULL;
54162306a36Sopenharmony_ci	return container_of(prev, struct extent_map, rb_node);
54262306a36Sopenharmony_ci}
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci/*
54562306a36Sopenharmony_ci * Helper for btrfs_get_extent.  Given an existing extent in the tree,
54662306a36Sopenharmony_ci * the existing extent is the nearest extent to map_start,
54762306a36Sopenharmony_ci * and an extent that you want to insert, deal with overlap and insert
54862306a36Sopenharmony_ci * the best fitted new extent into the tree.
54962306a36Sopenharmony_ci */
55062306a36Sopenharmony_cistatic noinline int merge_extent_mapping(struct extent_map_tree *em_tree,
55162306a36Sopenharmony_ci					 struct extent_map *existing,
55262306a36Sopenharmony_ci					 struct extent_map *em,
55362306a36Sopenharmony_ci					 u64 map_start)
55462306a36Sopenharmony_ci{
55562306a36Sopenharmony_ci	struct extent_map *prev;
55662306a36Sopenharmony_ci	struct extent_map *next;
55762306a36Sopenharmony_ci	u64 start;
55862306a36Sopenharmony_ci	u64 end;
55962306a36Sopenharmony_ci	u64 start_diff;
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	if (existing->start > map_start) {
56462306a36Sopenharmony_ci		next = existing;
56562306a36Sopenharmony_ci		prev = prev_extent_map(next);
56662306a36Sopenharmony_ci	} else {
56762306a36Sopenharmony_ci		prev = existing;
56862306a36Sopenharmony_ci		next = next_extent_map(prev);
56962306a36Sopenharmony_ci	}
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	start = prev ? extent_map_end(prev) : em->start;
57262306a36Sopenharmony_ci	start = max_t(u64, start, em->start);
57362306a36Sopenharmony_ci	end = next ? next->start : extent_map_end(em);
57462306a36Sopenharmony_ci	end = min_t(u64, end, extent_map_end(em));
57562306a36Sopenharmony_ci	start_diff = start - em->start;
57662306a36Sopenharmony_ci	em->start = start;
57762306a36Sopenharmony_ci	em->len = end - start;
57862306a36Sopenharmony_ci	if (em->block_start < EXTENT_MAP_LAST_BYTE &&
57962306a36Sopenharmony_ci	    !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
58062306a36Sopenharmony_ci		em->block_start += start_diff;
58162306a36Sopenharmony_ci		em->block_len = em->len;
58262306a36Sopenharmony_ci	}
58362306a36Sopenharmony_ci	return add_extent_mapping(em_tree, em, 0);
58462306a36Sopenharmony_ci}
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci/*
58762306a36Sopenharmony_ci * Add extent mapping into em_tree.
58862306a36Sopenharmony_ci *
58962306a36Sopenharmony_ci * @fs_info:  the filesystem
59062306a36Sopenharmony_ci * @em_tree:  extent tree into which we want to insert the extent mapping
59162306a36Sopenharmony_ci * @em_in:    extent we are inserting
59262306a36Sopenharmony_ci * @start:    start of the logical range btrfs_get_extent() is requesting
59362306a36Sopenharmony_ci * @len:      length of the logical range btrfs_get_extent() is requesting
59462306a36Sopenharmony_ci *
59562306a36Sopenharmony_ci * Note that @em_in's range may be different from [start, start+len),
59662306a36Sopenharmony_ci * but they must be overlapped.
59762306a36Sopenharmony_ci *
59862306a36Sopenharmony_ci * Insert @em_in into @em_tree. In case there is an overlapping range, handle
59962306a36Sopenharmony_ci * the -EEXIST by either:
60062306a36Sopenharmony_ci * a) Returning the existing extent in @em_in if @start is within the
60162306a36Sopenharmony_ci *    existing em.
60262306a36Sopenharmony_ci * b) Merge the existing extent with @em_in passed in.
60362306a36Sopenharmony_ci *
60462306a36Sopenharmony_ci * Return 0 on success, otherwise -EEXIST.
60562306a36Sopenharmony_ci *
60662306a36Sopenharmony_ci */
60762306a36Sopenharmony_ciint btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
60862306a36Sopenharmony_ci			     struct extent_map_tree *em_tree,
60962306a36Sopenharmony_ci			     struct extent_map **em_in, u64 start, u64 len)
61062306a36Sopenharmony_ci{
61162306a36Sopenharmony_ci	int ret;
61262306a36Sopenharmony_ci	struct extent_map *em = *em_in;
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci	/*
61562306a36Sopenharmony_ci	 * Tree-checker should have rejected any inline extent with non-zero
61662306a36Sopenharmony_ci	 * file offset. Here just do a sanity check.
61762306a36Sopenharmony_ci	 */
61862306a36Sopenharmony_ci	if (em->block_start == EXTENT_MAP_INLINE)
61962306a36Sopenharmony_ci		ASSERT(em->start == 0);
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	ret = add_extent_mapping(em_tree, em, 0);
62262306a36Sopenharmony_ci	/* it is possible that someone inserted the extent into the tree
62362306a36Sopenharmony_ci	 * while we had the lock dropped.  It is also possible that
62462306a36Sopenharmony_ci	 * an overlapping map exists in the tree
62562306a36Sopenharmony_ci	 */
62662306a36Sopenharmony_ci	if (ret == -EEXIST) {
62762306a36Sopenharmony_ci		struct extent_map *existing;
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci		ret = 0;
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_ci		existing = search_extent_mapping(em_tree, start, len);
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci		trace_btrfs_handle_em_exist(fs_info, existing, em, start, len);
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci		/*
63662306a36Sopenharmony_ci		 * existing will always be non-NULL, since there must be
63762306a36Sopenharmony_ci		 * extent causing the -EEXIST.
63862306a36Sopenharmony_ci		 */
63962306a36Sopenharmony_ci		if (start >= existing->start &&
64062306a36Sopenharmony_ci		    start < extent_map_end(existing)) {
64162306a36Sopenharmony_ci			free_extent_map(em);
64262306a36Sopenharmony_ci			*em_in = existing;
64362306a36Sopenharmony_ci			ret = 0;
64462306a36Sopenharmony_ci		} else {
64562306a36Sopenharmony_ci			u64 orig_start = em->start;
64662306a36Sopenharmony_ci			u64 orig_len = em->len;
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci			/*
64962306a36Sopenharmony_ci			 * The existing extent map is the one nearest to
65062306a36Sopenharmony_ci			 * the [start, start + len) range which overlaps
65162306a36Sopenharmony_ci			 */
65262306a36Sopenharmony_ci			ret = merge_extent_mapping(em_tree, existing,
65362306a36Sopenharmony_ci						   em, start);
65462306a36Sopenharmony_ci			if (ret) {
65562306a36Sopenharmony_ci				free_extent_map(em);
65662306a36Sopenharmony_ci				*em_in = NULL;
65762306a36Sopenharmony_ci				WARN_ONCE(ret,
65862306a36Sopenharmony_ci"unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n",
65962306a36Sopenharmony_ci					  ret, existing->start, existing->len,
66062306a36Sopenharmony_ci					  orig_start, orig_len);
66162306a36Sopenharmony_ci			}
66262306a36Sopenharmony_ci			free_extent_map(existing);
66362306a36Sopenharmony_ci		}
66462306a36Sopenharmony_ci	}
66562306a36Sopenharmony_ci
66662306a36Sopenharmony_ci	ASSERT(ret == 0 || ret == -EEXIST);
66762306a36Sopenharmony_ci	return ret;
66862306a36Sopenharmony_ci}
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci/*
67162306a36Sopenharmony_ci * Drop all extent maps from a tree in the fastest possible way, rescheduling
67262306a36Sopenharmony_ci * if needed. This avoids searching the tree, from the root down to the first
67362306a36Sopenharmony_ci * extent map, before each deletion.
67462306a36Sopenharmony_ci */
67562306a36Sopenharmony_cistatic void drop_all_extent_maps_fast(struct extent_map_tree *tree)
67662306a36Sopenharmony_ci{
67762306a36Sopenharmony_ci	write_lock(&tree->lock);
67862306a36Sopenharmony_ci	while (!RB_EMPTY_ROOT(&tree->map.rb_root)) {
67962306a36Sopenharmony_ci		struct extent_map *em;
68062306a36Sopenharmony_ci		struct rb_node *node;
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci		node = rb_first_cached(&tree->map);
68362306a36Sopenharmony_ci		em = rb_entry(node, struct extent_map, rb_node);
68462306a36Sopenharmony_ci		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
68562306a36Sopenharmony_ci		clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
68662306a36Sopenharmony_ci		remove_extent_mapping(tree, em);
68762306a36Sopenharmony_ci		free_extent_map(em);
68862306a36Sopenharmony_ci		cond_resched_rwlock_write(&tree->lock);
68962306a36Sopenharmony_ci	}
69062306a36Sopenharmony_ci	write_unlock(&tree->lock);
69162306a36Sopenharmony_ci}
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci/*
69462306a36Sopenharmony_ci * Drop all extent maps in a given range.
69562306a36Sopenharmony_ci *
69662306a36Sopenharmony_ci * @inode:       The target inode.
69762306a36Sopenharmony_ci * @start:       Start offset of the range.
69862306a36Sopenharmony_ci * @end:         End offset of the range (inclusive value).
69962306a36Sopenharmony_ci * @skip_pinned: Indicate if pinned extent maps should be ignored or not.
70062306a36Sopenharmony_ci *
70162306a36Sopenharmony_ci * This drops all the extent maps that intersect the given range [@start, @end].
70262306a36Sopenharmony_ci * Extent maps that partially overlap the range and extend behind or beyond it,
70362306a36Sopenharmony_ci * are split.
70462306a36Sopenharmony_ci * The caller should have locked an appropriate file range in the inode's io
70562306a36Sopenharmony_ci * tree before calling this function.
70662306a36Sopenharmony_ci */
70762306a36Sopenharmony_civoid btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
70862306a36Sopenharmony_ci				 bool skip_pinned)
70962306a36Sopenharmony_ci{
71062306a36Sopenharmony_ci	struct extent_map *split;
71162306a36Sopenharmony_ci	struct extent_map *split2;
71262306a36Sopenharmony_ci	struct extent_map *em;
71362306a36Sopenharmony_ci	struct extent_map_tree *em_tree = &inode->extent_tree;
71462306a36Sopenharmony_ci	u64 len = end - start + 1;
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci	WARN_ON(end < start);
71762306a36Sopenharmony_ci	if (end == (u64)-1) {
71862306a36Sopenharmony_ci		if (start == 0 && !skip_pinned) {
71962306a36Sopenharmony_ci			drop_all_extent_maps_fast(em_tree);
72062306a36Sopenharmony_ci			return;
72162306a36Sopenharmony_ci		}
72262306a36Sopenharmony_ci		len = (u64)-1;
72362306a36Sopenharmony_ci	} else {
72462306a36Sopenharmony_ci		/* Make end offset exclusive for use in the loop below. */
72562306a36Sopenharmony_ci		end++;
72662306a36Sopenharmony_ci	}
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci	/*
72962306a36Sopenharmony_ci	 * It's ok if we fail to allocate the extent maps, see the comment near
73062306a36Sopenharmony_ci	 * the bottom of the loop below. We only need two spare extent maps in
73162306a36Sopenharmony_ci	 * the worst case, where the first extent map that intersects our range
73262306a36Sopenharmony_ci	 * starts before the range and the last extent map that intersects our
73362306a36Sopenharmony_ci	 * range ends after our range (and they might be the same extent map),
73462306a36Sopenharmony_ci	 * because we need to split those two extent maps at the boundaries.
73562306a36Sopenharmony_ci	 */
73662306a36Sopenharmony_ci	split = alloc_extent_map();
73762306a36Sopenharmony_ci	split2 = alloc_extent_map();
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	write_lock(&em_tree->lock);
74062306a36Sopenharmony_ci	em = lookup_extent_mapping(em_tree, start, len);
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci	while (em) {
74362306a36Sopenharmony_ci		/* extent_map_end() returns exclusive value (last byte + 1). */
74462306a36Sopenharmony_ci		const u64 em_end = extent_map_end(em);
74562306a36Sopenharmony_ci		struct extent_map *next_em = NULL;
74662306a36Sopenharmony_ci		u64 gen;
74762306a36Sopenharmony_ci		unsigned long flags;
74862306a36Sopenharmony_ci		bool modified;
74962306a36Sopenharmony_ci		bool compressed;
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci		if (em_end < end) {
75262306a36Sopenharmony_ci			next_em = next_extent_map(em);
75362306a36Sopenharmony_ci			if (next_em) {
75462306a36Sopenharmony_ci				if (next_em->start < end)
75562306a36Sopenharmony_ci					refcount_inc(&next_em->refs);
75662306a36Sopenharmony_ci				else
75762306a36Sopenharmony_ci					next_em = NULL;
75862306a36Sopenharmony_ci			}
75962306a36Sopenharmony_ci		}
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci		if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
76262306a36Sopenharmony_ci			start = em_end;
76362306a36Sopenharmony_ci			goto next;
76462306a36Sopenharmony_ci		}
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_ci		flags = em->flags;
76762306a36Sopenharmony_ci		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
76862306a36Sopenharmony_ci		/*
76962306a36Sopenharmony_ci		 * In case we split the extent map, we want to preserve the
77062306a36Sopenharmony_ci		 * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want
77162306a36Sopenharmony_ci		 * it on the new extent maps.
77262306a36Sopenharmony_ci		 */
77362306a36Sopenharmony_ci		clear_bit(EXTENT_FLAG_LOGGING, &flags);
77462306a36Sopenharmony_ci		modified = !list_empty(&em->list);
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci		/*
77762306a36Sopenharmony_ci		 * The extent map does not cross our target range, so no need to
77862306a36Sopenharmony_ci		 * split it, we can remove it directly.
77962306a36Sopenharmony_ci		 */
78062306a36Sopenharmony_ci		if (em->start >= start && em_end <= end)
78162306a36Sopenharmony_ci			goto remove_em;
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci		gen = em->generation;
78462306a36Sopenharmony_ci		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ci		if (em->start < start) {
78762306a36Sopenharmony_ci			if (!split) {
78862306a36Sopenharmony_ci				split = split2;
78962306a36Sopenharmony_ci				split2 = NULL;
79062306a36Sopenharmony_ci				if (!split)
79162306a36Sopenharmony_ci					goto remove_em;
79262306a36Sopenharmony_ci			}
79362306a36Sopenharmony_ci			split->start = em->start;
79462306a36Sopenharmony_ci			split->len = start - em->start;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci			if (em->block_start < EXTENT_MAP_LAST_BYTE) {
79762306a36Sopenharmony_ci				split->orig_start = em->orig_start;
79862306a36Sopenharmony_ci				split->block_start = em->block_start;
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci				if (compressed)
80162306a36Sopenharmony_ci					split->block_len = em->block_len;
80262306a36Sopenharmony_ci				else
80362306a36Sopenharmony_ci					split->block_len = split->len;
80462306a36Sopenharmony_ci				split->orig_block_len = max(split->block_len,
80562306a36Sopenharmony_ci						em->orig_block_len);
80662306a36Sopenharmony_ci				split->ram_bytes = em->ram_bytes;
80762306a36Sopenharmony_ci			} else {
80862306a36Sopenharmony_ci				split->orig_start = split->start;
80962306a36Sopenharmony_ci				split->block_len = 0;
81062306a36Sopenharmony_ci				split->block_start = em->block_start;
81162306a36Sopenharmony_ci				split->orig_block_len = 0;
81262306a36Sopenharmony_ci				split->ram_bytes = split->len;
81362306a36Sopenharmony_ci			}
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci			split->generation = gen;
81662306a36Sopenharmony_ci			split->flags = flags;
81762306a36Sopenharmony_ci			split->compress_type = em->compress_type;
81862306a36Sopenharmony_ci			replace_extent_mapping(em_tree, em, split, modified);
81962306a36Sopenharmony_ci			free_extent_map(split);
82062306a36Sopenharmony_ci			split = split2;
82162306a36Sopenharmony_ci			split2 = NULL;
82262306a36Sopenharmony_ci		}
82362306a36Sopenharmony_ci		if (em_end > end) {
82462306a36Sopenharmony_ci			if (!split) {
82562306a36Sopenharmony_ci				split = split2;
82662306a36Sopenharmony_ci				split2 = NULL;
82762306a36Sopenharmony_ci				if (!split)
82862306a36Sopenharmony_ci					goto remove_em;
82962306a36Sopenharmony_ci			}
83062306a36Sopenharmony_ci			split->start = end;
83162306a36Sopenharmony_ci			split->len = em_end - end;
83262306a36Sopenharmony_ci			split->block_start = em->block_start;
83362306a36Sopenharmony_ci			split->flags = flags;
83462306a36Sopenharmony_ci			split->compress_type = em->compress_type;
83562306a36Sopenharmony_ci			split->generation = gen;
83662306a36Sopenharmony_ci
83762306a36Sopenharmony_ci			if (em->block_start < EXTENT_MAP_LAST_BYTE) {
83862306a36Sopenharmony_ci				split->orig_block_len = max(em->block_len,
83962306a36Sopenharmony_ci						    em->orig_block_len);
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci				split->ram_bytes = em->ram_bytes;
84262306a36Sopenharmony_ci				if (compressed) {
84362306a36Sopenharmony_ci					split->block_len = em->block_len;
84462306a36Sopenharmony_ci					split->orig_start = em->orig_start;
84562306a36Sopenharmony_ci				} else {
84662306a36Sopenharmony_ci					const u64 diff = start + len - em->start;
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci					split->block_len = split->len;
84962306a36Sopenharmony_ci					split->block_start += diff;
85062306a36Sopenharmony_ci					split->orig_start = em->orig_start;
85162306a36Sopenharmony_ci				}
85262306a36Sopenharmony_ci			} else {
85362306a36Sopenharmony_ci				split->ram_bytes = split->len;
85462306a36Sopenharmony_ci				split->orig_start = split->start;
85562306a36Sopenharmony_ci				split->block_len = 0;
85662306a36Sopenharmony_ci				split->orig_block_len = 0;
85762306a36Sopenharmony_ci			}
85862306a36Sopenharmony_ci
85962306a36Sopenharmony_ci			if (extent_map_in_tree(em)) {
86062306a36Sopenharmony_ci				replace_extent_mapping(em_tree, em, split,
86162306a36Sopenharmony_ci						       modified);
86262306a36Sopenharmony_ci			} else {
86362306a36Sopenharmony_ci				int ret;
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci				ret = add_extent_mapping(em_tree, split,
86662306a36Sopenharmony_ci							 modified);
86762306a36Sopenharmony_ci				/* Logic error, shouldn't happen. */
86862306a36Sopenharmony_ci				ASSERT(ret == 0);
86962306a36Sopenharmony_ci				if (WARN_ON(ret != 0) && modified)
87062306a36Sopenharmony_ci					btrfs_set_inode_full_sync(inode);
87162306a36Sopenharmony_ci			}
87262306a36Sopenharmony_ci			free_extent_map(split);
87362306a36Sopenharmony_ci			split = NULL;
87462306a36Sopenharmony_ci		}
87562306a36Sopenharmony_ciremove_em:
87662306a36Sopenharmony_ci		if (extent_map_in_tree(em)) {
87762306a36Sopenharmony_ci			/*
87862306a36Sopenharmony_ci			 * If the extent map is still in the tree it means that
87962306a36Sopenharmony_ci			 * either of the following is true:
88062306a36Sopenharmony_ci			 *
88162306a36Sopenharmony_ci			 * 1) It fits entirely in our range (doesn't end beyond
88262306a36Sopenharmony_ci			 *    it or starts before it);
88362306a36Sopenharmony_ci			 *
88462306a36Sopenharmony_ci			 * 2) It starts before our range and/or ends after our
88562306a36Sopenharmony_ci			 *    range, and we were not able to allocate the extent
88662306a36Sopenharmony_ci			 *    maps for split operations, @split and @split2.
88762306a36Sopenharmony_ci			 *
88862306a36Sopenharmony_ci			 * If we are at case 2) then we just remove the entire
88962306a36Sopenharmony_ci			 * extent map - this is fine since if anyone needs it to
89062306a36Sopenharmony_ci			 * access the subranges outside our range, will just
89162306a36Sopenharmony_ci			 * load it again from the subvolume tree's file extent
89262306a36Sopenharmony_ci			 * item. However if the extent map was in the list of
89362306a36Sopenharmony_ci			 * modified extents, then we must mark the inode for a
89462306a36Sopenharmony_ci			 * full fsync, otherwise a fast fsync will miss this
89562306a36Sopenharmony_ci			 * extent if it's new and needs to be logged.
89662306a36Sopenharmony_ci			 */
89762306a36Sopenharmony_ci			if ((em->start < start || em_end > end) && modified) {
89862306a36Sopenharmony_ci				ASSERT(!split);
89962306a36Sopenharmony_ci				btrfs_set_inode_full_sync(inode);
90062306a36Sopenharmony_ci			}
90162306a36Sopenharmony_ci			remove_extent_mapping(em_tree, em);
90262306a36Sopenharmony_ci		}
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_ci		/*
90562306a36Sopenharmony_ci		 * Once for the tree reference (we replaced or removed the
90662306a36Sopenharmony_ci		 * extent map from the tree).
90762306a36Sopenharmony_ci		 */
90862306a36Sopenharmony_ci		free_extent_map(em);
90962306a36Sopenharmony_cinext:
91062306a36Sopenharmony_ci		/* Once for us (for our lookup reference). */
91162306a36Sopenharmony_ci		free_extent_map(em);
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_ci		em = next_em;
91462306a36Sopenharmony_ci	}
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci	write_unlock(&em_tree->lock);
91762306a36Sopenharmony_ci
91862306a36Sopenharmony_ci	free_extent_map(split);
91962306a36Sopenharmony_ci	free_extent_map(split2);
92062306a36Sopenharmony_ci}
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ci/*
92362306a36Sopenharmony_ci * Replace a range in the inode's extent map tree with a new extent map.
92462306a36Sopenharmony_ci *
92562306a36Sopenharmony_ci * @inode:      The target inode.
92662306a36Sopenharmony_ci * @new_em:     The new extent map to add to the inode's extent map tree.
92762306a36Sopenharmony_ci * @modified:   Indicate if the new extent map should be added to the list of
92862306a36Sopenharmony_ci *              modified extents (for fast fsync tracking).
92962306a36Sopenharmony_ci *
93062306a36Sopenharmony_ci * Drops all the extent maps in the inode's extent map tree that intersect the
93162306a36Sopenharmony_ci * range of the new extent map and adds the new extent map to the tree.
93262306a36Sopenharmony_ci * The caller should have locked an appropriate file range in the inode's io
93362306a36Sopenharmony_ci * tree before calling this function.
93462306a36Sopenharmony_ci */
93562306a36Sopenharmony_ciint btrfs_replace_extent_map_range(struct btrfs_inode *inode,
93662306a36Sopenharmony_ci				   struct extent_map *new_em,
93762306a36Sopenharmony_ci				   bool modified)
93862306a36Sopenharmony_ci{
93962306a36Sopenharmony_ci	const u64 end = new_em->start + new_em->len - 1;
94062306a36Sopenharmony_ci	struct extent_map_tree *tree = &inode->extent_tree;
94162306a36Sopenharmony_ci	int ret;
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci	ASSERT(!extent_map_in_tree(new_em));
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci	/*
94662306a36Sopenharmony_ci	 * The caller has locked an appropriate file range in the inode's io
94762306a36Sopenharmony_ci	 * tree, but getting -EEXIST when adding the new extent map can still
94862306a36Sopenharmony_ci	 * happen in case there are extents that partially cover the range, and
94962306a36Sopenharmony_ci	 * this is due to two tasks operating on different parts of the extent.
95062306a36Sopenharmony_ci	 * See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from
95162306a36Sopenharmony_ci	 * btrfs_get_extent") for an example and details.
95262306a36Sopenharmony_ci	 */
95362306a36Sopenharmony_ci	do {
95462306a36Sopenharmony_ci		btrfs_drop_extent_map_range(inode, new_em->start, end, false);
95562306a36Sopenharmony_ci		write_lock(&tree->lock);
95662306a36Sopenharmony_ci		ret = add_extent_mapping(tree, new_em, modified);
95762306a36Sopenharmony_ci		write_unlock(&tree->lock);
95862306a36Sopenharmony_ci	} while (ret == -EEXIST);
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	return ret;
96162306a36Sopenharmony_ci}
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci/*
96462306a36Sopenharmony_ci * Split off the first pre bytes from the extent_map at [start, start + len],
96562306a36Sopenharmony_ci * and set the block_start for it to new_logical.
96662306a36Sopenharmony_ci *
96762306a36Sopenharmony_ci * This function is used when an ordered_extent needs to be split.
96862306a36Sopenharmony_ci */
96962306a36Sopenharmony_ciint split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
97062306a36Sopenharmony_ci		     u64 new_logical)
97162306a36Sopenharmony_ci{
97262306a36Sopenharmony_ci	struct extent_map_tree *em_tree = &inode->extent_tree;
97362306a36Sopenharmony_ci	struct extent_map *em;
97462306a36Sopenharmony_ci	struct extent_map *split_pre = NULL;
97562306a36Sopenharmony_ci	struct extent_map *split_mid = NULL;
97662306a36Sopenharmony_ci	int ret = 0;
97762306a36Sopenharmony_ci	unsigned long flags;
97862306a36Sopenharmony_ci
97962306a36Sopenharmony_ci	ASSERT(pre != 0);
98062306a36Sopenharmony_ci	ASSERT(pre < len);
98162306a36Sopenharmony_ci
98262306a36Sopenharmony_ci	split_pre = alloc_extent_map();
98362306a36Sopenharmony_ci	if (!split_pre)
98462306a36Sopenharmony_ci		return -ENOMEM;
98562306a36Sopenharmony_ci	split_mid = alloc_extent_map();
98662306a36Sopenharmony_ci	if (!split_mid) {
98762306a36Sopenharmony_ci		ret = -ENOMEM;
98862306a36Sopenharmony_ci		goto out_free_pre;
98962306a36Sopenharmony_ci	}
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ci	lock_extent(&inode->io_tree, start, start + len - 1, NULL);
99262306a36Sopenharmony_ci	write_lock(&em_tree->lock);
99362306a36Sopenharmony_ci	em = lookup_extent_mapping(em_tree, start, len);
99462306a36Sopenharmony_ci	if (!em) {
99562306a36Sopenharmony_ci		ret = -EIO;
99662306a36Sopenharmony_ci		goto out_unlock;
99762306a36Sopenharmony_ci	}
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	ASSERT(em->len == len);
100062306a36Sopenharmony_ci	ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
100162306a36Sopenharmony_ci	ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
100262306a36Sopenharmony_ci	ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
100362306a36Sopenharmony_ci	ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
100462306a36Sopenharmony_ci	ASSERT(!list_empty(&em->list));
100562306a36Sopenharmony_ci
100662306a36Sopenharmony_ci	flags = em->flags;
100762306a36Sopenharmony_ci	clear_bit(EXTENT_FLAG_PINNED, &em->flags);
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci	/* First, replace the em with a new extent_map starting from * em->start */
101062306a36Sopenharmony_ci	split_pre->start = em->start;
101162306a36Sopenharmony_ci	split_pre->len = pre;
101262306a36Sopenharmony_ci	split_pre->orig_start = split_pre->start;
101362306a36Sopenharmony_ci	split_pre->block_start = new_logical;
101462306a36Sopenharmony_ci	split_pre->block_len = split_pre->len;
101562306a36Sopenharmony_ci	split_pre->orig_block_len = split_pre->block_len;
101662306a36Sopenharmony_ci	split_pre->ram_bytes = split_pre->len;
101762306a36Sopenharmony_ci	split_pre->flags = flags;
101862306a36Sopenharmony_ci	split_pre->compress_type = em->compress_type;
101962306a36Sopenharmony_ci	split_pre->generation = em->generation;
102062306a36Sopenharmony_ci
102162306a36Sopenharmony_ci	replace_extent_mapping(em_tree, em, split_pre, 1);
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci	/*
102462306a36Sopenharmony_ci	 * Now we only have an extent_map at:
102562306a36Sopenharmony_ci	 *     [em->start, em->start + pre]
102662306a36Sopenharmony_ci	 */
102762306a36Sopenharmony_ci
102862306a36Sopenharmony_ci	/* Insert the middle extent_map. */
102962306a36Sopenharmony_ci	split_mid->start = em->start + pre;
103062306a36Sopenharmony_ci	split_mid->len = em->len - pre;
103162306a36Sopenharmony_ci	split_mid->orig_start = split_mid->start;
103262306a36Sopenharmony_ci	split_mid->block_start = em->block_start + pre;
103362306a36Sopenharmony_ci	split_mid->block_len = split_mid->len;
103462306a36Sopenharmony_ci	split_mid->orig_block_len = split_mid->block_len;
103562306a36Sopenharmony_ci	split_mid->ram_bytes = split_mid->len;
103662306a36Sopenharmony_ci	split_mid->flags = flags;
103762306a36Sopenharmony_ci	split_mid->compress_type = em->compress_type;
103862306a36Sopenharmony_ci	split_mid->generation = em->generation;
103962306a36Sopenharmony_ci	add_extent_mapping(em_tree, split_mid, 1);
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci	/* Once for us */
104262306a36Sopenharmony_ci	free_extent_map(em);
104362306a36Sopenharmony_ci	/* Once for the tree */
104462306a36Sopenharmony_ci	free_extent_map(em);
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ciout_unlock:
104762306a36Sopenharmony_ci	write_unlock(&em_tree->lock);
104862306a36Sopenharmony_ci	unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
104962306a36Sopenharmony_ci	free_extent_map(split_mid);
105062306a36Sopenharmony_ciout_free_pre:
105162306a36Sopenharmony_ci	free_extent_map(split_pre);
105262306a36Sopenharmony_ci	return ret;
105362306a36Sopenharmony_ci}
1054