162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#include <linux/err.h> 462306a36Sopenharmony_ci#include <linux/slab.h> 562306a36Sopenharmony_ci#include <linux/spinlock.h> 662306a36Sopenharmony_ci#include "messages.h" 762306a36Sopenharmony_ci#include "ctree.h" 862306a36Sopenharmony_ci#include "volumes.h" 962306a36Sopenharmony_ci#include "extent_map.h" 1062306a36Sopenharmony_ci#include "compression.h" 1162306a36Sopenharmony_ci#include "btrfs_inode.h" 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_cistatic struct kmem_cache *extent_map_cache; 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ciint __init extent_map_init(void) 1762306a36Sopenharmony_ci{ 1862306a36Sopenharmony_ci extent_map_cache = kmem_cache_create("btrfs_extent_map", 1962306a36Sopenharmony_ci sizeof(struct extent_map), 0, 2062306a36Sopenharmony_ci SLAB_MEM_SPREAD, NULL); 2162306a36Sopenharmony_ci if (!extent_map_cache) 2262306a36Sopenharmony_ci return -ENOMEM; 2362306a36Sopenharmony_ci return 0; 2462306a36Sopenharmony_ci} 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_civoid __cold extent_map_exit(void) 2762306a36Sopenharmony_ci{ 2862306a36Sopenharmony_ci kmem_cache_destroy(extent_map_cache); 2962306a36Sopenharmony_ci} 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci/* 3262306a36Sopenharmony_ci * Initialize the extent tree @tree. Should be called for each new inode or 3362306a36Sopenharmony_ci * other user of the extent_map interface. 3462306a36Sopenharmony_ci */ 3562306a36Sopenharmony_civoid extent_map_tree_init(struct extent_map_tree *tree) 3662306a36Sopenharmony_ci{ 3762306a36Sopenharmony_ci tree->map = RB_ROOT_CACHED; 3862306a36Sopenharmony_ci INIT_LIST_HEAD(&tree->modified_extents); 3962306a36Sopenharmony_ci rwlock_init(&tree->lock); 4062306a36Sopenharmony_ci} 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci/* 4362306a36Sopenharmony_ci * Allocate a new extent_map structure. The new structure is returned with a 4462306a36Sopenharmony_ci * reference count of one and needs to be freed using free_extent_map() 4562306a36Sopenharmony_ci */ 4662306a36Sopenharmony_cistruct extent_map *alloc_extent_map(void) 4762306a36Sopenharmony_ci{ 4862306a36Sopenharmony_ci struct extent_map *em; 4962306a36Sopenharmony_ci em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); 5062306a36Sopenharmony_ci if (!em) 5162306a36Sopenharmony_ci return NULL; 5262306a36Sopenharmony_ci RB_CLEAR_NODE(&em->rb_node); 5362306a36Sopenharmony_ci em->compress_type = BTRFS_COMPRESS_NONE; 5462306a36Sopenharmony_ci refcount_set(&em->refs, 1); 5562306a36Sopenharmony_ci INIT_LIST_HEAD(&em->list); 5662306a36Sopenharmony_ci return em; 5762306a36Sopenharmony_ci} 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci/* 6062306a36Sopenharmony_ci * Drop the reference out on @em by one and free the structure if the reference 6162306a36Sopenharmony_ci * count hits zero. 6262306a36Sopenharmony_ci */ 6362306a36Sopenharmony_civoid free_extent_map(struct extent_map *em) 6462306a36Sopenharmony_ci{ 6562306a36Sopenharmony_ci if (!em) 6662306a36Sopenharmony_ci return; 6762306a36Sopenharmony_ci if (refcount_dec_and_test(&em->refs)) { 6862306a36Sopenharmony_ci WARN_ON(extent_map_in_tree(em)); 6962306a36Sopenharmony_ci WARN_ON(!list_empty(&em->list)); 7062306a36Sopenharmony_ci if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) 7162306a36Sopenharmony_ci kfree(em->map_lookup); 7262306a36Sopenharmony_ci kmem_cache_free(extent_map_cache, em); 7362306a36Sopenharmony_ci } 7462306a36Sopenharmony_ci} 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci/* Do the math around the end of an extent, handling wrapping. */ 7762306a36Sopenharmony_cistatic u64 range_end(u64 start, u64 len) 7862306a36Sopenharmony_ci{ 7962306a36Sopenharmony_ci if (start + len < start) 8062306a36Sopenharmony_ci return (u64)-1; 8162306a36Sopenharmony_ci return start + len; 8262306a36Sopenharmony_ci} 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_cistatic int tree_insert(struct rb_root_cached *root, struct extent_map *em) 8562306a36Sopenharmony_ci{ 8662306a36Sopenharmony_ci struct rb_node **p = &root->rb_root.rb_node; 8762306a36Sopenharmony_ci struct rb_node *parent = NULL; 8862306a36Sopenharmony_ci struct extent_map *entry = NULL; 8962306a36Sopenharmony_ci struct rb_node *orig_parent = NULL; 9062306a36Sopenharmony_ci u64 end = range_end(em->start, em->len); 9162306a36Sopenharmony_ci bool leftmost = true; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci while (*p) { 9462306a36Sopenharmony_ci parent = *p; 9562306a36Sopenharmony_ci entry = rb_entry(parent, struct extent_map, rb_node); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci if (em->start < entry->start) { 9862306a36Sopenharmony_ci p = &(*p)->rb_left; 9962306a36Sopenharmony_ci } else if (em->start >= extent_map_end(entry)) { 10062306a36Sopenharmony_ci p = &(*p)->rb_right; 10162306a36Sopenharmony_ci leftmost = false; 10262306a36Sopenharmony_ci } else { 10362306a36Sopenharmony_ci return -EEXIST; 10462306a36Sopenharmony_ci } 10562306a36Sopenharmony_ci } 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci orig_parent = parent; 10862306a36Sopenharmony_ci while (parent && em->start >= extent_map_end(entry)) { 10962306a36Sopenharmony_ci parent = rb_next(parent); 11062306a36Sopenharmony_ci entry = rb_entry(parent, struct extent_map, rb_node); 11162306a36Sopenharmony_ci } 11262306a36Sopenharmony_ci if (parent) 11362306a36Sopenharmony_ci if (end > entry->start && em->start < extent_map_end(entry)) 11462306a36Sopenharmony_ci return -EEXIST; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci parent = orig_parent; 11762306a36Sopenharmony_ci entry = rb_entry(parent, struct extent_map, rb_node); 11862306a36Sopenharmony_ci while (parent && em->start < entry->start) { 11962306a36Sopenharmony_ci parent = rb_prev(parent); 12062306a36Sopenharmony_ci entry = rb_entry(parent, struct extent_map, rb_node); 12162306a36Sopenharmony_ci } 12262306a36Sopenharmony_ci if (parent) 12362306a36Sopenharmony_ci if (end > entry->start && em->start < extent_map_end(entry)) 12462306a36Sopenharmony_ci return -EEXIST; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci rb_link_node(&em->rb_node, orig_parent, p); 12762306a36Sopenharmony_ci rb_insert_color_cached(&em->rb_node, root, leftmost); 12862306a36Sopenharmony_ci return 0; 12962306a36Sopenharmony_ci} 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci/* 13262306a36Sopenharmony_ci * Search through the tree for an extent_map with a given offset. If it can't 13362306a36Sopenharmony_ci * be found, try to find some neighboring extents 13462306a36Sopenharmony_ci */ 13562306a36Sopenharmony_cistatic struct rb_node *__tree_search(struct rb_root *root, u64 offset, 13662306a36Sopenharmony_ci struct rb_node **prev_or_next_ret) 13762306a36Sopenharmony_ci{ 13862306a36Sopenharmony_ci struct rb_node *n = root->rb_node; 13962306a36Sopenharmony_ci struct rb_node *prev = NULL; 14062306a36Sopenharmony_ci struct rb_node *orig_prev = NULL; 14162306a36Sopenharmony_ci struct extent_map *entry; 14262306a36Sopenharmony_ci struct extent_map *prev_entry = NULL; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci ASSERT(prev_or_next_ret); 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci while (n) { 14762306a36Sopenharmony_ci entry = rb_entry(n, struct extent_map, rb_node); 14862306a36Sopenharmony_ci prev = n; 14962306a36Sopenharmony_ci prev_entry = entry; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci if (offset < entry->start) 15262306a36Sopenharmony_ci n = n->rb_left; 15362306a36Sopenharmony_ci else if (offset >= extent_map_end(entry)) 15462306a36Sopenharmony_ci n = n->rb_right; 15562306a36Sopenharmony_ci else 15662306a36Sopenharmony_ci return n; 15762306a36Sopenharmony_ci } 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci orig_prev = prev; 16062306a36Sopenharmony_ci while (prev && offset >= extent_map_end(prev_entry)) { 16162306a36Sopenharmony_ci prev = rb_next(prev); 16262306a36Sopenharmony_ci prev_entry = rb_entry(prev, struct extent_map, rb_node); 16362306a36Sopenharmony_ci } 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci /* 16662306a36Sopenharmony_ci * Previous extent map found, return as in this case the caller does not 16762306a36Sopenharmony_ci * care about the next one. 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_ci if (prev) { 17062306a36Sopenharmony_ci *prev_or_next_ret = prev; 17162306a36Sopenharmony_ci return NULL; 17262306a36Sopenharmony_ci } 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci prev = orig_prev; 17562306a36Sopenharmony_ci prev_entry = rb_entry(prev, struct extent_map, rb_node); 17662306a36Sopenharmony_ci while (prev && offset < prev_entry->start) { 17762306a36Sopenharmony_ci prev = rb_prev(prev); 17862306a36Sopenharmony_ci prev_entry = rb_entry(prev, struct extent_map, rb_node); 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci *prev_or_next_ret = prev; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci return NULL; 18362306a36Sopenharmony_ci} 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci/* Check to see if two extent_map structs are adjacent and safe to merge. */ 18662306a36Sopenharmony_cistatic int mergable_maps(struct extent_map *prev, struct extent_map *next) 18762306a36Sopenharmony_ci{ 18862306a36Sopenharmony_ci if (test_bit(EXTENT_FLAG_PINNED, &prev->flags)) 18962306a36Sopenharmony_ci return 0; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci /* 19262306a36Sopenharmony_ci * don't merge compressed extents, we need to know their 19362306a36Sopenharmony_ci * actual size 19462306a36Sopenharmony_ci */ 19562306a36Sopenharmony_ci if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) 19662306a36Sopenharmony_ci return 0; 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) || 19962306a36Sopenharmony_ci test_bit(EXTENT_FLAG_LOGGING, &next->flags)) 20062306a36Sopenharmony_ci return 0; 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci /* 20362306a36Sopenharmony_ci * We don't want to merge stuff that hasn't been written to the log yet 20462306a36Sopenharmony_ci * since it may not reflect exactly what is on disk, and that would be 20562306a36Sopenharmony_ci * bad. 20662306a36Sopenharmony_ci */ 20762306a36Sopenharmony_ci if (!list_empty(&prev->list) || !list_empty(&next->list)) 20862306a36Sopenharmony_ci return 0; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci ASSERT(next->block_start != EXTENT_MAP_DELALLOC && 21162306a36Sopenharmony_ci prev->block_start != EXTENT_MAP_DELALLOC); 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci if (prev->map_lookup || next->map_lookup) 21462306a36Sopenharmony_ci ASSERT(test_bit(EXTENT_FLAG_FS_MAPPING, &prev->flags) && 21562306a36Sopenharmony_ci test_bit(EXTENT_FLAG_FS_MAPPING, &next->flags)); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci if (extent_map_end(prev) == next->start && 21862306a36Sopenharmony_ci prev->flags == next->flags && 21962306a36Sopenharmony_ci prev->map_lookup == next->map_lookup && 22062306a36Sopenharmony_ci ((next->block_start == EXTENT_MAP_HOLE && 22162306a36Sopenharmony_ci prev->block_start == EXTENT_MAP_HOLE) || 22262306a36Sopenharmony_ci (next->block_start == EXTENT_MAP_INLINE && 22362306a36Sopenharmony_ci prev->block_start == EXTENT_MAP_INLINE) || 22462306a36Sopenharmony_ci (next->block_start < EXTENT_MAP_LAST_BYTE - 1 && 22562306a36Sopenharmony_ci next->block_start == extent_map_block_end(prev)))) { 22662306a36Sopenharmony_ci return 1; 22762306a36Sopenharmony_ci } 22862306a36Sopenharmony_ci return 0; 22962306a36Sopenharmony_ci} 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_cistatic void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) 23262306a36Sopenharmony_ci{ 23362306a36Sopenharmony_ci struct extent_map *merge = NULL; 23462306a36Sopenharmony_ci struct rb_node *rb; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci /* 23762306a36Sopenharmony_ci * We can't modify an extent map that is in the tree and that is being 23862306a36Sopenharmony_ci * used by another task, as it can cause that other task to see it in 23962306a36Sopenharmony_ci * inconsistent state during the merging. We always have 1 reference for 24062306a36Sopenharmony_ci * the tree and 1 for this task (which is unpinning the extent map or 24162306a36Sopenharmony_ci * clearing the logging flag), so anything > 2 means it's being used by 24262306a36Sopenharmony_ci * other tasks too. 24362306a36Sopenharmony_ci */ 24462306a36Sopenharmony_ci if (refcount_read(&em->refs) > 2) 24562306a36Sopenharmony_ci return; 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci if (em->start != 0) { 24862306a36Sopenharmony_ci rb = rb_prev(&em->rb_node); 24962306a36Sopenharmony_ci if (rb) 25062306a36Sopenharmony_ci merge = rb_entry(rb, struct extent_map, rb_node); 25162306a36Sopenharmony_ci if (rb && mergable_maps(merge, em)) { 25262306a36Sopenharmony_ci em->start = merge->start; 25362306a36Sopenharmony_ci em->orig_start = merge->orig_start; 25462306a36Sopenharmony_ci em->len += merge->len; 25562306a36Sopenharmony_ci em->block_len += merge->block_len; 25662306a36Sopenharmony_ci em->block_start = merge->block_start; 25762306a36Sopenharmony_ci em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; 25862306a36Sopenharmony_ci em->mod_start = merge->mod_start; 25962306a36Sopenharmony_ci em->generation = max(em->generation, merge->generation); 26062306a36Sopenharmony_ci set_bit(EXTENT_FLAG_MERGED, &em->flags); 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci rb_erase_cached(&merge->rb_node, &tree->map); 26362306a36Sopenharmony_ci RB_CLEAR_NODE(&merge->rb_node); 26462306a36Sopenharmony_ci free_extent_map(merge); 26562306a36Sopenharmony_ci } 26662306a36Sopenharmony_ci } 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci rb = rb_next(&em->rb_node); 26962306a36Sopenharmony_ci if (rb) 27062306a36Sopenharmony_ci merge = rb_entry(rb, struct extent_map, rb_node); 27162306a36Sopenharmony_ci if (rb && mergable_maps(em, merge)) { 27262306a36Sopenharmony_ci em->len += merge->len; 27362306a36Sopenharmony_ci em->block_len += merge->block_len; 27462306a36Sopenharmony_ci rb_erase_cached(&merge->rb_node, &tree->map); 27562306a36Sopenharmony_ci RB_CLEAR_NODE(&merge->rb_node); 27662306a36Sopenharmony_ci em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; 27762306a36Sopenharmony_ci em->generation = max(em->generation, merge->generation); 27862306a36Sopenharmony_ci set_bit(EXTENT_FLAG_MERGED, &em->flags); 27962306a36Sopenharmony_ci free_extent_map(merge); 28062306a36Sopenharmony_ci } 28162306a36Sopenharmony_ci} 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci/* 28462306a36Sopenharmony_ci * Unpin an extent from the cache. 28562306a36Sopenharmony_ci * 28662306a36Sopenharmony_ci * @tree: tree to unpin the extent in 28762306a36Sopenharmony_ci * @start: logical offset in the file 28862306a36Sopenharmony_ci * @len: length of the extent 28962306a36Sopenharmony_ci * @gen: generation that this extent has been modified in 29062306a36Sopenharmony_ci * 29162306a36Sopenharmony_ci * Called after an extent has been written to disk properly. Set the generation 29262306a36Sopenharmony_ci * to the generation that actually added the file item to the inode so we know 29362306a36Sopenharmony_ci * we need to sync this extent when we call fsync(). 29462306a36Sopenharmony_ci */ 29562306a36Sopenharmony_ciint unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, 29662306a36Sopenharmony_ci u64 gen) 29762306a36Sopenharmony_ci{ 29862306a36Sopenharmony_ci int ret = 0; 29962306a36Sopenharmony_ci struct extent_map *em; 30062306a36Sopenharmony_ci bool prealloc = false; 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci write_lock(&tree->lock); 30362306a36Sopenharmony_ci em = lookup_extent_mapping(tree, start, len); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci WARN_ON(!em || em->start != start); 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci if (!em) 30862306a36Sopenharmony_ci goto out; 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci em->generation = gen; 31162306a36Sopenharmony_ci clear_bit(EXTENT_FLAG_PINNED, &em->flags); 31262306a36Sopenharmony_ci em->mod_start = em->start; 31362306a36Sopenharmony_ci em->mod_len = em->len; 31462306a36Sopenharmony_ci 31562306a36Sopenharmony_ci if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) { 31662306a36Sopenharmony_ci prealloc = true; 31762306a36Sopenharmony_ci clear_bit(EXTENT_FLAG_FILLING, &em->flags); 31862306a36Sopenharmony_ci } 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci try_merge_map(tree, em); 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci if (prealloc) { 32362306a36Sopenharmony_ci em->mod_start = em->start; 32462306a36Sopenharmony_ci em->mod_len = em->len; 32562306a36Sopenharmony_ci } 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci free_extent_map(em); 32862306a36Sopenharmony_ciout: 32962306a36Sopenharmony_ci write_unlock(&tree->lock); 33062306a36Sopenharmony_ci return ret; 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci} 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_civoid clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) 33562306a36Sopenharmony_ci{ 33662306a36Sopenharmony_ci lockdep_assert_held_write(&tree->lock); 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci clear_bit(EXTENT_FLAG_LOGGING, &em->flags); 33962306a36Sopenharmony_ci if (extent_map_in_tree(em)) 34062306a36Sopenharmony_ci try_merge_map(tree, em); 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cistatic inline void setup_extent_mapping(struct extent_map_tree *tree, 34462306a36Sopenharmony_ci struct extent_map *em, 34562306a36Sopenharmony_ci int modified) 34662306a36Sopenharmony_ci{ 34762306a36Sopenharmony_ci refcount_inc(&em->refs); 34862306a36Sopenharmony_ci em->mod_start = em->start; 34962306a36Sopenharmony_ci em->mod_len = em->len; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci if (modified) 35262306a36Sopenharmony_ci list_move(&em->list, &tree->modified_extents); 35362306a36Sopenharmony_ci else 35462306a36Sopenharmony_ci try_merge_map(tree, em); 35562306a36Sopenharmony_ci} 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_cistatic void extent_map_device_set_bits(struct extent_map *em, unsigned bits) 35862306a36Sopenharmony_ci{ 35962306a36Sopenharmony_ci struct map_lookup *map = em->map_lookup; 36062306a36Sopenharmony_ci u64 stripe_size = em->orig_block_len; 36162306a36Sopenharmony_ci int i; 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci for (i = 0; i < map->num_stripes; i++) { 36462306a36Sopenharmony_ci struct btrfs_io_stripe *stripe = &map->stripes[i]; 36562306a36Sopenharmony_ci struct btrfs_device *device = stripe->dev; 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci set_extent_bit(&device->alloc_state, stripe->physical, 36862306a36Sopenharmony_ci stripe->physical + stripe_size - 1, 36962306a36Sopenharmony_ci bits | EXTENT_NOWAIT, NULL); 37062306a36Sopenharmony_ci } 37162306a36Sopenharmony_ci} 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_cistatic void extent_map_device_clear_bits(struct extent_map *em, unsigned bits) 37462306a36Sopenharmony_ci{ 37562306a36Sopenharmony_ci struct map_lookup *map = em->map_lookup; 37662306a36Sopenharmony_ci u64 stripe_size = em->orig_block_len; 37762306a36Sopenharmony_ci int i; 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci for (i = 0; i < map->num_stripes; i++) { 38062306a36Sopenharmony_ci struct btrfs_io_stripe *stripe = &map->stripes[i]; 38162306a36Sopenharmony_ci struct btrfs_device *device = stripe->dev; 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci __clear_extent_bit(&device->alloc_state, stripe->physical, 38462306a36Sopenharmony_ci stripe->physical + stripe_size - 1, 38562306a36Sopenharmony_ci bits | EXTENT_NOWAIT, 38662306a36Sopenharmony_ci NULL, NULL); 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci} 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci/* 39162306a36Sopenharmony_ci * Add new extent map to the extent tree 39262306a36Sopenharmony_ci * 39362306a36Sopenharmony_ci * @tree: tree to insert new map in 39462306a36Sopenharmony_ci * @em: map to insert 39562306a36Sopenharmony_ci * @modified: indicate whether the given @em should be added to the 39662306a36Sopenharmony_ci * modified list, which indicates the extent needs to be logged 39762306a36Sopenharmony_ci * 39862306a36Sopenharmony_ci * Insert @em into @tree or perform a simple forward/backward merge with 39962306a36Sopenharmony_ci * existing mappings. The extent_map struct passed in will be inserted 40062306a36Sopenharmony_ci * into the tree directly, with an additional reference taken, or a 40162306a36Sopenharmony_ci * reference dropped if the merge attempt was successful. 40262306a36Sopenharmony_ci */ 40362306a36Sopenharmony_ciint add_extent_mapping(struct extent_map_tree *tree, 40462306a36Sopenharmony_ci struct extent_map *em, int modified) 40562306a36Sopenharmony_ci{ 40662306a36Sopenharmony_ci int ret = 0; 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci lockdep_assert_held_write(&tree->lock); 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci ret = tree_insert(&tree->map, em); 41162306a36Sopenharmony_ci if (ret) 41262306a36Sopenharmony_ci goto out; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci setup_extent_mapping(tree, em, modified); 41562306a36Sopenharmony_ci if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) { 41662306a36Sopenharmony_ci extent_map_device_set_bits(em, CHUNK_ALLOCATED); 41762306a36Sopenharmony_ci extent_map_device_clear_bits(em, CHUNK_TRIMMED); 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ciout: 42062306a36Sopenharmony_ci return ret; 42162306a36Sopenharmony_ci} 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_cistatic struct extent_map * 42462306a36Sopenharmony_ci__lookup_extent_mapping(struct extent_map_tree *tree, 42562306a36Sopenharmony_ci u64 start, u64 len, int strict) 42662306a36Sopenharmony_ci{ 42762306a36Sopenharmony_ci struct extent_map *em; 42862306a36Sopenharmony_ci struct rb_node *rb_node; 42962306a36Sopenharmony_ci struct rb_node *prev_or_next = NULL; 43062306a36Sopenharmony_ci u64 end = range_end(start, len); 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci rb_node = __tree_search(&tree->map.rb_root, start, &prev_or_next); 43362306a36Sopenharmony_ci if (!rb_node) { 43462306a36Sopenharmony_ci if (prev_or_next) 43562306a36Sopenharmony_ci rb_node = prev_or_next; 43662306a36Sopenharmony_ci else 43762306a36Sopenharmony_ci return NULL; 43862306a36Sopenharmony_ci } 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci em = rb_entry(rb_node, struct extent_map, rb_node); 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci if (strict && !(end > em->start && start < extent_map_end(em))) 44362306a36Sopenharmony_ci return NULL; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci refcount_inc(&em->refs); 44662306a36Sopenharmony_ci return em; 44762306a36Sopenharmony_ci} 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci/* 45062306a36Sopenharmony_ci * Lookup extent_map that intersects @start + @len range. 45162306a36Sopenharmony_ci * 45262306a36Sopenharmony_ci * @tree: tree to lookup in 45362306a36Sopenharmony_ci * @start: byte offset to start the search 45462306a36Sopenharmony_ci * @len: length of the lookup range 45562306a36Sopenharmony_ci * 45662306a36Sopenharmony_ci * Find and return the first extent_map struct in @tree that intersects the 45762306a36Sopenharmony_ci * [start, len] range. There may be additional objects in the tree that 45862306a36Sopenharmony_ci * intersect, so check the object returned carefully to make sure that no 45962306a36Sopenharmony_ci * additional lookups are needed. 46062306a36Sopenharmony_ci */ 46162306a36Sopenharmony_cistruct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, 46262306a36Sopenharmony_ci u64 start, u64 len) 46362306a36Sopenharmony_ci{ 46462306a36Sopenharmony_ci return __lookup_extent_mapping(tree, start, len, 1); 46562306a36Sopenharmony_ci} 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci/* 46862306a36Sopenharmony_ci * Find a nearby extent map intersecting @start + @len (not an exact search). 46962306a36Sopenharmony_ci * 47062306a36Sopenharmony_ci * @tree: tree to lookup in 47162306a36Sopenharmony_ci * @start: byte offset to start the search 47262306a36Sopenharmony_ci * @len: length of the lookup range 47362306a36Sopenharmony_ci * 47462306a36Sopenharmony_ci * Find and return the first extent_map struct in @tree that intersects the 47562306a36Sopenharmony_ci * [start, len] range. 47662306a36Sopenharmony_ci * 47762306a36Sopenharmony_ci * If one can't be found, any nearby extent may be returned 47862306a36Sopenharmony_ci */ 47962306a36Sopenharmony_cistruct extent_map *search_extent_mapping(struct extent_map_tree *tree, 48062306a36Sopenharmony_ci u64 start, u64 len) 48162306a36Sopenharmony_ci{ 48262306a36Sopenharmony_ci return __lookup_extent_mapping(tree, start, len, 0); 48362306a36Sopenharmony_ci} 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci/* 48662306a36Sopenharmony_ci * Remove an extent_map from the extent tree. 48762306a36Sopenharmony_ci * 48862306a36Sopenharmony_ci * @tree: extent tree to remove from 48962306a36Sopenharmony_ci * @em: extent map being removed 49062306a36Sopenharmony_ci * 49162306a36Sopenharmony_ci * Remove @em from @tree. No reference counts are dropped, and no checks 49262306a36Sopenharmony_ci * are done to see if the range is in use. 49362306a36Sopenharmony_ci */ 49462306a36Sopenharmony_civoid remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) 49562306a36Sopenharmony_ci{ 49662306a36Sopenharmony_ci lockdep_assert_held_write(&tree->lock); 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); 49962306a36Sopenharmony_ci rb_erase_cached(&em->rb_node, &tree->map); 50062306a36Sopenharmony_ci if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) 50162306a36Sopenharmony_ci list_del_init(&em->list); 50262306a36Sopenharmony_ci if (test_bit(EXTENT_FLAG_FS_MAPPING, &em->flags)) 50362306a36Sopenharmony_ci extent_map_device_clear_bits(em, CHUNK_ALLOCATED); 50462306a36Sopenharmony_ci RB_CLEAR_NODE(&em->rb_node); 50562306a36Sopenharmony_ci} 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_cistatic void replace_extent_mapping(struct extent_map_tree *tree, 50862306a36Sopenharmony_ci struct extent_map *cur, 50962306a36Sopenharmony_ci struct extent_map *new, 51062306a36Sopenharmony_ci int modified) 51162306a36Sopenharmony_ci{ 51262306a36Sopenharmony_ci lockdep_assert_held_write(&tree->lock); 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags)); 51562306a36Sopenharmony_ci ASSERT(extent_map_in_tree(cur)); 51662306a36Sopenharmony_ci if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags)) 51762306a36Sopenharmony_ci list_del_init(&cur->list); 51862306a36Sopenharmony_ci rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map); 51962306a36Sopenharmony_ci RB_CLEAR_NODE(&cur->rb_node); 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci setup_extent_mapping(tree, new, modified); 52262306a36Sopenharmony_ci} 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_cistatic struct extent_map *next_extent_map(const struct extent_map *em) 52562306a36Sopenharmony_ci{ 52662306a36Sopenharmony_ci struct rb_node *next; 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci next = rb_next(&em->rb_node); 52962306a36Sopenharmony_ci if (!next) 53062306a36Sopenharmony_ci return NULL; 53162306a36Sopenharmony_ci return container_of(next, struct extent_map, rb_node); 53262306a36Sopenharmony_ci} 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_cistatic struct extent_map *prev_extent_map(struct extent_map *em) 53562306a36Sopenharmony_ci{ 53662306a36Sopenharmony_ci struct rb_node *prev; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci prev = rb_prev(&em->rb_node); 53962306a36Sopenharmony_ci if (!prev) 54062306a36Sopenharmony_ci return NULL; 54162306a36Sopenharmony_ci return container_of(prev, struct extent_map, rb_node); 54262306a36Sopenharmony_ci} 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci/* 54562306a36Sopenharmony_ci * Helper for btrfs_get_extent. Given an existing extent in the tree, 54662306a36Sopenharmony_ci * the existing extent is the nearest extent to map_start, 54762306a36Sopenharmony_ci * and an extent that you want to insert, deal with overlap and insert 54862306a36Sopenharmony_ci * the best fitted new extent into the tree. 54962306a36Sopenharmony_ci */ 55062306a36Sopenharmony_cistatic noinline int merge_extent_mapping(struct extent_map_tree *em_tree, 55162306a36Sopenharmony_ci struct extent_map *existing, 55262306a36Sopenharmony_ci struct extent_map *em, 55362306a36Sopenharmony_ci u64 map_start) 55462306a36Sopenharmony_ci{ 55562306a36Sopenharmony_ci struct extent_map *prev; 55662306a36Sopenharmony_ci struct extent_map *next; 55762306a36Sopenharmony_ci u64 start; 55862306a36Sopenharmony_ci u64 end; 55962306a36Sopenharmony_ci u64 start_diff; 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci BUG_ON(map_start < em->start || map_start >= extent_map_end(em)); 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci if (existing->start > map_start) { 56462306a36Sopenharmony_ci next = existing; 56562306a36Sopenharmony_ci prev = prev_extent_map(next); 56662306a36Sopenharmony_ci } else { 56762306a36Sopenharmony_ci prev = existing; 56862306a36Sopenharmony_ci next = next_extent_map(prev); 56962306a36Sopenharmony_ci } 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci start = prev ? extent_map_end(prev) : em->start; 57262306a36Sopenharmony_ci start = max_t(u64, start, em->start); 57362306a36Sopenharmony_ci end = next ? next->start : extent_map_end(em); 57462306a36Sopenharmony_ci end = min_t(u64, end, extent_map_end(em)); 57562306a36Sopenharmony_ci start_diff = start - em->start; 57662306a36Sopenharmony_ci em->start = start; 57762306a36Sopenharmony_ci em->len = end - start; 57862306a36Sopenharmony_ci if (em->block_start < EXTENT_MAP_LAST_BYTE && 57962306a36Sopenharmony_ci !test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 58062306a36Sopenharmony_ci em->block_start += start_diff; 58162306a36Sopenharmony_ci em->block_len = em->len; 58262306a36Sopenharmony_ci } 58362306a36Sopenharmony_ci return add_extent_mapping(em_tree, em, 0); 58462306a36Sopenharmony_ci} 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci/* 58762306a36Sopenharmony_ci * Add extent mapping into em_tree. 58862306a36Sopenharmony_ci * 58962306a36Sopenharmony_ci * @fs_info: the filesystem 59062306a36Sopenharmony_ci * @em_tree: extent tree into which we want to insert the extent mapping 59162306a36Sopenharmony_ci * @em_in: extent we are inserting 59262306a36Sopenharmony_ci * @start: start of the logical range btrfs_get_extent() is requesting 59362306a36Sopenharmony_ci * @len: length of the logical range btrfs_get_extent() is requesting 59462306a36Sopenharmony_ci * 59562306a36Sopenharmony_ci * Note that @em_in's range may be different from [start, start+len), 59662306a36Sopenharmony_ci * but they must be overlapped. 59762306a36Sopenharmony_ci * 59862306a36Sopenharmony_ci * Insert @em_in into @em_tree. In case there is an overlapping range, handle 59962306a36Sopenharmony_ci * the -EEXIST by either: 60062306a36Sopenharmony_ci * a) Returning the existing extent in @em_in if @start is within the 60162306a36Sopenharmony_ci * existing em. 60262306a36Sopenharmony_ci * b) Merge the existing extent with @em_in passed in. 60362306a36Sopenharmony_ci * 60462306a36Sopenharmony_ci * Return 0 on success, otherwise -EEXIST. 60562306a36Sopenharmony_ci * 60662306a36Sopenharmony_ci */ 60762306a36Sopenharmony_ciint btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info, 60862306a36Sopenharmony_ci struct extent_map_tree *em_tree, 60962306a36Sopenharmony_ci struct extent_map **em_in, u64 start, u64 len) 61062306a36Sopenharmony_ci{ 61162306a36Sopenharmony_ci int ret; 61262306a36Sopenharmony_ci struct extent_map *em = *em_in; 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci /* 61562306a36Sopenharmony_ci * Tree-checker should have rejected any inline extent with non-zero 61662306a36Sopenharmony_ci * file offset. Here just do a sanity check. 61762306a36Sopenharmony_ci */ 61862306a36Sopenharmony_ci if (em->block_start == EXTENT_MAP_INLINE) 61962306a36Sopenharmony_ci ASSERT(em->start == 0); 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci ret = add_extent_mapping(em_tree, em, 0); 62262306a36Sopenharmony_ci /* it is possible that someone inserted the extent into the tree 62362306a36Sopenharmony_ci * while we had the lock dropped. It is also possible that 62462306a36Sopenharmony_ci * an overlapping map exists in the tree 62562306a36Sopenharmony_ci */ 62662306a36Sopenharmony_ci if (ret == -EEXIST) { 62762306a36Sopenharmony_ci struct extent_map *existing; 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci ret = 0; 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci existing = search_extent_mapping(em_tree, start, len); 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci trace_btrfs_handle_em_exist(fs_info, existing, em, start, len); 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci /* 63662306a36Sopenharmony_ci * existing will always be non-NULL, since there must be 63762306a36Sopenharmony_ci * extent causing the -EEXIST. 63862306a36Sopenharmony_ci */ 63962306a36Sopenharmony_ci if (start >= existing->start && 64062306a36Sopenharmony_ci start < extent_map_end(existing)) { 64162306a36Sopenharmony_ci free_extent_map(em); 64262306a36Sopenharmony_ci *em_in = existing; 64362306a36Sopenharmony_ci ret = 0; 64462306a36Sopenharmony_ci } else { 64562306a36Sopenharmony_ci u64 orig_start = em->start; 64662306a36Sopenharmony_ci u64 orig_len = em->len; 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci /* 64962306a36Sopenharmony_ci * The existing extent map is the one nearest to 65062306a36Sopenharmony_ci * the [start, start + len) range which overlaps 65162306a36Sopenharmony_ci */ 65262306a36Sopenharmony_ci ret = merge_extent_mapping(em_tree, existing, 65362306a36Sopenharmony_ci em, start); 65462306a36Sopenharmony_ci if (ret) { 65562306a36Sopenharmony_ci free_extent_map(em); 65662306a36Sopenharmony_ci *em_in = NULL; 65762306a36Sopenharmony_ci WARN_ONCE(ret, 65862306a36Sopenharmony_ci"unexpected error %d: merge existing(start %llu len %llu) with em(start %llu len %llu)\n", 65962306a36Sopenharmony_ci ret, existing->start, existing->len, 66062306a36Sopenharmony_ci orig_start, orig_len); 66162306a36Sopenharmony_ci } 66262306a36Sopenharmony_ci free_extent_map(existing); 66362306a36Sopenharmony_ci } 66462306a36Sopenharmony_ci } 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci ASSERT(ret == 0 || ret == -EEXIST); 66762306a36Sopenharmony_ci return ret; 66862306a36Sopenharmony_ci} 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci/* 67162306a36Sopenharmony_ci * Drop all extent maps from a tree in the fastest possible way, rescheduling 67262306a36Sopenharmony_ci * if needed. This avoids searching the tree, from the root down to the first 67362306a36Sopenharmony_ci * extent map, before each deletion. 67462306a36Sopenharmony_ci */ 67562306a36Sopenharmony_cistatic void drop_all_extent_maps_fast(struct extent_map_tree *tree) 67662306a36Sopenharmony_ci{ 67762306a36Sopenharmony_ci write_lock(&tree->lock); 67862306a36Sopenharmony_ci while (!RB_EMPTY_ROOT(&tree->map.rb_root)) { 67962306a36Sopenharmony_ci struct extent_map *em; 68062306a36Sopenharmony_ci struct rb_node *node; 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ci node = rb_first_cached(&tree->map); 68362306a36Sopenharmony_ci em = rb_entry(node, struct extent_map, rb_node); 68462306a36Sopenharmony_ci clear_bit(EXTENT_FLAG_PINNED, &em->flags); 68562306a36Sopenharmony_ci clear_bit(EXTENT_FLAG_LOGGING, &em->flags); 68662306a36Sopenharmony_ci remove_extent_mapping(tree, em); 68762306a36Sopenharmony_ci free_extent_map(em); 68862306a36Sopenharmony_ci cond_resched_rwlock_write(&tree->lock); 68962306a36Sopenharmony_ci } 69062306a36Sopenharmony_ci write_unlock(&tree->lock); 69162306a36Sopenharmony_ci} 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci/* 69462306a36Sopenharmony_ci * Drop all extent maps in a given range. 69562306a36Sopenharmony_ci * 69662306a36Sopenharmony_ci * @inode: The target inode. 69762306a36Sopenharmony_ci * @start: Start offset of the range. 69862306a36Sopenharmony_ci * @end: End offset of the range (inclusive value). 69962306a36Sopenharmony_ci * @skip_pinned: Indicate if pinned extent maps should be ignored or not. 70062306a36Sopenharmony_ci * 70162306a36Sopenharmony_ci * This drops all the extent maps that intersect the given range [@start, @end]. 70262306a36Sopenharmony_ci * Extent maps that partially overlap the range and extend behind or beyond it, 70362306a36Sopenharmony_ci * are split. 70462306a36Sopenharmony_ci * The caller should have locked an appropriate file range in the inode's io 70562306a36Sopenharmony_ci * tree before calling this function. 70662306a36Sopenharmony_ci */ 70762306a36Sopenharmony_civoid btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end, 70862306a36Sopenharmony_ci bool skip_pinned) 70962306a36Sopenharmony_ci{ 71062306a36Sopenharmony_ci struct extent_map *split; 71162306a36Sopenharmony_ci struct extent_map *split2; 71262306a36Sopenharmony_ci struct extent_map *em; 71362306a36Sopenharmony_ci struct extent_map_tree *em_tree = &inode->extent_tree; 71462306a36Sopenharmony_ci u64 len = end - start + 1; 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci WARN_ON(end < start); 71762306a36Sopenharmony_ci if (end == (u64)-1) { 71862306a36Sopenharmony_ci if (start == 0 && !skip_pinned) { 71962306a36Sopenharmony_ci drop_all_extent_maps_fast(em_tree); 72062306a36Sopenharmony_ci return; 72162306a36Sopenharmony_ci } 72262306a36Sopenharmony_ci len = (u64)-1; 72362306a36Sopenharmony_ci } else { 72462306a36Sopenharmony_ci /* Make end offset exclusive for use in the loop below. */ 72562306a36Sopenharmony_ci end++; 72662306a36Sopenharmony_ci } 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci /* 72962306a36Sopenharmony_ci * It's ok if we fail to allocate the extent maps, see the comment near 73062306a36Sopenharmony_ci * the bottom of the loop below. We only need two spare extent maps in 73162306a36Sopenharmony_ci * the worst case, where the first extent map that intersects our range 73262306a36Sopenharmony_ci * starts before the range and the last extent map that intersects our 73362306a36Sopenharmony_ci * range ends after our range (and they might be the same extent map), 73462306a36Sopenharmony_ci * because we need to split those two extent maps at the boundaries. 73562306a36Sopenharmony_ci */ 73662306a36Sopenharmony_ci split = alloc_extent_map(); 73762306a36Sopenharmony_ci split2 = alloc_extent_map(); 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci write_lock(&em_tree->lock); 74062306a36Sopenharmony_ci em = lookup_extent_mapping(em_tree, start, len); 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci while (em) { 74362306a36Sopenharmony_ci /* extent_map_end() returns exclusive value (last byte + 1). */ 74462306a36Sopenharmony_ci const u64 em_end = extent_map_end(em); 74562306a36Sopenharmony_ci struct extent_map *next_em = NULL; 74662306a36Sopenharmony_ci u64 gen; 74762306a36Sopenharmony_ci unsigned long flags; 74862306a36Sopenharmony_ci bool modified; 74962306a36Sopenharmony_ci bool compressed; 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci if (em_end < end) { 75262306a36Sopenharmony_ci next_em = next_extent_map(em); 75362306a36Sopenharmony_ci if (next_em) { 75462306a36Sopenharmony_ci if (next_em->start < end) 75562306a36Sopenharmony_ci refcount_inc(&next_em->refs); 75662306a36Sopenharmony_ci else 75762306a36Sopenharmony_ci next_em = NULL; 75862306a36Sopenharmony_ci } 75962306a36Sopenharmony_ci } 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) { 76262306a36Sopenharmony_ci start = em_end; 76362306a36Sopenharmony_ci goto next; 76462306a36Sopenharmony_ci } 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci flags = em->flags; 76762306a36Sopenharmony_ci clear_bit(EXTENT_FLAG_PINNED, &em->flags); 76862306a36Sopenharmony_ci /* 76962306a36Sopenharmony_ci * In case we split the extent map, we want to preserve the 77062306a36Sopenharmony_ci * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want 77162306a36Sopenharmony_ci * it on the new extent maps. 77262306a36Sopenharmony_ci */ 77362306a36Sopenharmony_ci clear_bit(EXTENT_FLAG_LOGGING, &flags); 77462306a36Sopenharmony_ci modified = !list_empty(&em->list); 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci /* 77762306a36Sopenharmony_ci * The extent map does not cross our target range, so no need to 77862306a36Sopenharmony_ci * split it, we can remove it directly. 77962306a36Sopenharmony_ci */ 78062306a36Sopenharmony_ci if (em->start >= start && em_end <= end) 78162306a36Sopenharmony_ci goto remove_em; 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci gen = em->generation; 78462306a36Sopenharmony_ci compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci if (em->start < start) { 78762306a36Sopenharmony_ci if (!split) { 78862306a36Sopenharmony_ci split = split2; 78962306a36Sopenharmony_ci split2 = NULL; 79062306a36Sopenharmony_ci if (!split) 79162306a36Sopenharmony_ci goto remove_em; 79262306a36Sopenharmony_ci } 79362306a36Sopenharmony_ci split->start = em->start; 79462306a36Sopenharmony_ci split->len = start - em->start; 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci if (em->block_start < EXTENT_MAP_LAST_BYTE) { 79762306a36Sopenharmony_ci split->orig_start = em->orig_start; 79862306a36Sopenharmony_ci split->block_start = em->block_start; 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci if (compressed) 80162306a36Sopenharmony_ci split->block_len = em->block_len; 80262306a36Sopenharmony_ci else 80362306a36Sopenharmony_ci split->block_len = split->len; 80462306a36Sopenharmony_ci split->orig_block_len = max(split->block_len, 80562306a36Sopenharmony_ci em->orig_block_len); 80662306a36Sopenharmony_ci split->ram_bytes = em->ram_bytes; 80762306a36Sopenharmony_ci } else { 80862306a36Sopenharmony_ci split->orig_start = split->start; 80962306a36Sopenharmony_ci split->block_len = 0; 81062306a36Sopenharmony_ci split->block_start = em->block_start; 81162306a36Sopenharmony_ci split->orig_block_len = 0; 81262306a36Sopenharmony_ci split->ram_bytes = split->len; 81362306a36Sopenharmony_ci } 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci split->generation = gen; 81662306a36Sopenharmony_ci split->flags = flags; 81762306a36Sopenharmony_ci split->compress_type = em->compress_type; 81862306a36Sopenharmony_ci replace_extent_mapping(em_tree, em, split, modified); 81962306a36Sopenharmony_ci free_extent_map(split); 82062306a36Sopenharmony_ci split = split2; 82162306a36Sopenharmony_ci split2 = NULL; 82262306a36Sopenharmony_ci } 82362306a36Sopenharmony_ci if (em_end > end) { 82462306a36Sopenharmony_ci if (!split) { 82562306a36Sopenharmony_ci split = split2; 82662306a36Sopenharmony_ci split2 = NULL; 82762306a36Sopenharmony_ci if (!split) 82862306a36Sopenharmony_ci goto remove_em; 82962306a36Sopenharmony_ci } 83062306a36Sopenharmony_ci split->start = end; 83162306a36Sopenharmony_ci split->len = em_end - end; 83262306a36Sopenharmony_ci split->block_start = em->block_start; 83362306a36Sopenharmony_ci split->flags = flags; 83462306a36Sopenharmony_ci split->compress_type = em->compress_type; 83562306a36Sopenharmony_ci split->generation = gen; 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci if (em->block_start < EXTENT_MAP_LAST_BYTE) { 83862306a36Sopenharmony_ci split->orig_block_len = max(em->block_len, 83962306a36Sopenharmony_ci em->orig_block_len); 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci split->ram_bytes = em->ram_bytes; 84262306a36Sopenharmony_ci if (compressed) { 84362306a36Sopenharmony_ci split->block_len = em->block_len; 84462306a36Sopenharmony_ci split->orig_start = em->orig_start; 84562306a36Sopenharmony_ci } else { 84662306a36Sopenharmony_ci const u64 diff = start + len - em->start; 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci split->block_len = split->len; 84962306a36Sopenharmony_ci split->block_start += diff; 85062306a36Sopenharmony_ci split->orig_start = em->orig_start; 85162306a36Sopenharmony_ci } 85262306a36Sopenharmony_ci } else { 85362306a36Sopenharmony_ci split->ram_bytes = split->len; 85462306a36Sopenharmony_ci split->orig_start = split->start; 85562306a36Sopenharmony_ci split->block_len = 0; 85662306a36Sopenharmony_ci split->orig_block_len = 0; 85762306a36Sopenharmony_ci } 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci if (extent_map_in_tree(em)) { 86062306a36Sopenharmony_ci replace_extent_mapping(em_tree, em, split, 86162306a36Sopenharmony_ci modified); 86262306a36Sopenharmony_ci } else { 86362306a36Sopenharmony_ci int ret; 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci ret = add_extent_mapping(em_tree, split, 86662306a36Sopenharmony_ci modified); 86762306a36Sopenharmony_ci /* Logic error, shouldn't happen. */ 86862306a36Sopenharmony_ci ASSERT(ret == 0); 86962306a36Sopenharmony_ci if (WARN_ON(ret != 0) && modified) 87062306a36Sopenharmony_ci btrfs_set_inode_full_sync(inode); 87162306a36Sopenharmony_ci } 87262306a36Sopenharmony_ci free_extent_map(split); 87362306a36Sopenharmony_ci split = NULL; 87462306a36Sopenharmony_ci } 87562306a36Sopenharmony_ciremove_em: 87662306a36Sopenharmony_ci if (extent_map_in_tree(em)) { 87762306a36Sopenharmony_ci /* 87862306a36Sopenharmony_ci * If the extent map is still in the tree it means that 87962306a36Sopenharmony_ci * either of the following is true: 88062306a36Sopenharmony_ci * 88162306a36Sopenharmony_ci * 1) It fits entirely in our range (doesn't end beyond 88262306a36Sopenharmony_ci * it or starts before it); 88362306a36Sopenharmony_ci * 88462306a36Sopenharmony_ci * 2) It starts before our range and/or ends after our 88562306a36Sopenharmony_ci * range, and we were not able to allocate the extent 88662306a36Sopenharmony_ci * maps for split operations, @split and @split2. 88762306a36Sopenharmony_ci * 88862306a36Sopenharmony_ci * If we are at case 2) then we just remove the entire 88962306a36Sopenharmony_ci * extent map - this is fine since if anyone needs it to 89062306a36Sopenharmony_ci * access the subranges outside our range, will just 89162306a36Sopenharmony_ci * load it again from the subvolume tree's file extent 89262306a36Sopenharmony_ci * item. However if the extent map was in the list of 89362306a36Sopenharmony_ci * modified extents, then we must mark the inode for a 89462306a36Sopenharmony_ci * full fsync, otherwise a fast fsync will miss this 89562306a36Sopenharmony_ci * extent if it's new and needs to be logged. 89662306a36Sopenharmony_ci */ 89762306a36Sopenharmony_ci if ((em->start < start || em_end > end) && modified) { 89862306a36Sopenharmony_ci ASSERT(!split); 89962306a36Sopenharmony_ci btrfs_set_inode_full_sync(inode); 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci remove_extent_mapping(em_tree, em); 90262306a36Sopenharmony_ci } 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci /* 90562306a36Sopenharmony_ci * Once for the tree reference (we replaced or removed the 90662306a36Sopenharmony_ci * extent map from the tree). 90762306a36Sopenharmony_ci */ 90862306a36Sopenharmony_ci free_extent_map(em); 90962306a36Sopenharmony_cinext: 91062306a36Sopenharmony_ci /* Once for us (for our lookup reference). */ 91162306a36Sopenharmony_ci free_extent_map(em); 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci em = next_em; 91462306a36Sopenharmony_ci } 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci write_unlock(&em_tree->lock); 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci free_extent_map(split); 91962306a36Sopenharmony_ci free_extent_map(split2); 92062306a36Sopenharmony_ci} 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_ci/* 92362306a36Sopenharmony_ci * Replace a range in the inode's extent map tree with a new extent map. 92462306a36Sopenharmony_ci * 92562306a36Sopenharmony_ci * @inode: The target inode. 92662306a36Sopenharmony_ci * @new_em: The new extent map to add to the inode's extent map tree. 92762306a36Sopenharmony_ci * @modified: Indicate if the new extent map should be added to the list of 92862306a36Sopenharmony_ci * modified extents (for fast fsync tracking). 92962306a36Sopenharmony_ci * 93062306a36Sopenharmony_ci * Drops all the extent maps in the inode's extent map tree that intersect the 93162306a36Sopenharmony_ci * range of the new extent map and adds the new extent map to the tree. 93262306a36Sopenharmony_ci * The caller should have locked an appropriate file range in the inode's io 93362306a36Sopenharmony_ci * tree before calling this function. 93462306a36Sopenharmony_ci */ 93562306a36Sopenharmony_ciint btrfs_replace_extent_map_range(struct btrfs_inode *inode, 93662306a36Sopenharmony_ci struct extent_map *new_em, 93762306a36Sopenharmony_ci bool modified) 93862306a36Sopenharmony_ci{ 93962306a36Sopenharmony_ci const u64 end = new_em->start + new_em->len - 1; 94062306a36Sopenharmony_ci struct extent_map_tree *tree = &inode->extent_tree; 94162306a36Sopenharmony_ci int ret; 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci ASSERT(!extent_map_in_tree(new_em)); 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ci /* 94662306a36Sopenharmony_ci * The caller has locked an appropriate file range in the inode's io 94762306a36Sopenharmony_ci * tree, but getting -EEXIST when adding the new extent map can still 94862306a36Sopenharmony_ci * happen in case there are extents that partially cover the range, and 94962306a36Sopenharmony_ci * this is due to two tasks operating on different parts of the extent. 95062306a36Sopenharmony_ci * See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from 95162306a36Sopenharmony_ci * btrfs_get_extent") for an example and details. 95262306a36Sopenharmony_ci */ 95362306a36Sopenharmony_ci do { 95462306a36Sopenharmony_ci btrfs_drop_extent_map_range(inode, new_em->start, end, false); 95562306a36Sopenharmony_ci write_lock(&tree->lock); 95662306a36Sopenharmony_ci ret = add_extent_mapping(tree, new_em, modified); 95762306a36Sopenharmony_ci write_unlock(&tree->lock); 95862306a36Sopenharmony_ci } while (ret == -EEXIST); 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci return ret; 96162306a36Sopenharmony_ci} 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci/* 96462306a36Sopenharmony_ci * Split off the first pre bytes from the extent_map at [start, start + len], 96562306a36Sopenharmony_ci * and set the block_start for it to new_logical. 96662306a36Sopenharmony_ci * 96762306a36Sopenharmony_ci * This function is used when an ordered_extent needs to be split. 96862306a36Sopenharmony_ci */ 96962306a36Sopenharmony_ciint split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre, 97062306a36Sopenharmony_ci u64 new_logical) 97162306a36Sopenharmony_ci{ 97262306a36Sopenharmony_ci struct extent_map_tree *em_tree = &inode->extent_tree; 97362306a36Sopenharmony_ci struct extent_map *em; 97462306a36Sopenharmony_ci struct extent_map *split_pre = NULL; 97562306a36Sopenharmony_ci struct extent_map *split_mid = NULL; 97662306a36Sopenharmony_ci int ret = 0; 97762306a36Sopenharmony_ci unsigned long flags; 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci ASSERT(pre != 0); 98062306a36Sopenharmony_ci ASSERT(pre < len); 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci split_pre = alloc_extent_map(); 98362306a36Sopenharmony_ci if (!split_pre) 98462306a36Sopenharmony_ci return -ENOMEM; 98562306a36Sopenharmony_ci split_mid = alloc_extent_map(); 98662306a36Sopenharmony_ci if (!split_mid) { 98762306a36Sopenharmony_ci ret = -ENOMEM; 98862306a36Sopenharmony_ci goto out_free_pre; 98962306a36Sopenharmony_ci } 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci lock_extent(&inode->io_tree, start, start + len - 1, NULL); 99262306a36Sopenharmony_ci write_lock(&em_tree->lock); 99362306a36Sopenharmony_ci em = lookup_extent_mapping(em_tree, start, len); 99462306a36Sopenharmony_ci if (!em) { 99562306a36Sopenharmony_ci ret = -EIO; 99662306a36Sopenharmony_ci goto out_unlock; 99762306a36Sopenharmony_ci } 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci ASSERT(em->len == len); 100062306a36Sopenharmony_ci ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)); 100162306a36Sopenharmony_ci ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE); 100262306a36Sopenharmony_ci ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags)); 100362306a36Sopenharmony_ci ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags)); 100462306a36Sopenharmony_ci ASSERT(!list_empty(&em->list)); 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci flags = em->flags; 100762306a36Sopenharmony_ci clear_bit(EXTENT_FLAG_PINNED, &em->flags); 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci /* First, replace the em with a new extent_map starting from * em->start */ 101062306a36Sopenharmony_ci split_pre->start = em->start; 101162306a36Sopenharmony_ci split_pre->len = pre; 101262306a36Sopenharmony_ci split_pre->orig_start = split_pre->start; 101362306a36Sopenharmony_ci split_pre->block_start = new_logical; 101462306a36Sopenharmony_ci split_pre->block_len = split_pre->len; 101562306a36Sopenharmony_ci split_pre->orig_block_len = split_pre->block_len; 101662306a36Sopenharmony_ci split_pre->ram_bytes = split_pre->len; 101762306a36Sopenharmony_ci split_pre->flags = flags; 101862306a36Sopenharmony_ci split_pre->compress_type = em->compress_type; 101962306a36Sopenharmony_ci split_pre->generation = em->generation; 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_ci replace_extent_mapping(em_tree, em, split_pre, 1); 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_ci /* 102462306a36Sopenharmony_ci * Now we only have an extent_map at: 102562306a36Sopenharmony_ci * [em->start, em->start + pre] 102662306a36Sopenharmony_ci */ 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci /* Insert the middle extent_map. */ 102962306a36Sopenharmony_ci split_mid->start = em->start + pre; 103062306a36Sopenharmony_ci split_mid->len = em->len - pre; 103162306a36Sopenharmony_ci split_mid->orig_start = split_mid->start; 103262306a36Sopenharmony_ci split_mid->block_start = em->block_start + pre; 103362306a36Sopenharmony_ci split_mid->block_len = split_mid->len; 103462306a36Sopenharmony_ci split_mid->orig_block_len = split_mid->block_len; 103562306a36Sopenharmony_ci split_mid->ram_bytes = split_mid->len; 103662306a36Sopenharmony_ci split_mid->flags = flags; 103762306a36Sopenharmony_ci split_mid->compress_type = em->compress_type; 103862306a36Sopenharmony_ci split_mid->generation = em->generation; 103962306a36Sopenharmony_ci add_extent_mapping(em_tree, split_mid, 1); 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci /* Once for us */ 104262306a36Sopenharmony_ci free_extent_map(em); 104362306a36Sopenharmony_ci /* Once for the tree */ 104462306a36Sopenharmony_ci free_extent_map(em); 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ciout_unlock: 104762306a36Sopenharmony_ci write_unlock(&em_tree->lock); 104862306a36Sopenharmony_ci unlock_extent(&inode->io_tree, start, start + len - 1, NULL); 104962306a36Sopenharmony_ci free_extent_map(split_mid); 105062306a36Sopenharmony_ciout_free_pre: 105162306a36Sopenharmony_ci free_extent_map(split_pre); 105262306a36Sopenharmony_ci return ret; 105362306a36Sopenharmony_ci} 1054