18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci#include <linux/bitops.h> 48c2ecf20Sopenharmony_ci#include <linux/slab.h> 58c2ecf20Sopenharmony_ci#include <linux/bio.h> 68c2ecf20Sopenharmony_ci#include <linux/mm.h> 78c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 88c2ecf20Sopenharmony_ci#include <linux/page-flags.h> 98c2ecf20Sopenharmony_ci#include <linux/spinlock.h> 108c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 118c2ecf20Sopenharmony_ci#include <linux/swap.h> 128c2ecf20Sopenharmony_ci#include <linux/writeback.h> 138c2ecf20Sopenharmony_ci#include <linux/pagevec.h> 148c2ecf20Sopenharmony_ci#include <linux/prefetch.h> 158c2ecf20Sopenharmony_ci#include <linux/cleancache.h> 168c2ecf20Sopenharmony_ci#include "extent_io.h" 178c2ecf20Sopenharmony_ci#include "extent-io-tree.h" 188c2ecf20Sopenharmony_ci#include "extent_map.h" 198c2ecf20Sopenharmony_ci#include "ctree.h" 208c2ecf20Sopenharmony_ci#include "btrfs_inode.h" 218c2ecf20Sopenharmony_ci#include "volumes.h" 228c2ecf20Sopenharmony_ci#include "check-integrity.h" 238c2ecf20Sopenharmony_ci#include "locking.h" 248c2ecf20Sopenharmony_ci#include "rcu-string.h" 258c2ecf20Sopenharmony_ci#include "backref.h" 268c2ecf20Sopenharmony_ci#include "disk-io.h" 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_cistatic struct kmem_cache *extent_state_cache; 298c2ecf20Sopenharmony_cistatic struct kmem_cache *extent_buffer_cache; 308c2ecf20Sopenharmony_cistatic struct bio_set btrfs_bioset; 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_cistatic inline bool extent_state_in_tree(const struct extent_state *state) 338c2ecf20Sopenharmony_ci{ 348c2ecf20Sopenharmony_ci return !RB_EMPTY_NODE(&state->rb_node); 358c2ecf20Sopenharmony_ci} 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#ifdef CONFIG_BTRFS_DEBUG 388c2ecf20Sopenharmony_cistatic LIST_HEAD(states); 398c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(leak_lock); 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_cistatic inline void btrfs_leak_debug_add(spinlock_t *lock, 428c2ecf20Sopenharmony_ci struct list_head *new, 438c2ecf20Sopenharmony_ci struct list_head *head) 448c2ecf20Sopenharmony_ci{ 458c2ecf20Sopenharmony_ci unsigned long flags; 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci spin_lock_irqsave(lock, flags); 488c2ecf20Sopenharmony_ci list_add(new, head); 498c2ecf20Sopenharmony_ci spin_unlock_irqrestore(lock, flags); 508c2ecf20Sopenharmony_ci} 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_cistatic inline void btrfs_leak_debug_del(spinlock_t *lock, 538c2ecf20Sopenharmony_ci struct list_head *entry) 548c2ecf20Sopenharmony_ci{ 558c2ecf20Sopenharmony_ci unsigned long flags; 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci spin_lock_irqsave(lock, flags); 588c2ecf20Sopenharmony_ci list_del(entry); 598c2ecf20Sopenharmony_ci spin_unlock_irqrestore(lock, flags); 608c2ecf20Sopenharmony_ci} 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_civoid btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info) 638c2ecf20Sopenharmony_ci{ 648c2ecf20Sopenharmony_ci struct extent_buffer *eb; 658c2ecf20Sopenharmony_ci unsigned long flags; 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci /* 688c2ecf20Sopenharmony_ci * If we didn't get into open_ctree our allocated_ebs will not be 698c2ecf20Sopenharmony_ci * initialized, so just skip this. 708c2ecf20Sopenharmony_ci */ 718c2ecf20Sopenharmony_ci if (!fs_info->allocated_ebs.next) 728c2ecf20Sopenharmony_ci return; 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci spin_lock_irqsave(&fs_info->eb_leak_lock, flags); 758c2ecf20Sopenharmony_ci while (!list_empty(&fs_info->allocated_ebs)) { 768c2ecf20Sopenharmony_ci eb = list_first_entry(&fs_info->allocated_ebs, 778c2ecf20Sopenharmony_ci struct extent_buffer, leak_list); 788c2ecf20Sopenharmony_ci pr_err( 798c2ecf20Sopenharmony_ci "BTRFS: buffer leak start %llu len %lu refs %d bflags %lu owner %llu\n", 808c2ecf20Sopenharmony_ci eb->start, eb->len, atomic_read(&eb->refs), eb->bflags, 818c2ecf20Sopenharmony_ci btrfs_header_owner(eb)); 828c2ecf20Sopenharmony_ci list_del(&eb->leak_list); 838c2ecf20Sopenharmony_ci kmem_cache_free(extent_buffer_cache, eb); 848c2ecf20Sopenharmony_ci } 858c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags); 868c2ecf20Sopenharmony_ci} 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_cistatic inline void btrfs_extent_state_leak_debug_check(void) 898c2ecf20Sopenharmony_ci{ 908c2ecf20Sopenharmony_ci struct extent_state *state; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci while (!list_empty(&states)) { 938c2ecf20Sopenharmony_ci state = list_entry(states.next, struct extent_state, leak_list); 948c2ecf20Sopenharmony_ci pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", 958c2ecf20Sopenharmony_ci state->start, state->end, state->state, 968c2ecf20Sopenharmony_ci extent_state_in_tree(state), 978c2ecf20Sopenharmony_ci refcount_read(&state->refs)); 988c2ecf20Sopenharmony_ci list_del(&state->leak_list); 998c2ecf20Sopenharmony_ci kmem_cache_free(extent_state_cache, state); 1008c2ecf20Sopenharmony_ci } 1018c2ecf20Sopenharmony_ci} 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci#define btrfs_debug_check_extent_io_range(tree, start, end) \ 1048c2ecf20Sopenharmony_ci __btrfs_debug_check_extent_io_range(__func__, (tree), (start), (end)) 1058c2ecf20Sopenharmony_cistatic inline void __btrfs_debug_check_extent_io_range(const char *caller, 1068c2ecf20Sopenharmony_ci struct extent_io_tree *tree, u64 start, u64 end) 1078c2ecf20Sopenharmony_ci{ 1088c2ecf20Sopenharmony_ci struct inode *inode = tree->private_data; 1098c2ecf20Sopenharmony_ci u64 isize; 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci if (!inode || !is_data_inode(inode)) 1128c2ecf20Sopenharmony_ci return; 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci isize = i_size_read(inode); 1158c2ecf20Sopenharmony_ci if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) { 1168c2ecf20Sopenharmony_ci btrfs_debug_rl(BTRFS_I(inode)->root->fs_info, 1178c2ecf20Sopenharmony_ci "%s: ino %llu isize %llu odd range [%llu,%llu]", 1188c2ecf20Sopenharmony_ci caller, btrfs_ino(BTRFS_I(inode)), isize, start, end); 1198c2ecf20Sopenharmony_ci } 1208c2ecf20Sopenharmony_ci} 1218c2ecf20Sopenharmony_ci#else 1228c2ecf20Sopenharmony_ci#define btrfs_leak_debug_add(lock, new, head) do {} while (0) 1238c2ecf20Sopenharmony_ci#define btrfs_leak_debug_del(lock, entry) do {} while (0) 1248c2ecf20Sopenharmony_ci#define btrfs_extent_state_leak_debug_check() do {} while (0) 1258c2ecf20Sopenharmony_ci#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0) 1268c2ecf20Sopenharmony_ci#endif 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_cistruct tree_entry { 1298c2ecf20Sopenharmony_ci u64 start; 1308c2ecf20Sopenharmony_ci u64 end; 1318c2ecf20Sopenharmony_ci struct rb_node rb_node; 1328c2ecf20Sopenharmony_ci}; 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_cistruct extent_page_data { 1358c2ecf20Sopenharmony_ci struct bio *bio; 1368c2ecf20Sopenharmony_ci /* tells writepage not to lock the state bits for this range 1378c2ecf20Sopenharmony_ci * it still does the unlocking 1388c2ecf20Sopenharmony_ci */ 1398c2ecf20Sopenharmony_ci unsigned int extent_locked:1; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci /* tells the submit_bio code to use REQ_SYNC */ 1428c2ecf20Sopenharmony_ci unsigned int sync_io:1; 1438c2ecf20Sopenharmony_ci}; 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_cistatic int add_extent_changeset(struct extent_state *state, unsigned bits, 1468c2ecf20Sopenharmony_ci struct extent_changeset *changeset, 1478c2ecf20Sopenharmony_ci int set) 1488c2ecf20Sopenharmony_ci{ 1498c2ecf20Sopenharmony_ci int ret; 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci if (!changeset) 1528c2ecf20Sopenharmony_ci return 0; 1538c2ecf20Sopenharmony_ci if (set && (state->state & bits) == bits) 1548c2ecf20Sopenharmony_ci return 0; 1558c2ecf20Sopenharmony_ci if (!set && (state->state & bits) == 0) 1568c2ecf20Sopenharmony_ci return 0; 1578c2ecf20Sopenharmony_ci changeset->bytes_changed += state->end - state->start + 1; 1588c2ecf20Sopenharmony_ci ret = ulist_add(&changeset->range_changed, state->start, state->end, 1598c2ecf20Sopenharmony_ci GFP_ATOMIC); 1608c2ecf20Sopenharmony_ci return ret; 1618c2ecf20Sopenharmony_ci} 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ciint __must_check submit_one_bio(struct bio *bio, int mirror_num, 1648c2ecf20Sopenharmony_ci unsigned long bio_flags) 1658c2ecf20Sopenharmony_ci{ 1668c2ecf20Sopenharmony_ci blk_status_t ret = 0; 1678c2ecf20Sopenharmony_ci struct extent_io_tree *tree = bio->bi_private; 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci bio->bi_private = NULL; 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci if (is_data_inode(tree->private_data)) 1728c2ecf20Sopenharmony_ci ret = btrfs_submit_data_bio(tree->private_data, bio, mirror_num, 1738c2ecf20Sopenharmony_ci bio_flags); 1748c2ecf20Sopenharmony_ci else 1758c2ecf20Sopenharmony_ci ret = btrfs_submit_metadata_bio(tree->private_data, bio, 1768c2ecf20Sopenharmony_ci mirror_num, bio_flags); 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci return blk_status_to_errno(ret); 1798c2ecf20Sopenharmony_ci} 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci/* Cleanup unsubmitted bios */ 1828c2ecf20Sopenharmony_cistatic void end_write_bio(struct extent_page_data *epd, int ret) 1838c2ecf20Sopenharmony_ci{ 1848c2ecf20Sopenharmony_ci if (epd->bio) { 1858c2ecf20Sopenharmony_ci epd->bio->bi_status = errno_to_blk_status(ret); 1868c2ecf20Sopenharmony_ci bio_endio(epd->bio); 1878c2ecf20Sopenharmony_ci epd->bio = NULL; 1888c2ecf20Sopenharmony_ci } 1898c2ecf20Sopenharmony_ci} 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci/* 1928c2ecf20Sopenharmony_ci * Submit bio from extent page data via submit_one_bio 1938c2ecf20Sopenharmony_ci * 1948c2ecf20Sopenharmony_ci * Return 0 if everything is OK. 1958c2ecf20Sopenharmony_ci * Return <0 for error. 1968c2ecf20Sopenharmony_ci */ 1978c2ecf20Sopenharmony_cistatic int __must_check flush_write_bio(struct extent_page_data *epd) 1988c2ecf20Sopenharmony_ci{ 1998c2ecf20Sopenharmony_ci int ret = 0; 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci if (epd->bio) { 2028c2ecf20Sopenharmony_ci ret = submit_one_bio(epd->bio, 0, 0); 2038c2ecf20Sopenharmony_ci /* 2048c2ecf20Sopenharmony_ci * Clean up of epd->bio is handled by its endio function. 2058c2ecf20Sopenharmony_ci * And endio is either triggered by successful bio execution 2068c2ecf20Sopenharmony_ci * or the error handler of submit bio hook. 2078c2ecf20Sopenharmony_ci * So at this point, no matter what happened, we don't need 2088c2ecf20Sopenharmony_ci * to clean up epd->bio. 2098c2ecf20Sopenharmony_ci */ 2108c2ecf20Sopenharmony_ci epd->bio = NULL; 2118c2ecf20Sopenharmony_ci } 2128c2ecf20Sopenharmony_ci return ret; 2138c2ecf20Sopenharmony_ci} 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ciint __init extent_state_cache_init(void) 2168c2ecf20Sopenharmony_ci{ 2178c2ecf20Sopenharmony_ci extent_state_cache = kmem_cache_create("btrfs_extent_state", 2188c2ecf20Sopenharmony_ci sizeof(struct extent_state), 0, 2198c2ecf20Sopenharmony_ci SLAB_MEM_SPREAD, NULL); 2208c2ecf20Sopenharmony_ci if (!extent_state_cache) 2218c2ecf20Sopenharmony_ci return -ENOMEM; 2228c2ecf20Sopenharmony_ci return 0; 2238c2ecf20Sopenharmony_ci} 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ciint __init extent_io_init(void) 2268c2ecf20Sopenharmony_ci{ 2278c2ecf20Sopenharmony_ci extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer", 2288c2ecf20Sopenharmony_ci sizeof(struct extent_buffer), 0, 2298c2ecf20Sopenharmony_ci SLAB_MEM_SPREAD, NULL); 2308c2ecf20Sopenharmony_ci if (!extent_buffer_cache) 2318c2ecf20Sopenharmony_ci return -ENOMEM; 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE, 2348c2ecf20Sopenharmony_ci offsetof(struct btrfs_io_bio, bio), 2358c2ecf20Sopenharmony_ci BIOSET_NEED_BVECS)) 2368c2ecf20Sopenharmony_ci goto free_buffer_cache; 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci if (bioset_integrity_create(&btrfs_bioset, BIO_POOL_SIZE)) 2398c2ecf20Sopenharmony_ci goto free_bioset; 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci return 0; 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_cifree_bioset: 2448c2ecf20Sopenharmony_ci bioset_exit(&btrfs_bioset); 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_cifree_buffer_cache: 2478c2ecf20Sopenharmony_ci kmem_cache_destroy(extent_buffer_cache); 2488c2ecf20Sopenharmony_ci extent_buffer_cache = NULL; 2498c2ecf20Sopenharmony_ci return -ENOMEM; 2508c2ecf20Sopenharmony_ci} 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_civoid __cold extent_state_cache_exit(void) 2538c2ecf20Sopenharmony_ci{ 2548c2ecf20Sopenharmony_ci btrfs_extent_state_leak_debug_check(); 2558c2ecf20Sopenharmony_ci kmem_cache_destroy(extent_state_cache); 2568c2ecf20Sopenharmony_ci} 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_civoid __cold extent_io_exit(void) 2598c2ecf20Sopenharmony_ci{ 2608c2ecf20Sopenharmony_ci /* 2618c2ecf20Sopenharmony_ci * Make sure all delayed rcu free are flushed before we 2628c2ecf20Sopenharmony_ci * destroy caches. 2638c2ecf20Sopenharmony_ci */ 2648c2ecf20Sopenharmony_ci rcu_barrier(); 2658c2ecf20Sopenharmony_ci kmem_cache_destroy(extent_buffer_cache); 2668c2ecf20Sopenharmony_ci bioset_exit(&btrfs_bioset); 2678c2ecf20Sopenharmony_ci} 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci/* 2708c2ecf20Sopenharmony_ci * For the file_extent_tree, we want to hold the inode lock when we lookup and 2718c2ecf20Sopenharmony_ci * update the disk_i_size, but lockdep will complain because our io_tree we hold 2728c2ecf20Sopenharmony_ci * the tree lock and get the inode lock when setting delalloc. These two things 2738c2ecf20Sopenharmony_ci * are unrelated, so make a class for the file_extent_tree so we don't get the 2748c2ecf20Sopenharmony_ci * two locking patterns mixed up. 2758c2ecf20Sopenharmony_ci */ 2768c2ecf20Sopenharmony_cistatic struct lock_class_key file_extent_tree_class; 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_civoid extent_io_tree_init(struct btrfs_fs_info *fs_info, 2798c2ecf20Sopenharmony_ci struct extent_io_tree *tree, unsigned int owner, 2808c2ecf20Sopenharmony_ci void *private_data) 2818c2ecf20Sopenharmony_ci{ 2828c2ecf20Sopenharmony_ci tree->fs_info = fs_info; 2838c2ecf20Sopenharmony_ci tree->state = RB_ROOT; 2848c2ecf20Sopenharmony_ci tree->dirty_bytes = 0; 2858c2ecf20Sopenharmony_ci spin_lock_init(&tree->lock); 2868c2ecf20Sopenharmony_ci tree->private_data = private_data; 2878c2ecf20Sopenharmony_ci tree->owner = owner; 2888c2ecf20Sopenharmony_ci if (owner == IO_TREE_INODE_FILE_EXTENT) 2898c2ecf20Sopenharmony_ci lockdep_set_class(&tree->lock, &file_extent_tree_class); 2908c2ecf20Sopenharmony_ci} 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_civoid extent_io_tree_release(struct extent_io_tree *tree) 2938c2ecf20Sopenharmony_ci{ 2948c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 2958c2ecf20Sopenharmony_ci /* 2968c2ecf20Sopenharmony_ci * Do a single barrier for the waitqueue_active check here, the state 2978c2ecf20Sopenharmony_ci * of the waitqueue should not change once extent_io_tree_release is 2988c2ecf20Sopenharmony_ci * called. 2998c2ecf20Sopenharmony_ci */ 3008c2ecf20Sopenharmony_ci smp_mb(); 3018c2ecf20Sopenharmony_ci while (!RB_EMPTY_ROOT(&tree->state)) { 3028c2ecf20Sopenharmony_ci struct rb_node *node; 3038c2ecf20Sopenharmony_ci struct extent_state *state; 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci node = rb_first(&tree->state); 3068c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 3078c2ecf20Sopenharmony_ci rb_erase(&state->rb_node, &tree->state); 3088c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&state->rb_node); 3098c2ecf20Sopenharmony_ci /* 3108c2ecf20Sopenharmony_ci * btree io trees aren't supposed to have tasks waiting for 3118c2ecf20Sopenharmony_ci * changes in the flags of extent states ever. 3128c2ecf20Sopenharmony_ci */ 3138c2ecf20Sopenharmony_ci ASSERT(!waitqueue_active(&state->wq)); 3148c2ecf20Sopenharmony_ci free_extent_state(state); 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci cond_resched_lock(&tree->lock); 3178c2ecf20Sopenharmony_ci } 3188c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 3198c2ecf20Sopenharmony_ci} 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_cistatic struct extent_state *alloc_extent_state(gfp_t mask) 3228c2ecf20Sopenharmony_ci{ 3238c2ecf20Sopenharmony_ci struct extent_state *state; 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci /* 3268c2ecf20Sopenharmony_ci * The given mask might be not appropriate for the slab allocator, 3278c2ecf20Sopenharmony_ci * drop the unsupported bits 3288c2ecf20Sopenharmony_ci */ 3298c2ecf20Sopenharmony_ci mask &= ~(__GFP_DMA32|__GFP_HIGHMEM); 3308c2ecf20Sopenharmony_ci state = kmem_cache_alloc(extent_state_cache, mask); 3318c2ecf20Sopenharmony_ci if (!state) 3328c2ecf20Sopenharmony_ci return state; 3338c2ecf20Sopenharmony_ci state->state = 0; 3348c2ecf20Sopenharmony_ci state->failrec = NULL; 3358c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&state->rb_node); 3368c2ecf20Sopenharmony_ci btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states); 3378c2ecf20Sopenharmony_ci refcount_set(&state->refs, 1); 3388c2ecf20Sopenharmony_ci init_waitqueue_head(&state->wq); 3398c2ecf20Sopenharmony_ci trace_alloc_extent_state(state, mask, _RET_IP_); 3408c2ecf20Sopenharmony_ci return state; 3418c2ecf20Sopenharmony_ci} 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_civoid free_extent_state(struct extent_state *state) 3448c2ecf20Sopenharmony_ci{ 3458c2ecf20Sopenharmony_ci if (!state) 3468c2ecf20Sopenharmony_ci return; 3478c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&state->refs)) { 3488c2ecf20Sopenharmony_ci WARN_ON(extent_state_in_tree(state)); 3498c2ecf20Sopenharmony_ci btrfs_leak_debug_del(&leak_lock, &state->leak_list); 3508c2ecf20Sopenharmony_ci trace_free_extent_state(state, _RET_IP_); 3518c2ecf20Sopenharmony_ci kmem_cache_free(extent_state_cache, state); 3528c2ecf20Sopenharmony_ci } 3538c2ecf20Sopenharmony_ci} 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_cistatic struct rb_node *tree_insert(struct rb_root *root, 3568c2ecf20Sopenharmony_ci struct rb_node *search_start, 3578c2ecf20Sopenharmony_ci u64 offset, 3588c2ecf20Sopenharmony_ci struct rb_node *node, 3598c2ecf20Sopenharmony_ci struct rb_node ***p_in, 3608c2ecf20Sopenharmony_ci struct rb_node **parent_in) 3618c2ecf20Sopenharmony_ci{ 3628c2ecf20Sopenharmony_ci struct rb_node **p; 3638c2ecf20Sopenharmony_ci struct rb_node *parent = NULL; 3648c2ecf20Sopenharmony_ci struct tree_entry *entry; 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci if (p_in && parent_in) { 3678c2ecf20Sopenharmony_ci p = *p_in; 3688c2ecf20Sopenharmony_ci parent = *parent_in; 3698c2ecf20Sopenharmony_ci goto do_insert; 3708c2ecf20Sopenharmony_ci } 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_ci p = search_start ? &search_start : &root->rb_node; 3738c2ecf20Sopenharmony_ci while (*p) { 3748c2ecf20Sopenharmony_ci parent = *p; 3758c2ecf20Sopenharmony_ci entry = rb_entry(parent, struct tree_entry, rb_node); 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci if (offset < entry->start) 3788c2ecf20Sopenharmony_ci p = &(*p)->rb_left; 3798c2ecf20Sopenharmony_ci else if (offset > entry->end) 3808c2ecf20Sopenharmony_ci p = &(*p)->rb_right; 3818c2ecf20Sopenharmony_ci else 3828c2ecf20Sopenharmony_ci return parent; 3838c2ecf20Sopenharmony_ci } 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_cido_insert: 3868c2ecf20Sopenharmony_ci rb_link_node(node, parent, p); 3878c2ecf20Sopenharmony_ci rb_insert_color(node, root); 3888c2ecf20Sopenharmony_ci return NULL; 3898c2ecf20Sopenharmony_ci} 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci/** 3928c2ecf20Sopenharmony_ci * __etree_search - searche @tree for an entry that contains @offset. Such 3938c2ecf20Sopenharmony_ci * entry would have entry->start <= offset && entry->end >= offset. 3948c2ecf20Sopenharmony_ci * 3958c2ecf20Sopenharmony_ci * @tree - the tree to search 3968c2ecf20Sopenharmony_ci * @offset - offset that should fall within an entry in @tree 3978c2ecf20Sopenharmony_ci * @next_ret - pointer to the first entry whose range ends after @offset 3988c2ecf20Sopenharmony_ci * @prev - pointer to the first entry whose range begins before @offset 3998c2ecf20Sopenharmony_ci * @p_ret - pointer where new node should be anchored (used when inserting an 4008c2ecf20Sopenharmony_ci * entry in the tree) 4018c2ecf20Sopenharmony_ci * @parent_ret - points to entry which would have been the parent of the entry, 4028c2ecf20Sopenharmony_ci * containing @offset 4038c2ecf20Sopenharmony_ci * 4048c2ecf20Sopenharmony_ci * This function returns a pointer to the entry that contains @offset byte 4058c2ecf20Sopenharmony_ci * address. If no such entry exists, then NULL is returned and the other 4068c2ecf20Sopenharmony_ci * pointer arguments to the function are filled, otherwise the found entry is 4078c2ecf20Sopenharmony_ci * returned and other pointers are left untouched. 4088c2ecf20Sopenharmony_ci */ 4098c2ecf20Sopenharmony_cistatic struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset, 4108c2ecf20Sopenharmony_ci struct rb_node **next_ret, 4118c2ecf20Sopenharmony_ci struct rb_node **prev_ret, 4128c2ecf20Sopenharmony_ci struct rb_node ***p_ret, 4138c2ecf20Sopenharmony_ci struct rb_node **parent_ret) 4148c2ecf20Sopenharmony_ci{ 4158c2ecf20Sopenharmony_ci struct rb_root *root = &tree->state; 4168c2ecf20Sopenharmony_ci struct rb_node **n = &root->rb_node; 4178c2ecf20Sopenharmony_ci struct rb_node *prev = NULL; 4188c2ecf20Sopenharmony_ci struct rb_node *orig_prev = NULL; 4198c2ecf20Sopenharmony_ci struct tree_entry *entry; 4208c2ecf20Sopenharmony_ci struct tree_entry *prev_entry = NULL; 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci while (*n) { 4238c2ecf20Sopenharmony_ci prev = *n; 4248c2ecf20Sopenharmony_ci entry = rb_entry(prev, struct tree_entry, rb_node); 4258c2ecf20Sopenharmony_ci prev_entry = entry; 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_ci if (offset < entry->start) 4288c2ecf20Sopenharmony_ci n = &(*n)->rb_left; 4298c2ecf20Sopenharmony_ci else if (offset > entry->end) 4308c2ecf20Sopenharmony_ci n = &(*n)->rb_right; 4318c2ecf20Sopenharmony_ci else 4328c2ecf20Sopenharmony_ci return *n; 4338c2ecf20Sopenharmony_ci } 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci if (p_ret) 4368c2ecf20Sopenharmony_ci *p_ret = n; 4378c2ecf20Sopenharmony_ci if (parent_ret) 4388c2ecf20Sopenharmony_ci *parent_ret = prev; 4398c2ecf20Sopenharmony_ci 4408c2ecf20Sopenharmony_ci if (next_ret) { 4418c2ecf20Sopenharmony_ci orig_prev = prev; 4428c2ecf20Sopenharmony_ci while (prev && offset > prev_entry->end) { 4438c2ecf20Sopenharmony_ci prev = rb_next(prev); 4448c2ecf20Sopenharmony_ci prev_entry = rb_entry(prev, struct tree_entry, rb_node); 4458c2ecf20Sopenharmony_ci } 4468c2ecf20Sopenharmony_ci *next_ret = prev; 4478c2ecf20Sopenharmony_ci prev = orig_prev; 4488c2ecf20Sopenharmony_ci } 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci if (prev_ret) { 4518c2ecf20Sopenharmony_ci prev_entry = rb_entry(prev, struct tree_entry, rb_node); 4528c2ecf20Sopenharmony_ci while (prev && offset < prev_entry->start) { 4538c2ecf20Sopenharmony_ci prev = rb_prev(prev); 4548c2ecf20Sopenharmony_ci prev_entry = rb_entry(prev, struct tree_entry, rb_node); 4558c2ecf20Sopenharmony_ci } 4568c2ecf20Sopenharmony_ci *prev_ret = prev; 4578c2ecf20Sopenharmony_ci } 4588c2ecf20Sopenharmony_ci return NULL; 4598c2ecf20Sopenharmony_ci} 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_cistatic inline struct rb_node * 4628c2ecf20Sopenharmony_citree_search_for_insert(struct extent_io_tree *tree, 4638c2ecf20Sopenharmony_ci u64 offset, 4648c2ecf20Sopenharmony_ci struct rb_node ***p_ret, 4658c2ecf20Sopenharmony_ci struct rb_node **parent_ret) 4668c2ecf20Sopenharmony_ci{ 4678c2ecf20Sopenharmony_ci struct rb_node *next= NULL; 4688c2ecf20Sopenharmony_ci struct rb_node *ret; 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci ret = __etree_search(tree, offset, &next, NULL, p_ret, parent_ret); 4718c2ecf20Sopenharmony_ci if (!ret) 4728c2ecf20Sopenharmony_ci return next; 4738c2ecf20Sopenharmony_ci return ret; 4748c2ecf20Sopenharmony_ci} 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_cistatic inline struct rb_node *tree_search(struct extent_io_tree *tree, 4778c2ecf20Sopenharmony_ci u64 offset) 4788c2ecf20Sopenharmony_ci{ 4798c2ecf20Sopenharmony_ci return tree_search_for_insert(tree, offset, NULL, NULL); 4808c2ecf20Sopenharmony_ci} 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_ci/* 4838c2ecf20Sopenharmony_ci * utility function to look for merge candidates inside a given range. 4848c2ecf20Sopenharmony_ci * Any extents with matching state are merged together into a single 4858c2ecf20Sopenharmony_ci * extent in the tree. Extents with EXTENT_IO in their state field 4868c2ecf20Sopenharmony_ci * are not merged because the end_io handlers need to be able to do 4878c2ecf20Sopenharmony_ci * operations on them without sleeping (or doing allocations/splits). 4888c2ecf20Sopenharmony_ci * 4898c2ecf20Sopenharmony_ci * This should be called with the tree lock held. 4908c2ecf20Sopenharmony_ci */ 4918c2ecf20Sopenharmony_cistatic void merge_state(struct extent_io_tree *tree, 4928c2ecf20Sopenharmony_ci struct extent_state *state) 4938c2ecf20Sopenharmony_ci{ 4948c2ecf20Sopenharmony_ci struct extent_state *other; 4958c2ecf20Sopenharmony_ci struct rb_node *other_node; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci if (state->state & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 4988c2ecf20Sopenharmony_ci return; 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_ci other_node = rb_prev(&state->rb_node); 5018c2ecf20Sopenharmony_ci if (other_node) { 5028c2ecf20Sopenharmony_ci other = rb_entry(other_node, struct extent_state, rb_node); 5038c2ecf20Sopenharmony_ci if (other->end == state->start - 1 && 5048c2ecf20Sopenharmony_ci other->state == state->state) { 5058c2ecf20Sopenharmony_ci if (tree->private_data && 5068c2ecf20Sopenharmony_ci is_data_inode(tree->private_data)) 5078c2ecf20Sopenharmony_ci btrfs_merge_delalloc_extent(tree->private_data, 5088c2ecf20Sopenharmony_ci state, other); 5098c2ecf20Sopenharmony_ci state->start = other->start; 5108c2ecf20Sopenharmony_ci rb_erase(&other->rb_node, &tree->state); 5118c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&other->rb_node); 5128c2ecf20Sopenharmony_ci free_extent_state(other); 5138c2ecf20Sopenharmony_ci } 5148c2ecf20Sopenharmony_ci } 5158c2ecf20Sopenharmony_ci other_node = rb_next(&state->rb_node); 5168c2ecf20Sopenharmony_ci if (other_node) { 5178c2ecf20Sopenharmony_ci other = rb_entry(other_node, struct extent_state, rb_node); 5188c2ecf20Sopenharmony_ci if (other->start == state->end + 1 && 5198c2ecf20Sopenharmony_ci other->state == state->state) { 5208c2ecf20Sopenharmony_ci if (tree->private_data && 5218c2ecf20Sopenharmony_ci is_data_inode(tree->private_data)) 5228c2ecf20Sopenharmony_ci btrfs_merge_delalloc_extent(tree->private_data, 5238c2ecf20Sopenharmony_ci state, other); 5248c2ecf20Sopenharmony_ci state->end = other->end; 5258c2ecf20Sopenharmony_ci rb_erase(&other->rb_node, &tree->state); 5268c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&other->rb_node); 5278c2ecf20Sopenharmony_ci free_extent_state(other); 5288c2ecf20Sopenharmony_ci } 5298c2ecf20Sopenharmony_ci } 5308c2ecf20Sopenharmony_ci} 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_cistatic void set_state_bits(struct extent_io_tree *tree, 5338c2ecf20Sopenharmony_ci struct extent_state *state, unsigned *bits, 5348c2ecf20Sopenharmony_ci struct extent_changeset *changeset); 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ci/* 5378c2ecf20Sopenharmony_ci * insert an extent_state struct into the tree. 'bits' are set on the 5388c2ecf20Sopenharmony_ci * struct before it is inserted. 5398c2ecf20Sopenharmony_ci * 5408c2ecf20Sopenharmony_ci * This may return -EEXIST if the extent is already there, in which case the 5418c2ecf20Sopenharmony_ci * state struct is freed. 5428c2ecf20Sopenharmony_ci * 5438c2ecf20Sopenharmony_ci * The tree lock is not taken internally. This is a utility function and 5448c2ecf20Sopenharmony_ci * probably isn't what you want to call (see set/clear_extent_bit). 5458c2ecf20Sopenharmony_ci */ 5468c2ecf20Sopenharmony_cistatic int insert_state(struct extent_io_tree *tree, 5478c2ecf20Sopenharmony_ci struct extent_state *state, u64 start, u64 end, 5488c2ecf20Sopenharmony_ci struct rb_node ***p, 5498c2ecf20Sopenharmony_ci struct rb_node **parent, 5508c2ecf20Sopenharmony_ci unsigned *bits, struct extent_changeset *changeset) 5518c2ecf20Sopenharmony_ci{ 5528c2ecf20Sopenharmony_ci struct rb_node *node; 5538c2ecf20Sopenharmony_ci 5548c2ecf20Sopenharmony_ci if (end < start) { 5558c2ecf20Sopenharmony_ci btrfs_err(tree->fs_info, 5568c2ecf20Sopenharmony_ci "insert state: end < start %llu %llu", end, start); 5578c2ecf20Sopenharmony_ci WARN_ON(1); 5588c2ecf20Sopenharmony_ci } 5598c2ecf20Sopenharmony_ci state->start = start; 5608c2ecf20Sopenharmony_ci state->end = end; 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci set_state_bits(tree, state, bits, changeset); 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent); 5658c2ecf20Sopenharmony_ci if (node) { 5668c2ecf20Sopenharmony_ci struct extent_state *found; 5678c2ecf20Sopenharmony_ci found = rb_entry(node, struct extent_state, rb_node); 5688c2ecf20Sopenharmony_ci btrfs_err(tree->fs_info, 5698c2ecf20Sopenharmony_ci "found node %llu %llu on insert of %llu %llu", 5708c2ecf20Sopenharmony_ci found->start, found->end, start, end); 5718c2ecf20Sopenharmony_ci return -EEXIST; 5728c2ecf20Sopenharmony_ci } 5738c2ecf20Sopenharmony_ci merge_state(tree, state); 5748c2ecf20Sopenharmony_ci return 0; 5758c2ecf20Sopenharmony_ci} 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci/* 5788c2ecf20Sopenharmony_ci * split a given extent state struct in two, inserting the preallocated 5798c2ecf20Sopenharmony_ci * struct 'prealloc' as the newly created second half. 'split' indicates an 5808c2ecf20Sopenharmony_ci * offset inside 'orig' where it should be split. 5818c2ecf20Sopenharmony_ci * 5828c2ecf20Sopenharmony_ci * Before calling, 5838c2ecf20Sopenharmony_ci * the tree has 'orig' at [orig->start, orig->end]. After calling, there 5848c2ecf20Sopenharmony_ci * are two extent state structs in the tree: 5858c2ecf20Sopenharmony_ci * prealloc: [orig->start, split - 1] 5868c2ecf20Sopenharmony_ci * orig: [ split, orig->end ] 5878c2ecf20Sopenharmony_ci * 5888c2ecf20Sopenharmony_ci * The tree locks are not taken by this function. They need to be held 5898c2ecf20Sopenharmony_ci * by the caller. 5908c2ecf20Sopenharmony_ci */ 5918c2ecf20Sopenharmony_cistatic int split_state(struct extent_io_tree *tree, struct extent_state *orig, 5928c2ecf20Sopenharmony_ci struct extent_state *prealloc, u64 split) 5938c2ecf20Sopenharmony_ci{ 5948c2ecf20Sopenharmony_ci struct rb_node *node; 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci if (tree->private_data && is_data_inode(tree->private_data)) 5978c2ecf20Sopenharmony_ci btrfs_split_delalloc_extent(tree->private_data, orig, split); 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci prealloc->start = orig->start; 6008c2ecf20Sopenharmony_ci prealloc->end = split - 1; 6018c2ecf20Sopenharmony_ci prealloc->state = orig->state; 6028c2ecf20Sopenharmony_ci orig->start = split; 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci node = tree_insert(&tree->state, &orig->rb_node, prealloc->end, 6058c2ecf20Sopenharmony_ci &prealloc->rb_node, NULL, NULL); 6068c2ecf20Sopenharmony_ci if (node) { 6078c2ecf20Sopenharmony_ci free_extent_state(prealloc); 6088c2ecf20Sopenharmony_ci return -EEXIST; 6098c2ecf20Sopenharmony_ci } 6108c2ecf20Sopenharmony_ci return 0; 6118c2ecf20Sopenharmony_ci} 6128c2ecf20Sopenharmony_ci 6138c2ecf20Sopenharmony_cistatic struct extent_state *next_state(struct extent_state *state) 6148c2ecf20Sopenharmony_ci{ 6158c2ecf20Sopenharmony_ci struct rb_node *next = rb_next(&state->rb_node); 6168c2ecf20Sopenharmony_ci if (next) 6178c2ecf20Sopenharmony_ci return rb_entry(next, struct extent_state, rb_node); 6188c2ecf20Sopenharmony_ci else 6198c2ecf20Sopenharmony_ci return NULL; 6208c2ecf20Sopenharmony_ci} 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_ci/* 6238c2ecf20Sopenharmony_ci * utility function to clear some bits in an extent state struct. 6248c2ecf20Sopenharmony_ci * it will optionally wake up anyone waiting on this state (wake == 1). 6258c2ecf20Sopenharmony_ci * 6268c2ecf20Sopenharmony_ci * If no bits are set on the state struct after clearing things, the 6278c2ecf20Sopenharmony_ci * struct is freed and removed from the tree 6288c2ecf20Sopenharmony_ci */ 6298c2ecf20Sopenharmony_cistatic struct extent_state *clear_state_bit(struct extent_io_tree *tree, 6308c2ecf20Sopenharmony_ci struct extent_state *state, 6318c2ecf20Sopenharmony_ci unsigned *bits, int wake, 6328c2ecf20Sopenharmony_ci struct extent_changeset *changeset) 6338c2ecf20Sopenharmony_ci{ 6348c2ecf20Sopenharmony_ci struct extent_state *next; 6358c2ecf20Sopenharmony_ci unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS; 6368c2ecf20Sopenharmony_ci int ret; 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { 6398c2ecf20Sopenharmony_ci u64 range = state->end - state->start + 1; 6408c2ecf20Sopenharmony_ci WARN_ON(range > tree->dirty_bytes); 6418c2ecf20Sopenharmony_ci tree->dirty_bytes -= range; 6428c2ecf20Sopenharmony_ci } 6438c2ecf20Sopenharmony_ci 6448c2ecf20Sopenharmony_ci if (tree->private_data && is_data_inode(tree->private_data)) 6458c2ecf20Sopenharmony_ci btrfs_clear_delalloc_extent(tree->private_data, state, bits); 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci ret = add_extent_changeset(state, bits_to_clear, changeset, 0); 6488c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 6498c2ecf20Sopenharmony_ci state->state &= ~bits_to_clear; 6508c2ecf20Sopenharmony_ci if (wake) 6518c2ecf20Sopenharmony_ci wake_up(&state->wq); 6528c2ecf20Sopenharmony_ci if (state->state == 0) { 6538c2ecf20Sopenharmony_ci next = next_state(state); 6548c2ecf20Sopenharmony_ci if (extent_state_in_tree(state)) { 6558c2ecf20Sopenharmony_ci rb_erase(&state->rb_node, &tree->state); 6568c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&state->rb_node); 6578c2ecf20Sopenharmony_ci free_extent_state(state); 6588c2ecf20Sopenharmony_ci } else { 6598c2ecf20Sopenharmony_ci WARN_ON(1); 6608c2ecf20Sopenharmony_ci } 6618c2ecf20Sopenharmony_ci } else { 6628c2ecf20Sopenharmony_ci merge_state(tree, state); 6638c2ecf20Sopenharmony_ci next = next_state(state); 6648c2ecf20Sopenharmony_ci } 6658c2ecf20Sopenharmony_ci return next; 6668c2ecf20Sopenharmony_ci} 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_cistatic struct extent_state * 6698c2ecf20Sopenharmony_cialloc_extent_state_atomic(struct extent_state *prealloc) 6708c2ecf20Sopenharmony_ci{ 6718c2ecf20Sopenharmony_ci if (!prealloc) 6728c2ecf20Sopenharmony_ci prealloc = alloc_extent_state(GFP_ATOMIC); 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_ci return prealloc; 6758c2ecf20Sopenharmony_ci} 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_cistatic void extent_io_tree_panic(struct extent_io_tree *tree, int err) 6788c2ecf20Sopenharmony_ci{ 6798c2ecf20Sopenharmony_ci btrfs_panic(tree->fs_info, err, 6808c2ecf20Sopenharmony_ci "locking error: extent tree was modified by another thread while locked"); 6818c2ecf20Sopenharmony_ci} 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ci/* 6848c2ecf20Sopenharmony_ci * clear some bits on a range in the tree. This may require splitting 6858c2ecf20Sopenharmony_ci * or inserting elements in the tree, so the gfp mask is used to 6868c2ecf20Sopenharmony_ci * indicate which allocations or sleeping are allowed. 6878c2ecf20Sopenharmony_ci * 6888c2ecf20Sopenharmony_ci * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove 6898c2ecf20Sopenharmony_ci * the given range from the tree regardless of state (ie for truncate). 6908c2ecf20Sopenharmony_ci * 6918c2ecf20Sopenharmony_ci * the range [start, end] is inclusive. 6928c2ecf20Sopenharmony_ci * 6938c2ecf20Sopenharmony_ci * This takes the tree lock, and returns 0 on success and < 0 on error. 6948c2ecf20Sopenharmony_ci */ 6958c2ecf20Sopenharmony_ciint __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 6968c2ecf20Sopenharmony_ci unsigned bits, int wake, int delete, 6978c2ecf20Sopenharmony_ci struct extent_state **cached_state, 6988c2ecf20Sopenharmony_ci gfp_t mask, struct extent_changeset *changeset) 6998c2ecf20Sopenharmony_ci{ 7008c2ecf20Sopenharmony_ci struct extent_state *state; 7018c2ecf20Sopenharmony_ci struct extent_state *cached; 7028c2ecf20Sopenharmony_ci struct extent_state *prealloc = NULL; 7038c2ecf20Sopenharmony_ci struct rb_node *node; 7048c2ecf20Sopenharmony_ci u64 last_end; 7058c2ecf20Sopenharmony_ci int err; 7068c2ecf20Sopenharmony_ci int clear = 0; 7078c2ecf20Sopenharmony_ci 7088c2ecf20Sopenharmony_ci btrfs_debug_check_extent_io_range(tree, start, end); 7098c2ecf20Sopenharmony_ci trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits); 7108c2ecf20Sopenharmony_ci 7118c2ecf20Sopenharmony_ci if (bits & EXTENT_DELALLOC) 7128c2ecf20Sopenharmony_ci bits |= EXTENT_NORESERVE; 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_ci if (delete) 7158c2ecf20Sopenharmony_ci bits |= ~EXTENT_CTLBITS; 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_ci if (bits & (EXTENT_LOCKED | EXTENT_BOUNDARY)) 7188c2ecf20Sopenharmony_ci clear = 1; 7198c2ecf20Sopenharmony_ciagain: 7208c2ecf20Sopenharmony_ci if (!prealloc && gfpflags_allow_blocking(mask)) { 7218c2ecf20Sopenharmony_ci /* 7228c2ecf20Sopenharmony_ci * Don't care for allocation failure here because we might end 7238c2ecf20Sopenharmony_ci * up not needing the pre-allocated extent state at all, which 7248c2ecf20Sopenharmony_ci * is the case if we only have in the tree extent states that 7258c2ecf20Sopenharmony_ci * cover our input range and don't cover too any other range. 7268c2ecf20Sopenharmony_ci * If we end up needing a new extent state we allocate it later. 7278c2ecf20Sopenharmony_ci */ 7288c2ecf20Sopenharmony_ci prealloc = alloc_extent_state(mask); 7298c2ecf20Sopenharmony_ci } 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 7328c2ecf20Sopenharmony_ci if (cached_state) { 7338c2ecf20Sopenharmony_ci cached = *cached_state; 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci if (clear) { 7368c2ecf20Sopenharmony_ci *cached_state = NULL; 7378c2ecf20Sopenharmony_ci cached_state = NULL; 7388c2ecf20Sopenharmony_ci } 7398c2ecf20Sopenharmony_ci 7408c2ecf20Sopenharmony_ci if (cached && extent_state_in_tree(cached) && 7418c2ecf20Sopenharmony_ci cached->start <= start && cached->end > start) { 7428c2ecf20Sopenharmony_ci if (clear) 7438c2ecf20Sopenharmony_ci refcount_dec(&cached->refs); 7448c2ecf20Sopenharmony_ci state = cached; 7458c2ecf20Sopenharmony_ci goto hit_next; 7468c2ecf20Sopenharmony_ci } 7478c2ecf20Sopenharmony_ci if (clear) 7488c2ecf20Sopenharmony_ci free_extent_state(cached); 7498c2ecf20Sopenharmony_ci } 7508c2ecf20Sopenharmony_ci /* 7518c2ecf20Sopenharmony_ci * this search will find the extents that end after 7528c2ecf20Sopenharmony_ci * our range starts 7538c2ecf20Sopenharmony_ci */ 7548c2ecf20Sopenharmony_ci node = tree_search(tree, start); 7558c2ecf20Sopenharmony_ci if (!node) 7568c2ecf20Sopenharmony_ci goto out; 7578c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 7588c2ecf20Sopenharmony_cihit_next: 7598c2ecf20Sopenharmony_ci if (state->start > end) 7608c2ecf20Sopenharmony_ci goto out; 7618c2ecf20Sopenharmony_ci WARN_ON(state->end < start); 7628c2ecf20Sopenharmony_ci last_end = state->end; 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_ci /* the state doesn't have the wanted bits, go ahead */ 7658c2ecf20Sopenharmony_ci if (!(state->state & bits)) { 7668c2ecf20Sopenharmony_ci state = next_state(state); 7678c2ecf20Sopenharmony_ci goto next; 7688c2ecf20Sopenharmony_ci } 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_ci /* 7718c2ecf20Sopenharmony_ci * | ---- desired range ---- | 7728c2ecf20Sopenharmony_ci * | state | or 7738c2ecf20Sopenharmony_ci * | ------------- state -------------- | 7748c2ecf20Sopenharmony_ci * 7758c2ecf20Sopenharmony_ci * We need to split the extent we found, and may flip 7768c2ecf20Sopenharmony_ci * bits on second half. 7778c2ecf20Sopenharmony_ci * 7788c2ecf20Sopenharmony_ci * If the extent we found extends past our range, we 7798c2ecf20Sopenharmony_ci * just split and search again. It'll get split again 7808c2ecf20Sopenharmony_ci * the next time though. 7818c2ecf20Sopenharmony_ci * 7828c2ecf20Sopenharmony_ci * If the extent we found is inside our range, we clear 7838c2ecf20Sopenharmony_ci * the desired bit on it. 7848c2ecf20Sopenharmony_ci */ 7858c2ecf20Sopenharmony_ci 7868c2ecf20Sopenharmony_ci if (state->start < start) { 7878c2ecf20Sopenharmony_ci prealloc = alloc_extent_state_atomic(prealloc); 7888c2ecf20Sopenharmony_ci BUG_ON(!prealloc); 7898c2ecf20Sopenharmony_ci err = split_state(tree, state, prealloc, start); 7908c2ecf20Sopenharmony_ci if (err) 7918c2ecf20Sopenharmony_ci extent_io_tree_panic(tree, err); 7928c2ecf20Sopenharmony_ci 7938c2ecf20Sopenharmony_ci prealloc = NULL; 7948c2ecf20Sopenharmony_ci if (err) 7958c2ecf20Sopenharmony_ci goto out; 7968c2ecf20Sopenharmony_ci if (state->end <= end) { 7978c2ecf20Sopenharmony_ci state = clear_state_bit(tree, state, &bits, wake, 7988c2ecf20Sopenharmony_ci changeset); 7998c2ecf20Sopenharmony_ci goto next; 8008c2ecf20Sopenharmony_ci } 8018c2ecf20Sopenharmony_ci goto search_again; 8028c2ecf20Sopenharmony_ci } 8038c2ecf20Sopenharmony_ci /* 8048c2ecf20Sopenharmony_ci * | ---- desired range ---- | 8058c2ecf20Sopenharmony_ci * | state | 8068c2ecf20Sopenharmony_ci * We need to split the extent, and clear the bit 8078c2ecf20Sopenharmony_ci * on the first half 8088c2ecf20Sopenharmony_ci */ 8098c2ecf20Sopenharmony_ci if (state->start <= end && state->end > end) { 8108c2ecf20Sopenharmony_ci prealloc = alloc_extent_state_atomic(prealloc); 8118c2ecf20Sopenharmony_ci BUG_ON(!prealloc); 8128c2ecf20Sopenharmony_ci err = split_state(tree, state, prealloc, end + 1); 8138c2ecf20Sopenharmony_ci if (err) 8148c2ecf20Sopenharmony_ci extent_io_tree_panic(tree, err); 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_ci if (wake) 8178c2ecf20Sopenharmony_ci wake_up(&state->wq); 8188c2ecf20Sopenharmony_ci 8198c2ecf20Sopenharmony_ci clear_state_bit(tree, prealloc, &bits, wake, changeset); 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci prealloc = NULL; 8228c2ecf20Sopenharmony_ci goto out; 8238c2ecf20Sopenharmony_ci } 8248c2ecf20Sopenharmony_ci 8258c2ecf20Sopenharmony_ci state = clear_state_bit(tree, state, &bits, wake, changeset); 8268c2ecf20Sopenharmony_cinext: 8278c2ecf20Sopenharmony_ci if (last_end == (u64)-1) 8288c2ecf20Sopenharmony_ci goto out; 8298c2ecf20Sopenharmony_ci start = last_end + 1; 8308c2ecf20Sopenharmony_ci if (start <= end && state && !need_resched()) 8318c2ecf20Sopenharmony_ci goto hit_next; 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_cisearch_again: 8348c2ecf20Sopenharmony_ci if (start > end) 8358c2ecf20Sopenharmony_ci goto out; 8368c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 8378c2ecf20Sopenharmony_ci if (gfpflags_allow_blocking(mask)) 8388c2ecf20Sopenharmony_ci cond_resched(); 8398c2ecf20Sopenharmony_ci goto again; 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ciout: 8428c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 8438c2ecf20Sopenharmony_ci if (prealloc) 8448c2ecf20Sopenharmony_ci free_extent_state(prealloc); 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_ci return 0; 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci} 8498c2ecf20Sopenharmony_ci 8508c2ecf20Sopenharmony_cistatic void wait_on_state(struct extent_io_tree *tree, 8518c2ecf20Sopenharmony_ci struct extent_state *state) 8528c2ecf20Sopenharmony_ci __releases(tree->lock) 8538c2ecf20Sopenharmony_ci __acquires(tree->lock) 8548c2ecf20Sopenharmony_ci{ 8558c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 8568c2ecf20Sopenharmony_ci prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); 8578c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 8588c2ecf20Sopenharmony_ci schedule(); 8598c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 8608c2ecf20Sopenharmony_ci finish_wait(&state->wq, &wait); 8618c2ecf20Sopenharmony_ci} 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_ci/* 8648c2ecf20Sopenharmony_ci * waits for one or more bits to clear on a range in the state tree. 8658c2ecf20Sopenharmony_ci * The range [start, end] is inclusive. 8668c2ecf20Sopenharmony_ci * The tree lock is taken by this function 8678c2ecf20Sopenharmony_ci */ 8688c2ecf20Sopenharmony_cistatic void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 8698c2ecf20Sopenharmony_ci unsigned long bits) 8708c2ecf20Sopenharmony_ci{ 8718c2ecf20Sopenharmony_ci struct extent_state *state; 8728c2ecf20Sopenharmony_ci struct rb_node *node; 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci btrfs_debug_check_extent_io_range(tree, start, end); 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 8778c2ecf20Sopenharmony_ciagain: 8788c2ecf20Sopenharmony_ci while (1) { 8798c2ecf20Sopenharmony_ci /* 8808c2ecf20Sopenharmony_ci * this search will find all the extents that end after 8818c2ecf20Sopenharmony_ci * our range starts 8828c2ecf20Sopenharmony_ci */ 8838c2ecf20Sopenharmony_ci node = tree_search(tree, start); 8848c2ecf20Sopenharmony_ciprocess_node: 8858c2ecf20Sopenharmony_ci if (!node) 8868c2ecf20Sopenharmony_ci break; 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 8898c2ecf20Sopenharmony_ci 8908c2ecf20Sopenharmony_ci if (state->start > end) 8918c2ecf20Sopenharmony_ci goto out; 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_ci if (state->state & bits) { 8948c2ecf20Sopenharmony_ci start = state->start; 8958c2ecf20Sopenharmony_ci refcount_inc(&state->refs); 8968c2ecf20Sopenharmony_ci wait_on_state(tree, state); 8978c2ecf20Sopenharmony_ci free_extent_state(state); 8988c2ecf20Sopenharmony_ci goto again; 8998c2ecf20Sopenharmony_ci } 9008c2ecf20Sopenharmony_ci start = state->end + 1; 9018c2ecf20Sopenharmony_ci 9028c2ecf20Sopenharmony_ci if (start > end) 9038c2ecf20Sopenharmony_ci break; 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci if (!cond_resched_lock(&tree->lock)) { 9068c2ecf20Sopenharmony_ci node = rb_next(node); 9078c2ecf20Sopenharmony_ci goto process_node; 9088c2ecf20Sopenharmony_ci } 9098c2ecf20Sopenharmony_ci } 9108c2ecf20Sopenharmony_ciout: 9118c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 9128c2ecf20Sopenharmony_ci} 9138c2ecf20Sopenharmony_ci 9148c2ecf20Sopenharmony_cistatic void set_state_bits(struct extent_io_tree *tree, 9158c2ecf20Sopenharmony_ci struct extent_state *state, 9168c2ecf20Sopenharmony_ci unsigned *bits, struct extent_changeset *changeset) 9178c2ecf20Sopenharmony_ci{ 9188c2ecf20Sopenharmony_ci unsigned bits_to_set = *bits & ~EXTENT_CTLBITS; 9198c2ecf20Sopenharmony_ci int ret; 9208c2ecf20Sopenharmony_ci 9218c2ecf20Sopenharmony_ci if (tree->private_data && is_data_inode(tree->private_data)) 9228c2ecf20Sopenharmony_ci btrfs_set_delalloc_extent(tree->private_data, state, bits); 9238c2ecf20Sopenharmony_ci 9248c2ecf20Sopenharmony_ci if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { 9258c2ecf20Sopenharmony_ci u64 range = state->end - state->start + 1; 9268c2ecf20Sopenharmony_ci tree->dirty_bytes += range; 9278c2ecf20Sopenharmony_ci } 9288c2ecf20Sopenharmony_ci ret = add_extent_changeset(state, bits_to_set, changeset, 1); 9298c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 9308c2ecf20Sopenharmony_ci state->state |= bits_to_set; 9318c2ecf20Sopenharmony_ci} 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_cistatic void cache_state_if_flags(struct extent_state *state, 9348c2ecf20Sopenharmony_ci struct extent_state **cached_ptr, 9358c2ecf20Sopenharmony_ci unsigned flags) 9368c2ecf20Sopenharmony_ci{ 9378c2ecf20Sopenharmony_ci if (cached_ptr && !(*cached_ptr)) { 9388c2ecf20Sopenharmony_ci if (!flags || (state->state & flags)) { 9398c2ecf20Sopenharmony_ci *cached_ptr = state; 9408c2ecf20Sopenharmony_ci refcount_inc(&state->refs); 9418c2ecf20Sopenharmony_ci } 9428c2ecf20Sopenharmony_ci } 9438c2ecf20Sopenharmony_ci} 9448c2ecf20Sopenharmony_ci 9458c2ecf20Sopenharmony_cistatic void cache_state(struct extent_state *state, 9468c2ecf20Sopenharmony_ci struct extent_state **cached_ptr) 9478c2ecf20Sopenharmony_ci{ 9488c2ecf20Sopenharmony_ci return cache_state_if_flags(state, cached_ptr, 9498c2ecf20Sopenharmony_ci EXTENT_LOCKED | EXTENT_BOUNDARY); 9508c2ecf20Sopenharmony_ci} 9518c2ecf20Sopenharmony_ci 9528c2ecf20Sopenharmony_ci/* 9538c2ecf20Sopenharmony_ci * set some bits on a range in the tree. This may require allocations or 9548c2ecf20Sopenharmony_ci * sleeping, so the gfp mask is used to indicate what is allowed. 9558c2ecf20Sopenharmony_ci * 9568c2ecf20Sopenharmony_ci * If any of the exclusive bits are set, this will fail with -EEXIST if some 9578c2ecf20Sopenharmony_ci * part of the range already has the desired bits set. The start of the 9588c2ecf20Sopenharmony_ci * existing range is returned in failed_start in this case. 9598c2ecf20Sopenharmony_ci * 9608c2ecf20Sopenharmony_ci * [start, end] is inclusive This takes the tree lock. 9618c2ecf20Sopenharmony_ci */ 9628c2ecf20Sopenharmony_ci 9638c2ecf20Sopenharmony_cistatic int __must_check 9648c2ecf20Sopenharmony_ci__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 9658c2ecf20Sopenharmony_ci unsigned bits, unsigned exclusive_bits, 9668c2ecf20Sopenharmony_ci u64 *failed_start, struct extent_state **cached_state, 9678c2ecf20Sopenharmony_ci gfp_t mask, struct extent_changeset *changeset) 9688c2ecf20Sopenharmony_ci{ 9698c2ecf20Sopenharmony_ci struct extent_state *state; 9708c2ecf20Sopenharmony_ci struct extent_state *prealloc = NULL; 9718c2ecf20Sopenharmony_ci struct rb_node *node; 9728c2ecf20Sopenharmony_ci struct rb_node **p; 9738c2ecf20Sopenharmony_ci struct rb_node *parent; 9748c2ecf20Sopenharmony_ci int err = 0; 9758c2ecf20Sopenharmony_ci u64 last_start; 9768c2ecf20Sopenharmony_ci u64 last_end; 9778c2ecf20Sopenharmony_ci 9788c2ecf20Sopenharmony_ci btrfs_debug_check_extent_io_range(tree, start, end); 9798c2ecf20Sopenharmony_ci trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits); 9808c2ecf20Sopenharmony_ci 9818c2ecf20Sopenharmony_ciagain: 9828c2ecf20Sopenharmony_ci if (!prealloc && gfpflags_allow_blocking(mask)) { 9838c2ecf20Sopenharmony_ci /* 9848c2ecf20Sopenharmony_ci * Don't care for allocation failure here because we might end 9858c2ecf20Sopenharmony_ci * up not needing the pre-allocated extent state at all, which 9868c2ecf20Sopenharmony_ci * is the case if we only have in the tree extent states that 9878c2ecf20Sopenharmony_ci * cover our input range and don't cover too any other range. 9888c2ecf20Sopenharmony_ci * If we end up needing a new extent state we allocate it later. 9898c2ecf20Sopenharmony_ci */ 9908c2ecf20Sopenharmony_ci prealloc = alloc_extent_state(mask); 9918c2ecf20Sopenharmony_ci } 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 9948c2ecf20Sopenharmony_ci if (cached_state && *cached_state) { 9958c2ecf20Sopenharmony_ci state = *cached_state; 9968c2ecf20Sopenharmony_ci if (state->start <= start && state->end > start && 9978c2ecf20Sopenharmony_ci extent_state_in_tree(state)) { 9988c2ecf20Sopenharmony_ci node = &state->rb_node; 9998c2ecf20Sopenharmony_ci goto hit_next; 10008c2ecf20Sopenharmony_ci } 10018c2ecf20Sopenharmony_ci } 10028c2ecf20Sopenharmony_ci /* 10038c2ecf20Sopenharmony_ci * this search will find all the extents that end after 10048c2ecf20Sopenharmony_ci * our range starts. 10058c2ecf20Sopenharmony_ci */ 10068c2ecf20Sopenharmony_ci node = tree_search_for_insert(tree, start, &p, &parent); 10078c2ecf20Sopenharmony_ci if (!node) { 10088c2ecf20Sopenharmony_ci prealloc = alloc_extent_state_atomic(prealloc); 10098c2ecf20Sopenharmony_ci BUG_ON(!prealloc); 10108c2ecf20Sopenharmony_ci err = insert_state(tree, prealloc, start, end, 10118c2ecf20Sopenharmony_ci &p, &parent, &bits, changeset); 10128c2ecf20Sopenharmony_ci if (err) 10138c2ecf20Sopenharmony_ci extent_io_tree_panic(tree, err); 10148c2ecf20Sopenharmony_ci 10158c2ecf20Sopenharmony_ci cache_state(prealloc, cached_state); 10168c2ecf20Sopenharmony_ci prealloc = NULL; 10178c2ecf20Sopenharmony_ci goto out; 10188c2ecf20Sopenharmony_ci } 10198c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 10208c2ecf20Sopenharmony_cihit_next: 10218c2ecf20Sopenharmony_ci last_start = state->start; 10228c2ecf20Sopenharmony_ci last_end = state->end; 10238c2ecf20Sopenharmony_ci 10248c2ecf20Sopenharmony_ci /* 10258c2ecf20Sopenharmony_ci * | ---- desired range ---- | 10268c2ecf20Sopenharmony_ci * | state | 10278c2ecf20Sopenharmony_ci * 10288c2ecf20Sopenharmony_ci * Just lock what we found and keep going 10298c2ecf20Sopenharmony_ci */ 10308c2ecf20Sopenharmony_ci if (state->start == start && state->end <= end) { 10318c2ecf20Sopenharmony_ci if (state->state & exclusive_bits) { 10328c2ecf20Sopenharmony_ci *failed_start = state->start; 10338c2ecf20Sopenharmony_ci err = -EEXIST; 10348c2ecf20Sopenharmony_ci goto out; 10358c2ecf20Sopenharmony_ci } 10368c2ecf20Sopenharmony_ci 10378c2ecf20Sopenharmony_ci set_state_bits(tree, state, &bits, changeset); 10388c2ecf20Sopenharmony_ci cache_state(state, cached_state); 10398c2ecf20Sopenharmony_ci merge_state(tree, state); 10408c2ecf20Sopenharmony_ci if (last_end == (u64)-1) 10418c2ecf20Sopenharmony_ci goto out; 10428c2ecf20Sopenharmony_ci start = last_end + 1; 10438c2ecf20Sopenharmony_ci state = next_state(state); 10448c2ecf20Sopenharmony_ci if (start < end && state && state->start == start && 10458c2ecf20Sopenharmony_ci !need_resched()) 10468c2ecf20Sopenharmony_ci goto hit_next; 10478c2ecf20Sopenharmony_ci goto search_again; 10488c2ecf20Sopenharmony_ci } 10498c2ecf20Sopenharmony_ci 10508c2ecf20Sopenharmony_ci /* 10518c2ecf20Sopenharmony_ci * | ---- desired range ---- | 10528c2ecf20Sopenharmony_ci * | state | 10538c2ecf20Sopenharmony_ci * or 10548c2ecf20Sopenharmony_ci * | ------------- state -------------- | 10558c2ecf20Sopenharmony_ci * 10568c2ecf20Sopenharmony_ci * We need to split the extent we found, and may flip bits on 10578c2ecf20Sopenharmony_ci * second half. 10588c2ecf20Sopenharmony_ci * 10598c2ecf20Sopenharmony_ci * If the extent we found extends past our 10608c2ecf20Sopenharmony_ci * range, we just split and search again. It'll get split 10618c2ecf20Sopenharmony_ci * again the next time though. 10628c2ecf20Sopenharmony_ci * 10638c2ecf20Sopenharmony_ci * If the extent we found is inside our range, we set the 10648c2ecf20Sopenharmony_ci * desired bit on it. 10658c2ecf20Sopenharmony_ci */ 10668c2ecf20Sopenharmony_ci if (state->start < start) { 10678c2ecf20Sopenharmony_ci if (state->state & exclusive_bits) { 10688c2ecf20Sopenharmony_ci *failed_start = start; 10698c2ecf20Sopenharmony_ci err = -EEXIST; 10708c2ecf20Sopenharmony_ci goto out; 10718c2ecf20Sopenharmony_ci } 10728c2ecf20Sopenharmony_ci 10738c2ecf20Sopenharmony_ci /* 10748c2ecf20Sopenharmony_ci * If this extent already has all the bits we want set, then 10758c2ecf20Sopenharmony_ci * skip it, not necessary to split it or do anything with it. 10768c2ecf20Sopenharmony_ci */ 10778c2ecf20Sopenharmony_ci if ((state->state & bits) == bits) { 10788c2ecf20Sopenharmony_ci start = state->end + 1; 10798c2ecf20Sopenharmony_ci cache_state(state, cached_state); 10808c2ecf20Sopenharmony_ci goto search_again; 10818c2ecf20Sopenharmony_ci } 10828c2ecf20Sopenharmony_ci 10838c2ecf20Sopenharmony_ci prealloc = alloc_extent_state_atomic(prealloc); 10848c2ecf20Sopenharmony_ci BUG_ON(!prealloc); 10858c2ecf20Sopenharmony_ci err = split_state(tree, state, prealloc, start); 10868c2ecf20Sopenharmony_ci if (err) 10878c2ecf20Sopenharmony_ci extent_io_tree_panic(tree, err); 10888c2ecf20Sopenharmony_ci 10898c2ecf20Sopenharmony_ci prealloc = NULL; 10908c2ecf20Sopenharmony_ci if (err) 10918c2ecf20Sopenharmony_ci goto out; 10928c2ecf20Sopenharmony_ci if (state->end <= end) { 10938c2ecf20Sopenharmony_ci set_state_bits(tree, state, &bits, changeset); 10948c2ecf20Sopenharmony_ci cache_state(state, cached_state); 10958c2ecf20Sopenharmony_ci merge_state(tree, state); 10968c2ecf20Sopenharmony_ci if (last_end == (u64)-1) 10978c2ecf20Sopenharmony_ci goto out; 10988c2ecf20Sopenharmony_ci start = last_end + 1; 10998c2ecf20Sopenharmony_ci state = next_state(state); 11008c2ecf20Sopenharmony_ci if (start < end && state && state->start == start && 11018c2ecf20Sopenharmony_ci !need_resched()) 11028c2ecf20Sopenharmony_ci goto hit_next; 11038c2ecf20Sopenharmony_ci } 11048c2ecf20Sopenharmony_ci goto search_again; 11058c2ecf20Sopenharmony_ci } 11068c2ecf20Sopenharmony_ci /* 11078c2ecf20Sopenharmony_ci * | ---- desired range ---- | 11088c2ecf20Sopenharmony_ci * | state | or | state | 11098c2ecf20Sopenharmony_ci * 11108c2ecf20Sopenharmony_ci * There's a hole, we need to insert something in it and 11118c2ecf20Sopenharmony_ci * ignore the extent we found. 11128c2ecf20Sopenharmony_ci */ 11138c2ecf20Sopenharmony_ci if (state->start > start) { 11148c2ecf20Sopenharmony_ci u64 this_end; 11158c2ecf20Sopenharmony_ci if (end < last_start) 11168c2ecf20Sopenharmony_ci this_end = end; 11178c2ecf20Sopenharmony_ci else 11188c2ecf20Sopenharmony_ci this_end = last_start - 1; 11198c2ecf20Sopenharmony_ci 11208c2ecf20Sopenharmony_ci prealloc = alloc_extent_state_atomic(prealloc); 11218c2ecf20Sopenharmony_ci BUG_ON(!prealloc); 11228c2ecf20Sopenharmony_ci 11238c2ecf20Sopenharmony_ci /* 11248c2ecf20Sopenharmony_ci * Avoid to free 'prealloc' if it can be merged with 11258c2ecf20Sopenharmony_ci * the later extent. 11268c2ecf20Sopenharmony_ci */ 11278c2ecf20Sopenharmony_ci err = insert_state(tree, prealloc, start, this_end, 11288c2ecf20Sopenharmony_ci NULL, NULL, &bits, changeset); 11298c2ecf20Sopenharmony_ci if (err) 11308c2ecf20Sopenharmony_ci extent_io_tree_panic(tree, err); 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ci cache_state(prealloc, cached_state); 11338c2ecf20Sopenharmony_ci prealloc = NULL; 11348c2ecf20Sopenharmony_ci start = this_end + 1; 11358c2ecf20Sopenharmony_ci goto search_again; 11368c2ecf20Sopenharmony_ci } 11378c2ecf20Sopenharmony_ci /* 11388c2ecf20Sopenharmony_ci * | ---- desired range ---- | 11398c2ecf20Sopenharmony_ci * | state | 11408c2ecf20Sopenharmony_ci * We need to split the extent, and set the bit 11418c2ecf20Sopenharmony_ci * on the first half 11428c2ecf20Sopenharmony_ci */ 11438c2ecf20Sopenharmony_ci if (state->start <= end && state->end > end) { 11448c2ecf20Sopenharmony_ci if (state->state & exclusive_bits) { 11458c2ecf20Sopenharmony_ci *failed_start = start; 11468c2ecf20Sopenharmony_ci err = -EEXIST; 11478c2ecf20Sopenharmony_ci goto out; 11488c2ecf20Sopenharmony_ci } 11498c2ecf20Sopenharmony_ci 11508c2ecf20Sopenharmony_ci prealloc = alloc_extent_state_atomic(prealloc); 11518c2ecf20Sopenharmony_ci BUG_ON(!prealloc); 11528c2ecf20Sopenharmony_ci err = split_state(tree, state, prealloc, end + 1); 11538c2ecf20Sopenharmony_ci if (err) 11548c2ecf20Sopenharmony_ci extent_io_tree_panic(tree, err); 11558c2ecf20Sopenharmony_ci 11568c2ecf20Sopenharmony_ci set_state_bits(tree, prealloc, &bits, changeset); 11578c2ecf20Sopenharmony_ci cache_state(prealloc, cached_state); 11588c2ecf20Sopenharmony_ci merge_state(tree, prealloc); 11598c2ecf20Sopenharmony_ci prealloc = NULL; 11608c2ecf20Sopenharmony_ci goto out; 11618c2ecf20Sopenharmony_ci } 11628c2ecf20Sopenharmony_ci 11638c2ecf20Sopenharmony_cisearch_again: 11648c2ecf20Sopenharmony_ci if (start > end) 11658c2ecf20Sopenharmony_ci goto out; 11668c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 11678c2ecf20Sopenharmony_ci if (gfpflags_allow_blocking(mask)) 11688c2ecf20Sopenharmony_ci cond_resched(); 11698c2ecf20Sopenharmony_ci goto again; 11708c2ecf20Sopenharmony_ci 11718c2ecf20Sopenharmony_ciout: 11728c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 11738c2ecf20Sopenharmony_ci if (prealloc) 11748c2ecf20Sopenharmony_ci free_extent_state(prealloc); 11758c2ecf20Sopenharmony_ci 11768c2ecf20Sopenharmony_ci return err; 11778c2ecf20Sopenharmony_ci 11788c2ecf20Sopenharmony_ci} 11798c2ecf20Sopenharmony_ci 11808c2ecf20Sopenharmony_ciint set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 11818c2ecf20Sopenharmony_ci unsigned bits, u64 * failed_start, 11828c2ecf20Sopenharmony_ci struct extent_state **cached_state, gfp_t mask) 11838c2ecf20Sopenharmony_ci{ 11848c2ecf20Sopenharmony_ci return __set_extent_bit(tree, start, end, bits, 0, failed_start, 11858c2ecf20Sopenharmony_ci cached_state, mask, NULL); 11868c2ecf20Sopenharmony_ci} 11878c2ecf20Sopenharmony_ci 11888c2ecf20Sopenharmony_ci 11898c2ecf20Sopenharmony_ci/** 11908c2ecf20Sopenharmony_ci * convert_extent_bit - convert all bits in a given range from one bit to 11918c2ecf20Sopenharmony_ci * another 11928c2ecf20Sopenharmony_ci * @tree: the io tree to search 11938c2ecf20Sopenharmony_ci * @start: the start offset in bytes 11948c2ecf20Sopenharmony_ci * @end: the end offset in bytes (inclusive) 11958c2ecf20Sopenharmony_ci * @bits: the bits to set in this range 11968c2ecf20Sopenharmony_ci * @clear_bits: the bits to clear in this range 11978c2ecf20Sopenharmony_ci * @cached_state: state that we're going to cache 11988c2ecf20Sopenharmony_ci * 11998c2ecf20Sopenharmony_ci * This will go through and set bits for the given range. If any states exist 12008c2ecf20Sopenharmony_ci * already in this range they are set with the given bit and cleared of the 12018c2ecf20Sopenharmony_ci * clear_bits. This is only meant to be used by things that are mergeable, ie 12028c2ecf20Sopenharmony_ci * converting from say DELALLOC to DIRTY. This is not meant to be used with 12038c2ecf20Sopenharmony_ci * boundary bits like LOCK. 12048c2ecf20Sopenharmony_ci * 12058c2ecf20Sopenharmony_ci * All allocations are done with GFP_NOFS. 12068c2ecf20Sopenharmony_ci */ 12078c2ecf20Sopenharmony_ciint convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 12088c2ecf20Sopenharmony_ci unsigned bits, unsigned clear_bits, 12098c2ecf20Sopenharmony_ci struct extent_state **cached_state) 12108c2ecf20Sopenharmony_ci{ 12118c2ecf20Sopenharmony_ci struct extent_state *state; 12128c2ecf20Sopenharmony_ci struct extent_state *prealloc = NULL; 12138c2ecf20Sopenharmony_ci struct rb_node *node; 12148c2ecf20Sopenharmony_ci struct rb_node **p; 12158c2ecf20Sopenharmony_ci struct rb_node *parent; 12168c2ecf20Sopenharmony_ci int err = 0; 12178c2ecf20Sopenharmony_ci u64 last_start; 12188c2ecf20Sopenharmony_ci u64 last_end; 12198c2ecf20Sopenharmony_ci bool first_iteration = true; 12208c2ecf20Sopenharmony_ci 12218c2ecf20Sopenharmony_ci btrfs_debug_check_extent_io_range(tree, start, end); 12228c2ecf20Sopenharmony_ci trace_btrfs_convert_extent_bit(tree, start, end - start + 1, bits, 12238c2ecf20Sopenharmony_ci clear_bits); 12248c2ecf20Sopenharmony_ci 12258c2ecf20Sopenharmony_ciagain: 12268c2ecf20Sopenharmony_ci if (!prealloc) { 12278c2ecf20Sopenharmony_ci /* 12288c2ecf20Sopenharmony_ci * Best effort, don't worry if extent state allocation fails 12298c2ecf20Sopenharmony_ci * here for the first iteration. We might have a cached state 12308c2ecf20Sopenharmony_ci * that matches exactly the target range, in which case no 12318c2ecf20Sopenharmony_ci * extent state allocations are needed. We'll only know this 12328c2ecf20Sopenharmony_ci * after locking the tree. 12338c2ecf20Sopenharmony_ci */ 12348c2ecf20Sopenharmony_ci prealloc = alloc_extent_state(GFP_NOFS); 12358c2ecf20Sopenharmony_ci if (!prealloc && !first_iteration) 12368c2ecf20Sopenharmony_ci return -ENOMEM; 12378c2ecf20Sopenharmony_ci } 12388c2ecf20Sopenharmony_ci 12398c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 12408c2ecf20Sopenharmony_ci if (cached_state && *cached_state) { 12418c2ecf20Sopenharmony_ci state = *cached_state; 12428c2ecf20Sopenharmony_ci if (state->start <= start && state->end > start && 12438c2ecf20Sopenharmony_ci extent_state_in_tree(state)) { 12448c2ecf20Sopenharmony_ci node = &state->rb_node; 12458c2ecf20Sopenharmony_ci goto hit_next; 12468c2ecf20Sopenharmony_ci } 12478c2ecf20Sopenharmony_ci } 12488c2ecf20Sopenharmony_ci 12498c2ecf20Sopenharmony_ci /* 12508c2ecf20Sopenharmony_ci * this search will find all the extents that end after 12518c2ecf20Sopenharmony_ci * our range starts. 12528c2ecf20Sopenharmony_ci */ 12538c2ecf20Sopenharmony_ci node = tree_search_for_insert(tree, start, &p, &parent); 12548c2ecf20Sopenharmony_ci if (!node) { 12558c2ecf20Sopenharmony_ci prealloc = alloc_extent_state_atomic(prealloc); 12568c2ecf20Sopenharmony_ci if (!prealloc) { 12578c2ecf20Sopenharmony_ci err = -ENOMEM; 12588c2ecf20Sopenharmony_ci goto out; 12598c2ecf20Sopenharmony_ci } 12608c2ecf20Sopenharmony_ci err = insert_state(tree, prealloc, start, end, 12618c2ecf20Sopenharmony_ci &p, &parent, &bits, NULL); 12628c2ecf20Sopenharmony_ci if (err) 12638c2ecf20Sopenharmony_ci extent_io_tree_panic(tree, err); 12648c2ecf20Sopenharmony_ci cache_state(prealloc, cached_state); 12658c2ecf20Sopenharmony_ci prealloc = NULL; 12668c2ecf20Sopenharmony_ci goto out; 12678c2ecf20Sopenharmony_ci } 12688c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 12698c2ecf20Sopenharmony_cihit_next: 12708c2ecf20Sopenharmony_ci last_start = state->start; 12718c2ecf20Sopenharmony_ci last_end = state->end; 12728c2ecf20Sopenharmony_ci 12738c2ecf20Sopenharmony_ci /* 12748c2ecf20Sopenharmony_ci * | ---- desired range ---- | 12758c2ecf20Sopenharmony_ci * | state | 12768c2ecf20Sopenharmony_ci * 12778c2ecf20Sopenharmony_ci * Just lock what we found and keep going 12788c2ecf20Sopenharmony_ci */ 12798c2ecf20Sopenharmony_ci if (state->start == start && state->end <= end) { 12808c2ecf20Sopenharmony_ci set_state_bits(tree, state, &bits, NULL); 12818c2ecf20Sopenharmony_ci cache_state(state, cached_state); 12828c2ecf20Sopenharmony_ci state = clear_state_bit(tree, state, &clear_bits, 0, NULL); 12838c2ecf20Sopenharmony_ci if (last_end == (u64)-1) 12848c2ecf20Sopenharmony_ci goto out; 12858c2ecf20Sopenharmony_ci start = last_end + 1; 12868c2ecf20Sopenharmony_ci if (start < end && state && state->start == start && 12878c2ecf20Sopenharmony_ci !need_resched()) 12888c2ecf20Sopenharmony_ci goto hit_next; 12898c2ecf20Sopenharmony_ci goto search_again; 12908c2ecf20Sopenharmony_ci } 12918c2ecf20Sopenharmony_ci 12928c2ecf20Sopenharmony_ci /* 12938c2ecf20Sopenharmony_ci * | ---- desired range ---- | 12948c2ecf20Sopenharmony_ci * | state | 12958c2ecf20Sopenharmony_ci * or 12968c2ecf20Sopenharmony_ci * | ------------- state -------------- | 12978c2ecf20Sopenharmony_ci * 12988c2ecf20Sopenharmony_ci * We need to split the extent we found, and may flip bits on 12998c2ecf20Sopenharmony_ci * second half. 13008c2ecf20Sopenharmony_ci * 13018c2ecf20Sopenharmony_ci * If the extent we found extends past our 13028c2ecf20Sopenharmony_ci * range, we just split and search again. It'll get split 13038c2ecf20Sopenharmony_ci * again the next time though. 13048c2ecf20Sopenharmony_ci * 13058c2ecf20Sopenharmony_ci * If the extent we found is inside our range, we set the 13068c2ecf20Sopenharmony_ci * desired bit on it. 13078c2ecf20Sopenharmony_ci */ 13088c2ecf20Sopenharmony_ci if (state->start < start) { 13098c2ecf20Sopenharmony_ci prealloc = alloc_extent_state_atomic(prealloc); 13108c2ecf20Sopenharmony_ci if (!prealloc) { 13118c2ecf20Sopenharmony_ci err = -ENOMEM; 13128c2ecf20Sopenharmony_ci goto out; 13138c2ecf20Sopenharmony_ci } 13148c2ecf20Sopenharmony_ci err = split_state(tree, state, prealloc, start); 13158c2ecf20Sopenharmony_ci if (err) 13168c2ecf20Sopenharmony_ci extent_io_tree_panic(tree, err); 13178c2ecf20Sopenharmony_ci prealloc = NULL; 13188c2ecf20Sopenharmony_ci if (err) 13198c2ecf20Sopenharmony_ci goto out; 13208c2ecf20Sopenharmony_ci if (state->end <= end) { 13218c2ecf20Sopenharmony_ci set_state_bits(tree, state, &bits, NULL); 13228c2ecf20Sopenharmony_ci cache_state(state, cached_state); 13238c2ecf20Sopenharmony_ci state = clear_state_bit(tree, state, &clear_bits, 0, 13248c2ecf20Sopenharmony_ci NULL); 13258c2ecf20Sopenharmony_ci if (last_end == (u64)-1) 13268c2ecf20Sopenharmony_ci goto out; 13278c2ecf20Sopenharmony_ci start = last_end + 1; 13288c2ecf20Sopenharmony_ci if (start < end && state && state->start == start && 13298c2ecf20Sopenharmony_ci !need_resched()) 13308c2ecf20Sopenharmony_ci goto hit_next; 13318c2ecf20Sopenharmony_ci } 13328c2ecf20Sopenharmony_ci goto search_again; 13338c2ecf20Sopenharmony_ci } 13348c2ecf20Sopenharmony_ci /* 13358c2ecf20Sopenharmony_ci * | ---- desired range ---- | 13368c2ecf20Sopenharmony_ci * | state | or | state | 13378c2ecf20Sopenharmony_ci * 13388c2ecf20Sopenharmony_ci * There's a hole, we need to insert something in it and 13398c2ecf20Sopenharmony_ci * ignore the extent we found. 13408c2ecf20Sopenharmony_ci */ 13418c2ecf20Sopenharmony_ci if (state->start > start) { 13428c2ecf20Sopenharmony_ci u64 this_end; 13438c2ecf20Sopenharmony_ci if (end < last_start) 13448c2ecf20Sopenharmony_ci this_end = end; 13458c2ecf20Sopenharmony_ci else 13468c2ecf20Sopenharmony_ci this_end = last_start - 1; 13478c2ecf20Sopenharmony_ci 13488c2ecf20Sopenharmony_ci prealloc = alloc_extent_state_atomic(prealloc); 13498c2ecf20Sopenharmony_ci if (!prealloc) { 13508c2ecf20Sopenharmony_ci err = -ENOMEM; 13518c2ecf20Sopenharmony_ci goto out; 13528c2ecf20Sopenharmony_ci } 13538c2ecf20Sopenharmony_ci 13548c2ecf20Sopenharmony_ci /* 13558c2ecf20Sopenharmony_ci * Avoid to free 'prealloc' if it can be merged with 13568c2ecf20Sopenharmony_ci * the later extent. 13578c2ecf20Sopenharmony_ci */ 13588c2ecf20Sopenharmony_ci err = insert_state(tree, prealloc, start, this_end, 13598c2ecf20Sopenharmony_ci NULL, NULL, &bits, NULL); 13608c2ecf20Sopenharmony_ci if (err) 13618c2ecf20Sopenharmony_ci extent_io_tree_panic(tree, err); 13628c2ecf20Sopenharmony_ci cache_state(prealloc, cached_state); 13638c2ecf20Sopenharmony_ci prealloc = NULL; 13648c2ecf20Sopenharmony_ci start = this_end + 1; 13658c2ecf20Sopenharmony_ci goto search_again; 13668c2ecf20Sopenharmony_ci } 13678c2ecf20Sopenharmony_ci /* 13688c2ecf20Sopenharmony_ci * | ---- desired range ---- | 13698c2ecf20Sopenharmony_ci * | state | 13708c2ecf20Sopenharmony_ci * We need to split the extent, and set the bit 13718c2ecf20Sopenharmony_ci * on the first half 13728c2ecf20Sopenharmony_ci */ 13738c2ecf20Sopenharmony_ci if (state->start <= end && state->end > end) { 13748c2ecf20Sopenharmony_ci prealloc = alloc_extent_state_atomic(prealloc); 13758c2ecf20Sopenharmony_ci if (!prealloc) { 13768c2ecf20Sopenharmony_ci err = -ENOMEM; 13778c2ecf20Sopenharmony_ci goto out; 13788c2ecf20Sopenharmony_ci } 13798c2ecf20Sopenharmony_ci 13808c2ecf20Sopenharmony_ci err = split_state(tree, state, prealloc, end + 1); 13818c2ecf20Sopenharmony_ci if (err) 13828c2ecf20Sopenharmony_ci extent_io_tree_panic(tree, err); 13838c2ecf20Sopenharmony_ci 13848c2ecf20Sopenharmony_ci set_state_bits(tree, prealloc, &bits, NULL); 13858c2ecf20Sopenharmony_ci cache_state(prealloc, cached_state); 13868c2ecf20Sopenharmony_ci clear_state_bit(tree, prealloc, &clear_bits, 0, NULL); 13878c2ecf20Sopenharmony_ci prealloc = NULL; 13888c2ecf20Sopenharmony_ci goto out; 13898c2ecf20Sopenharmony_ci } 13908c2ecf20Sopenharmony_ci 13918c2ecf20Sopenharmony_cisearch_again: 13928c2ecf20Sopenharmony_ci if (start > end) 13938c2ecf20Sopenharmony_ci goto out; 13948c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 13958c2ecf20Sopenharmony_ci cond_resched(); 13968c2ecf20Sopenharmony_ci first_iteration = false; 13978c2ecf20Sopenharmony_ci goto again; 13988c2ecf20Sopenharmony_ci 13998c2ecf20Sopenharmony_ciout: 14008c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 14018c2ecf20Sopenharmony_ci if (prealloc) 14028c2ecf20Sopenharmony_ci free_extent_state(prealloc); 14038c2ecf20Sopenharmony_ci 14048c2ecf20Sopenharmony_ci return err; 14058c2ecf20Sopenharmony_ci} 14068c2ecf20Sopenharmony_ci 14078c2ecf20Sopenharmony_ci/* wrappers around set/clear extent bit */ 14088c2ecf20Sopenharmony_ciint set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 14098c2ecf20Sopenharmony_ci unsigned bits, struct extent_changeset *changeset) 14108c2ecf20Sopenharmony_ci{ 14118c2ecf20Sopenharmony_ci /* 14128c2ecf20Sopenharmony_ci * We don't support EXTENT_LOCKED yet, as current changeset will 14138c2ecf20Sopenharmony_ci * record any bits changed, so for EXTENT_LOCKED case, it will 14148c2ecf20Sopenharmony_ci * either fail with -EEXIST or changeset will record the whole 14158c2ecf20Sopenharmony_ci * range. 14168c2ecf20Sopenharmony_ci */ 14178c2ecf20Sopenharmony_ci BUG_ON(bits & EXTENT_LOCKED); 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS, 14208c2ecf20Sopenharmony_ci changeset); 14218c2ecf20Sopenharmony_ci} 14228c2ecf20Sopenharmony_ci 14238c2ecf20Sopenharmony_ciint set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end, 14248c2ecf20Sopenharmony_ci unsigned bits) 14258c2ecf20Sopenharmony_ci{ 14268c2ecf20Sopenharmony_ci return __set_extent_bit(tree, start, end, bits, 0, NULL, NULL, 14278c2ecf20Sopenharmony_ci GFP_NOWAIT, NULL); 14288c2ecf20Sopenharmony_ci} 14298c2ecf20Sopenharmony_ci 14308c2ecf20Sopenharmony_ciint clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 14318c2ecf20Sopenharmony_ci unsigned bits, int wake, int delete, 14328c2ecf20Sopenharmony_ci struct extent_state **cached) 14338c2ecf20Sopenharmony_ci{ 14348c2ecf20Sopenharmony_ci return __clear_extent_bit(tree, start, end, bits, wake, delete, 14358c2ecf20Sopenharmony_ci cached, GFP_NOFS, NULL); 14368c2ecf20Sopenharmony_ci} 14378c2ecf20Sopenharmony_ci 14388c2ecf20Sopenharmony_ciint clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 14398c2ecf20Sopenharmony_ci unsigned bits, struct extent_changeset *changeset) 14408c2ecf20Sopenharmony_ci{ 14418c2ecf20Sopenharmony_ci /* 14428c2ecf20Sopenharmony_ci * Don't support EXTENT_LOCKED case, same reason as 14438c2ecf20Sopenharmony_ci * set_record_extent_bits(). 14448c2ecf20Sopenharmony_ci */ 14458c2ecf20Sopenharmony_ci BUG_ON(bits & EXTENT_LOCKED); 14468c2ecf20Sopenharmony_ci 14478c2ecf20Sopenharmony_ci return __clear_extent_bit(tree, start, end, bits, 0, 0, NULL, GFP_NOFS, 14488c2ecf20Sopenharmony_ci changeset); 14498c2ecf20Sopenharmony_ci} 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_ci/* 14528c2ecf20Sopenharmony_ci * either insert or lock state struct between start and end use mask to tell 14538c2ecf20Sopenharmony_ci * us if waiting is desired. 14548c2ecf20Sopenharmony_ci */ 14558c2ecf20Sopenharmony_ciint lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 14568c2ecf20Sopenharmony_ci struct extent_state **cached_state) 14578c2ecf20Sopenharmony_ci{ 14588c2ecf20Sopenharmony_ci int err; 14598c2ecf20Sopenharmony_ci u64 failed_start; 14608c2ecf20Sopenharmony_ci 14618c2ecf20Sopenharmony_ci while (1) { 14628c2ecf20Sopenharmony_ci err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, 14638c2ecf20Sopenharmony_ci EXTENT_LOCKED, &failed_start, 14648c2ecf20Sopenharmony_ci cached_state, GFP_NOFS, NULL); 14658c2ecf20Sopenharmony_ci if (err == -EEXIST) { 14668c2ecf20Sopenharmony_ci wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED); 14678c2ecf20Sopenharmony_ci start = failed_start; 14688c2ecf20Sopenharmony_ci } else 14698c2ecf20Sopenharmony_ci break; 14708c2ecf20Sopenharmony_ci WARN_ON(start > end); 14718c2ecf20Sopenharmony_ci } 14728c2ecf20Sopenharmony_ci return err; 14738c2ecf20Sopenharmony_ci} 14748c2ecf20Sopenharmony_ci 14758c2ecf20Sopenharmony_ciint try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) 14768c2ecf20Sopenharmony_ci{ 14778c2ecf20Sopenharmony_ci int err; 14788c2ecf20Sopenharmony_ci u64 failed_start; 14798c2ecf20Sopenharmony_ci 14808c2ecf20Sopenharmony_ci err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, 14818c2ecf20Sopenharmony_ci &failed_start, NULL, GFP_NOFS, NULL); 14828c2ecf20Sopenharmony_ci if (err == -EEXIST) { 14838c2ecf20Sopenharmony_ci if (failed_start > start) 14848c2ecf20Sopenharmony_ci clear_extent_bit(tree, start, failed_start - 1, 14858c2ecf20Sopenharmony_ci EXTENT_LOCKED, 1, 0, NULL); 14868c2ecf20Sopenharmony_ci return 0; 14878c2ecf20Sopenharmony_ci } 14888c2ecf20Sopenharmony_ci return 1; 14898c2ecf20Sopenharmony_ci} 14908c2ecf20Sopenharmony_ci 14918c2ecf20Sopenharmony_civoid extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) 14928c2ecf20Sopenharmony_ci{ 14938c2ecf20Sopenharmony_ci unsigned long index = start >> PAGE_SHIFT; 14948c2ecf20Sopenharmony_ci unsigned long end_index = end >> PAGE_SHIFT; 14958c2ecf20Sopenharmony_ci struct page *page; 14968c2ecf20Sopenharmony_ci 14978c2ecf20Sopenharmony_ci while (index <= end_index) { 14988c2ecf20Sopenharmony_ci page = find_get_page(inode->i_mapping, index); 14998c2ecf20Sopenharmony_ci BUG_ON(!page); /* Pages should be in the extent_io_tree */ 15008c2ecf20Sopenharmony_ci clear_page_dirty_for_io(page); 15018c2ecf20Sopenharmony_ci put_page(page); 15028c2ecf20Sopenharmony_ci index++; 15038c2ecf20Sopenharmony_ci } 15048c2ecf20Sopenharmony_ci} 15058c2ecf20Sopenharmony_ci 15068c2ecf20Sopenharmony_civoid extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) 15078c2ecf20Sopenharmony_ci{ 15088c2ecf20Sopenharmony_ci unsigned long index = start >> PAGE_SHIFT; 15098c2ecf20Sopenharmony_ci unsigned long end_index = end >> PAGE_SHIFT; 15108c2ecf20Sopenharmony_ci struct page *page; 15118c2ecf20Sopenharmony_ci 15128c2ecf20Sopenharmony_ci while (index <= end_index) { 15138c2ecf20Sopenharmony_ci page = find_get_page(inode->i_mapping, index); 15148c2ecf20Sopenharmony_ci BUG_ON(!page); /* Pages should be in the extent_io_tree */ 15158c2ecf20Sopenharmony_ci __set_page_dirty_nobuffers(page); 15168c2ecf20Sopenharmony_ci account_page_redirty(page); 15178c2ecf20Sopenharmony_ci put_page(page); 15188c2ecf20Sopenharmony_ci index++; 15198c2ecf20Sopenharmony_ci } 15208c2ecf20Sopenharmony_ci} 15218c2ecf20Sopenharmony_ci 15228c2ecf20Sopenharmony_ci/* find the first state struct with 'bits' set after 'start', and 15238c2ecf20Sopenharmony_ci * return it. tree->lock must be held. NULL will returned if 15248c2ecf20Sopenharmony_ci * nothing was found after 'start' 15258c2ecf20Sopenharmony_ci */ 15268c2ecf20Sopenharmony_cistatic struct extent_state * 15278c2ecf20Sopenharmony_cifind_first_extent_bit_state(struct extent_io_tree *tree, 15288c2ecf20Sopenharmony_ci u64 start, unsigned bits) 15298c2ecf20Sopenharmony_ci{ 15308c2ecf20Sopenharmony_ci struct rb_node *node; 15318c2ecf20Sopenharmony_ci struct extent_state *state; 15328c2ecf20Sopenharmony_ci 15338c2ecf20Sopenharmony_ci /* 15348c2ecf20Sopenharmony_ci * this search will find all the extents that end after 15358c2ecf20Sopenharmony_ci * our range starts. 15368c2ecf20Sopenharmony_ci */ 15378c2ecf20Sopenharmony_ci node = tree_search(tree, start); 15388c2ecf20Sopenharmony_ci if (!node) 15398c2ecf20Sopenharmony_ci goto out; 15408c2ecf20Sopenharmony_ci 15418c2ecf20Sopenharmony_ci while (1) { 15428c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 15438c2ecf20Sopenharmony_ci if (state->end >= start && (state->state & bits)) 15448c2ecf20Sopenharmony_ci return state; 15458c2ecf20Sopenharmony_ci 15468c2ecf20Sopenharmony_ci node = rb_next(node); 15478c2ecf20Sopenharmony_ci if (!node) 15488c2ecf20Sopenharmony_ci break; 15498c2ecf20Sopenharmony_ci } 15508c2ecf20Sopenharmony_ciout: 15518c2ecf20Sopenharmony_ci return NULL; 15528c2ecf20Sopenharmony_ci} 15538c2ecf20Sopenharmony_ci 15548c2ecf20Sopenharmony_ci/* 15558c2ecf20Sopenharmony_ci * find the first offset in the io tree with 'bits' set. zero is 15568c2ecf20Sopenharmony_ci * returned if we find something, and *start_ret and *end_ret are 15578c2ecf20Sopenharmony_ci * set to reflect the state struct that was found. 15588c2ecf20Sopenharmony_ci * 15598c2ecf20Sopenharmony_ci * If nothing was found, 1 is returned. If found something, return 0. 15608c2ecf20Sopenharmony_ci */ 15618c2ecf20Sopenharmony_ciint find_first_extent_bit(struct extent_io_tree *tree, u64 start, 15628c2ecf20Sopenharmony_ci u64 *start_ret, u64 *end_ret, unsigned bits, 15638c2ecf20Sopenharmony_ci struct extent_state **cached_state) 15648c2ecf20Sopenharmony_ci{ 15658c2ecf20Sopenharmony_ci struct extent_state *state; 15668c2ecf20Sopenharmony_ci int ret = 1; 15678c2ecf20Sopenharmony_ci 15688c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 15698c2ecf20Sopenharmony_ci if (cached_state && *cached_state) { 15708c2ecf20Sopenharmony_ci state = *cached_state; 15718c2ecf20Sopenharmony_ci if (state->end == start - 1 && extent_state_in_tree(state)) { 15728c2ecf20Sopenharmony_ci while ((state = next_state(state)) != NULL) { 15738c2ecf20Sopenharmony_ci if (state->state & bits) 15748c2ecf20Sopenharmony_ci goto got_it; 15758c2ecf20Sopenharmony_ci } 15768c2ecf20Sopenharmony_ci free_extent_state(*cached_state); 15778c2ecf20Sopenharmony_ci *cached_state = NULL; 15788c2ecf20Sopenharmony_ci goto out; 15798c2ecf20Sopenharmony_ci } 15808c2ecf20Sopenharmony_ci free_extent_state(*cached_state); 15818c2ecf20Sopenharmony_ci *cached_state = NULL; 15828c2ecf20Sopenharmony_ci } 15838c2ecf20Sopenharmony_ci 15848c2ecf20Sopenharmony_ci state = find_first_extent_bit_state(tree, start, bits); 15858c2ecf20Sopenharmony_cigot_it: 15868c2ecf20Sopenharmony_ci if (state) { 15878c2ecf20Sopenharmony_ci cache_state_if_flags(state, cached_state, 0); 15888c2ecf20Sopenharmony_ci *start_ret = state->start; 15898c2ecf20Sopenharmony_ci *end_ret = state->end; 15908c2ecf20Sopenharmony_ci ret = 0; 15918c2ecf20Sopenharmony_ci } 15928c2ecf20Sopenharmony_ciout: 15938c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 15948c2ecf20Sopenharmony_ci return ret; 15958c2ecf20Sopenharmony_ci} 15968c2ecf20Sopenharmony_ci 15978c2ecf20Sopenharmony_ci/** 15988c2ecf20Sopenharmony_ci * find_contiguous_extent_bit: find a contiguous area of bits 15998c2ecf20Sopenharmony_ci * @tree - io tree to check 16008c2ecf20Sopenharmony_ci * @start - offset to start the search from 16018c2ecf20Sopenharmony_ci * @start_ret - the first offset we found with the bits set 16028c2ecf20Sopenharmony_ci * @end_ret - the final contiguous range of the bits that were set 16038c2ecf20Sopenharmony_ci * @bits - bits to look for 16048c2ecf20Sopenharmony_ci * 16058c2ecf20Sopenharmony_ci * set_extent_bit and clear_extent_bit can temporarily split contiguous ranges 16068c2ecf20Sopenharmony_ci * to set bits appropriately, and then merge them again. During this time it 16078c2ecf20Sopenharmony_ci * will drop the tree->lock, so use this helper if you want to find the actual 16088c2ecf20Sopenharmony_ci * contiguous area for given bits. We will search to the first bit we find, and 16098c2ecf20Sopenharmony_ci * then walk down the tree until we find a non-contiguous area. The area 16108c2ecf20Sopenharmony_ci * returned will be the full contiguous area with the bits set. 16118c2ecf20Sopenharmony_ci */ 16128c2ecf20Sopenharmony_ciint find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start, 16138c2ecf20Sopenharmony_ci u64 *start_ret, u64 *end_ret, unsigned bits) 16148c2ecf20Sopenharmony_ci{ 16158c2ecf20Sopenharmony_ci struct extent_state *state; 16168c2ecf20Sopenharmony_ci int ret = 1; 16178c2ecf20Sopenharmony_ci 16188c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 16198c2ecf20Sopenharmony_ci state = find_first_extent_bit_state(tree, start, bits); 16208c2ecf20Sopenharmony_ci if (state) { 16218c2ecf20Sopenharmony_ci *start_ret = state->start; 16228c2ecf20Sopenharmony_ci *end_ret = state->end; 16238c2ecf20Sopenharmony_ci while ((state = next_state(state)) != NULL) { 16248c2ecf20Sopenharmony_ci if (state->start > (*end_ret + 1)) 16258c2ecf20Sopenharmony_ci break; 16268c2ecf20Sopenharmony_ci *end_ret = state->end; 16278c2ecf20Sopenharmony_ci } 16288c2ecf20Sopenharmony_ci ret = 0; 16298c2ecf20Sopenharmony_ci } 16308c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 16318c2ecf20Sopenharmony_ci return ret; 16328c2ecf20Sopenharmony_ci} 16338c2ecf20Sopenharmony_ci 16348c2ecf20Sopenharmony_ci/** 16358c2ecf20Sopenharmony_ci * find_first_clear_extent_bit - find the first range that has @bits not set. 16368c2ecf20Sopenharmony_ci * This range could start before @start. 16378c2ecf20Sopenharmony_ci * 16388c2ecf20Sopenharmony_ci * @tree - the tree to search 16398c2ecf20Sopenharmony_ci * @start - the offset at/after which the found extent should start 16408c2ecf20Sopenharmony_ci * @start_ret - records the beginning of the range 16418c2ecf20Sopenharmony_ci * @end_ret - records the end of the range (inclusive) 16428c2ecf20Sopenharmony_ci * @bits - the set of bits which must be unset 16438c2ecf20Sopenharmony_ci * 16448c2ecf20Sopenharmony_ci * Since unallocated range is also considered one which doesn't have the bits 16458c2ecf20Sopenharmony_ci * set it's possible that @end_ret contains -1, this happens in case the range 16468c2ecf20Sopenharmony_ci * spans (last_range_end, end of device]. In this case it's up to the caller to 16478c2ecf20Sopenharmony_ci * trim @end_ret to the appropriate size. 16488c2ecf20Sopenharmony_ci */ 16498c2ecf20Sopenharmony_civoid find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, 16508c2ecf20Sopenharmony_ci u64 *start_ret, u64 *end_ret, unsigned bits) 16518c2ecf20Sopenharmony_ci{ 16528c2ecf20Sopenharmony_ci struct extent_state *state; 16538c2ecf20Sopenharmony_ci struct rb_node *node, *prev = NULL, *next; 16548c2ecf20Sopenharmony_ci 16558c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 16568c2ecf20Sopenharmony_ci 16578c2ecf20Sopenharmony_ci /* Find first extent with bits cleared */ 16588c2ecf20Sopenharmony_ci while (1) { 16598c2ecf20Sopenharmony_ci node = __etree_search(tree, start, &next, &prev, NULL, NULL); 16608c2ecf20Sopenharmony_ci if (!node && !next && !prev) { 16618c2ecf20Sopenharmony_ci /* 16628c2ecf20Sopenharmony_ci * Tree is completely empty, send full range and let 16638c2ecf20Sopenharmony_ci * caller deal with it 16648c2ecf20Sopenharmony_ci */ 16658c2ecf20Sopenharmony_ci *start_ret = 0; 16668c2ecf20Sopenharmony_ci *end_ret = -1; 16678c2ecf20Sopenharmony_ci goto out; 16688c2ecf20Sopenharmony_ci } else if (!node && !next) { 16698c2ecf20Sopenharmony_ci /* 16708c2ecf20Sopenharmony_ci * We are past the last allocated chunk, set start at 16718c2ecf20Sopenharmony_ci * the end of the last extent. 16728c2ecf20Sopenharmony_ci */ 16738c2ecf20Sopenharmony_ci state = rb_entry(prev, struct extent_state, rb_node); 16748c2ecf20Sopenharmony_ci *start_ret = state->end + 1; 16758c2ecf20Sopenharmony_ci *end_ret = -1; 16768c2ecf20Sopenharmony_ci goto out; 16778c2ecf20Sopenharmony_ci } else if (!node) { 16788c2ecf20Sopenharmony_ci node = next; 16798c2ecf20Sopenharmony_ci } 16808c2ecf20Sopenharmony_ci /* 16818c2ecf20Sopenharmony_ci * At this point 'node' either contains 'start' or start is 16828c2ecf20Sopenharmony_ci * before 'node' 16838c2ecf20Sopenharmony_ci */ 16848c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 16858c2ecf20Sopenharmony_ci 16868c2ecf20Sopenharmony_ci if (in_range(start, state->start, state->end - state->start + 1)) { 16878c2ecf20Sopenharmony_ci if (state->state & bits) { 16888c2ecf20Sopenharmony_ci /* 16898c2ecf20Sopenharmony_ci * |--range with bits sets--| 16908c2ecf20Sopenharmony_ci * | 16918c2ecf20Sopenharmony_ci * start 16928c2ecf20Sopenharmony_ci */ 16938c2ecf20Sopenharmony_ci start = state->end + 1; 16948c2ecf20Sopenharmony_ci } else { 16958c2ecf20Sopenharmony_ci /* 16968c2ecf20Sopenharmony_ci * 'start' falls within a range that doesn't 16978c2ecf20Sopenharmony_ci * have the bits set, so take its start as 16988c2ecf20Sopenharmony_ci * the beginning of the desired range 16998c2ecf20Sopenharmony_ci * 17008c2ecf20Sopenharmony_ci * |--range with bits cleared----| 17018c2ecf20Sopenharmony_ci * | 17028c2ecf20Sopenharmony_ci * start 17038c2ecf20Sopenharmony_ci */ 17048c2ecf20Sopenharmony_ci *start_ret = state->start; 17058c2ecf20Sopenharmony_ci break; 17068c2ecf20Sopenharmony_ci } 17078c2ecf20Sopenharmony_ci } else { 17088c2ecf20Sopenharmony_ci /* 17098c2ecf20Sopenharmony_ci * |---prev range---|---hole/unset---|---node range---| 17108c2ecf20Sopenharmony_ci * | 17118c2ecf20Sopenharmony_ci * start 17128c2ecf20Sopenharmony_ci * 17138c2ecf20Sopenharmony_ci * or 17148c2ecf20Sopenharmony_ci * 17158c2ecf20Sopenharmony_ci * |---hole/unset--||--first node--| 17168c2ecf20Sopenharmony_ci * 0 | 17178c2ecf20Sopenharmony_ci * start 17188c2ecf20Sopenharmony_ci */ 17198c2ecf20Sopenharmony_ci if (prev) { 17208c2ecf20Sopenharmony_ci state = rb_entry(prev, struct extent_state, 17218c2ecf20Sopenharmony_ci rb_node); 17228c2ecf20Sopenharmony_ci *start_ret = state->end + 1; 17238c2ecf20Sopenharmony_ci } else { 17248c2ecf20Sopenharmony_ci *start_ret = 0; 17258c2ecf20Sopenharmony_ci } 17268c2ecf20Sopenharmony_ci break; 17278c2ecf20Sopenharmony_ci } 17288c2ecf20Sopenharmony_ci } 17298c2ecf20Sopenharmony_ci 17308c2ecf20Sopenharmony_ci /* 17318c2ecf20Sopenharmony_ci * Find the longest stretch from start until an entry which has the 17328c2ecf20Sopenharmony_ci * bits set 17338c2ecf20Sopenharmony_ci */ 17348c2ecf20Sopenharmony_ci while (1) { 17358c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 17368c2ecf20Sopenharmony_ci if (state->end >= start && !(state->state & bits)) { 17378c2ecf20Sopenharmony_ci *end_ret = state->end; 17388c2ecf20Sopenharmony_ci } else { 17398c2ecf20Sopenharmony_ci *end_ret = state->start - 1; 17408c2ecf20Sopenharmony_ci break; 17418c2ecf20Sopenharmony_ci } 17428c2ecf20Sopenharmony_ci 17438c2ecf20Sopenharmony_ci node = rb_next(node); 17448c2ecf20Sopenharmony_ci if (!node) 17458c2ecf20Sopenharmony_ci break; 17468c2ecf20Sopenharmony_ci } 17478c2ecf20Sopenharmony_ciout: 17488c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 17498c2ecf20Sopenharmony_ci} 17508c2ecf20Sopenharmony_ci 17518c2ecf20Sopenharmony_ci/* 17528c2ecf20Sopenharmony_ci * find a contiguous range of bytes in the file marked as delalloc, not 17538c2ecf20Sopenharmony_ci * more than 'max_bytes'. start and end are used to return the range, 17548c2ecf20Sopenharmony_ci * 17558c2ecf20Sopenharmony_ci * true is returned if we find something, false if nothing was in the tree 17568c2ecf20Sopenharmony_ci */ 17578c2ecf20Sopenharmony_cibool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start, 17588c2ecf20Sopenharmony_ci u64 *end, u64 max_bytes, 17598c2ecf20Sopenharmony_ci struct extent_state **cached_state) 17608c2ecf20Sopenharmony_ci{ 17618c2ecf20Sopenharmony_ci struct rb_node *node; 17628c2ecf20Sopenharmony_ci struct extent_state *state; 17638c2ecf20Sopenharmony_ci u64 cur_start = *start; 17648c2ecf20Sopenharmony_ci bool found = false; 17658c2ecf20Sopenharmony_ci u64 total_bytes = 0; 17668c2ecf20Sopenharmony_ci 17678c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 17688c2ecf20Sopenharmony_ci 17698c2ecf20Sopenharmony_ci /* 17708c2ecf20Sopenharmony_ci * this search will find all the extents that end after 17718c2ecf20Sopenharmony_ci * our range starts. 17728c2ecf20Sopenharmony_ci */ 17738c2ecf20Sopenharmony_ci node = tree_search(tree, cur_start); 17748c2ecf20Sopenharmony_ci if (!node) { 17758c2ecf20Sopenharmony_ci *end = (u64)-1; 17768c2ecf20Sopenharmony_ci goto out; 17778c2ecf20Sopenharmony_ci } 17788c2ecf20Sopenharmony_ci 17798c2ecf20Sopenharmony_ci while (1) { 17808c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 17818c2ecf20Sopenharmony_ci if (found && (state->start != cur_start || 17828c2ecf20Sopenharmony_ci (state->state & EXTENT_BOUNDARY))) { 17838c2ecf20Sopenharmony_ci goto out; 17848c2ecf20Sopenharmony_ci } 17858c2ecf20Sopenharmony_ci if (!(state->state & EXTENT_DELALLOC)) { 17868c2ecf20Sopenharmony_ci if (!found) 17878c2ecf20Sopenharmony_ci *end = state->end; 17888c2ecf20Sopenharmony_ci goto out; 17898c2ecf20Sopenharmony_ci } 17908c2ecf20Sopenharmony_ci if (!found) { 17918c2ecf20Sopenharmony_ci *start = state->start; 17928c2ecf20Sopenharmony_ci *cached_state = state; 17938c2ecf20Sopenharmony_ci refcount_inc(&state->refs); 17948c2ecf20Sopenharmony_ci } 17958c2ecf20Sopenharmony_ci found = true; 17968c2ecf20Sopenharmony_ci *end = state->end; 17978c2ecf20Sopenharmony_ci cur_start = state->end + 1; 17988c2ecf20Sopenharmony_ci node = rb_next(node); 17998c2ecf20Sopenharmony_ci total_bytes += state->end - state->start + 1; 18008c2ecf20Sopenharmony_ci if (total_bytes >= max_bytes) 18018c2ecf20Sopenharmony_ci break; 18028c2ecf20Sopenharmony_ci if (!node) 18038c2ecf20Sopenharmony_ci break; 18048c2ecf20Sopenharmony_ci } 18058c2ecf20Sopenharmony_ciout: 18068c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 18078c2ecf20Sopenharmony_ci return found; 18088c2ecf20Sopenharmony_ci} 18098c2ecf20Sopenharmony_ci 18108c2ecf20Sopenharmony_cistatic int __process_pages_contig(struct address_space *mapping, 18118c2ecf20Sopenharmony_ci struct page *locked_page, 18128c2ecf20Sopenharmony_ci pgoff_t start_index, pgoff_t end_index, 18138c2ecf20Sopenharmony_ci unsigned long page_ops, pgoff_t *index_ret); 18148c2ecf20Sopenharmony_ci 18158c2ecf20Sopenharmony_cistatic noinline void __unlock_for_delalloc(struct inode *inode, 18168c2ecf20Sopenharmony_ci struct page *locked_page, 18178c2ecf20Sopenharmony_ci u64 start, u64 end) 18188c2ecf20Sopenharmony_ci{ 18198c2ecf20Sopenharmony_ci unsigned long index = start >> PAGE_SHIFT; 18208c2ecf20Sopenharmony_ci unsigned long end_index = end >> PAGE_SHIFT; 18218c2ecf20Sopenharmony_ci 18228c2ecf20Sopenharmony_ci ASSERT(locked_page); 18238c2ecf20Sopenharmony_ci if (index == locked_page->index && end_index == index) 18248c2ecf20Sopenharmony_ci return; 18258c2ecf20Sopenharmony_ci 18268c2ecf20Sopenharmony_ci __process_pages_contig(inode->i_mapping, locked_page, index, end_index, 18278c2ecf20Sopenharmony_ci PAGE_UNLOCK, NULL); 18288c2ecf20Sopenharmony_ci} 18298c2ecf20Sopenharmony_ci 18308c2ecf20Sopenharmony_cistatic noinline int lock_delalloc_pages(struct inode *inode, 18318c2ecf20Sopenharmony_ci struct page *locked_page, 18328c2ecf20Sopenharmony_ci u64 delalloc_start, 18338c2ecf20Sopenharmony_ci u64 delalloc_end) 18348c2ecf20Sopenharmony_ci{ 18358c2ecf20Sopenharmony_ci unsigned long index = delalloc_start >> PAGE_SHIFT; 18368c2ecf20Sopenharmony_ci unsigned long index_ret = index; 18378c2ecf20Sopenharmony_ci unsigned long end_index = delalloc_end >> PAGE_SHIFT; 18388c2ecf20Sopenharmony_ci int ret; 18398c2ecf20Sopenharmony_ci 18408c2ecf20Sopenharmony_ci ASSERT(locked_page); 18418c2ecf20Sopenharmony_ci if (index == locked_page->index && index == end_index) 18428c2ecf20Sopenharmony_ci return 0; 18438c2ecf20Sopenharmony_ci 18448c2ecf20Sopenharmony_ci ret = __process_pages_contig(inode->i_mapping, locked_page, index, 18458c2ecf20Sopenharmony_ci end_index, PAGE_LOCK, &index_ret); 18468c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 18478c2ecf20Sopenharmony_ci __unlock_for_delalloc(inode, locked_page, delalloc_start, 18488c2ecf20Sopenharmony_ci (u64)index_ret << PAGE_SHIFT); 18498c2ecf20Sopenharmony_ci return ret; 18508c2ecf20Sopenharmony_ci} 18518c2ecf20Sopenharmony_ci 18528c2ecf20Sopenharmony_ci/* 18538c2ecf20Sopenharmony_ci * Find and lock a contiguous range of bytes in the file marked as delalloc, no 18548c2ecf20Sopenharmony_ci * more than @max_bytes. @Start and @end are used to return the range, 18558c2ecf20Sopenharmony_ci * 18568c2ecf20Sopenharmony_ci * Return: true if we find something 18578c2ecf20Sopenharmony_ci * false if nothing was in the tree 18588c2ecf20Sopenharmony_ci */ 18598c2ecf20Sopenharmony_ciEXPORT_FOR_TESTS 18608c2ecf20Sopenharmony_cinoinline_for_stack bool find_lock_delalloc_range(struct inode *inode, 18618c2ecf20Sopenharmony_ci struct page *locked_page, u64 *start, 18628c2ecf20Sopenharmony_ci u64 *end) 18638c2ecf20Sopenharmony_ci{ 18648c2ecf20Sopenharmony_ci struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 18658c2ecf20Sopenharmony_ci u64 max_bytes = BTRFS_MAX_EXTENT_SIZE; 18668c2ecf20Sopenharmony_ci u64 delalloc_start; 18678c2ecf20Sopenharmony_ci u64 delalloc_end; 18688c2ecf20Sopenharmony_ci bool found; 18698c2ecf20Sopenharmony_ci struct extent_state *cached_state = NULL; 18708c2ecf20Sopenharmony_ci int ret; 18718c2ecf20Sopenharmony_ci int loops = 0; 18728c2ecf20Sopenharmony_ci 18738c2ecf20Sopenharmony_ciagain: 18748c2ecf20Sopenharmony_ci /* step one, find a bunch of delalloc bytes starting at start */ 18758c2ecf20Sopenharmony_ci delalloc_start = *start; 18768c2ecf20Sopenharmony_ci delalloc_end = 0; 18778c2ecf20Sopenharmony_ci found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end, 18788c2ecf20Sopenharmony_ci max_bytes, &cached_state); 18798c2ecf20Sopenharmony_ci if (!found || delalloc_end <= *start) { 18808c2ecf20Sopenharmony_ci *start = delalloc_start; 18818c2ecf20Sopenharmony_ci *end = delalloc_end; 18828c2ecf20Sopenharmony_ci free_extent_state(cached_state); 18838c2ecf20Sopenharmony_ci return false; 18848c2ecf20Sopenharmony_ci } 18858c2ecf20Sopenharmony_ci 18868c2ecf20Sopenharmony_ci /* 18878c2ecf20Sopenharmony_ci * start comes from the offset of locked_page. We have to lock 18888c2ecf20Sopenharmony_ci * pages in order, so we can't process delalloc bytes before 18898c2ecf20Sopenharmony_ci * locked_page 18908c2ecf20Sopenharmony_ci */ 18918c2ecf20Sopenharmony_ci if (delalloc_start < *start) 18928c2ecf20Sopenharmony_ci delalloc_start = *start; 18938c2ecf20Sopenharmony_ci 18948c2ecf20Sopenharmony_ci /* 18958c2ecf20Sopenharmony_ci * make sure to limit the number of pages we try to lock down 18968c2ecf20Sopenharmony_ci */ 18978c2ecf20Sopenharmony_ci if (delalloc_end + 1 - delalloc_start > max_bytes) 18988c2ecf20Sopenharmony_ci delalloc_end = delalloc_start + max_bytes - 1; 18998c2ecf20Sopenharmony_ci 19008c2ecf20Sopenharmony_ci /* step two, lock all the pages after the page that has start */ 19018c2ecf20Sopenharmony_ci ret = lock_delalloc_pages(inode, locked_page, 19028c2ecf20Sopenharmony_ci delalloc_start, delalloc_end); 19038c2ecf20Sopenharmony_ci ASSERT(!ret || ret == -EAGAIN); 19048c2ecf20Sopenharmony_ci if (ret == -EAGAIN) { 19058c2ecf20Sopenharmony_ci /* some of the pages are gone, lets avoid looping by 19068c2ecf20Sopenharmony_ci * shortening the size of the delalloc range we're searching 19078c2ecf20Sopenharmony_ci */ 19088c2ecf20Sopenharmony_ci free_extent_state(cached_state); 19098c2ecf20Sopenharmony_ci cached_state = NULL; 19108c2ecf20Sopenharmony_ci if (!loops) { 19118c2ecf20Sopenharmony_ci max_bytes = PAGE_SIZE; 19128c2ecf20Sopenharmony_ci loops = 1; 19138c2ecf20Sopenharmony_ci goto again; 19148c2ecf20Sopenharmony_ci } else { 19158c2ecf20Sopenharmony_ci found = false; 19168c2ecf20Sopenharmony_ci goto out_failed; 19178c2ecf20Sopenharmony_ci } 19188c2ecf20Sopenharmony_ci } 19198c2ecf20Sopenharmony_ci 19208c2ecf20Sopenharmony_ci /* step three, lock the state bits for the whole range */ 19218c2ecf20Sopenharmony_ci lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state); 19228c2ecf20Sopenharmony_ci 19238c2ecf20Sopenharmony_ci /* then test to make sure it is all still delalloc */ 19248c2ecf20Sopenharmony_ci ret = test_range_bit(tree, delalloc_start, delalloc_end, 19258c2ecf20Sopenharmony_ci EXTENT_DELALLOC, 1, cached_state); 19268c2ecf20Sopenharmony_ci if (!ret) { 19278c2ecf20Sopenharmony_ci unlock_extent_cached(tree, delalloc_start, delalloc_end, 19288c2ecf20Sopenharmony_ci &cached_state); 19298c2ecf20Sopenharmony_ci __unlock_for_delalloc(inode, locked_page, 19308c2ecf20Sopenharmony_ci delalloc_start, delalloc_end); 19318c2ecf20Sopenharmony_ci cond_resched(); 19328c2ecf20Sopenharmony_ci goto again; 19338c2ecf20Sopenharmony_ci } 19348c2ecf20Sopenharmony_ci free_extent_state(cached_state); 19358c2ecf20Sopenharmony_ci *start = delalloc_start; 19368c2ecf20Sopenharmony_ci *end = delalloc_end; 19378c2ecf20Sopenharmony_ciout_failed: 19388c2ecf20Sopenharmony_ci return found; 19398c2ecf20Sopenharmony_ci} 19408c2ecf20Sopenharmony_ci 19418c2ecf20Sopenharmony_cistatic int __process_pages_contig(struct address_space *mapping, 19428c2ecf20Sopenharmony_ci struct page *locked_page, 19438c2ecf20Sopenharmony_ci pgoff_t start_index, pgoff_t end_index, 19448c2ecf20Sopenharmony_ci unsigned long page_ops, pgoff_t *index_ret) 19458c2ecf20Sopenharmony_ci{ 19468c2ecf20Sopenharmony_ci unsigned long nr_pages = end_index - start_index + 1; 19478c2ecf20Sopenharmony_ci unsigned long pages_locked = 0; 19488c2ecf20Sopenharmony_ci pgoff_t index = start_index; 19498c2ecf20Sopenharmony_ci struct page *pages[16]; 19508c2ecf20Sopenharmony_ci unsigned ret; 19518c2ecf20Sopenharmony_ci int err = 0; 19528c2ecf20Sopenharmony_ci int i; 19538c2ecf20Sopenharmony_ci 19548c2ecf20Sopenharmony_ci if (page_ops & PAGE_LOCK) { 19558c2ecf20Sopenharmony_ci ASSERT(page_ops == PAGE_LOCK); 19568c2ecf20Sopenharmony_ci ASSERT(index_ret && *index_ret == start_index); 19578c2ecf20Sopenharmony_ci } 19588c2ecf20Sopenharmony_ci 19598c2ecf20Sopenharmony_ci if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0) 19608c2ecf20Sopenharmony_ci mapping_set_error(mapping, -EIO); 19618c2ecf20Sopenharmony_ci 19628c2ecf20Sopenharmony_ci while (nr_pages > 0) { 19638c2ecf20Sopenharmony_ci ret = find_get_pages_contig(mapping, index, 19648c2ecf20Sopenharmony_ci min_t(unsigned long, 19658c2ecf20Sopenharmony_ci nr_pages, ARRAY_SIZE(pages)), pages); 19668c2ecf20Sopenharmony_ci if (ret == 0) { 19678c2ecf20Sopenharmony_ci /* 19688c2ecf20Sopenharmony_ci * Only if we're going to lock these pages, 19698c2ecf20Sopenharmony_ci * can we find nothing at @index. 19708c2ecf20Sopenharmony_ci */ 19718c2ecf20Sopenharmony_ci ASSERT(page_ops & PAGE_LOCK); 19728c2ecf20Sopenharmony_ci err = -EAGAIN; 19738c2ecf20Sopenharmony_ci goto out; 19748c2ecf20Sopenharmony_ci } 19758c2ecf20Sopenharmony_ci 19768c2ecf20Sopenharmony_ci for (i = 0; i < ret; i++) { 19778c2ecf20Sopenharmony_ci if (page_ops & PAGE_SET_PRIVATE2) 19788c2ecf20Sopenharmony_ci SetPagePrivate2(pages[i]); 19798c2ecf20Sopenharmony_ci 19808c2ecf20Sopenharmony_ci if (locked_page && pages[i] == locked_page) { 19818c2ecf20Sopenharmony_ci put_page(pages[i]); 19828c2ecf20Sopenharmony_ci pages_locked++; 19838c2ecf20Sopenharmony_ci continue; 19848c2ecf20Sopenharmony_ci } 19858c2ecf20Sopenharmony_ci if (page_ops & PAGE_CLEAR_DIRTY) 19868c2ecf20Sopenharmony_ci clear_page_dirty_for_io(pages[i]); 19878c2ecf20Sopenharmony_ci if (page_ops & PAGE_SET_WRITEBACK) 19888c2ecf20Sopenharmony_ci set_page_writeback(pages[i]); 19898c2ecf20Sopenharmony_ci if (page_ops & PAGE_SET_ERROR) 19908c2ecf20Sopenharmony_ci SetPageError(pages[i]); 19918c2ecf20Sopenharmony_ci if (page_ops & PAGE_END_WRITEBACK) 19928c2ecf20Sopenharmony_ci end_page_writeback(pages[i]); 19938c2ecf20Sopenharmony_ci if (page_ops & PAGE_UNLOCK) 19948c2ecf20Sopenharmony_ci unlock_page(pages[i]); 19958c2ecf20Sopenharmony_ci if (page_ops & PAGE_LOCK) { 19968c2ecf20Sopenharmony_ci lock_page(pages[i]); 19978c2ecf20Sopenharmony_ci if (!PageDirty(pages[i]) || 19988c2ecf20Sopenharmony_ci pages[i]->mapping != mapping) { 19998c2ecf20Sopenharmony_ci unlock_page(pages[i]); 20008c2ecf20Sopenharmony_ci for (; i < ret; i++) 20018c2ecf20Sopenharmony_ci put_page(pages[i]); 20028c2ecf20Sopenharmony_ci err = -EAGAIN; 20038c2ecf20Sopenharmony_ci goto out; 20048c2ecf20Sopenharmony_ci } 20058c2ecf20Sopenharmony_ci } 20068c2ecf20Sopenharmony_ci put_page(pages[i]); 20078c2ecf20Sopenharmony_ci pages_locked++; 20088c2ecf20Sopenharmony_ci } 20098c2ecf20Sopenharmony_ci nr_pages -= ret; 20108c2ecf20Sopenharmony_ci index += ret; 20118c2ecf20Sopenharmony_ci cond_resched(); 20128c2ecf20Sopenharmony_ci } 20138c2ecf20Sopenharmony_ciout: 20148c2ecf20Sopenharmony_ci if (err && index_ret) 20158c2ecf20Sopenharmony_ci *index_ret = start_index + pages_locked - 1; 20168c2ecf20Sopenharmony_ci return err; 20178c2ecf20Sopenharmony_ci} 20188c2ecf20Sopenharmony_ci 20198c2ecf20Sopenharmony_civoid extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, 20208c2ecf20Sopenharmony_ci struct page *locked_page, 20218c2ecf20Sopenharmony_ci unsigned clear_bits, 20228c2ecf20Sopenharmony_ci unsigned long page_ops) 20238c2ecf20Sopenharmony_ci{ 20248c2ecf20Sopenharmony_ci clear_extent_bit(&inode->io_tree, start, end, clear_bits, 1, 0, NULL); 20258c2ecf20Sopenharmony_ci 20268c2ecf20Sopenharmony_ci __process_pages_contig(inode->vfs_inode.i_mapping, locked_page, 20278c2ecf20Sopenharmony_ci start >> PAGE_SHIFT, end >> PAGE_SHIFT, 20288c2ecf20Sopenharmony_ci page_ops, NULL); 20298c2ecf20Sopenharmony_ci} 20308c2ecf20Sopenharmony_ci 20318c2ecf20Sopenharmony_ci/* 20328c2ecf20Sopenharmony_ci * count the number of bytes in the tree that have a given bit(s) 20338c2ecf20Sopenharmony_ci * set. This can be fairly slow, except for EXTENT_DIRTY which is 20348c2ecf20Sopenharmony_ci * cached. The total number found is returned. 20358c2ecf20Sopenharmony_ci */ 20368c2ecf20Sopenharmony_ciu64 count_range_bits(struct extent_io_tree *tree, 20378c2ecf20Sopenharmony_ci u64 *start, u64 search_end, u64 max_bytes, 20388c2ecf20Sopenharmony_ci unsigned bits, int contig) 20398c2ecf20Sopenharmony_ci{ 20408c2ecf20Sopenharmony_ci struct rb_node *node; 20418c2ecf20Sopenharmony_ci struct extent_state *state; 20428c2ecf20Sopenharmony_ci u64 cur_start = *start; 20438c2ecf20Sopenharmony_ci u64 total_bytes = 0; 20448c2ecf20Sopenharmony_ci u64 last = 0; 20458c2ecf20Sopenharmony_ci int found = 0; 20468c2ecf20Sopenharmony_ci 20478c2ecf20Sopenharmony_ci if (WARN_ON(search_end <= cur_start)) 20488c2ecf20Sopenharmony_ci return 0; 20498c2ecf20Sopenharmony_ci 20508c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 20518c2ecf20Sopenharmony_ci if (cur_start == 0 && bits == EXTENT_DIRTY) { 20528c2ecf20Sopenharmony_ci total_bytes = tree->dirty_bytes; 20538c2ecf20Sopenharmony_ci goto out; 20548c2ecf20Sopenharmony_ci } 20558c2ecf20Sopenharmony_ci /* 20568c2ecf20Sopenharmony_ci * this search will find all the extents that end after 20578c2ecf20Sopenharmony_ci * our range starts. 20588c2ecf20Sopenharmony_ci */ 20598c2ecf20Sopenharmony_ci node = tree_search(tree, cur_start); 20608c2ecf20Sopenharmony_ci if (!node) 20618c2ecf20Sopenharmony_ci goto out; 20628c2ecf20Sopenharmony_ci 20638c2ecf20Sopenharmony_ci while (1) { 20648c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 20658c2ecf20Sopenharmony_ci if (state->start > search_end) 20668c2ecf20Sopenharmony_ci break; 20678c2ecf20Sopenharmony_ci if (contig && found && state->start > last + 1) 20688c2ecf20Sopenharmony_ci break; 20698c2ecf20Sopenharmony_ci if (state->end >= cur_start && (state->state & bits) == bits) { 20708c2ecf20Sopenharmony_ci total_bytes += min(search_end, state->end) + 1 - 20718c2ecf20Sopenharmony_ci max(cur_start, state->start); 20728c2ecf20Sopenharmony_ci if (total_bytes >= max_bytes) 20738c2ecf20Sopenharmony_ci break; 20748c2ecf20Sopenharmony_ci if (!found) { 20758c2ecf20Sopenharmony_ci *start = max(cur_start, state->start); 20768c2ecf20Sopenharmony_ci found = 1; 20778c2ecf20Sopenharmony_ci } 20788c2ecf20Sopenharmony_ci last = state->end; 20798c2ecf20Sopenharmony_ci } else if (contig && found) { 20808c2ecf20Sopenharmony_ci break; 20818c2ecf20Sopenharmony_ci } 20828c2ecf20Sopenharmony_ci node = rb_next(node); 20838c2ecf20Sopenharmony_ci if (!node) 20848c2ecf20Sopenharmony_ci break; 20858c2ecf20Sopenharmony_ci } 20868c2ecf20Sopenharmony_ciout: 20878c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 20888c2ecf20Sopenharmony_ci return total_bytes; 20898c2ecf20Sopenharmony_ci} 20908c2ecf20Sopenharmony_ci 20918c2ecf20Sopenharmony_ci/* 20928c2ecf20Sopenharmony_ci * set the private field for a given byte offset in the tree. If there isn't 20938c2ecf20Sopenharmony_ci * an extent_state there already, this does nothing. 20948c2ecf20Sopenharmony_ci */ 20958c2ecf20Sopenharmony_ciint set_state_failrec(struct extent_io_tree *tree, u64 start, 20968c2ecf20Sopenharmony_ci struct io_failure_record *failrec) 20978c2ecf20Sopenharmony_ci{ 20988c2ecf20Sopenharmony_ci struct rb_node *node; 20998c2ecf20Sopenharmony_ci struct extent_state *state; 21008c2ecf20Sopenharmony_ci int ret = 0; 21018c2ecf20Sopenharmony_ci 21028c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 21038c2ecf20Sopenharmony_ci /* 21048c2ecf20Sopenharmony_ci * this search will find all the extents that end after 21058c2ecf20Sopenharmony_ci * our range starts. 21068c2ecf20Sopenharmony_ci */ 21078c2ecf20Sopenharmony_ci node = tree_search(tree, start); 21088c2ecf20Sopenharmony_ci if (!node) { 21098c2ecf20Sopenharmony_ci ret = -ENOENT; 21108c2ecf20Sopenharmony_ci goto out; 21118c2ecf20Sopenharmony_ci } 21128c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 21138c2ecf20Sopenharmony_ci if (state->start != start) { 21148c2ecf20Sopenharmony_ci ret = -ENOENT; 21158c2ecf20Sopenharmony_ci goto out; 21168c2ecf20Sopenharmony_ci } 21178c2ecf20Sopenharmony_ci state->failrec = failrec; 21188c2ecf20Sopenharmony_ciout: 21198c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 21208c2ecf20Sopenharmony_ci return ret; 21218c2ecf20Sopenharmony_ci} 21228c2ecf20Sopenharmony_ci 21238c2ecf20Sopenharmony_cistruct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start) 21248c2ecf20Sopenharmony_ci{ 21258c2ecf20Sopenharmony_ci struct rb_node *node; 21268c2ecf20Sopenharmony_ci struct extent_state *state; 21278c2ecf20Sopenharmony_ci struct io_failure_record *failrec; 21288c2ecf20Sopenharmony_ci 21298c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 21308c2ecf20Sopenharmony_ci /* 21318c2ecf20Sopenharmony_ci * this search will find all the extents that end after 21328c2ecf20Sopenharmony_ci * our range starts. 21338c2ecf20Sopenharmony_ci */ 21348c2ecf20Sopenharmony_ci node = tree_search(tree, start); 21358c2ecf20Sopenharmony_ci if (!node) { 21368c2ecf20Sopenharmony_ci failrec = ERR_PTR(-ENOENT); 21378c2ecf20Sopenharmony_ci goto out; 21388c2ecf20Sopenharmony_ci } 21398c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 21408c2ecf20Sopenharmony_ci if (state->start != start) { 21418c2ecf20Sopenharmony_ci failrec = ERR_PTR(-ENOENT); 21428c2ecf20Sopenharmony_ci goto out; 21438c2ecf20Sopenharmony_ci } 21448c2ecf20Sopenharmony_ci 21458c2ecf20Sopenharmony_ci failrec = state->failrec; 21468c2ecf20Sopenharmony_ciout: 21478c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 21488c2ecf20Sopenharmony_ci return failrec; 21498c2ecf20Sopenharmony_ci} 21508c2ecf20Sopenharmony_ci 21518c2ecf20Sopenharmony_ci/* 21528c2ecf20Sopenharmony_ci * searches a range in the state tree for a given mask. 21538c2ecf20Sopenharmony_ci * If 'filled' == 1, this returns 1 only if every extent in the tree 21548c2ecf20Sopenharmony_ci * has the bits set. Otherwise, 1 is returned if any bit in the 21558c2ecf20Sopenharmony_ci * range is found set. 21568c2ecf20Sopenharmony_ci */ 21578c2ecf20Sopenharmony_ciint test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 21588c2ecf20Sopenharmony_ci unsigned bits, int filled, struct extent_state *cached) 21598c2ecf20Sopenharmony_ci{ 21608c2ecf20Sopenharmony_ci struct extent_state *state = NULL; 21618c2ecf20Sopenharmony_ci struct rb_node *node; 21628c2ecf20Sopenharmony_ci int bitset = 0; 21638c2ecf20Sopenharmony_ci 21648c2ecf20Sopenharmony_ci spin_lock(&tree->lock); 21658c2ecf20Sopenharmony_ci if (cached && extent_state_in_tree(cached) && cached->start <= start && 21668c2ecf20Sopenharmony_ci cached->end > start) 21678c2ecf20Sopenharmony_ci node = &cached->rb_node; 21688c2ecf20Sopenharmony_ci else 21698c2ecf20Sopenharmony_ci node = tree_search(tree, start); 21708c2ecf20Sopenharmony_ci while (node && start <= end) { 21718c2ecf20Sopenharmony_ci state = rb_entry(node, struct extent_state, rb_node); 21728c2ecf20Sopenharmony_ci 21738c2ecf20Sopenharmony_ci if (filled && state->start > start) { 21748c2ecf20Sopenharmony_ci bitset = 0; 21758c2ecf20Sopenharmony_ci break; 21768c2ecf20Sopenharmony_ci } 21778c2ecf20Sopenharmony_ci 21788c2ecf20Sopenharmony_ci if (state->start > end) 21798c2ecf20Sopenharmony_ci break; 21808c2ecf20Sopenharmony_ci 21818c2ecf20Sopenharmony_ci if (state->state & bits) { 21828c2ecf20Sopenharmony_ci bitset = 1; 21838c2ecf20Sopenharmony_ci if (!filled) 21848c2ecf20Sopenharmony_ci break; 21858c2ecf20Sopenharmony_ci } else if (filled) { 21868c2ecf20Sopenharmony_ci bitset = 0; 21878c2ecf20Sopenharmony_ci break; 21888c2ecf20Sopenharmony_ci } 21898c2ecf20Sopenharmony_ci 21908c2ecf20Sopenharmony_ci if (state->end == (u64)-1) 21918c2ecf20Sopenharmony_ci break; 21928c2ecf20Sopenharmony_ci 21938c2ecf20Sopenharmony_ci start = state->end + 1; 21948c2ecf20Sopenharmony_ci if (start > end) 21958c2ecf20Sopenharmony_ci break; 21968c2ecf20Sopenharmony_ci node = rb_next(node); 21978c2ecf20Sopenharmony_ci if (!node) { 21988c2ecf20Sopenharmony_ci if (filled) 21998c2ecf20Sopenharmony_ci bitset = 0; 22008c2ecf20Sopenharmony_ci break; 22018c2ecf20Sopenharmony_ci } 22028c2ecf20Sopenharmony_ci } 22038c2ecf20Sopenharmony_ci spin_unlock(&tree->lock); 22048c2ecf20Sopenharmony_ci return bitset; 22058c2ecf20Sopenharmony_ci} 22068c2ecf20Sopenharmony_ci 22078c2ecf20Sopenharmony_ci/* 22088c2ecf20Sopenharmony_ci * helper function to set a given page up to date if all the 22098c2ecf20Sopenharmony_ci * extents in the tree for that page are up to date 22108c2ecf20Sopenharmony_ci */ 22118c2ecf20Sopenharmony_cistatic void check_page_uptodate(struct extent_io_tree *tree, struct page *page) 22128c2ecf20Sopenharmony_ci{ 22138c2ecf20Sopenharmony_ci u64 start = page_offset(page); 22148c2ecf20Sopenharmony_ci u64 end = start + PAGE_SIZE - 1; 22158c2ecf20Sopenharmony_ci if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) 22168c2ecf20Sopenharmony_ci SetPageUptodate(page); 22178c2ecf20Sopenharmony_ci} 22188c2ecf20Sopenharmony_ci 22198c2ecf20Sopenharmony_ciint free_io_failure(struct extent_io_tree *failure_tree, 22208c2ecf20Sopenharmony_ci struct extent_io_tree *io_tree, 22218c2ecf20Sopenharmony_ci struct io_failure_record *rec) 22228c2ecf20Sopenharmony_ci{ 22238c2ecf20Sopenharmony_ci int ret; 22248c2ecf20Sopenharmony_ci int err = 0; 22258c2ecf20Sopenharmony_ci 22268c2ecf20Sopenharmony_ci set_state_failrec(failure_tree, rec->start, NULL); 22278c2ecf20Sopenharmony_ci ret = clear_extent_bits(failure_tree, rec->start, 22288c2ecf20Sopenharmony_ci rec->start + rec->len - 1, 22298c2ecf20Sopenharmony_ci EXTENT_LOCKED | EXTENT_DIRTY); 22308c2ecf20Sopenharmony_ci if (ret) 22318c2ecf20Sopenharmony_ci err = ret; 22328c2ecf20Sopenharmony_ci 22338c2ecf20Sopenharmony_ci ret = clear_extent_bits(io_tree, rec->start, 22348c2ecf20Sopenharmony_ci rec->start + rec->len - 1, 22358c2ecf20Sopenharmony_ci EXTENT_DAMAGED); 22368c2ecf20Sopenharmony_ci if (ret && !err) 22378c2ecf20Sopenharmony_ci err = ret; 22388c2ecf20Sopenharmony_ci 22398c2ecf20Sopenharmony_ci kfree(rec); 22408c2ecf20Sopenharmony_ci return err; 22418c2ecf20Sopenharmony_ci} 22428c2ecf20Sopenharmony_ci 22438c2ecf20Sopenharmony_ci/* 22448c2ecf20Sopenharmony_ci * this bypasses the standard btrfs submit functions deliberately, as 22458c2ecf20Sopenharmony_ci * the standard behavior is to write all copies in a raid setup. here we only 22468c2ecf20Sopenharmony_ci * want to write the one bad copy. so we do the mapping for ourselves and issue 22478c2ecf20Sopenharmony_ci * submit_bio directly. 22488c2ecf20Sopenharmony_ci * to avoid any synchronization issues, wait for the data after writing, which 22498c2ecf20Sopenharmony_ci * actually prevents the read that triggered the error from finishing. 22508c2ecf20Sopenharmony_ci * currently, there can be no more than two copies of every data bit. thus, 22518c2ecf20Sopenharmony_ci * exactly one rewrite is required. 22528c2ecf20Sopenharmony_ci */ 22538c2ecf20Sopenharmony_ciint repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, 22548c2ecf20Sopenharmony_ci u64 length, u64 logical, struct page *page, 22558c2ecf20Sopenharmony_ci unsigned int pg_offset, int mirror_num) 22568c2ecf20Sopenharmony_ci{ 22578c2ecf20Sopenharmony_ci struct bio *bio; 22588c2ecf20Sopenharmony_ci struct btrfs_device *dev; 22598c2ecf20Sopenharmony_ci u64 map_length = 0; 22608c2ecf20Sopenharmony_ci u64 sector; 22618c2ecf20Sopenharmony_ci struct btrfs_bio *bbio = NULL; 22628c2ecf20Sopenharmony_ci int ret; 22638c2ecf20Sopenharmony_ci 22648c2ecf20Sopenharmony_ci ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); 22658c2ecf20Sopenharmony_ci BUG_ON(!mirror_num); 22668c2ecf20Sopenharmony_ci 22678c2ecf20Sopenharmony_ci bio = btrfs_io_bio_alloc(1); 22688c2ecf20Sopenharmony_ci bio->bi_iter.bi_size = 0; 22698c2ecf20Sopenharmony_ci map_length = length; 22708c2ecf20Sopenharmony_ci 22718c2ecf20Sopenharmony_ci /* 22728c2ecf20Sopenharmony_ci * Avoid races with device replace and make sure our bbio has devices 22738c2ecf20Sopenharmony_ci * associated to its stripes that don't go away while we are doing the 22748c2ecf20Sopenharmony_ci * read repair operation. 22758c2ecf20Sopenharmony_ci */ 22768c2ecf20Sopenharmony_ci btrfs_bio_counter_inc_blocked(fs_info); 22778c2ecf20Sopenharmony_ci if (btrfs_is_parity_mirror(fs_info, logical, length)) { 22788c2ecf20Sopenharmony_ci /* 22798c2ecf20Sopenharmony_ci * Note that we don't use BTRFS_MAP_WRITE because it's supposed 22808c2ecf20Sopenharmony_ci * to update all raid stripes, but here we just want to correct 22818c2ecf20Sopenharmony_ci * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad 22828c2ecf20Sopenharmony_ci * stripe's dev and sector. 22838c2ecf20Sopenharmony_ci */ 22848c2ecf20Sopenharmony_ci ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, 22858c2ecf20Sopenharmony_ci &map_length, &bbio, 0); 22868c2ecf20Sopenharmony_ci if (ret) { 22878c2ecf20Sopenharmony_ci btrfs_bio_counter_dec(fs_info); 22888c2ecf20Sopenharmony_ci bio_put(bio); 22898c2ecf20Sopenharmony_ci return -EIO; 22908c2ecf20Sopenharmony_ci } 22918c2ecf20Sopenharmony_ci ASSERT(bbio->mirror_num == 1); 22928c2ecf20Sopenharmony_ci } else { 22938c2ecf20Sopenharmony_ci ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, 22948c2ecf20Sopenharmony_ci &map_length, &bbio, mirror_num); 22958c2ecf20Sopenharmony_ci if (ret) { 22968c2ecf20Sopenharmony_ci btrfs_bio_counter_dec(fs_info); 22978c2ecf20Sopenharmony_ci bio_put(bio); 22988c2ecf20Sopenharmony_ci return -EIO; 22998c2ecf20Sopenharmony_ci } 23008c2ecf20Sopenharmony_ci BUG_ON(mirror_num != bbio->mirror_num); 23018c2ecf20Sopenharmony_ci } 23028c2ecf20Sopenharmony_ci 23038c2ecf20Sopenharmony_ci sector = bbio->stripes[bbio->mirror_num - 1].physical >> 9; 23048c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = sector; 23058c2ecf20Sopenharmony_ci dev = bbio->stripes[bbio->mirror_num - 1].dev; 23068c2ecf20Sopenharmony_ci btrfs_put_bbio(bbio); 23078c2ecf20Sopenharmony_ci if (!dev || !dev->bdev || 23088c2ecf20Sopenharmony_ci !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { 23098c2ecf20Sopenharmony_ci btrfs_bio_counter_dec(fs_info); 23108c2ecf20Sopenharmony_ci bio_put(bio); 23118c2ecf20Sopenharmony_ci return -EIO; 23128c2ecf20Sopenharmony_ci } 23138c2ecf20Sopenharmony_ci bio_set_dev(bio, dev->bdev); 23148c2ecf20Sopenharmony_ci bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; 23158c2ecf20Sopenharmony_ci bio_add_page(bio, page, length, pg_offset); 23168c2ecf20Sopenharmony_ci 23178c2ecf20Sopenharmony_ci if (btrfsic_submit_bio_wait(bio)) { 23188c2ecf20Sopenharmony_ci /* try to remap that extent elsewhere? */ 23198c2ecf20Sopenharmony_ci btrfs_bio_counter_dec(fs_info); 23208c2ecf20Sopenharmony_ci bio_put(bio); 23218c2ecf20Sopenharmony_ci btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); 23228c2ecf20Sopenharmony_ci return -EIO; 23238c2ecf20Sopenharmony_ci } 23248c2ecf20Sopenharmony_ci 23258c2ecf20Sopenharmony_ci btrfs_info_rl_in_rcu(fs_info, 23268c2ecf20Sopenharmony_ci "read error corrected: ino %llu off %llu (dev %s sector %llu)", 23278c2ecf20Sopenharmony_ci ino, start, 23288c2ecf20Sopenharmony_ci rcu_str_deref(dev->name), sector); 23298c2ecf20Sopenharmony_ci btrfs_bio_counter_dec(fs_info); 23308c2ecf20Sopenharmony_ci bio_put(bio); 23318c2ecf20Sopenharmony_ci return 0; 23328c2ecf20Sopenharmony_ci} 23338c2ecf20Sopenharmony_ci 23348c2ecf20Sopenharmony_ciint btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) 23358c2ecf20Sopenharmony_ci{ 23368c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = eb->fs_info; 23378c2ecf20Sopenharmony_ci u64 start = eb->start; 23388c2ecf20Sopenharmony_ci int i, num_pages = num_extent_pages(eb); 23398c2ecf20Sopenharmony_ci int ret = 0; 23408c2ecf20Sopenharmony_ci 23418c2ecf20Sopenharmony_ci if (sb_rdonly(fs_info->sb)) 23428c2ecf20Sopenharmony_ci return -EROFS; 23438c2ecf20Sopenharmony_ci 23448c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 23458c2ecf20Sopenharmony_ci struct page *p = eb->pages[i]; 23468c2ecf20Sopenharmony_ci 23478c2ecf20Sopenharmony_ci ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p, 23488c2ecf20Sopenharmony_ci start - page_offset(p), mirror_num); 23498c2ecf20Sopenharmony_ci if (ret) 23508c2ecf20Sopenharmony_ci break; 23518c2ecf20Sopenharmony_ci start += PAGE_SIZE; 23528c2ecf20Sopenharmony_ci } 23538c2ecf20Sopenharmony_ci 23548c2ecf20Sopenharmony_ci return ret; 23558c2ecf20Sopenharmony_ci} 23568c2ecf20Sopenharmony_ci 23578c2ecf20Sopenharmony_ci/* 23588c2ecf20Sopenharmony_ci * each time an IO finishes, we do a fast check in the IO failure tree 23598c2ecf20Sopenharmony_ci * to see if we need to process or clean up an io_failure_record 23608c2ecf20Sopenharmony_ci */ 23618c2ecf20Sopenharmony_ciint clean_io_failure(struct btrfs_fs_info *fs_info, 23628c2ecf20Sopenharmony_ci struct extent_io_tree *failure_tree, 23638c2ecf20Sopenharmony_ci struct extent_io_tree *io_tree, u64 start, 23648c2ecf20Sopenharmony_ci struct page *page, u64 ino, unsigned int pg_offset) 23658c2ecf20Sopenharmony_ci{ 23668c2ecf20Sopenharmony_ci u64 private; 23678c2ecf20Sopenharmony_ci struct io_failure_record *failrec; 23688c2ecf20Sopenharmony_ci struct extent_state *state; 23698c2ecf20Sopenharmony_ci int num_copies; 23708c2ecf20Sopenharmony_ci int ret; 23718c2ecf20Sopenharmony_ci 23728c2ecf20Sopenharmony_ci private = 0; 23738c2ecf20Sopenharmony_ci ret = count_range_bits(failure_tree, &private, (u64)-1, 1, 23748c2ecf20Sopenharmony_ci EXTENT_DIRTY, 0); 23758c2ecf20Sopenharmony_ci if (!ret) 23768c2ecf20Sopenharmony_ci return 0; 23778c2ecf20Sopenharmony_ci 23788c2ecf20Sopenharmony_ci failrec = get_state_failrec(failure_tree, start); 23798c2ecf20Sopenharmony_ci if (IS_ERR(failrec)) 23808c2ecf20Sopenharmony_ci return 0; 23818c2ecf20Sopenharmony_ci 23828c2ecf20Sopenharmony_ci BUG_ON(!failrec->this_mirror); 23838c2ecf20Sopenharmony_ci 23848c2ecf20Sopenharmony_ci if (failrec->in_validation) { 23858c2ecf20Sopenharmony_ci /* there was no real error, just free the record */ 23868c2ecf20Sopenharmony_ci btrfs_debug(fs_info, 23878c2ecf20Sopenharmony_ci "clean_io_failure: freeing dummy error at %llu", 23888c2ecf20Sopenharmony_ci failrec->start); 23898c2ecf20Sopenharmony_ci goto out; 23908c2ecf20Sopenharmony_ci } 23918c2ecf20Sopenharmony_ci if (sb_rdonly(fs_info->sb)) 23928c2ecf20Sopenharmony_ci goto out; 23938c2ecf20Sopenharmony_ci 23948c2ecf20Sopenharmony_ci spin_lock(&io_tree->lock); 23958c2ecf20Sopenharmony_ci state = find_first_extent_bit_state(io_tree, 23968c2ecf20Sopenharmony_ci failrec->start, 23978c2ecf20Sopenharmony_ci EXTENT_LOCKED); 23988c2ecf20Sopenharmony_ci spin_unlock(&io_tree->lock); 23998c2ecf20Sopenharmony_ci 24008c2ecf20Sopenharmony_ci if (state && state->start <= failrec->start && 24018c2ecf20Sopenharmony_ci state->end >= failrec->start + failrec->len - 1) { 24028c2ecf20Sopenharmony_ci num_copies = btrfs_num_copies(fs_info, failrec->logical, 24038c2ecf20Sopenharmony_ci failrec->len); 24048c2ecf20Sopenharmony_ci if (num_copies > 1) { 24058c2ecf20Sopenharmony_ci repair_io_failure(fs_info, ino, start, failrec->len, 24068c2ecf20Sopenharmony_ci failrec->logical, page, pg_offset, 24078c2ecf20Sopenharmony_ci failrec->failed_mirror); 24088c2ecf20Sopenharmony_ci } 24098c2ecf20Sopenharmony_ci } 24108c2ecf20Sopenharmony_ci 24118c2ecf20Sopenharmony_ciout: 24128c2ecf20Sopenharmony_ci free_io_failure(failure_tree, io_tree, failrec); 24138c2ecf20Sopenharmony_ci 24148c2ecf20Sopenharmony_ci return 0; 24158c2ecf20Sopenharmony_ci} 24168c2ecf20Sopenharmony_ci 24178c2ecf20Sopenharmony_ci/* 24188c2ecf20Sopenharmony_ci * Can be called when 24198c2ecf20Sopenharmony_ci * - hold extent lock 24208c2ecf20Sopenharmony_ci * - under ordered extent 24218c2ecf20Sopenharmony_ci * - the inode is freeing 24228c2ecf20Sopenharmony_ci */ 24238c2ecf20Sopenharmony_civoid btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end) 24248c2ecf20Sopenharmony_ci{ 24258c2ecf20Sopenharmony_ci struct extent_io_tree *failure_tree = &inode->io_failure_tree; 24268c2ecf20Sopenharmony_ci struct io_failure_record *failrec; 24278c2ecf20Sopenharmony_ci struct extent_state *state, *next; 24288c2ecf20Sopenharmony_ci 24298c2ecf20Sopenharmony_ci if (RB_EMPTY_ROOT(&failure_tree->state)) 24308c2ecf20Sopenharmony_ci return; 24318c2ecf20Sopenharmony_ci 24328c2ecf20Sopenharmony_ci spin_lock(&failure_tree->lock); 24338c2ecf20Sopenharmony_ci state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY); 24348c2ecf20Sopenharmony_ci while (state) { 24358c2ecf20Sopenharmony_ci if (state->start > end) 24368c2ecf20Sopenharmony_ci break; 24378c2ecf20Sopenharmony_ci 24388c2ecf20Sopenharmony_ci ASSERT(state->end <= end); 24398c2ecf20Sopenharmony_ci 24408c2ecf20Sopenharmony_ci next = next_state(state); 24418c2ecf20Sopenharmony_ci 24428c2ecf20Sopenharmony_ci failrec = state->failrec; 24438c2ecf20Sopenharmony_ci free_extent_state(state); 24448c2ecf20Sopenharmony_ci kfree(failrec); 24458c2ecf20Sopenharmony_ci 24468c2ecf20Sopenharmony_ci state = next; 24478c2ecf20Sopenharmony_ci } 24488c2ecf20Sopenharmony_ci spin_unlock(&failure_tree->lock); 24498c2ecf20Sopenharmony_ci} 24508c2ecf20Sopenharmony_ci 24518c2ecf20Sopenharmony_cistatic struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode, 24528c2ecf20Sopenharmony_ci u64 start, u64 end) 24538c2ecf20Sopenharmony_ci{ 24548c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 24558c2ecf20Sopenharmony_ci struct io_failure_record *failrec; 24568c2ecf20Sopenharmony_ci struct extent_map *em; 24578c2ecf20Sopenharmony_ci struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; 24588c2ecf20Sopenharmony_ci struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 24598c2ecf20Sopenharmony_ci struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 24608c2ecf20Sopenharmony_ci int ret; 24618c2ecf20Sopenharmony_ci u64 logical; 24628c2ecf20Sopenharmony_ci 24638c2ecf20Sopenharmony_ci failrec = get_state_failrec(failure_tree, start); 24648c2ecf20Sopenharmony_ci if (!IS_ERR(failrec)) { 24658c2ecf20Sopenharmony_ci btrfs_debug(fs_info, 24668c2ecf20Sopenharmony_ci "Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d", 24678c2ecf20Sopenharmony_ci failrec->logical, failrec->start, failrec->len, 24688c2ecf20Sopenharmony_ci failrec->in_validation); 24698c2ecf20Sopenharmony_ci /* 24708c2ecf20Sopenharmony_ci * when data can be on disk more than twice, add to failrec here 24718c2ecf20Sopenharmony_ci * (e.g. with a list for failed_mirror) to make 24728c2ecf20Sopenharmony_ci * clean_io_failure() clean all those errors at once. 24738c2ecf20Sopenharmony_ci */ 24748c2ecf20Sopenharmony_ci 24758c2ecf20Sopenharmony_ci return failrec; 24768c2ecf20Sopenharmony_ci } 24778c2ecf20Sopenharmony_ci 24788c2ecf20Sopenharmony_ci failrec = kzalloc(sizeof(*failrec), GFP_NOFS); 24798c2ecf20Sopenharmony_ci if (!failrec) 24808c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 24818c2ecf20Sopenharmony_ci 24828c2ecf20Sopenharmony_ci failrec->start = start; 24838c2ecf20Sopenharmony_ci failrec->len = end - start + 1; 24848c2ecf20Sopenharmony_ci failrec->this_mirror = 0; 24858c2ecf20Sopenharmony_ci failrec->bio_flags = 0; 24868c2ecf20Sopenharmony_ci failrec->in_validation = 0; 24878c2ecf20Sopenharmony_ci 24888c2ecf20Sopenharmony_ci read_lock(&em_tree->lock); 24898c2ecf20Sopenharmony_ci em = lookup_extent_mapping(em_tree, start, failrec->len); 24908c2ecf20Sopenharmony_ci if (!em) { 24918c2ecf20Sopenharmony_ci read_unlock(&em_tree->lock); 24928c2ecf20Sopenharmony_ci kfree(failrec); 24938c2ecf20Sopenharmony_ci return ERR_PTR(-EIO); 24948c2ecf20Sopenharmony_ci } 24958c2ecf20Sopenharmony_ci 24968c2ecf20Sopenharmony_ci if (em->start > start || em->start + em->len <= start) { 24978c2ecf20Sopenharmony_ci free_extent_map(em); 24988c2ecf20Sopenharmony_ci em = NULL; 24998c2ecf20Sopenharmony_ci } 25008c2ecf20Sopenharmony_ci read_unlock(&em_tree->lock); 25018c2ecf20Sopenharmony_ci if (!em) { 25028c2ecf20Sopenharmony_ci kfree(failrec); 25038c2ecf20Sopenharmony_ci return ERR_PTR(-EIO); 25048c2ecf20Sopenharmony_ci } 25058c2ecf20Sopenharmony_ci 25068c2ecf20Sopenharmony_ci logical = start - em->start; 25078c2ecf20Sopenharmony_ci logical = em->block_start + logical; 25088c2ecf20Sopenharmony_ci if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 25098c2ecf20Sopenharmony_ci logical = em->block_start; 25108c2ecf20Sopenharmony_ci failrec->bio_flags = EXTENT_BIO_COMPRESSED; 25118c2ecf20Sopenharmony_ci extent_set_compress_type(&failrec->bio_flags, em->compress_type); 25128c2ecf20Sopenharmony_ci } 25138c2ecf20Sopenharmony_ci 25148c2ecf20Sopenharmony_ci btrfs_debug(fs_info, 25158c2ecf20Sopenharmony_ci "Get IO Failure Record: (new) logical=%llu, start=%llu, len=%llu", 25168c2ecf20Sopenharmony_ci logical, start, failrec->len); 25178c2ecf20Sopenharmony_ci 25188c2ecf20Sopenharmony_ci failrec->logical = logical; 25198c2ecf20Sopenharmony_ci free_extent_map(em); 25208c2ecf20Sopenharmony_ci 25218c2ecf20Sopenharmony_ci /* Set the bits in the private failure tree */ 25228c2ecf20Sopenharmony_ci ret = set_extent_bits(failure_tree, start, end, 25238c2ecf20Sopenharmony_ci EXTENT_LOCKED | EXTENT_DIRTY); 25248c2ecf20Sopenharmony_ci if (ret >= 0) { 25258c2ecf20Sopenharmony_ci ret = set_state_failrec(failure_tree, start, failrec); 25268c2ecf20Sopenharmony_ci /* Set the bits in the inode's tree */ 25278c2ecf20Sopenharmony_ci ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED); 25288c2ecf20Sopenharmony_ci } else if (ret < 0) { 25298c2ecf20Sopenharmony_ci kfree(failrec); 25308c2ecf20Sopenharmony_ci return ERR_PTR(ret); 25318c2ecf20Sopenharmony_ci } 25328c2ecf20Sopenharmony_ci 25338c2ecf20Sopenharmony_ci return failrec; 25348c2ecf20Sopenharmony_ci} 25358c2ecf20Sopenharmony_ci 25368c2ecf20Sopenharmony_cistatic bool btrfs_check_repairable(struct inode *inode, bool needs_validation, 25378c2ecf20Sopenharmony_ci struct io_failure_record *failrec, 25388c2ecf20Sopenharmony_ci int failed_mirror) 25398c2ecf20Sopenharmony_ci{ 25408c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 25418c2ecf20Sopenharmony_ci int num_copies; 25428c2ecf20Sopenharmony_ci 25438c2ecf20Sopenharmony_ci num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len); 25448c2ecf20Sopenharmony_ci if (num_copies == 1) { 25458c2ecf20Sopenharmony_ci /* 25468c2ecf20Sopenharmony_ci * we only have a single copy of the data, so don't bother with 25478c2ecf20Sopenharmony_ci * all the retry and error correction code that follows. no 25488c2ecf20Sopenharmony_ci * matter what the error is, it is very likely to persist. 25498c2ecf20Sopenharmony_ci */ 25508c2ecf20Sopenharmony_ci btrfs_debug(fs_info, 25518c2ecf20Sopenharmony_ci "Check Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d", 25528c2ecf20Sopenharmony_ci num_copies, failrec->this_mirror, failed_mirror); 25538c2ecf20Sopenharmony_ci return false; 25548c2ecf20Sopenharmony_ci } 25558c2ecf20Sopenharmony_ci 25568c2ecf20Sopenharmony_ci /* 25578c2ecf20Sopenharmony_ci * there are two premises: 25588c2ecf20Sopenharmony_ci * a) deliver good data to the caller 25598c2ecf20Sopenharmony_ci * b) correct the bad sectors on disk 25608c2ecf20Sopenharmony_ci */ 25618c2ecf20Sopenharmony_ci if (needs_validation) { 25628c2ecf20Sopenharmony_ci /* 25638c2ecf20Sopenharmony_ci * to fulfill b), we need to know the exact failing sectors, as 25648c2ecf20Sopenharmony_ci * we don't want to rewrite any more than the failed ones. thus, 25658c2ecf20Sopenharmony_ci * we need separate read requests for the failed bio 25668c2ecf20Sopenharmony_ci * 25678c2ecf20Sopenharmony_ci * if the following BUG_ON triggers, our validation request got 25688c2ecf20Sopenharmony_ci * merged. we need separate requests for our algorithm to work. 25698c2ecf20Sopenharmony_ci */ 25708c2ecf20Sopenharmony_ci BUG_ON(failrec->in_validation); 25718c2ecf20Sopenharmony_ci failrec->in_validation = 1; 25728c2ecf20Sopenharmony_ci failrec->this_mirror = failed_mirror; 25738c2ecf20Sopenharmony_ci } else { 25748c2ecf20Sopenharmony_ci /* 25758c2ecf20Sopenharmony_ci * we're ready to fulfill a) and b) alongside. get a good copy 25768c2ecf20Sopenharmony_ci * of the failed sector and if we succeed, we have setup 25778c2ecf20Sopenharmony_ci * everything for repair_io_failure to do the rest for us. 25788c2ecf20Sopenharmony_ci */ 25798c2ecf20Sopenharmony_ci if (failrec->in_validation) { 25808c2ecf20Sopenharmony_ci BUG_ON(failrec->this_mirror != failed_mirror); 25818c2ecf20Sopenharmony_ci failrec->in_validation = 0; 25828c2ecf20Sopenharmony_ci failrec->this_mirror = 0; 25838c2ecf20Sopenharmony_ci } 25848c2ecf20Sopenharmony_ci failrec->failed_mirror = failed_mirror; 25858c2ecf20Sopenharmony_ci failrec->this_mirror++; 25868c2ecf20Sopenharmony_ci if (failrec->this_mirror == failed_mirror) 25878c2ecf20Sopenharmony_ci failrec->this_mirror++; 25888c2ecf20Sopenharmony_ci } 25898c2ecf20Sopenharmony_ci 25908c2ecf20Sopenharmony_ci if (failrec->this_mirror > num_copies) { 25918c2ecf20Sopenharmony_ci btrfs_debug(fs_info, 25928c2ecf20Sopenharmony_ci "Check Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d", 25938c2ecf20Sopenharmony_ci num_copies, failrec->this_mirror, failed_mirror); 25948c2ecf20Sopenharmony_ci return false; 25958c2ecf20Sopenharmony_ci } 25968c2ecf20Sopenharmony_ci 25978c2ecf20Sopenharmony_ci return true; 25988c2ecf20Sopenharmony_ci} 25998c2ecf20Sopenharmony_ci 26008c2ecf20Sopenharmony_cistatic bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio) 26018c2ecf20Sopenharmony_ci{ 26028c2ecf20Sopenharmony_ci u64 len = 0; 26038c2ecf20Sopenharmony_ci const u32 blocksize = inode->i_sb->s_blocksize; 26048c2ecf20Sopenharmony_ci 26058c2ecf20Sopenharmony_ci /* 26068c2ecf20Sopenharmony_ci * If bi_status is BLK_STS_OK, then this was a checksum error, not an 26078c2ecf20Sopenharmony_ci * I/O error. In this case, we already know exactly which sector was 26088c2ecf20Sopenharmony_ci * bad, so we don't need to validate. 26098c2ecf20Sopenharmony_ci */ 26108c2ecf20Sopenharmony_ci if (bio->bi_status == BLK_STS_OK) 26118c2ecf20Sopenharmony_ci return false; 26128c2ecf20Sopenharmony_ci 26138c2ecf20Sopenharmony_ci /* 26148c2ecf20Sopenharmony_ci * We need to validate each sector individually if the failed I/O was 26158c2ecf20Sopenharmony_ci * for multiple sectors. 26168c2ecf20Sopenharmony_ci * 26178c2ecf20Sopenharmony_ci * There are a few possible bios that can end up here: 26188c2ecf20Sopenharmony_ci * 1. A buffered read bio, which is not cloned. 26198c2ecf20Sopenharmony_ci * 2. A direct I/O read bio, which is cloned. 26208c2ecf20Sopenharmony_ci * 3. A (buffered or direct) repair bio, which is not cloned. 26218c2ecf20Sopenharmony_ci * 26228c2ecf20Sopenharmony_ci * For cloned bios (case 2), we can get the size from 26238c2ecf20Sopenharmony_ci * btrfs_io_bio->iter; for non-cloned bios (cases 1 and 3), we can get 26248c2ecf20Sopenharmony_ci * it from the bvecs. 26258c2ecf20Sopenharmony_ci */ 26268c2ecf20Sopenharmony_ci if (bio_flagged(bio, BIO_CLONED)) { 26278c2ecf20Sopenharmony_ci if (btrfs_io_bio(bio)->iter.bi_size > blocksize) 26288c2ecf20Sopenharmony_ci return true; 26298c2ecf20Sopenharmony_ci } else { 26308c2ecf20Sopenharmony_ci struct bio_vec *bvec; 26318c2ecf20Sopenharmony_ci int i; 26328c2ecf20Sopenharmony_ci 26338c2ecf20Sopenharmony_ci bio_for_each_bvec_all(bvec, bio, i) { 26348c2ecf20Sopenharmony_ci len += bvec->bv_len; 26358c2ecf20Sopenharmony_ci if (len > blocksize) 26368c2ecf20Sopenharmony_ci return true; 26378c2ecf20Sopenharmony_ci } 26388c2ecf20Sopenharmony_ci } 26398c2ecf20Sopenharmony_ci return false; 26408c2ecf20Sopenharmony_ci} 26418c2ecf20Sopenharmony_ci 26428c2ecf20Sopenharmony_ciblk_status_t btrfs_submit_read_repair(struct inode *inode, 26438c2ecf20Sopenharmony_ci struct bio *failed_bio, u64 phy_offset, 26448c2ecf20Sopenharmony_ci struct page *page, unsigned int pgoff, 26458c2ecf20Sopenharmony_ci u64 start, u64 end, int failed_mirror, 26468c2ecf20Sopenharmony_ci submit_bio_hook_t *submit_bio_hook) 26478c2ecf20Sopenharmony_ci{ 26488c2ecf20Sopenharmony_ci struct io_failure_record *failrec; 26498c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 26508c2ecf20Sopenharmony_ci struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 26518c2ecf20Sopenharmony_ci struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; 26528c2ecf20Sopenharmony_ci struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio); 26538c2ecf20Sopenharmony_ci const int icsum = phy_offset >> inode->i_sb->s_blocksize_bits; 26548c2ecf20Sopenharmony_ci bool need_validation; 26558c2ecf20Sopenharmony_ci struct bio *repair_bio; 26568c2ecf20Sopenharmony_ci struct btrfs_io_bio *repair_io_bio; 26578c2ecf20Sopenharmony_ci blk_status_t status; 26588c2ecf20Sopenharmony_ci 26598c2ecf20Sopenharmony_ci btrfs_debug(fs_info, 26608c2ecf20Sopenharmony_ci "repair read error: read error at %llu", start); 26618c2ecf20Sopenharmony_ci 26628c2ecf20Sopenharmony_ci BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); 26638c2ecf20Sopenharmony_ci 26648c2ecf20Sopenharmony_ci failrec = btrfs_get_io_failure_record(inode, start, end); 26658c2ecf20Sopenharmony_ci if (IS_ERR(failrec)) 26668c2ecf20Sopenharmony_ci return errno_to_blk_status(PTR_ERR(failrec)); 26678c2ecf20Sopenharmony_ci 26688c2ecf20Sopenharmony_ci need_validation = btrfs_io_needs_validation(inode, failed_bio); 26698c2ecf20Sopenharmony_ci 26708c2ecf20Sopenharmony_ci if (!btrfs_check_repairable(inode, need_validation, failrec, 26718c2ecf20Sopenharmony_ci failed_mirror)) { 26728c2ecf20Sopenharmony_ci free_io_failure(failure_tree, tree, failrec); 26738c2ecf20Sopenharmony_ci return BLK_STS_IOERR; 26748c2ecf20Sopenharmony_ci } 26758c2ecf20Sopenharmony_ci 26768c2ecf20Sopenharmony_ci repair_bio = btrfs_io_bio_alloc(1); 26778c2ecf20Sopenharmony_ci repair_io_bio = btrfs_io_bio(repair_bio); 26788c2ecf20Sopenharmony_ci repair_bio->bi_opf = REQ_OP_READ; 26798c2ecf20Sopenharmony_ci if (need_validation) 26808c2ecf20Sopenharmony_ci repair_bio->bi_opf |= REQ_FAILFAST_DEV; 26818c2ecf20Sopenharmony_ci repair_bio->bi_end_io = failed_bio->bi_end_io; 26828c2ecf20Sopenharmony_ci repair_bio->bi_iter.bi_sector = failrec->logical >> 9; 26838c2ecf20Sopenharmony_ci repair_bio->bi_private = failed_bio->bi_private; 26848c2ecf20Sopenharmony_ci 26858c2ecf20Sopenharmony_ci if (failed_io_bio->csum) { 26868c2ecf20Sopenharmony_ci const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); 26878c2ecf20Sopenharmony_ci 26888c2ecf20Sopenharmony_ci repair_io_bio->csum = repair_io_bio->csum_inline; 26898c2ecf20Sopenharmony_ci memcpy(repair_io_bio->csum, 26908c2ecf20Sopenharmony_ci failed_io_bio->csum + csum_size * icsum, csum_size); 26918c2ecf20Sopenharmony_ci } 26928c2ecf20Sopenharmony_ci 26938c2ecf20Sopenharmony_ci bio_add_page(repair_bio, page, failrec->len, pgoff); 26948c2ecf20Sopenharmony_ci repair_io_bio->logical = failrec->start; 26958c2ecf20Sopenharmony_ci repair_io_bio->iter = repair_bio->bi_iter; 26968c2ecf20Sopenharmony_ci 26978c2ecf20Sopenharmony_ci btrfs_debug(btrfs_sb(inode->i_sb), 26988c2ecf20Sopenharmony_ci"repair read error: submitting new read to mirror %d, in_validation=%d", 26998c2ecf20Sopenharmony_ci failrec->this_mirror, failrec->in_validation); 27008c2ecf20Sopenharmony_ci 27018c2ecf20Sopenharmony_ci status = submit_bio_hook(inode, repair_bio, failrec->this_mirror, 27028c2ecf20Sopenharmony_ci failrec->bio_flags); 27038c2ecf20Sopenharmony_ci if (status) { 27048c2ecf20Sopenharmony_ci free_io_failure(failure_tree, tree, failrec); 27058c2ecf20Sopenharmony_ci bio_put(repair_bio); 27068c2ecf20Sopenharmony_ci } 27078c2ecf20Sopenharmony_ci return status; 27088c2ecf20Sopenharmony_ci} 27098c2ecf20Sopenharmony_ci 27108c2ecf20Sopenharmony_ci/* lots and lots of room for performance fixes in the end_bio funcs */ 27118c2ecf20Sopenharmony_ci 27128c2ecf20Sopenharmony_civoid end_extent_writepage(struct page *page, int err, u64 start, u64 end) 27138c2ecf20Sopenharmony_ci{ 27148c2ecf20Sopenharmony_ci int uptodate = (err == 0); 27158c2ecf20Sopenharmony_ci int ret = 0; 27168c2ecf20Sopenharmony_ci 27178c2ecf20Sopenharmony_ci btrfs_writepage_endio_finish_ordered(page, start, end, uptodate); 27188c2ecf20Sopenharmony_ci 27198c2ecf20Sopenharmony_ci if (!uptodate) { 27208c2ecf20Sopenharmony_ci ClearPageUptodate(page); 27218c2ecf20Sopenharmony_ci SetPageError(page); 27228c2ecf20Sopenharmony_ci ret = err < 0 ? err : -EIO; 27238c2ecf20Sopenharmony_ci mapping_set_error(page->mapping, ret); 27248c2ecf20Sopenharmony_ci } 27258c2ecf20Sopenharmony_ci} 27268c2ecf20Sopenharmony_ci 27278c2ecf20Sopenharmony_ci/* 27288c2ecf20Sopenharmony_ci * after a writepage IO is done, we need to: 27298c2ecf20Sopenharmony_ci * clear the uptodate bits on error 27308c2ecf20Sopenharmony_ci * clear the writeback bits in the extent tree for this IO 27318c2ecf20Sopenharmony_ci * end_page_writeback if the page has no more pending IO 27328c2ecf20Sopenharmony_ci * 27338c2ecf20Sopenharmony_ci * Scheduling is not allowed, so the extent state tree is expected 27348c2ecf20Sopenharmony_ci * to have one and only one object corresponding to this IO. 27358c2ecf20Sopenharmony_ci */ 27368c2ecf20Sopenharmony_cistatic void end_bio_extent_writepage(struct bio *bio) 27378c2ecf20Sopenharmony_ci{ 27388c2ecf20Sopenharmony_ci int error = blk_status_to_errno(bio->bi_status); 27398c2ecf20Sopenharmony_ci struct bio_vec *bvec; 27408c2ecf20Sopenharmony_ci u64 start; 27418c2ecf20Sopenharmony_ci u64 end; 27428c2ecf20Sopenharmony_ci struct bvec_iter_all iter_all; 27438c2ecf20Sopenharmony_ci 27448c2ecf20Sopenharmony_ci ASSERT(!bio_flagged(bio, BIO_CLONED)); 27458c2ecf20Sopenharmony_ci bio_for_each_segment_all(bvec, bio, iter_all) { 27468c2ecf20Sopenharmony_ci struct page *page = bvec->bv_page; 27478c2ecf20Sopenharmony_ci struct inode *inode = page->mapping->host; 27488c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 27498c2ecf20Sopenharmony_ci 27508c2ecf20Sopenharmony_ci /* We always issue full-page reads, but if some block 27518c2ecf20Sopenharmony_ci * in a page fails to read, blk_update_request() will 27528c2ecf20Sopenharmony_ci * advance bv_offset and adjust bv_len to compensate. 27538c2ecf20Sopenharmony_ci * Print a warning for nonzero offsets, and an error 27548c2ecf20Sopenharmony_ci * if they don't add up to a full page. */ 27558c2ecf20Sopenharmony_ci if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) { 27568c2ecf20Sopenharmony_ci if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE) 27578c2ecf20Sopenharmony_ci btrfs_err(fs_info, 27588c2ecf20Sopenharmony_ci "partial page write in btrfs with offset %u and length %u", 27598c2ecf20Sopenharmony_ci bvec->bv_offset, bvec->bv_len); 27608c2ecf20Sopenharmony_ci else 27618c2ecf20Sopenharmony_ci btrfs_info(fs_info, 27628c2ecf20Sopenharmony_ci "incomplete page write in btrfs with offset %u and length %u", 27638c2ecf20Sopenharmony_ci bvec->bv_offset, bvec->bv_len); 27648c2ecf20Sopenharmony_ci } 27658c2ecf20Sopenharmony_ci 27668c2ecf20Sopenharmony_ci start = page_offset(page); 27678c2ecf20Sopenharmony_ci end = start + bvec->bv_offset + bvec->bv_len - 1; 27688c2ecf20Sopenharmony_ci 27698c2ecf20Sopenharmony_ci end_extent_writepage(page, error, start, end); 27708c2ecf20Sopenharmony_ci end_page_writeback(page); 27718c2ecf20Sopenharmony_ci } 27728c2ecf20Sopenharmony_ci 27738c2ecf20Sopenharmony_ci bio_put(bio); 27748c2ecf20Sopenharmony_ci} 27758c2ecf20Sopenharmony_ci 27768c2ecf20Sopenharmony_cistatic void 27778c2ecf20Sopenharmony_ciendio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, 27788c2ecf20Sopenharmony_ci int uptodate) 27798c2ecf20Sopenharmony_ci{ 27808c2ecf20Sopenharmony_ci struct extent_state *cached = NULL; 27818c2ecf20Sopenharmony_ci u64 end = start + len - 1; 27828c2ecf20Sopenharmony_ci 27838c2ecf20Sopenharmony_ci if (uptodate && tree->track_uptodate) 27848c2ecf20Sopenharmony_ci set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC); 27858c2ecf20Sopenharmony_ci unlock_extent_cached_atomic(tree, start, end, &cached); 27868c2ecf20Sopenharmony_ci} 27878c2ecf20Sopenharmony_ci 27888c2ecf20Sopenharmony_ci/* 27898c2ecf20Sopenharmony_ci * after a readpage IO is done, we need to: 27908c2ecf20Sopenharmony_ci * clear the uptodate bits on error 27918c2ecf20Sopenharmony_ci * set the uptodate bits if things worked 27928c2ecf20Sopenharmony_ci * set the page up to date if all extents in the tree are uptodate 27938c2ecf20Sopenharmony_ci * clear the lock bit in the extent tree 27948c2ecf20Sopenharmony_ci * unlock the page if there are no other extents locked for it 27958c2ecf20Sopenharmony_ci * 27968c2ecf20Sopenharmony_ci * Scheduling is not allowed, so the extent state tree is expected 27978c2ecf20Sopenharmony_ci * to have one and only one object corresponding to this IO. 27988c2ecf20Sopenharmony_ci */ 27998c2ecf20Sopenharmony_cistatic void end_bio_extent_readpage(struct bio *bio) 28008c2ecf20Sopenharmony_ci{ 28018c2ecf20Sopenharmony_ci struct bio_vec *bvec; 28028c2ecf20Sopenharmony_ci int uptodate = !bio->bi_status; 28038c2ecf20Sopenharmony_ci struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 28048c2ecf20Sopenharmony_ci struct extent_io_tree *tree, *failure_tree; 28058c2ecf20Sopenharmony_ci u64 offset = 0; 28068c2ecf20Sopenharmony_ci u64 start; 28078c2ecf20Sopenharmony_ci u64 end; 28088c2ecf20Sopenharmony_ci u64 len; 28098c2ecf20Sopenharmony_ci u64 extent_start = 0; 28108c2ecf20Sopenharmony_ci u64 extent_len = 0; 28118c2ecf20Sopenharmony_ci int mirror; 28128c2ecf20Sopenharmony_ci int ret; 28138c2ecf20Sopenharmony_ci struct bvec_iter_all iter_all; 28148c2ecf20Sopenharmony_ci 28158c2ecf20Sopenharmony_ci ASSERT(!bio_flagged(bio, BIO_CLONED)); 28168c2ecf20Sopenharmony_ci bio_for_each_segment_all(bvec, bio, iter_all) { 28178c2ecf20Sopenharmony_ci struct page *page = bvec->bv_page; 28188c2ecf20Sopenharmony_ci struct inode *inode = page->mapping->host; 28198c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 28208c2ecf20Sopenharmony_ci 28218c2ecf20Sopenharmony_ci btrfs_debug(fs_info, 28228c2ecf20Sopenharmony_ci "end_bio_extent_readpage: bi_sector=%llu, err=%d, mirror=%u", 28238c2ecf20Sopenharmony_ci (u64)bio->bi_iter.bi_sector, bio->bi_status, 28248c2ecf20Sopenharmony_ci io_bio->mirror_num); 28258c2ecf20Sopenharmony_ci tree = &BTRFS_I(inode)->io_tree; 28268c2ecf20Sopenharmony_ci failure_tree = &BTRFS_I(inode)->io_failure_tree; 28278c2ecf20Sopenharmony_ci 28288c2ecf20Sopenharmony_ci /* We always issue full-page reads, but if some block 28298c2ecf20Sopenharmony_ci * in a page fails to read, blk_update_request() will 28308c2ecf20Sopenharmony_ci * advance bv_offset and adjust bv_len to compensate. 28318c2ecf20Sopenharmony_ci * Print a warning for nonzero offsets, and an error 28328c2ecf20Sopenharmony_ci * if they don't add up to a full page. */ 28338c2ecf20Sopenharmony_ci if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) { 28348c2ecf20Sopenharmony_ci if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE) 28358c2ecf20Sopenharmony_ci btrfs_err(fs_info, 28368c2ecf20Sopenharmony_ci "partial page read in btrfs with offset %u and length %u", 28378c2ecf20Sopenharmony_ci bvec->bv_offset, bvec->bv_len); 28388c2ecf20Sopenharmony_ci else 28398c2ecf20Sopenharmony_ci btrfs_info(fs_info, 28408c2ecf20Sopenharmony_ci "incomplete page read in btrfs with offset %u and length %u", 28418c2ecf20Sopenharmony_ci bvec->bv_offset, bvec->bv_len); 28428c2ecf20Sopenharmony_ci } 28438c2ecf20Sopenharmony_ci 28448c2ecf20Sopenharmony_ci start = page_offset(page); 28458c2ecf20Sopenharmony_ci end = start + bvec->bv_offset + bvec->bv_len - 1; 28468c2ecf20Sopenharmony_ci len = bvec->bv_len; 28478c2ecf20Sopenharmony_ci 28488c2ecf20Sopenharmony_ci mirror = io_bio->mirror_num; 28498c2ecf20Sopenharmony_ci if (likely(uptodate)) { 28508c2ecf20Sopenharmony_ci if (is_data_inode(inode)) 28518c2ecf20Sopenharmony_ci ret = btrfs_verify_data_csum(io_bio, offset, page, 28528c2ecf20Sopenharmony_ci start, end, mirror); 28538c2ecf20Sopenharmony_ci else 28548c2ecf20Sopenharmony_ci ret = btrfs_validate_metadata_buffer(io_bio, 28558c2ecf20Sopenharmony_ci offset, page, start, end, mirror); 28568c2ecf20Sopenharmony_ci if (ret) 28578c2ecf20Sopenharmony_ci uptodate = 0; 28588c2ecf20Sopenharmony_ci else 28598c2ecf20Sopenharmony_ci clean_io_failure(BTRFS_I(inode)->root->fs_info, 28608c2ecf20Sopenharmony_ci failure_tree, tree, start, 28618c2ecf20Sopenharmony_ci page, 28628c2ecf20Sopenharmony_ci btrfs_ino(BTRFS_I(inode)), 0); 28638c2ecf20Sopenharmony_ci } 28648c2ecf20Sopenharmony_ci 28658c2ecf20Sopenharmony_ci if (likely(uptodate)) 28668c2ecf20Sopenharmony_ci goto readpage_ok; 28678c2ecf20Sopenharmony_ci 28688c2ecf20Sopenharmony_ci if (is_data_inode(inode)) { 28698c2ecf20Sopenharmony_ci 28708c2ecf20Sopenharmony_ci /* 28718c2ecf20Sopenharmony_ci * The generic bio_readpage_error handles errors the 28728c2ecf20Sopenharmony_ci * following way: If possible, new read requests are 28738c2ecf20Sopenharmony_ci * created and submitted and will end up in 28748c2ecf20Sopenharmony_ci * end_bio_extent_readpage as well (if we're lucky, 28758c2ecf20Sopenharmony_ci * not in the !uptodate case). In that case it returns 28768c2ecf20Sopenharmony_ci * 0 and we just go on with the next page in our bio. 28778c2ecf20Sopenharmony_ci * If it can't handle the error it will return -EIO and 28788c2ecf20Sopenharmony_ci * we remain responsible for that page. 28798c2ecf20Sopenharmony_ci */ 28808c2ecf20Sopenharmony_ci if (!btrfs_submit_read_repair(inode, bio, offset, page, 28818c2ecf20Sopenharmony_ci start - page_offset(page), 28828c2ecf20Sopenharmony_ci start, end, mirror, 28838c2ecf20Sopenharmony_ci btrfs_submit_data_bio)) { 28848c2ecf20Sopenharmony_ci uptodate = !bio->bi_status; 28858c2ecf20Sopenharmony_ci offset += len; 28868c2ecf20Sopenharmony_ci continue; 28878c2ecf20Sopenharmony_ci } 28888c2ecf20Sopenharmony_ci } else { 28898c2ecf20Sopenharmony_ci struct extent_buffer *eb; 28908c2ecf20Sopenharmony_ci 28918c2ecf20Sopenharmony_ci eb = (struct extent_buffer *)page->private; 28928c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); 28938c2ecf20Sopenharmony_ci eb->read_mirror = mirror; 28948c2ecf20Sopenharmony_ci atomic_dec(&eb->io_pages); 28958c2ecf20Sopenharmony_ci if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, 28968c2ecf20Sopenharmony_ci &eb->bflags)) 28978c2ecf20Sopenharmony_ci btree_readahead_hook(eb, -EIO); 28988c2ecf20Sopenharmony_ci } 28998c2ecf20Sopenharmony_cireadpage_ok: 29008c2ecf20Sopenharmony_ci if (likely(uptodate)) { 29018c2ecf20Sopenharmony_ci loff_t i_size = i_size_read(inode); 29028c2ecf20Sopenharmony_ci pgoff_t end_index = i_size >> PAGE_SHIFT; 29038c2ecf20Sopenharmony_ci unsigned off; 29048c2ecf20Sopenharmony_ci 29058c2ecf20Sopenharmony_ci /* Zero out the end if this page straddles i_size */ 29068c2ecf20Sopenharmony_ci off = offset_in_page(i_size); 29078c2ecf20Sopenharmony_ci if (page->index == end_index && off) 29088c2ecf20Sopenharmony_ci zero_user_segment(page, off, PAGE_SIZE); 29098c2ecf20Sopenharmony_ci SetPageUptodate(page); 29108c2ecf20Sopenharmony_ci } else { 29118c2ecf20Sopenharmony_ci ClearPageUptodate(page); 29128c2ecf20Sopenharmony_ci SetPageError(page); 29138c2ecf20Sopenharmony_ci } 29148c2ecf20Sopenharmony_ci unlock_page(page); 29158c2ecf20Sopenharmony_ci offset += len; 29168c2ecf20Sopenharmony_ci 29178c2ecf20Sopenharmony_ci if (unlikely(!uptodate)) { 29188c2ecf20Sopenharmony_ci if (extent_len) { 29198c2ecf20Sopenharmony_ci endio_readpage_release_extent(tree, 29208c2ecf20Sopenharmony_ci extent_start, 29218c2ecf20Sopenharmony_ci extent_len, 1); 29228c2ecf20Sopenharmony_ci extent_start = 0; 29238c2ecf20Sopenharmony_ci extent_len = 0; 29248c2ecf20Sopenharmony_ci } 29258c2ecf20Sopenharmony_ci endio_readpage_release_extent(tree, start, 29268c2ecf20Sopenharmony_ci end - start + 1, 0); 29278c2ecf20Sopenharmony_ci } else if (!extent_len) { 29288c2ecf20Sopenharmony_ci extent_start = start; 29298c2ecf20Sopenharmony_ci extent_len = end + 1 - start; 29308c2ecf20Sopenharmony_ci } else if (extent_start + extent_len == start) { 29318c2ecf20Sopenharmony_ci extent_len += end + 1 - start; 29328c2ecf20Sopenharmony_ci } else { 29338c2ecf20Sopenharmony_ci endio_readpage_release_extent(tree, extent_start, 29348c2ecf20Sopenharmony_ci extent_len, uptodate); 29358c2ecf20Sopenharmony_ci extent_start = start; 29368c2ecf20Sopenharmony_ci extent_len = end + 1 - start; 29378c2ecf20Sopenharmony_ci } 29388c2ecf20Sopenharmony_ci } 29398c2ecf20Sopenharmony_ci 29408c2ecf20Sopenharmony_ci if (extent_len) 29418c2ecf20Sopenharmony_ci endio_readpage_release_extent(tree, extent_start, extent_len, 29428c2ecf20Sopenharmony_ci uptodate); 29438c2ecf20Sopenharmony_ci btrfs_io_bio_free_csum(io_bio); 29448c2ecf20Sopenharmony_ci bio_put(bio); 29458c2ecf20Sopenharmony_ci} 29468c2ecf20Sopenharmony_ci 29478c2ecf20Sopenharmony_ci/* 29488c2ecf20Sopenharmony_ci * Initialize the members up to but not including 'bio'. Use after allocating a 29498c2ecf20Sopenharmony_ci * new bio by bio_alloc_bioset as it does not initialize the bytes outside of 29508c2ecf20Sopenharmony_ci * 'bio' because use of __GFP_ZERO is not supported. 29518c2ecf20Sopenharmony_ci */ 29528c2ecf20Sopenharmony_cistatic inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio) 29538c2ecf20Sopenharmony_ci{ 29548c2ecf20Sopenharmony_ci memset(btrfs_bio, 0, offsetof(struct btrfs_io_bio, bio)); 29558c2ecf20Sopenharmony_ci} 29568c2ecf20Sopenharmony_ci 29578c2ecf20Sopenharmony_ci/* 29588c2ecf20Sopenharmony_ci * The following helpers allocate a bio. As it's backed by a bioset, it'll 29598c2ecf20Sopenharmony_ci * never fail. We're returning a bio right now but you can call btrfs_io_bio 29608c2ecf20Sopenharmony_ci * for the appropriate container_of magic 29618c2ecf20Sopenharmony_ci */ 29628c2ecf20Sopenharmony_cistruct bio *btrfs_bio_alloc(u64 first_byte) 29638c2ecf20Sopenharmony_ci{ 29648c2ecf20Sopenharmony_ci struct bio *bio; 29658c2ecf20Sopenharmony_ci 29668c2ecf20Sopenharmony_ci bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset); 29678c2ecf20Sopenharmony_ci bio->bi_iter.bi_sector = first_byte >> 9; 29688c2ecf20Sopenharmony_ci btrfs_io_bio_init(btrfs_io_bio(bio)); 29698c2ecf20Sopenharmony_ci return bio; 29708c2ecf20Sopenharmony_ci} 29718c2ecf20Sopenharmony_ci 29728c2ecf20Sopenharmony_cistruct bio *btrfs_bio_clone(struct bio *bio) 29738c2ecf20Sopenharmony_ci{ 29748c2ecf20Sopenharmony_ci struct btrfs_io_bio *btrfs_bio; 29758c2ecf20Sopenharmony_ci struct bio *new; 29768c2ecf20Sopenharmony_ci 29778c2ecf20Sopenharmony_ci /* Bio allocation backed by a bioset does not fail */ 29788c2ecf20Sopenharmony_ci new = bio_clone_fast(bio, GFP_NOFS, &btrfs_bioset); 29798c2ecf20Sopenharmony_ci btrfs_bio = btrfs_io_bio(new); 29808c2ecf20Sopenharmony_ci btrfs_io_bio_init(btrfs_bio); 29818c2ecf20Sopenharmony_ci btrfs_bio->iter = bio->bi_iter; 29828c2ecf20Sopenharmony_ci return new; 29838c2ecf20Sopenharmony_ci} 29848c2ecf20Sopenharmony_ci 29858c2ecf20Sopenharmony_cistruct bio *btrfs_io_bio_alloc(unsigned int nr_iovecs) 29868c2ecf20Sopenharmony_ci{ 29878c2ecf20Sopenharmony_ci struct bio *bio; 29888c2ecf20Sopenharmony_ci 29898c2ecf20Sopenharmony_ci /* Bio allocation backed by a bioset does not fail */ 29908c2ecf20Sopenharmony_ci bio = bio_alloc_bioset(GFP_NOFS, nr_iovecs, &btrfs_bioset); 29918c2ecf20Sopenharmony_ci btrfs_io_bio_init(btrfs_io_bio(bio)); 29928c2ecf20Sopenharmony_ci return bio; 29938c2ecf20Sopenharmony_ci} 29948c2ecf20Sopenharmony_ci 29958c2ecf20Sopenharmony_cistruct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size) 29968c2ecf20Sopenharmony_ci{ 29978c2ecf20Sopenharmony_ci struct bio *bio; 29988c2ecf20Sopenharmony_ci struct btrfs_io_bio *btrfs_bio; 29998c2ecf20Sopenharmony_ci 30008c2ecf20Sopenharmony_ci /* this will never fail when it's backed by a bioset */ 30018c2ecf20Sopenharmony_ci bio = bio_clone_fast(orig, GFP_NOFS, &btrfs_bioset); 30028c2ecf20Sopenharmony_ci ASSERT(bio); 30038c2ecf20Sopenharmony_ci 30048c2ecf20Sopenharmony_ci btrfs_bio = btrfs_io_bio(bio); 30058c2ecf20Sopenharmony_ci btrfs_io_bio_init(btrfs_bio); 30068c2ecf20Sopenharmony_ci 30078c2ecf20Sopenharmony_ci bio_trim(bio, offset >> 9, size >> 9); 30088c2ecf20Sopenharmony_ci btrfs_bio->iter = bio->bi_iter; 30098c2ecf20Sopenharmony_ci return bio; 30108c2ecf20Sopenharmony_ci} 30118c2ecf20Sopenharmony_ci 30128c2ecf20Sopenharmony_ci/* 30138c2ecf20Sopenharmony_ci * @opf: bio REQ_OP_* and REQ_* flags as one value 30148c2ecf20Sopenharmony_ci * @wbc: optional writeback control for io accounting 30158c2ecf20Sopenharmony_ci * @page: page to add to the bio 30168c2ecf20Sopenharmony_ci * @pg_offset: offset of the new bio or to check whether we are adding 30178c2ecf20Sopenharmony_ci * a contiguous page to the previous one 30188c2ecf20Sopenharmony_ci * @size: portion of page that we want to write 30198c2ecf20Sopenharmony_ci * @offset: starting offset in the page 30208c2ecf20Sopenharmony_ci * @bio_ret: must be valid pointer, newly allocated bio will be stored there 30218c2ecf20Sopenharmony_ci * @end_io_func: end_io callback for new bio 30228c2ecf20Sopenharmony_ci * @mirror_num: desired mirror to read/write 30238c2ecf20Sopenharmony_ci * @prev_bio_flags: flags of previous bio to see if we can merge the current one 30248c2ecf20Sopenharmony_ci * @bio_flags: flags of the current bio to see if we can merge them 30258c2ecf20Sopenharmony_ci */ 30268c2ecf20Sopenharmony_cistatic int submit_extent_page(unsigned int opf, 30278c2ecf20Sopenharmony_ci struct writeback_control *wbc, 30288c2ecf20Sopenharmony_ci struct page *page, u64 offset, 30298c2ecf20Sopenharmony_ci size_t size, unsigned long pg_offset, 30308c2ecf20Sopenharmony_ci struct bio **bio_ret, 30318c2ecf20Sopenharmony_ci bio_end_io_t end_io_func, 30328c2ecf20Sopenharmony_ci int mirror_num, 30338c2ecf20Sopenharmony_ci unsigned long prev_bio_flags, 30348c2ecf20Sopenharmony_ci unsigned long bio_flags, 30358c2ecf20Sopenharmony_ci bool force_bio_submit) 30368c2ecf20Sopenharmony_ci{ 30378c2ecf20Sopenharmony_ci int ret = 0; 30388c2ecf20Sopenharmony_ci struct bio *bio; 30398c2ecf20Sopenharmony_ci size_t page_size = min_t(size_t, size, PAGE_SIZE); 30408c2ecf20Sopenharmony_ci sector_t sector = offset >> 9; 30418c2ecf20Sopenharmony_ci struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree; 30428c2ecf20Sopenharmony_ci 30438c2ecf20Sopenharmony_ci ASSERT(bio_ret); 30448c2ecf20Sopenharmony_ci 30458c2ecf20Sopenharmony_ci if (*bio_ret) { 30468c2ecf20Sopenharmony_ci bool contig; 30478c2ecf20Sopenharmony_ci bool can_merge = true; 30488c2ecf20Sopenharmony_ci 30498c2ecf20Sopenharmony_ci bio = *bio_ret; 30508c2ecf20Sopenharmony_ci if (prev_bio_flags & EXTENT_BIO_COMPRESSED) 30518c2ecf20Sopenharmony_ci contig = bio->bi_iter.bi_sector == sector; 30528c2ecf20Sopenharmony_ci else 30538c2ecf20Sopenharmony_ci contig = bio_end_sector(bio) == sector; 30548c2ecf20Sopenharmony_ci 30558c2ecf20Sopenharmony_ci if (btrfs_bio_fits_in_stripe(page, page_size, bio, bio_flags)) 30568c2ecf20Sopenharmony_ci can_merge = false; 30578c2ecf20Sopenharmony_ci 30588c2ecf20Sopenharmony_ci if (prev_bio_flags != bio_flags || !contig || !can_merge || 30598c2ecf20Sopenharmony_ci force_bio_submit || 30608c2ecf20Sopenharmony_ci bio_add_page(bio, page, page_size, pg_offset) < page_size) { 30618c2ecf20Sopenharmony_ci ret = submit_one_bio(bio, mirror_num, prev_bio_flags); 30628c2ecf20Sopenharmony_ci if (ret < 0) { 30638c2ecf20Sopenharmony_ci *bio_ret = NULL; 30648c2ecf20Sopenharmony_ci return ret; 30658c2ecf20Sopenharmony_ci } 30668c2ecf20Sopenharmony_ci bio = NULL; 30678c2ecf20Sopenharmony_ci } else { 30688c2ecf20Sopenharmony_ci if (wbc) 30698c2ecf20Sopenharmony_ci wbc_account_cgroup_owner(wbc, page, page_size); 30708c2ecf20Sopenharmony_ci return 0; 30718c2ecf20Sopenharmony_ci } 30728c2ecf20Sopenharmony_ci } 30738c2ecf20Sopenharmony_ci 30748c2ecf20Sopenharmony_ci bio = btrfs_bio_alloc(offset); 30758c2ecf20Sopenharmony_ci bio_add_page(bio, page, page_size, pg_offset); 30768c2ecf20Sopenharmony_ci bio->bi_end_io = end_io_func; 30778c2ecf20Sopenharmony_ci bio->bi_private = tree; 30788c2ecf20Sopenharmony_ci bio->bi_write_hint = page->mapping->host->i_write_hint; 30798c2ecf20Sopenharmony_ci bio->bi_opf = opf; 30808c2ecf20Sopenharmony_ci if (wbc) { 30818c2ecf20Sopenharmony_ci struct block_device *bdev; 30828c2ecf20Sopenharmony_ci 30838c2ecf20Sopenharmony_ci bdev = BTRFS_I(page->mapping->host)->root->fs_info->fs_devices->latest_bdev; 30848c2ecf20Sopenharmony_ci bio_set_dev(bio, bdev); 30858c2ecf20Sopenharmony_ci wbc_init_bio(wbc, bio); 30868c2ecf20Sopenharmony_ci wbc_account_cgroup_owner(wbc, page, page_size); 30878c2ecf20Sopenharmony_ci } 30888c2ecf20Sopenharmony_ci 30898c2ecf20Sopenharmony_ci *bio_ret = bio; 30908c2ecf20Sopenharmony_ci 30918c2ecf20Sopenharmony_ci return ret; 30928c2ecf20Sopenharmony_ci} 30938c2ecf20Sopenharmony_ci 30948c2ecf20Sopenharmony_cistatic void attach_extent_buffer_page(struct extent_buffer *eb, 30958c2ecf20Sopenharmony_ci struct page *page) 30968c2ecf20Sopenharmony_ci{ 30978c2ecf20Sopenharmony_ci if (!PagePrivate(page)) 30988c2ecf20Sopenharmony_ci attach_page_private(page, eb); 30998c2ecf20Sopenharmony_ci else 31008c2ecf20Sopenharmony_ci WARN_ON(page->private != (unsigned long)eb); 31018c2ecf20Sopenharmony_ci} 31028c2ecf20Sopenharmony_ci 31038c2ecf20Sopenharmony_civoid set_page_extent_mapped(struct page *page) 31048c2ecf20Sopenharmony_ci{ 31058c2ecf20Sopenharmony_ci if (!PagePrivate(page)) 31068c2ecf20Sopenharmony_ci attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE); 31078c2ecf20Sopenharmony_ci} 31088c2ecf20Sopenharmony_ci 31098c2ecf20Sopenharmony_cistatic struct extent_map * 31108c2ecf20Sopenharmony_ci__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, 31118c2ecf20Sopenharmony_ci u64 start, u64 len, struct extent_map **em_cached) 31128c2ecf20Sopenharmony_ci{ 31138c2ecf20Sopenharmony_ci struct extent_map *em; 31148c2ecf20Sopenharmony_ci 31158c2ecf20Sopenharmony_ci if (em_cached && *em_cached) { 31168c2ecf20Sopenharmony_ci em = *em_cached; 31178c2ecf20Sopenharmony_ci if (extent_map_in_tree(em) && start >= em->start && 31188c2ecf20Sopenharmony_ci start < extent_map_end(em)) { 31198c2ecf20Sopenharmony_ci refcount_inc(&em->refs); 31208c2ecf20Sopenharmony_ci return em; 31218c2ecf20Sopenharmony_ci } 31228c2ecf20Sopenharmony_ci 31238c2ecf20Sopenharmony_ci free_extent_map(em); 31248c2ecf20Sopenharmony_ci *em_cached = NULL; 31258c2ecf20Sopenharmony_ci } 31268c2ecf20Sopenharmony_ci 31278c2ecf20Sopenharmony_ci em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len); 31288c2ecf20Sopenharmony_ci if (em_cached && !IS_ERR_OR_NULL(em)) { 31298c2ecf20Sopenharmony_ci BUG_ON(*em_cached); 31308c2ecf20Sopenharmony_ci refcount_inc(&em->refs); 31318c2ecf20Sopenharmony_ci *em_cached = em; 31328c2ecf20Sopenharmony_ci } 31338c2ecf20Sopenharmony_ci return em; 31348c2ecf20Sopenharmony_ci} 31358c2ecf20Sopenharmony_ci/* 31368c2ecf20Sopenharmony_ci * basic readpage implementation. Locked extent state structs are inserted 31378c2ecf20Sopenharmony_ci * into the tree that are removed when the IO is done (by the end_io 31388c2ecf20Sopenharmony_ci * handlers) 31398c2ecf20Sopenharmony_ci * XXX JDM: This needs looking at to ensure proper page locking 31408c2ecf20Sopenharmony_ci * return 0 on success, otherwise return error 31418c2ecf20Sopenharmony_ci */ 31428c2ecf20Sopenharmony_ciint btrfs_do_readpage(struct page *page, struct extent_map **em_cached, 31438c2ecf20Sopenharmony_ci struct bio **bio, unsigned long *bio_flags, 31448c2ecf20Sopenharmony_ci unsigned int read_flags, u64 *prev_em_start) 31458c2ecf20Sopenharmony_ci{ 31468c2ecf20Sopenharmony_ci struct inode *inode = page->mapping->host; 31478c2ecf20Sopenharmony_ci u64 start = page_offset(page); 31488c2ecf20Sopenharmony_ci const u64 end = start + PAGE_SIZE - 1; 31498c2ecf20Sopenharmony_ci u64 cur = start; 31508c2ecf20Sopenharmony_ci u64 extent_offset; 31518c2ecf20Sopenharmony_ci u64 last_byte = i_size_read(inode); 31528c2ecf20Sopenharmony_ci u64 block_start; 31538c2ecf20Sopenharmony_ci u64 cur_end; 31548c2ecf20Sopenharmony_ci struct extent_map *em; 31558c2ecf20Sopenharmony_ci int ret = 0; 31568c2ecf20Sopenharmony_ci int nr = 0; 31578c2ecf20Sopenharmony_ci size_t pg_offset = 0; 31588c2ecf20Sopenharmony_ci size_t iosize; 31598c2ecf20Sopenharmony_ci size_t disk_io_size; 31608c2ecf20Sopenharmony_ci size_t blocksize = inode->i_sb->s_blocksize; 31618c2ecf20Sopenharmony_ci unsigned long this_bio_flag = 0; 31628c2ecf20Sopenharmony_ci struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 31638c2ecf20Sopenharmony_ci 31648c2ecf20Sopenharmony_ci set_page_extent_mapped(page); 31658c2ecf20Sopenharmony_ci 31668c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 31678c2ecf20Sopenharmony_ci if (cleancache_get_page(page) == 0) { 31688c2ecf20Sopenharmony_ci BUG_ON(blocksize != PAGE_SIZE); 31698c2ecf20Sopenharmony_ci unlock_extent(tree, start, end); 31708c2ecf20Sopenharmony_ci goto out; 31718c2ecf20Sopenharmony_ci } 31728c2ecf20Sopenharmony_ci } 31738c2ecf20Sopenharmony_ci 31748c2ecf20Sopenharmony_ci if (page->index == last_byte >> PAGE_SHIFT) { 31758c2ecf20Sopenharmony_ci char *userpage; 31768c2ecf20Sopenharmony_ci size_t zero_offset = offset_in_page(last_byte); 31778c2ecf20Sopenharmony_ci 31788c2ecf20Sopenharmony_ci if (zero_offset) { 31798c2ecf20Sopenharmony_ci iosize = PAGE_SIZE - zero_offset; 31808c2ecf20Sopenharmony_ci userpage = kmap_atomic(page); 31818c2ecf20Sopenharmony_ci memset(userpage + zero_offset, 0, iosize); 31828c2ecf20Sopenharmony_ci flush_dcache_page(page); 31838c2ecf20Sopenharmony_ci kunmap_atomic(userpage); 31848c2ecf20Sopenharmony_ci } 31858c2ecf20Sopenharmony_ci } 31868c2ecf20Sopenharmony_ci while (cur <= end) { 31878c2ecf20Sopenharmony_ci bool force_bio_submit = false; 31888c2ecf20Sopenharmony_ci u64 offset; 31898c2ecf20Sopenharmony_ci 31908c2ecf20Sopenharmony_ci if (cur >= last_byte) { 31918c2ecf20Sopenharmony_ci char *userpage; 31928c2ecf20Sopenharmony_ci struct extent_state *cached = NULL; 31938c2ecf20Sopenharmony_ci 31948c2ecf20Sopenharmony_ci iosize = PAGE_SIZE - pg_offset; 31958c2ecf20Sopenharmony_ci userpage = kmap_atomic(page); 31968c2ecf20Sopenharmony_ci memset(userpage + pg_offset, 0, iosize); 31978c2ecf20Sopenharmony_ci flush_dcache_page(page); 31988c2ecf20Sopenharmony_ci kunmap_atomic(userpage); 31998c2ecf20Sopenharmony_ci set_extent_uptodate(tree, cur, cur + iosize - 1, 32008c2ecf20Sopenharmony_ci &cached, GFP_NOFS); 32018c2ecf20Sopenharmony_ci unlock_extent_cached(tree, cur, 32028c2ecf20Sopenharmony_ci cur + iosize - 1, &cached); 32038c2ecf20Sopenharmony_ci break; 32048c2ecf20Sopenharmony_ci } 32058c2ecf20Sopenharmony_ci em = __get_extent_map(inode, page, pg_offset, cur, 32068c2ecf20Sopenharmony_ci end - cur + 1, em_cached); 32078c2ecf20Sopenharmony_ci if (IS_ERR_OR_NULL(em)) { 32088c2ecf20Sopenharmony_ci SetPageError(page); 32098c2ecf20Sopenharmony_ci unlock_extent(tree, cur, end); 32108c2ecf20Sopenharmony_ci break; 32118c2ecf20Sopenharmony_ci } 32128c2ecf20Sopenharmony_ci extent_offset = cur - em->start; 32138c2ecf20Sopenharmony_ci BUG_ON(extent_map_end(em) <= cur); 32148c2ecf20Sopenharmony_ci BUG_ON(end < cur); 32158c2ecf20Sopenharmony_ci 32168c2ecf20Sopenharmony_ci if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { 32178c2ecf20Sopenharmony_ci this_bio_flag |= EXTENT_BIO_COMPRESSED; 32188c2ecf20Sopenharmony_ci extent_set_compress_type(&this_bio_flag, 32198c2ecf20Sopenharmony_ci em->compress_type); 32208c2ecf20Sopenharmony_ci } 32218c2ecf20Sopenharmony_ci 32228c2ecf20Sopenharmony_ci iosize = min(extent_map_end(em) - cur, end - cur + 1); 32238c2ecf20Sopenharmony_ci cur_end = min(extent_map_end(em) - 1, end); 32248c2ecf20Sopenharmony_ci iosize = ALIGN(iosize, blocksize); 32258c2ecf20Sopenharmony_ci if (this_bio_flag & EXTENT_BIO_COMPRESSED) { 32268c2ecf20Sopenharmony_ci disk_io_size = em->block_len; 32278c2ecf20Sopenharmony_ci offset = em->block_start; 32288c2ecf20Sopenharmony_ci } else { 32298c2ecf20Sopenharmony_ci offset = em->block_start + extent_offset; 32308c2ecf20Sopenharmony_ci disk_io_size = iosize; 32318c2ecf20Sopenharmony_ci } 32328c2ecf20Sopenharmony_ci block_start = em->block_start; 32338c2ecf20Sopenharmony_ci if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 32348c2ecf20Sopenharmony_ci block_start = EXTENT_MAP_HOLE; 32358c2ecf20Sopenharmony_ci 32368c2ecf20Sopenharmony_ci /* 32378c2ecf20Sopenharmony_ci * If we have a file range that points to a compressed extent 32388c2ecf20Sopenharmony_ci * and it's followed by a consecutive file range that points 32398c2ecf20Sopenharmony_ci * to the same compressed extent (possibly with a different 32408c2ecf20Sopenharmony_ci * offset and/or length, so it either points to the whole extent 32418c2ecf20Sopenharmony_ci * or only part of it), we must make sure we do not submit a 32428c2ecf20Sopenharmony_ci * single bio to populate the pages for the 2 ranges because 32438c2ecf20Sopenharmony_ci * this makes the compressed extent read zero out the pages 32448c2ecf20Sopenharmony_ci * belonging to the 2nd range. Imagine the following scenario: 32458c2ecf20Sopenharmony_ci * 32468c2ecf20Sopenharmony_ci * File layout 32478c2ecf20Sopenharmony_ci * [0 - 8K] [8K - 24K] 32488c2ecf20Sopenharmony_ci * | | 32498c2ecf20Sopenharmony_ci * | | 32508c2ecf20Sopenharmony_ci * points to extent X, points to extent X, 32518c2ecf20Sopenharmony_ci * offset 4K, length of 8K offset 0, length 16K 32528c2ecf20Sopenharmony_ci * 32538c2ecf20Sopenharmony_ci * [extent X, compressed length = 4K uncompressed length = 16K] 32548c2ecf20Sopenharmony_ci * 32558c2ecf20Sopenharmony_ci * If the bio to read the compressed extent covers both ranges, 32568c2ecf20Sopenharmony_ci * it will decompress extent X into the pages belonging to the 32578c2ecf20Sopenharmony_ci * first range and then it will stop, zeroing out the remaining 32588c2ecf20Sopenharmony_ci * pages that belong to the other range that points to extent X. 32598c2ecf20Sopenharmony_ci * So here we make sure we submit 2 bios, one for the first 32608c2ecf20Sopenharmony_ci * range and another one for the third range. Both will target 32618c2ecf20Sopenharmony_ci * the same physical extent from disk, but we can't currently 32628c2ecf20Sopenharmony_ci * make the compressed bio endio callback populate the pages 32638c2ecf20Sopenharmony_ci * for both ranges because each compressed bio is tightly 32648c2ecf20Sopenharmony_ci * coupled with a single extent map, and each range can have 32658c2ecf20Sopenharmony_ci * an extent map with a different offset value relative to the 32668c2ecf20Sopenharmony_ci * uncompressed data of our extent and different lengths. This 32678c2ecf20Sopenharmony_ci * is a corner case so we prioritize correctness over 32688c2ecf20Sopenharmony_ci * non-optimal behavior (submitting 2 bios for the same extent). 32698c2ecf20Sopenharmony_ci */ 32708c2ecf20Sopenharmony_ci if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && 32718c2ecf20Sopenharmony_ci prev_em_start && *prev_em_start != (u64)-1 && 32728c2ecf20Sopenharmony_ci *prev_em_start != em->start) 32738c2ecf20Sopenharmony_ci force_bio_submit = true; 32748c2ecf20Sopenharmony_ci 32758c2ecf20Sopenharmony_ci if (prev_em_start) 32768c2ecf20Sopenharmony_ci *prev_em_start = em->start; 32778c2ecf20Sopenharmony_ci 32788c2ecf20Sopenharmony_ci free_extent_map(em); 32798c2ecf20Sopenharmony_ci em = NULL; 32808c2ecf20Sopenharmony_ci 32818c2ecf20Sopenharmony_ci /* we've found a hole, just zero and go on */ 32828c2ecf20Sopenharmony_ci if (block_start == EXTENT_MAP_HOLE) { 32838c2ecf20Sopenharmony_ci char *userpage; 32848c2ecf20Sopenharmony_ci struct extent_state *cached = NULL; 32858c2ecf20Sopenharmony_ci 32868c2ecf20Sopenharmony_ci userpage = kmap_atomic(page); 32878c2ecf20Sopenharmony_ci memset(userpage + pg_offset, 0, iosize); 32888c2ecf20Sopenharmony_ci flush_dcache_page(page); 32898c2ecf20Sopenharmony_ci kunmap_atomic(userpage); 32908c2ecf20Sopenharmony_ci 32918c2ecf20Sopenharmony_ci set_extent_uptodate(tree, cur, cur + iosize - 1, 32928c2ecf20Sopenharmony_ci &cached, GFP_NOFS); 32938c2ecf20Sopenharmony_ci unlock_extent_cached(tree, cur, 32948c2ecf20Sopenharmony_ci cur + iosize - 1, &cached); 32958c2ecf20Sopenharmony_ci cur = cur + iosize; 32968c2ecf20Sopenharmony_ci pg_offset += iosize; 32978c2ecf20Sopenharmony_ci continue; 32988c2ecf20Sopenharmony_ci } 32998c2ecf20Sopenharmony_ci /* the get_extent function already copied into the page */ 33008c2ecf20Sopenharmony_ci if (test_range_bit(tree, cur, cur_end, 33018c2ecf20Sopenharmony_ci EXTENT_UPTODATE, 1, NULL)) { 33028c2ecf20Sopenharmony_ci check_page_uptodate(tree, page); 33038c2ecf20Sopenharmony_ci unlock_extent(tree, cur, cur + iosize - 1); 33048c2ecf20Sopenharmony_ci cur = cur + iosize; 33058c2ecf20Sopenharmony_ci pg_offset += iosize; 33068c2ecf20Sopenharmony_ci continue; 33078c2ecf20Sopenharmony_ci } 33088c2ecf20Sopenharmony_ci /* we have an inline extent but it didn't get marked up 33098c2ecf20Sopenharmony_ci * to date. Error out 33108c2ecf20Sopenharmony_ci */ 33118c2ecf20Sopenharmony_ci if (block_start == EXTENT_MAP_INLINE) { 33128c2ecf20Sopenharmony_ci SetPageError(page); 33138c2ecf20Sopenharmony_ci unlock_extent(tree, cur, cur + iosize - 1); 33148c2ecf20Sopenharmony_ci cur = cur + iosize; 33158c2ecf20Sopenharmony_ci pg_offset += iosize; 33168c2ecf20Sopenharmony_ci continue; 33178c2ecf20Sopenharmony_ci } 33188c2ecf20Sopenharmony_ci 33198c2ecf20Sopenharmony_ci ret = submit_extent_page(REQ_OP_READ | read_flags, NULL, 33208c2ecf20Sopenharmony_ci page, offset, disk_io_size, 33218c2ecf20Sopenharmony_ci pg_offset, bio, 33228c2ecf20Sopenharmony_ci end_bio_extent_readpage, 0, 33238c2ecf20Sopenharmony_ci *bio_flags, 33248c2ecf20Sopenharmony_ci this_bio_flag, 33258c2ecf20Sopenharmony_ci force_bio_submit); 33268c2ecf20Sopenharmony_ci if (!ret) { 33278c2ecf20Sopenharmony_ci nr++; 33288c2ecf20Sopenharmony_ci *bio_flags = this_bio_flag; 33298c2ecf20Sopenharmony_ci } else { 33308c2ecf20Sopenharmony_ci SetPageError(page); 33318c2ecf20Sopenharmony_ci unlock_extent(tree, cur, cur + iosize - 1); 33328c2ecf20Sopenharmony_ci goto out; 33338c2ecf20Sopenharmony_ci } 33348c2ecf20Sopenharmony_ci cur = cur + iosize; 33358c2ecf20Sopenharmony_ci pg_offset += iosize; 33368c2ecf20Sopenharmony_ci } 33378c2ecf20Sopenharmony_ciout: 33388c2ecf20Sopenharmony_ci if (!nr) { 33398c2ecf20Sopenharmony_ci if (!PageError(page)) 33408c2ecf20Sopenharmony_ci SetPageUptodate(page); 33418c2ecf20Sopenharmony_ci unlock_page(page); 33428c2ecf20Sopenharmony_ci } 33438c2ecf20Sopenharmony_ci return ret; 33448c2ecf20Sopenharmony_ci} 33458c2ecf20Sopenharmony_ci 33468c2ecf20Sopenharmony_cistatic inline void contiguous_readpages(struct page *pages[], int nr_pages, 33478c2ecf20Sopenharmony_ci u64 start, u64 end, 33488c2ecf20Sopenharmony_ci struct extent_map **em_cached, 33498c2ecf20Sopenharmony_ci struct bio **bio, 33508c2ecf20Sopenharmony_ci unsigned long *bio_flags, 33518c2ecf20Sopenharmony_ci u64 *prev_em_start) 33528c2ecf20Sopenharmony_ci{ 33538c2ecf20Sopenharmony_ci struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host); 33548c2ecf20Sopenharmony_ci int index; 33558c2ecf20Sopenharmony_ci 33568c2ecf20Sopenharmony_ci btrfs_lock_and_flush_ordered_range(inode, start, end, NULL); 33578c2ecf20Sopenharmony_ci 33588c2ecf20Sopenharmony_ci for (index = 0; index < nr_pages; index++) { 33598c2ecf20Sopenharmony_ci btrfs_do_readpage(pages[index], em_cached, bio, bio_flags, 33608c2ecf20Sopenharmony_ci REQ_RAHEAD, prev_em_start); 33618c2ecf20Sopenharmony_ci put_page(pages[index]); 33628c2ecf20Sopenharmony_ci } 33638c2ecf20Sopenharmony_ci} 33648c2ecf20Sopenharmony_ci 33658c2ecf20Sopenharmony_cistatic void update_nr_written(struct writeback_control *wbc, 33668c2ecf20Sopenharmony_ci unsigned long nr_written) 33678c2ecf20Sopenharmony_ci{ 33688c2ecf20Sopenharmony_ci wbc->nr_to_write -= nr_written; 33698c2ecf20Sopenharmony_ci} 33708c2ecf20Sopenharmony_ci 33718c2ecf20Sopenharmony_ci/* 33728c2ecf20Sopenharmony_ci * helper for __extent_writepage, doing all of the delayed allocation setup. 33738c2ecf20Sopenharmony_ci * 33748c2ecf20Sopenharmony_ci * This returns 1 if btrfs_run_delalloc_range function did all the work required 33758c2ecf20Sopenharmony_ci * to write the page (copy into inline extent). In this case the IO has 33768c2ecf20Sopenharmony_ci * been started and the page is already unlocked. 33778c2ecf20Sopenharmony_ci * 33788c2ecf20Sopenharmony_ci * This returns 0 if all went well (page still locked) 33798c2ecf20Sopenharmony_ci * This returns < 0 if there were errors (page still locked) 33808c2ecf20Sopenharmony_ci */ 33818c2ecf20Sopenharmony_cistatic noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, 33828c2ecf20Sopenharmony_ci struct page *page, struct writeback_control *wbc, 33838c2ecf20Sopenharmony_ci u64 delalloc_start, unsigned long *nr_written) 33848c2ecf20Sopenharmony_ci{ 33858c2ecf20Sopenharmony_ci u64 page_end = delalloc_start + PAGE_SIZE - 1; 33868c2ecf20Sopenharmony_ci bool found; 33878c2ecf20Sopenharmony_ci u64 delalloc_to_write = 0; 33888c2ecf20Sopenharmony_ci u64 delalloc_end = 0; 33898c2ecf20Sopenharmony_ci int ret; 33908c2ecf20Sopenharmony_ci int page_started = 0; 33918c2ecf20Sopenharmony_ci 33928c2ecf20Sopenharmony_ci 33938c2ecf20Sopenharmony_ci while (delalloc_end < page_end) { 33948c2ecf20Sopenharmony_ci found = find_lock_delalloc_range(&inode->vfs_inode, page, 33958c2ecf20Sopenharmony_ci &delalloc_start, 33968c2ecf20Sopenharmony_ci &delalloc_end); 33978c2ecf20Sopenharmony_ci if (!found) { 33988c2ecf20Sopenharmony_ci delalloc_start = delalloc_end + 1; 33998c2ecf20Sopenharmony_ci continue; 34008c2ecf20Sopenharmony_ci } 34018c2ecf20Sopenharmony_ci ret = btrfs_run_delalloc_range(inode, page, delalloc_start, 34028c2ecf20Sopenharmony_ci delalloc_end, &page_started, nr_written, wbc); 34038c2ecf20Sopenharmony_ci if (ret) { 34048c2ecf20Sopenharmony_ci SetPageError(page); 34058c2ecf20Sopenharmony_ci /* 34068c2ecf20Sopenharmony_ci * btrfs_run_delalloc_range should return < 0 for error 34078c2ecf20Sopenharmony_ci * but just in case, we use > 0 here meaning the IO is 34088c2ecf20Sopenharmony_ci * started, so we don't want to return > 0 unless 34098c2ecf20Sopenharmony_ci * things are going well. 34108c2ecf20Sopenharmony_ci */ 34118c2ecf20Sopenharmony_ci return ret < 0 ? ret : -EIO; 34128c2ecf20Sopenharmony_ci } 34138c2ecf20Sopenharmony_ci /* 34148c2ecf20Sopenharmony_ci * delalloc_end is already one less than the total length, so 34158c2ecf20Sopenharmony_ci * we don't subtract one from PAGE_SIZE 34168c2ecf20Sopenharmony_ci */ 34178c2ecf20Sopenharmony_ci delalloc_to_write += (delalloc_end - delalloc_start + 34188c2ecf20Sopenharmony_ci PAGE_SIZE) >> PAGE_SHIFT; 34198c2ecf20Sopenharmony_ci delalloc_start = delalloc_end + 1; 34208c2ecf20Sopenharmony_ci } 34218c2ecf20Sopenharmony_ci if (wbc->nr_to_write < delalloc_to_write) { 34228c2ecf20Sopenharmony_ci int thresh = 8192; 34238c2ecf20Sopenharmony_ci 34248c2ecf20Sopenharmony_ci if (delalloc_to_write < thresh * 2) 34258c2ecf20Sopenharmony_ci thresh = delalloc_to_write; 34268c2ecf20Sopenharmony_ci wbc->nr_to_write = min_t(u64, delalloc_to_write, 34278c2ecf20Sopenharmony_ci thresh); 34288c2ecf20Sopenharmony_ci } 34298c2ecf20Sopenharmony_ci 34308c2ecf20Sopenharmony_ci /* did the fill delalloc function already unlock and start 34318c2ecf20Sopenharmony_ci * the IO? 34328c2ecf20Sopenharmony_ci */ 34338c2ecf20Sopenharmony_ci if (page_started) { 34348c2ecf20Sopenharmony_ci /* 34358c2ecf20Sopenharmony_ci * we've unlocked the page, so we can't update 34368c2ecf20Sopenharmony_ci * the mapping's writeback index, just update 34378c2ecf20Sopenharmony_ci * nr_to_write. 34388c2ecf20Sopenharmony_ci */ 34398c2ecf20Sopenharmony_ci wbc->nr_to_write -= *nr_written; 34408c2ecf20Sopenharmony_ci return 1; 34418c2ecf20Sopenharmony_ci } 34428c2ecf20Sopenharmony_ci 34438c2ecf20Sopenharmony_ci return 0; 34448c2ecf20Sopenharmony_ci} 34458c2ecf20Sopenharmony_ci 34468c2ecf20Sopenharmony_ci/* 34478c2ecf20Sopenharmony_ci * helper for __extent_writepage. This calls the writepage start hooks, 34488c2ecf20Sopenharmony_ci * and does the loop to map the page into extents and bios. 34498c2ecf20Sopenharmony_ci * 34508c2ecf20Sopenharmony_ci * We return 1 if the IO is started and the page is unlocked, 34518c2ecf20Sopenharmony_ci * 0 if all went well (page still locked) 34528c2ecf20Sopenharmony_ci * < 0 if there were errors (page still locked) 34538c2ecf20Sopenharmony_ci */ 34548c2ecf20Sopenharmony_cistatic noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, 34558c2ecf20Sopenharmony_ci struct page *page, 34568c2ecf20Sopenharmony_ci struct writeback_control *wbc, 34578c2ecf20Sopenharmony_ci struct extent_page_data *epd, 34588c2ecf20Sopenharmony_ci loff_t i_size, 34598c2ecf20Sopenharmony_ci unsigned long nr_written, 34608c2ecf20Sopenharmony_ci int *nr_ret) 34618c2ecf20Sopenharmony_ci{ 34628c2ecf20Sopenharmony_ci struct extent_io_tree *tree = &inode->io_tree; 34638c2ecf20Sopenharmony_ci u64 start = page_offset(page); 34648c2ecf20Sopenharmony_ci u64 page_end = start + PAGE_SIZE - 1; 34658c2ecf20Sopenharmony_ci u64 end; 34668c2ecf20Sopenharmony_ci u64 cur = start; 34678c2ecf20Sopenharmony_ci u64 extent_offset; 34688c2ecf20Sopenharmony_ci u64 block_start; 34698c2ecf20Sopenharmony_ci u64 iosize; 34708c2ecf20Sopenharmony_ci struct extent_map *em; 34718c2ecf20Sopenharmony_ci size_t pg_offset = 0; 34728c2ecf20Sopenharmony_ci size_t blocksize; 34738c2ecf20Sopenharmony_ci int ret = 0; 34748c2ecf20Sopenharmony_ci int nr = 0; 34758c2ecf20Sopenharmony_ci const unsigned int write_flags = wbc_to_write_flags(wbc); 34768c2ecf20Sopenharmony_ci bool compressed; 34778c2ecf20Sopenharmony_ci 34788c2ecf20Sopenharmony_ci ret = btrfs_writepage_cow_fixup(page, start, page_end); 34798c2ecf20Sopenharmony_ci if (ret) { 34808c2ecf20Sopenharmony_ci /* Fixup worker will requeue */ 34818c2ecf20Sopenharmony_ci redirty_page_for_writepage(wbc, page); 34828c2ecf20Sopenharmony_ci update_nr_written(wbc, nr_written); 34838c2ecf20Sopenharmony_ci unlock_page(page); 34848c2ecf20Sopenharmony_ci return 1; 34858c2ecf20Sopenharmony_ci } 34868c2ecf20Sopenharmony_ci 34878c2ecf20Sopenharmony_ci /* 34888c2ecf20Sopenharmony_ci * we don't want to touch the inode after unlocking the page, 34898c2ecf20Sopenharmony_ci * so we update the mapping writeback index now 34908c2ecf20Sopenharmony_ci */ 34918c2ecf20Sopenharmony_ci update_nr_written(wbc, nr_written + 1); 34928c2ecf20Sopenharmony_ci 34938c2ecf20Sopenharmony_ci end = page_end; 34948c2ecf20Sopenharmony_ci blocksize = inode->vfs_inode.i_sb->s_blocksize; 34958c2ecf20Sopenharmony_ci 34968c2ecf20Sopenharmony_ci while (cur <= end) { 34978c2ecf20Sopenharmony_ci u64 em_end; 34988c2ecf20Sopenharmony_ci u64 offset; 34998c2ecf20Sopenharmony_ci 35008c2ecf20Sopenharmony_ci if (cur >= i_size) { 35018c2ecf20Sopenharmony_ci btrfs_writepage_endio_finish_ordered(page, cur, 35028c2ecf20Sopenharmony_ci page_end, 1); 35038c2ecf20Sopenharmony_ci break; 35048c2ecf20Sopenharmony_ci } 35058c2ecf20Sopenharmony_ci em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1); 35068c2ecf20Sopenharmony_ci if (IS_ERR_OR_NULL(em)) { 35078c2ecf20Sopenharmony_ci SetPageError(page); 35088c2ecf20Sopenharmony_ci ret = PTR_ERR_OR_ZERO(em); 35098c2ecf20Sopenharmony_ci break; 35108c2ecf20Sopenharmony_ci } 35118c2ecf20Sopenharmony_ci 35128c2ecf20Sopenharmony_ci extent_offset = cur - em->start; 35138c2ecf20Sopenharmony_ci em_end = extent_map_end(em); 35148c2ecf20Sopenharmony_ci BUG_ON(em_end <= cur); 35158c2ecf20Sopenharmony_ci BUG_ON(end < cur); 35168c2ecf20Sopenharmony_ci iosize = min(em_end - cur, end - cur + 1); 35178c2ecf20Sopenharmony_ci iosize = ALIGN(iosize, blocksize); 35188c2ecf20Sopenharmony_ci offset = em->block_start + extent_offset; 35198c2ecf20Sopenharmony_ci block_start = em->block_start; 35208c2ecf20Sopenharmony_ci compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); 35218c2ecf20Sopenharmony_ci free_extent_map(em); 35228c2ecf20Sopenharmony_ci em = NULL; 35238c2ecf20Sopenharmony_ci 35248c2ecf20Sopenharmony_ci /* 35258c2ecf20Sopenharmony_ci * compressed and inline extents are written through other 35268c2ecf20Sopenharmony_ci * paths in the FS 35278c2ecf20Sopenharmony_ci */ 35288c2ecf20Sopenharmony_ci if (compressed || block_start == EXTENT_MAP_HOLE || 35298c2ecf20Sopenharmony_ci block_start == EXTENT_MAP_INLINE) { 35308c2ecf20Sopenharmony_ci if (compressed) 35318c2ecf20Sopenharmony_ci nr++; 35328c2ecf20Sopenharmony_ci else 35338c2ecf20Sopenharmony_ci btrfs_writepage_endio_finish_ordered(page, cur, 35348c2ecf20Sopenharmony_ci cur + iosize - 1, 1); 35358c2ecf20Sopenharmony_ci cur += iosize; 35368c2ecf20Sopenharmony_ci pg_offset += iosize; 35378c2ecf20Sopenharmony_ci continue; 35388c2ecf20Sopenharmony_ci } 35398c2ecf20Sopenharmony_ci 35408c2ecf20Sopenharmony_ci btrfs_set_range_writeback(tree, cur, cur + iosize - 1); 35418c2ecf20Sopenharmony_ci if (!PageWriteback(page)) { 35428c2ecf20Sopenharmony_ci btrfs_err(inode->root->fs_info, 35438c2ecf20Sopenharmony_ci "page %lu not writeback, cur %llu end %llu", 35448c2ecf20Sopenharmony_ci page->index, cur, end); 35458c2ecf20Sopenharmony_ci } 35468c2ecf20Sopenharmony_ci 35478c2ecf20Sopenharmony_ci ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc, 35488c2ecf20Sopenharmony_ci page, offset, iosize, pg_offset, 35498c2ecf20Sopenharmony_ci &epd->bio, 35508c2ecf20Sopenharmony_ci end_bio_extent_writepage, 35518c2ecf20Sopenharmony_ci 0, 0, 0, false); 35528c2ecf20Sopenharmony_ci if (ret) { 35538c2ecf20Sopenharmony_ci SetPageError(page); 35548c2ecf20Sopenharmony_ci if (PageWriteback(page)) 35558c2ecf20Sopenharmony_ci end_page_writeback(page); 35568c2ecf20Sopenharmony_ci } 35578c2ecf20Sopenharmony_ci 35588c2ecf20Sopenharmony_ci cur = cur + iosize; 35598c2ecf20Sopenharmony_ci pg_offset += iosize; 35608c2ecf20Sopenharmony_ci nr++; 35618c2ecf20Sopenharmony_ci } 35628c2ecf20Sopenharmony_ci *nr_ret = nr; 35638c2ecf20Sopenharmony_ci return ret; 35648c2ecf20Sopenharmony_ci} 35658c2ecf20Sopenharmony_ci 35668c2ecf20Sopenharmony_ci/* 35678c2ecf20Sopenharmony_ci * the writepage semantics are similar to regular writepage. extent 35688c2ecf20Sopenharmony_ci * records are inserted to lock ranges in the tree, and as dirty areas 35698c2ecf20Sopenharmony_ci * are found, they are marked writeback. Then the lock bits are removed 35708c2ecf20Sopenharmony_ci * and the end_io handler clears the writeback ranges 35718c2ecf20Sopenharmony_ci * 35728c2ecf20Sopenharmony_ci * Return 0 if everything goes well. 35738c2ecf20Sopenharmony_ci * Return <0 for error. 35748c2ecf20Sopenharmony_ci */ 35758c2ecf20Sopenharmony_cistatic int __extent_writepage(struct page *page, struct writeback_control *wbc, 35768c2ecf20Sopenharmony_ci struct extent_page_data *epd) 35778c2ecf20Sopenharmony_ci{ 35788c2ecf20Sopenharmony_ci struct inode *inode = page->mapping->host; 35798c2ecf20Sopenharmony_ci u64 start = page_offset(page); 35808c2ecf20Sopenharmony_ci u64 page_end = start + PAGE_SIZE - 1; 35818c2ecf20Sopenharmony_ci int ret; 35828c2ecf20Sopenharmony_ci int nr = 0; 35838c2ecf20Sopenharmony_ci size_t pg_offset; 35848c2ecf20Sopenharmony_ci loff_t i_size = i_size_read(inode); 35858c2ecf20Sopenharmony_ci unsigned long end_index = i_size >> PAGE_SHIFT; 35868c2ecf20Sopenharmony_ci unsigned long nr_written = 0; 35878c2ecf20Sopenharmony_ci 35888c2ecf20Sopenharmony_ci trace___extent_writepage(page, inode, wbc); 35898c2ecf20Sopenharmony_ci 35908c2ecf20Sopenharmony_ci WARN_ON(!PageLocked(page)); 35918c2ecf20Sopenharmony_ci 35928c2ecf20Sopenharmony_ci ClearPageError(page); 35938c2ecf20Sopenharmony_ci 35948c2ecf20Sopenharmony_ci pg_offset = offset_in_page(i_size); 35958c2ecf20Sopenharmony_ci if (page->index > end_index || 35968c2ecf20Sopenharmony_ci (page->index == end_index && !pg_offset)) { 35978c2ecf20Sopenharmony_ci page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); 35988c2ecf20Sopenharmony_ci unlock_page(page); 35998c2ecf20Sopenharmony_ci return 0; 36008c2ecf20Sopenharmony_ci } 36018c2ecf20Sopenharmony_ci 36028c2ecf20Sopenharmony_ci if (page->index == end_index) { 36038c2ecf20Sopenharmony_ci char *userpage; 36048c2ecf20Sopenharmony_ci 36058c2ecf20Sopenharmony_ci userpage = kmap_atomic(page); 36068c2ecf20Sopenharmony_ci memset(userpage + pg_offset, 0, 36078c2ecf20Sopenharmony_ci PAGE_SIZE - pg_offset); 36088c2ecf20Sopenharmony_ci kunmap_atomic(userpage); 36098c2ecf20Sopenharmony_ci flush_dcache_page(page); 36108c2ecf20Sopenharmony_ci } 36118c2ecf20Sopenharmony_ci 36128c2ecf20Sopenharmony_ci set_page_extent_mapped(page); 36138c2ecf20Sopenharmony_ci 36148c2ecf20Sopenharmony_ci if (!epd->extent_locked) { 36158c2ecf20Sopenharmony_ci ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start, 36168c2ecf20Sopenharmony_ci &nr_written); 36178c2ecf20Sopenharmony_ci if (ret == 1) 36188c2ecf20Sopenharmony_ci return 0; 36198c2ecf20Sopenharmony_ci if (ret) 36208c2ecf20Sopenharmony_ci goto done; 36218c2ecf20Sopenharmony_ci } 36228c2ecf20Sopenharmony_ci 36238c2ecf20Sopenharmony_ci ret = __extent_writepage_io(BTRFS_I(inode), page, wbc, epd, i_size, 36248c2ecf20Sopenharmony_ci nr_written, &nr); 36258c2ecf20Sopenharmony_ci if (ret == 1) 36268c2ecf20Sopenharmony_ci return 0; 36278c2ecf20Sopenharmony_ci 36288c2ecf20Sopenharmony_cidone: 36298c2ecf20Sopenharmony_ci if (nr == 0) { 36308c2ecf20Sopenharmony_ci /* make sure the mapping tag for page dirty gets cleared */ 36318c2ecf20Sopenharmony_ci set_page_writeback(page); 36328c2ecf20Sopenharmony_ci end_page_writeback(page); 36338c2ecf20Sopenharmony_ci } 36348c2ecf20Sopenharmony_ci if (PageError(page)) { 36358c2ecf20Sopenharmony_ci ret = ret < 0 ? ret : -EIO; 36368c2ecf20Sopenharmony_ci end_extent_writepage(page, ret, start, page_end); 36378c2ecf20Sopenharmony_ci } 36388c2ecf20Sopenharmony_ci unlock_page(page); 36398c2ecf20Sopenharmony_ci ASSERT(ret <= 0); 36408c2ecf20Sopenharmony_ci return ret; 36418c2ecf20Sopenharmony_ci} 36428c2ecf20Sopenharmony_ci 36438c2ecf20Sopenharmony_civoid wait_on_extent_buffer_writeback(struct extent_buffer *eb) 36448c2ecf20Sopenharmony_ci{ 36458c2ecf20Sopenharmony_ci wait_on_bit_io(&eb->bflags, EXTENT_BUFFER_WRITEBACK, 36468c2ecf20Sopenharmony_ci TASK_UNINTERRUPTIBLE); 36478c2ecf20Sopenharmony_ci} 36488c2ecf20Sopenharmony_ci 36498c2ecf20Sopenharmony_cistatic void end_extent_buffer_writeback(struct extent_buffer *eb) 36508c2ecf20Sopenharmony_ci{ 36518c2ecf20Sopenharmony_ci clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); 36528c2ecf20Sopenharmony_ci smp_mb__after_atomic(); 36538c2ecf20Sopenharmony_ci wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK); 36548c2ecf20Sopenharmony_ci} 36558c2ecf20Sopenharmony_ci 36568c2ecf20Sopenharmony_ci/* 36578c2ecf20Sopenharmony_ci * Lock eb pages and flush the bio if we can't the locks 36588c2ecf20Sopenharmony_ci * 36598c2ecf20Sopenharmony_ci * Return 0 if nothing went wrong 36608c2ecf20Sopenharmony_ci * Return >0 is same as 0, except bio is not submitted 36618c2ecf20Sopenharmony_ci * Return <0 if something went wrong, no page is locked 36628c2ecf20Sopenharmony_ci */ 36638c2ecf20Sopenharmony_cistatic noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb, 36648c2ecf20Sopenharmony_ci struct extent_page_data *epd) 36658c2ecf20Sopenharmony_ci{ 36668c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = eb->fs_info; 36678c2ecf20Sopenharmony_ci int i, num_pages, failed_page_nr; 36688c2ecf20Sopenharmony_ci int flush = 0; 36698c2ecf20Sopenharmony_ci int ret = 0; 36708c2ecf20Sopenharmony_ci 36718c2ecf20Sopenharmony_ci if (!btrfs_try_tree_write_lock(eb)) { 36728c2ecf20Sopenharmony_ci ret = flush_write_bio(epd); 36738c2ecf20Sopenharmony_ci if (ret < 0) 36748c2ecf20Sopenharmony_ci return ret; 36758c2ecf20Sopenharmony_ci flush = 1; 36768c2ecf20Sopenharmony_ci btrfs_tree_lock(eb); 36778c2ecf20Sopenharmony_ci } 36788c2ecf20Sopenharmony_ci 36798c2ecf20Sopenharmony_ci if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) { 36808c2ecf20Sopenharmony_ci btrfs_tree_unlock(eb); 36818c2ecf20Sopenharmony_ci if (!epd->sync_io) 36828c2ecf20Sopenharmony_ci return 0; 36838c2ecf20Sopenharmony_ci if (!flush) { 36848c2ecf20Sopenharmony_ci ret = flush_write_bio(epd); 36858c2ecf20Sopenharmony_ci if (ret < 0) 36868c2ecf20Sopenharmony_ci return ret; 36878c2ecf20Sopenharmony_ci flush = 1; 36888c2ecf20Sopenharmony_ci } 36898c2ecf20Sopenharmony_ci while (1) { 36908c2ecf20Sopenharmony_ci wait_on_extent_buffer_writeback(eb); 36918c2ecf20Sopenharmony_ci btrfs_tree_lock(eb); 36928c2ecf20Sopenharmony_ci if (!test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) 36938c2ecf20Sopenharmony_ci break; 36948c2ecf20Sopenharmony_ci btrfs_tree_unlock(eb); 36958c2ecf20Sopenharmony_ci } 36968c2ecf20Sopenharmony_ci } 36978c2ecf20Sopenharmony_ci 36988c2ecf20Sopenharmony_ci /* 36998c2ecf20Sopenharmony_ci * We need to do this to prevent races in people who check if the eb is 37008c2ecf20Sopenharmony_ci * under IO since we can end up having no IO bits set for a short period 37018c2ecf20Sopenharmony_ci * of time. 37028c2ecf20Sopenharmony_ci */ 37038c2ecf20Sopenharmony_ci spin_lock(&eb->refs_lock); 37048c2ecf20Sopenharmony_ci if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 37058c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); 37068c2ecf20Sopenharmony_ci spin_unlock(&eb->refs_lock); 37078c2ecf20Sopenharmony_ci btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 37088c2ecf20Sopenharmony_ci percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, 37098c2ecf20Sopenharmony_ci -eb->len, 37108c2ecf20Sopenharmony_ci fs_info->dirty_metadata_batch); 37118c2ecf20Sopenharmony_ci ret = 1; 37128c2ecf20Sopenharmony_ci } else { 37138c2ecf20Sopenharmony_ci spin_unlock(&eb->refs_lock); 37148c2ecf20Sopenharmony_ci } 37158c2ecf20Sopenharmony_ci 37168c2ecf20Sopenharmony_ci btrfs_tree_unlock(eb); 37178c2ecf20Sopenharmony_ci 37188c2ecf20Sopenharmony_ci if (!ret) 37198c2ecf20Sopenharmony_ci return ret; 37208c2ecf20Sopenharmony_ci 37218c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 37228c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 37238c2ecf20Sopenharmony_ci struct page *p = eb->pages[i]; 37248c2ecf20Sopenharmony_ci 37258c2ecf20Sopenharmony_ci if (!trylock_page(p)) { 37268c2ecf20Sopenharmony_ci if (!flush) { 37278c2ecf20Sopenharmony_ci int err; 37288c2ecf20Sopenharmony_ci 37298c2ecf20Sopenharmony_ci err = flush_write_bio(epd); 37308c2ecf20Sopenharmony_ci if (err < 0) { 37318c2ecf20Sopenharmony_ci ret = err; 37328c2ecf20Sopenharmony_ci failed_page_nr = i; 37338c2ecf20Sopenharmony_ci goto err_unlock; 37348c2ecf20Sopenharmony_ci } 37358c2ecf20Sopenharmony_ci flush = 1; 37368c2ecf20Sopenharmony_ci } 37378c2ecf20Sopenharmony_ci lock_page(p); 37388c2ecf20Sopenharmony_ci } 37398c2ecf20Sopenharmony_ci } 37408c2ecf20Sopenharmony_ci 37418c2ecf20Sopenharmony_ci return ret; 37428c2ecf20Sopenharmony_cierr_unlock: 37438c2ecf20Sopenharmony_ci /* Unlock already locked pages */ 37448c2ecf20Sopenharmony_ci for (i = 0; i < failed_page_nr; i++) 37458c2ecf20Sopenharmony_ci unlock_page(eb->pages[i]); 37468c2ecf20Sopenharmony_ci /* 37478c2ecf20Sopenharmony_ci * Clear EXTENT_BUFFER_WRITEBACK and wake up anyone waiting on it. 37488c2ecf20Sopenharmony_ci * Also set back EXTENT_BUFFER_DIRTY so future attempts to this eb can 37498c2ecf20Sopenharmony_ci * be made and undo everything done before. 37508c2ecf20Sopenharmony_ci */ 37518c2ecf20Sopenharmony_ci btrfs_tree_lock(eb); 37528c2ecf20Sopenharmony_ci spin_lock(&eb->refs_lock); 37538c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 37548c2ecf20Sopenharmony_ci end_extent_buffer_writeback(eb); 37558c2ecf20Sopenharmony_ci spin_unlock(&eb->refs_lock); 37568c2ecf20Sopenharmony_ci percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, eb->len, 37578c2ecf20Sopenharmony_ci fs_info->dirty_metadata_batch); 37588c2ecf20Sopenharmony_ci btrfs_clear_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 37598c2ecf20Sopenharmony_ci btrfs_tree_unlock(eb); 37608c2ecf20Sopenharmony_ci return ret; 37618c2ecf20Sopenharmony_ci} 37628c2ecf20Sopenharmony_ci 37638c2ecf20Sopenharmony_cistatic void set_btree_ioerr(struct page *page) 37648c2ecf20Sopenharmony_ci{ 37658c2ecf20Sopenharmony_ci struct extent_buffer *eb = (struct extent_buffer *)page->private; 37668c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info; 37678c2ecf20Sopenharmony_ci 37688c2ecf20Sopenharmony_ci SetPageError(page); 37698c2ecf20Sopenharmony_ci if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) 37708c2ecf20Sopenharmony_ci return; 37718c2ecf20Sopenharmony_ci 37728c2ecf20Sopenharmony_ci /* 37738c2ecf20Sopenharmony_ci * A read may stumble upon this buffer later, make sure that it gets an 37748c2ecf20Sopenharmony_ci * error and knows there was an error. 37758c2ecf20Sopenharmony_ci */ 37768c2ecf20Sopenharmony_ci clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 37778c2ecf20Sopenharmony_ci 37788c2ecf20Sopenharmony_ci /* 37798c2ecf20Sopenharmony_ci * If we error out, we should add back the dirty_metadata_bytes 37808c2ecf20Sopenharmony_ci * to make it consistent. 37818c2ecf20Sopenharmony_ci */ 37828c2ecf20Sopenharmony_ci fs_info = eb->fs_info; 37838c2ecf20Sopenharmony_ci percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, 37848c2ecf20Sopenharmony_ci eb->len, fs_info->dirty_metadata_batch); 37858c2ecf20Sopenharmony_ci 37868c2ecf20Sopenharmony_ci /* 37878c2ecf20Sopenharmony_ci * If writeback for a btree extent that doesn't belong to a log tree 37888c2ecf20Sopenharmony_ci * failed, increment the counter transaction->eb_write_errors. 37898c2ecf20Sopenharmony_ci * We do this because while the transaction is running and before it's 37908c2ecf20Sopenharmony_ci * committing (when we call filemap_fdata[write|wait]_range against 37918c2ecf20Sopenharmony_ci * the btree inode), we might have 37928c2ecf20Sopenharmony_ci * btree_inode->i_mapping->a_ops->writepages() called by the VM - if it 37938c2ecf20Sopenharmony_ci * returns an error or an error happens during writeback, when we're 37948c2ecf20Sopenharmony_ci * committing the transaction we wouldn't know about it, since the pages 37958c2ecf20Sopenharmony_ci * can be no longer dirty nor marked anymore for writeback (if a 37968c2ecf20Sopenharmony_ci * subsequent modification to the extent buffer didn't happen before the 37978c2ecf20Sopenharmony_ci * transaction commit), which makes filemap_fdata[write|wait]_range not 37988c2ecf20Sopenharmony_ci * able to find the pages tagged with SetPageError at transaction 37998c2ecf20Sopenharmony_ci * commit time. So if this happens we must abort the transaction, 38008c2ecf20Sopenharmony_ci * otherwise we commit a super block with btree roots that point to 38018c2ecf20Sopenharmony_ci * btree nodes/leafs whose content on disk is invalid - either garbage 38028c2ecf20Sopenharmony_ci * or the content of some node/leaf from a past generation that got 38038c2ecf20Sopenharmony_ci * cowed or deleted and is no longer valid. 38048c2ecf20Sopenharmony_ci * 38058c2ecf20Sopenharmony_ci * Note: setting AS_EIO/AS_ENOSPC in the btree inode's i_mapping would 38068c2ecf20Sopenharmony_ci * not be enough - we need to distinguish between log tree extents vs 38078c2ecf20Sopenharmony_ci * non-log tree extents, and the next filemap_fdatawait_range() call 38088c2ecf20Sopenharmony_ci * will catch and clear such errors in the mapping - and that call might 38098c2ecf20Sopenharmony_ci * be from a log sync and not from a transaction commit. Also, checking 38108c2ecf20Sopenharmony_ci * for the eb flag EXTENT_BUFFER_WRITE_ERR at transaction commit time is 38118c2ecf20Sopenharmony_ci * not done and would not be reliable - the eb might have been released 38128c2ecf20Sopenharmony_ci * from memory and reading it back again means that flag would not be 38138c2ecf20Sopenharmony_ci * set (since it's a runtime flag, not persisted on disk). 38148c2ecf20Sopenharmony_ci * 38158c2ecf20Sopenharmony_ci * Using the flags below in the btree inode also makes us achieve the 38168c2ecf20Sopenharmony_ci * goal of AS_EIO/AS_ENOSPC when writepages() returns success, started 38178c2ecf20Sopenharmony_ci * writeback for all dirty pages and before filemap_fdatawait_range() 38188c2ecf20Sopenharmony_ci * is called, the writeback for all dirty pages had already finished 38198c2ecf20Sopenharmony_ci * with errors - because we were not using AS_EIO/AS_ENOSPC, 38208c2ecf20Sopenharmony_ci * filemap_fdatawait_range() would return success, as it could not know 38218c2ecf20Sopenharmony_ci * that writeback errors happened (the pages were no longer tagged for 38228c2ecf20Sopenharmony_ci * writeback). 38238c2ecf20Sopenharmony_ci */ 38248c2ecf20Sopenharmony_ci switch (eb->log_index) { 38258c2ecf20Sopenharmony_ci case -1: 38268c2ecf20Sopenharmony_ci set_bit(BTRFS_FS_BTREE_ERR, &eb->fs_info->flags); 38278c2ecf20Sopenharmony_ci break; 38288c2ecf20Sopenharmony_ci case 0: 38298c2ecf20Sopenharmony_ci set_bit(BTRFS_FS_LOG1_ERR, &eb->fs_info->flags); 38308c2ecf20Sopenharmony_ci break; 38318c2ecf20Sopenharmony_ci case 1: 38328c2ecf20Sopenharmony_ci set_bit(BTRFS_FS_LOG2_ERR, &eb->fs_info->flags); 38338c2ecf20Sopenharmony_ci break; 38348c2ecf20Sopenharmony_ci default: 38358c2ecf20Sopenharmony_ci BUG(); /* unexpected, logic error */ 38368c2ecf20Sopenharmony_ci } 38378c2ecf20Sopenharmony_ci} 38388c2ecf20Sopenharmony_ci 38398c2ecf20Sopenharmony_cistatic void end_bio_extent_buffer_writepage(struct bio *bio) 38408c2ecf20Sopenharmony_ci{ 38418c2ecf20Sopenharmony_ci struct bio_vec *bvec; 38428c2ecf20Sopenharmony_ci struct extent_buffer *eb; 38438c2ecf20Sopenharmony_ci int done; 38448c2ecf20Sopenharmony_ci struct bvec_iter_all iter_all; 38458c2ecf20Sopenharmony_ci 38468c2ecf20Sopenharmony_ci ASSERT(!bio_flagged(bio, BIO_CLONED)); 38478c2ecf20Sopenharmony_ci bio_for_each_segment_all(bvec, bio, iter_all) { 38488c2ecf20Sopenharmony_ci struct page *page = bvec->bv_page; 38498c2ecf20Sopenharmony_ci 38508c2ecf20Sopenharmony_ci eb = (struct extent_buffer *)page->private; 38518c2ecf20Sopenharmony_ci BUG_ON(!eb); 38528c2ecf20Sopenharmony_ci done = atomic_dec_and_test(&eb->io_pages); 38538c2ecf20Sopenharmony_ci 38548c2ecf20Sopenharmony_ci if (bio->bi_status || 38558c2ecf20Sopenharmony_ci test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) { 38568c2ecf20Sopenharmony_ci ClearPageUptodate(page); 38578c2ecf20Sopenharmony_ci set_btree_ioerr(page); 38588c2ecf20Sopenharmony_ci } 38598c2ecf20Sopenharmony_ci 38608c2ecf20Sopenharmony_ci end_page_writeback(page); 38618c2ecf20Sopenharmony_ci 38628c2ecf20Sopenharmony_ci if (!done) 38638c2ecf20Sopenharmony_ci continue; 38648c2ecf20Sopenharmony_ci 38658c2ecf20Sopenharmony_ci end_extent_buffer_writeback(eb); 38668c2ecf20Sopenharmony_ci } 38678c2ecf20Sopenharmony_ci 38688c2ecf20Sopenharmony_ci bio_put(bio); 38698c2ecf20Sopenharmony_ci} 38708c2ecf20Sopenharmony_ci 38718c2ecf20Sopenharmony_cistatic noinline_for_stack int write_one_eb(struct extent_buffer *eb, 38728c2ecf20Sopenharmony_ci struct writeback_control *wbc, 38738c2ecf20Sopenharmony_ci struct extent_page_data *epd) 38748c2ecf20Sopenharmony_ci{ 38758c2ecf20Sopenharmony_ci u64 offset = eb->start; 38768c2ecf20Sopenharmony_ci u32 nritems; 38778c2ecf20Sopenharmony_ci int i, num_pages; 38788c2ecf20Sopenharmony_ci unsigned long start, end; 38798c2ecf20Sopenharmony_ci unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; 38808c2ecf20Sopenharmony_ci int ret = 0; 38818c2ecf20Sopenharmony_ci 38828c2ecf20Sopenharmony_ci clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); 38838c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 38848c2ecf20Sopenharmony_ci atomic_set(&eb->io_pages, num_pages); 38858c2ecf20Sopenharmony_ci 38868c2ecf20Sopenharmony_ci /* set btree blocks beyond nritems with 0 to avoid stale content. */ 38878c2ecf20Sopenharmony_ci nritems = btrfs_header_nritems(eb); 38888c2ecf20Sopenharmony_ci if (btrfs_header_level(eb) > 0) { 38898c2ecf20Sopenharmony_ci end = btrfs_node_key_ptr_offset(nritems); 38908c2ecf20Sopenharmony_ci 38918c2ecf20Sopenharmony_ci memzero_extent_buffer(eb, end, eb->len - end); 38928c2ecf20Sopenharmony_ci } else { 38938c2ecf20Sopenharmony_ci /* 38948c2ecf20Sopenharmony_ci * leaf: 38958c2ecf20Sopenharmony_ci * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 38968c2ecf20Sopenharmony_ci */ 38978c2ecf20Sopenharmony_ci start = btrfs_item_nr_offset(nritems); 38988c2ecf20Sopenharmony_ci end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb); 38998c2ecf20Sopenharmony_ci memzero_extent_buffer(eb, start, end - start); 39008c2ecf20Sopenharmony_ci } 39018c2ecf20Sopenharmony_ci 39028c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 39038c2ecf20Sopenharmony_ci struct page *p = eb->pages[i]; 39048c2ecf20Sopenharmony_ci 39058c2ecf20Sopenharmony_ci clear_page_dirty_for_io(p); 39068c2ecf20Sopenharmony_ci set_page_writeback(p); 39078c2ecf20Sopenharmony_ci ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc, 39088c2ecf20Sopenharmony_ci p, offset, PAGE_SIZE, 0, 39098c2ecf20Sopenharmony_ci &epd->bio, 39108c2ecf20Sopenharmony_ci end_bio_extent_buffer_writepage, 39118c2ecf20Sopenharmony_ci 0, 0, 0, false); 39128c2ecf20Sopenharmony_ci if (ret) { 39138c2ecf20Sopenharmony_ci set_btree_ioerr(p); 39148c2ecf20Sopenharmony_ci if (PageWriteback(p)) 39158c2ecf20Sopenharmony_ci end_page_writeback(p); 39168c2ecf20Sopenharmony_ci if (atomic_sub_and_test(num_pages - i, &eb->io_pages)) 39178c2ecf20Sopenharmony_ci end_extent_buffer_writeback(eb); 39188c2ecf20Sopenharmony_ci ret = -EIO; 39198c2ecf20Sopenharmony_ci break; 39208c2ecf20Sopenharmony_ci } 39218c2ecf20Sopenharmony_ci offset += PAGE_SIZE; 39228c2ecf20Sopenharmony_ci update_nr_written(wbc, 1); 39238c2ecf20Sopenharmony_ci unlock_page(p); 39248c2ecf20Sopenharmony_ci } 39258c2ecf20Sopenharmony_ci 39268c2ecf20Sopenharmony_ci if (unlikely(ret)) { 39278c2ecf20Sopenharmony_ci for (; i < num_pages; i++) { 39288c2ecf20Sopenharmony_ci struct page *p = eb->pages[i]; 39298c2ecf20Sopenharmony_ci clear_page_dirty_for_io(p); 39308c2ecf20Sopenharmony_ci unlock_page(p); 39318c2ecf20Sopenharmony_ci } 39328c2ecf20Sopenharmony_ci } 39338c2ecf20Sopenharmony_ci 39348c2ecf20Sopenharmony_ci return ret; 39358c2ecf20Sopenharmony_ci} 39368c2ecf20Sopenharmony_ci 39378c2ecf20Sopenharmony_ciint btree_write_cache_pages(struct address_space *mapping, 39388c2ecf20Sopenharmony_ci struct writeback_control *wbc) 39398c2ecf20Sopenharmony_ci{ 39408c2ecf20Sopenharmony_ci struct extent_buffer *eb, *prev_eb = NULL; 39418c2ecf20Sopenharmony_ci struct extent_page_data epd = { 39428c2ecf20Sopenharmony_ci .bio = NULL, 39438c2ecf20Sopenharmony_ci .extent_locked = 0, 39448c2ecf20Sopenharmony_ci .sync_io = wbc->sync_mode == WB_SYNC_ALL, 39458c2ecf20Sopenharmony_ci }; 39468c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info; 39478c2ecf20Sopenharmony_ci int ret = 0; 39488c2ecf20Sopenharmony_ci int done = 0; 39498c2ecf20Sopenharmony_ci int nr_to_write_done = 0; 39508c2ecf20Sopenharmony_ci struct pagevec pvec; 39518c2ecf20Sopenharmony_ci int nr_pages; 39528c2ecf20Sopenharmony_ci pgoff_t index; 39538c2ecf20Sopenharmony_ci pgoff_t end; /* Inclusive */ 39548c2ecf20Sopenharmony_ci int scanned = 0; 39558c2ecf20Sopenharmony_ci xa_mark_t tag; 39568c2ecf20Sopenharmony_ci 39578c2ecf20Sopenharmony_ci pagevec_init(&pvec); 39588c2ecf20Sopenharmony_ci if (wbc->range_cyclic) { 39598c2ecf20Sopenharmony_ci index = mapping->writeback_index; /* Start from prev offset */ 39608c2ecf20Sopenharmony_ci end = -1; 39618c2ecf20Sopenharmony_ci /* 39628c2ecf20Sopenharmony_ci * Start from the beginning does not need to cycle over the 39638c2ecf20Sopenharmony_ci * range, mark it as scanned. 39648c2ecf20Sopenharmony_ci */ 39658c2ecf20Sopenharmony_ci scanned = (index == 0); 39668c2ecf20Sopenharmony_ci } else { 39678c2ecf20Sopenharmony_ci index = wbc->range_start >> PAGE_SHIFT; 39688c2ecf20Sopenharmony_ci end = wbc->range_end >> PAGE_SHIFT; 39698c2ecf20Sopenharmony_ci scanned = 1; 39708c2ecf20Sopenharmony_ci } 39718c2ecf20Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL) 39728c2ecf20Sopenharmony_ci tag = PAGECACHE_TAG_TOWRITE; 39738c2ecf20Sopenharmony_ci else 39748c2ecf20Sopenharmony_ci tag = PAGECACHE_TAG_DIRTY; 39758c2ecf20Sopenharmony_ciretry: 39768c2ecf20Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL) 39778c2ecf20Sopenharmony_ci tag_pages_for_writeback(mapping, index, end); 39788c2ecf20Sopenharmony_ci while (!done && !nr_to_write_done && (index <= end) && 39798c2ecf20Sopenharmony_ci (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, 39808c2ecf20Sopenharmony_ci tag))) { 39818c2ecf20Sopenharmony_ci unsigned i; 39828c2ecf20Sopenharmony_ci 39838c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 39848c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 39858c2ecf20Sopenharmony_ci 39868c2ecf20Sopenharmony_ci if (!PagePrivate(page)) 39878c2ecf20Sopenharmony_ci continue; 39888c2ecf20Sopenharmony_ci 39898c2ecf20Sopenharmony_ci spin_lock(&mapping->private_lock); 39908c2ecf20Sopenharmony_ci if (!PagePrivate(page)) { 39918c2ecf20Sopenharmony_ci spin_unlock(&mapping->private_lock); 39928c2ecf20Sopenharmony_ci continue; 39938c2ecf20Sopenharmony_ci } 39948c2ecf20Sopenharmony_ci 39958c2ecf20Sopenharmony_ci eb = (struct extent_buffer *)page->private; 39968c2ecf20Sopenharmony_ci 39978c2ecf20Sopenharmony_ci /* 39988c2ecf20Sopenharmony_ci * Shouldn't happen and normally this would be a BUG_ON 39998c2ecf20Sopenharmony_ci * but no sense in crashing the users box for something 40008c2ecf20Sopenharmony_ci * we can survive anyway. 40018c2ecf20Sopenharmony_ci */ 40028c2ecf20Sopenharmony_ci if (WARN_ON(!eb)) { 40038c2ecf20Sopenharmony_ci spin_unlock(&mapping->private_lock); 40048c2ecf20Sopenharmony_ci continue; 40058c2ecf20Sopenharmony_ci } 40068c2ecf20Sopenharmony_ci 40078c2ecf20Sopenharmony_ci if (eb == prev_eb) { 40088c2ecf20Sopenharmony_ci spin_unlock(&mapping->private_lock); 40098c2ecf20Sopenharmony_ci continue; 40108c2ecf20Sopenharmony_ci } 40118c2ecf20Sopenharmony_ci 40128c2ecf20Sopenharmony_ci ret = atomic_inc_not_zero(&eb->refs); 40138c2ecf20Sopenharmony_ci spin_unlock(&mapping->private_lock); 40148c2ecf20Sopenharmony_ci if (!ret) 40158c2ecf20Sopenharmony_ci continue; 40168c2ecf20Sopenharmony_ci 40178c2ecf20Sopenharmony_ci prev_eb = eb; 40188c2ecf20Sopenharmony_ci ret = lock_extent_buffer_for_io(eb, &epd); 40198c2ecf20Sopenharmony_ci if (!ret) { 40208c2ecf20Sopenharmony_ci free_extent_buffer(eb); 40218c2ecf20Sopenharmony_ci continue; 40228c2ecf20Sopenharmony_ci } else if (ret < 0) { 40238c2ecf20Sopenharmony_ci done = 1; 40248c2ecf20Sopenharmony_ci free_extent_buffer(eb); 40258c2ecf20Sopenharmony_ci break; 40268c2ecf20Sopenharmony_ci } 40278c2ecf20Sopenharmony_ci 40288c2ecf20Sopenharmony_ci ret = write_one_eb(eb, wbc, &epd); 40298c2ecf20Sopenharmony_ci if (ret) { 40308c2ecf20Sopenharmony_ci done = 1; 40318c2ecf20Sopenharmony_ci free_extent_buffer(eb); 40328c2ecf20Sopenharmony_ci break; 40338c2ecf20Sopenharmony_ci } 40348c2ecf20Sopenharmony_ci free_extent_buffer(eb); 40358c2ecf20Sopenharmony_ci 40368c2ecf20Sopenharmony_ci /* 40378c2ecf20Sopenharmony_ci * The filesystem may choose to bump up nr_to_write. 40388c2ecf20Sopenharmony_ci * We have to make sure to honor the new nr_to_write 40398c2ecf20Sopenharmony_ci * at any time. 40408c2ecf20Sopenharmony_ci */ 40418c2ecf20Sopenharmony_ci nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE && 40428c2ecf20Sopenharmony_ci wbc->nr_to_write <= 0); 40438c2ecf20Sopenharmony_ci } 40448c2ecf20Sopenharmony_ci pagevec_release(&pvec); 40458c2ecf20Sopenharmony_ci cond_resched(); 40468c2ecf20Sopenharmony_ci } 40478c2ecf20Sopenharmony_ci if (!scanned && !done) { 40488c2ecf20Sopenharmony_ci /* 40498c2ecf20Sopenharmony_ci * We hit the last page and there is more work to be done: wrap 40508c2ecf20Sopenharmony_ci * back to the start of the file 40518c2ecf20Sopenharmony_ci */ 40528c2ecf20Sopenharmony_ci scanned = 1; 40538c2ecf20Sopenharmony_ci index = 0; 40548c2ecf20Sopenharmony_ci goto retry; 40558c2ecf20Sopenharmony_ci } 40568c2ecf20Sopenharmony_ci ASSERT(ret <= 0); 40578c2ecf20Sopenharmony_ci if (ret < 0) { 40588c2ecf20Sopenharmony_ci end_write_bio(&epd, ret); 40598c2ecf20Sopenharmony_ci return ret; 40608c2ecf20Sopenharmony_ci } 40618c2ecf20Sopenharmony_ci /* 40628c2ecf20Sopenharmony_ci * If something went wrong, don't allow any metadata write bio to be 40638c2ecf20Sopenharmony_ci * submitted. 40648c2ecf20Sopenharmony_ci * 40658c2ecf20Sopenharmony_ci * This would prevent use-after-free if we had dirty pages not 40668c2ecf20Sopenharmony_ci * cleaned up, which can still happen by fuzzed images. 40678c2ecf20Sopenharmony_ci * 40688c2ecf20Sopenharmony_ci * - Bad extent tree 40698c2ecf20Sopenharmony_ci * Allowing existing tree block to be allocated for other trees. 40708c2ecf20Sopenharmony_ci * 40718c2ecf20Sopenharmony_ci * - Log tree operations 40728c2ecf20Sopenharmony_ci * Exiting tree blocks get allocated to log tree, bumps its 40738c2ecf20Sopenharmony_ci * generation, then get cleaned in tree re-balance. 40748c2ecf20Sopenharmony_ci * Such tree block will not be written back, since it's clean, 40758c2ecf20Sopenharmony_ci * thus no WRITTEN flag set. 40768c2ecf20Sopenharmony_ci * And after log writes back, this tree block is not traced by 40778c2ecf20Sopenharmony_ci * any dirty extent_io_tree. 40788c2ecf20Sopenharmony_ci * 40798c2ecf20Sopenharmony_ci * - Offending tree block gets re-dirtied from its original owner 40808c2ecf20Sopenharmony_ci * Since it has bumped generation, no WRITTEN flag, it can be 40818c2ecf20Sopenharmony_ci * reused without COWing. This tree block will not be traced 40828c2ecf20Sopenharmony_ci * by btrfs_transaction::dirty_pages. 40838c2ecf20Sopenharmony_ci * 40848c2ecf20Sopenharmony_ci * Now such dirty tree block will not be cleaned by any dirty 40858c2ecf20Sopenharmony_ci * extent io tree. Thus we don't want to submit such wild eb 40868c2ecf20Sopenharmony_ci * if the fs already has error. 40878c2ecf20Sopenharmony_ci */ 40888c2ecf20Sopenharmony_ci if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { 40898c2ecf20Sopenharmony_ci ret = flush_write_bio(&epd); 40908c2ecf20Sopenharmony_ci } else { 40918c2ecf20Sopenharmony_ci ret = -EROFS; 40928c2ecf20Sopenharmony_ci end_write_bio(&epd, ret); 40938c2ecf20Sopenharmony_ci } 40948c2ecf20Sopenharmony_ci return ret; 40958c2ecf20Sopenharmony_ci} 40968c2ecf20Sopenharmony_ci 40978c2ecf20Sopenharmony_ci/** 40988c2ecf20Sopenharmony_ci * write_cache_pages - walk the list of dirty pages of the given address space and write all of them. 40998c2ecf20Sopenharmony_ci * @mapping: address space structure to write 41008c2ecf20Sopenharmony_ci * @wbc: subtract the number of written pages from *@wbc->nr_to_write 41018c2ecf20Sopenharmony_ci * @data: data passed to __extent_writepage function 41028c2ecf20Sopenharmony_ci * 41038c2ecf20Sopenharmony_ci * If a page is already under I/O, write_cache_pages() skips it, even 41048c2ecf20Sopenharmony_ci * if it's dirty. This is desirable behaviour for memory-cleaning writeback, 41058c2ecf20Sopenharmony_ci * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() 41068c2ecf20Sopenharmony_ci * and msync() need to guarantee that all the data which was dirty at the time 41078c2ecf20Sopenharmony_ci * the call was made get new I/O started against them. If wbc->sync_mode is 41088c2ecf20Sopenharmony_ci * WB_SYNC_ALL then we were called for data integrity and we must wait for 41098c2ecf20Sopenharmony_ci * existing IO to complete. 41108c2ecf20Sopenharmony_ci */ 41118c2ecf20Sopenharmony_cistatic int extent_write_cache_pages(struct address_space *mapping, 41128c2ecf20Sopenharmony_ci struct writeback_control *wbc, 41138c2ecf20Sopenharmony_ci struct extent_page_data *epd) 41148c2ecf20Sopenharmony_ci{ 41158c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 41168c2ecf20Sopenharmony_ci int ret = 0; 41178c2ecf20Sopenharmony_ci int done = 0; 41188c2ecf20Sopenharmony_ci int nr_to_write_done = 0; 41198c2ecf20Sopenharmony_ci struct pagevec pvec; 41208c2ecf20Sopenharmony_ci int nr_pages; 41218c2ecf20Sopenharmony_ci pgoff_t index; 41228c2ecf20Sopenharmony_ci pgoff_t end; /* Inclusive */ 41238c2ecf20Sopenharmony_ci pgoff_t done_index; 41248c2ecf20Sopenharmony_ci int range_whole = 0; 41258c2ecf20Sopenharmony_ci int scanned = 0; 41268c2ecf20Sopenharmony_ci xa_mark_t tag; 41278c2ecf20Sopenharmony_ci 41288c2ecf20Sopenharmony_ci /* 41298c2ecf20Sopenharmony_ci * We have to hold onto the inode so that ordered extents can do their 41308c2ecf20Sopenharmony_ci * work when the IO finishes. The alternative to this is failing to add 41318c2ecf20Sopenharmony_ci * an ordered extent if the igrab() fails there and that is a huge pain 41328c2ecf20Sopenharmony_ci * to deal with, so instead just hold onto the inode throughout the 41338c2ecf20Sopenharmony_ci * writepages operation. If it fails here we are freeing up the inode 41348c2ecf20Sopenharmony_ci * anyway and we'd rather not waste our time writing out stuff that is 41358c2ecf20Sopenharmony_ci * going to be truncated anyway. 41368c2ecf20Sopenharmony_ci */ 41378c2ecf20Sopenharmony_ci if (!igrab(inode)) 41388c2ecf20Sopenharmony_ci return 0; 41398c2ecf20Sopenharmony_ci 41408c2ecf20Sopenharmony_ci pagevec_init(&pvec); 41418c2ecf20Sopenharmony_ci if (wbc->range_cyclic) { 41428c2ecf20Sopenharmony_ci index = mapping->writeback_index; /* Start from prev offset */ 41438c2ecf20Sopenharmony_ci end = -1; 41448c2ecf20Sopenharmony_ci /* 41458c2ecf20Sopenharmony_ci * Start from the beginning does not need to cycle over the 41468c2ecf20Sopenharmony_ci * range, mark it as scanned. 41478c2ecf20Sopenharmony_ci */ 41488c2ecf20Sopenharmony_ci scanned = (index == 0); 41498c2ecf20Sopenharmony_ci } else { 41508c2ecf20Sopenharmony_ci index = wbc->range_start >> PAGE_SHIFT; 41518c2ecf20Sopenharmony_ci end = wbc->range_end >> PAGE_SHIFT; 41528c2ecf20Sopenharmony_ci if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 41538c2ecf20Sopenharmony_ci range_whole = 1; 41548c2ecf20Sopenharmony_ci scanned = 1; 41558c2ecf20Sopenharmony_ci } 41568c2ecf20Sopenharmony_ci 41578c2ecf20Sopenharmony_ci /* 41588c2ecf20Sopenharmony_ci * We do the tagged writepage as long as the snapshot flush bit is set 41598c2ecf20Sopenharmony_ci * and we are the first one who do the filemap_flush() on this inode. 41608c2ecf20Sopenharmony_ci * 41618c2ecf20Sopenharmony_ci * The nr_to_write == LONG_MAX is needed to make sure other flushers do 41628c2ecf20Sopenharmony_ci * not race in and drop the bit. 41638c2ecf20Sopenharmony_ci */ 41648c2ecf20Sopenharmony_ci if (range_whole && wbc->nr_to_write == LONG_MAX && 41658c2ecf20Sopenharmony_ci test_and_clear_bit(BTRFS_INODE_SNAPSHOT_FLUSH, 41668c2ecf20Sopenharmony_ci &BTRFS_I(inode)->runtime_flags)) 41678c2ecf20Sopenharmony_ci wbc->tagged_writepages = 1; 41688c2ecf20Sopenharmony_ci 41698c2ecf20Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 41708c2ecf20Sopenharmony_ci tag = PAGECACHE_TAG_TOWRITE; 41718c2ecf20Sopenharmony_ci else 41728c2ecf20Sopenharmony_ci tag = PAGECACHE_TAG_DIRTY; 41738c2ecf20Sopenharmony_ciretry: 41748c2ecf20Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 41758c2ecf20Sopenharmony_ci tag_pages_for_writeback(mapping, index, end); 41768c2ecf20Sopenharmony_ci done_index = index; 41778c2ecf20Sopenharmony_ci while (!done && !nr_to_write_done && (index <= end) && 41788c2ecf20Sopenharmony_ci (nr_pages = pagevec_lookup_range_tag(&pvec, mapping, 41798c2ecf20Sopenharmony_ci &index, end, tag))) { 41808c2ecf20Sopenharmony_ci unsigned i; 41818c2ecf20Sopenharmony_ci 41828c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 41838c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 41848c2ecf20Sopenharmony_ci 41858c2ecf20Sopenharmony_ci done_index = page->index + 1; 41868c2ecf20Sopenharmony_ci /* 41878c2ecf20Sopenharmony_ci * At this point we hold neither the i_pages lock nor 41888c2ecf20Sopenharmony_ci * the page lock: the page may be truncated or 41898c2ecf20Sopenharmony_ci * invalidated (changing page->mapping to NULL), 41908c2ecf20Sopenharmony_ci * or even swizzled back from swapper_space to 41918c2ecf20Sopenharmony_ci * tmpfs file mapping 41928c2ecf20Sopenharmony_ci */ 41938c2ecf20Sopenharmony_ci if (!trylock_page(page)) { 41948c2ecf20Sopenharmony_ci ret = flush_write_bio(epd); 41958c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 41968c2ecf20Sopenharmony_ci lock_page(page); 41978c2ecf20Sopenharmony_ci } 41988c2ecf20Sopenharmony_ci 41998c2ecf20Sopenharmony_ci if (unlikely(page->mapping != mapping)) { 42008c2ecf20Sopenharmony_ci unlock_page(page); 42018c2ecf20Sopenharmony_ci continue; 42028c2ecf20Sopenharmony_ci } 42038c2ecf20Sopenharmony_ci 42048c2ecf20Sopenharmony_ci if (wbc->sync_mode != WB_SYNC_NONE) { 42058c2ecf20Sopenharmony_ci if (PageWriteback(page)) { 42068c2ecf20Sopenharmony_ci ret = flush_write_bio(epd); 42078c2ecf20Sopenharmony_ci BUG_ON(ret < 0); 42088c2ecf20Sopenharmony_ci } 42098c2ecf20Sopenharmony_ci wait_on_page_writeback(page); 42108c2ecf20Sopenharmony_ci } 42118c2ecf20Sopenharmony_ci 42128c2ecf20Sopenharmony_ci if (PageWriteback(page) || 42138c2ecf20Sopenharmony_ci !clear_page_dirty_for_io(page)) { 42148c2ecf20Sopenharmony_ci unlock_page(page); 42158c2ecf20Sopenharmony_ci continue; 42168c2ecf20Sopenharmony_ci } 42178c2ecf20Sopenharmony_ci 42188c2ecf20Sopenharmony_ci ret = __extent_writepage(page, wbc, epd); 42198c2ecf20Sopenharmony_ci if (ret < 0) { 42208c2ecf20Sopenharmony_ci done = 1; 42218c2ecf20Sopenharmony_ci break; 42228c2ecf20Sopenharmony_ci } 42238c2ecf20Sopenharmony_ci 42248c2ecf20Sopenharmony_ci /* 42258c2ecf20Sopenharmony_ci * the filesystem may choose to bump up nr_to_write. 42268c2ecf20Sopenharmony_ci * We have to make sure to honor the new nr_to_write 42278c2ecf20Sopenharmony_ci * at any time 42288c2ecf20Sopenharmony_ci */ 42298c2ecf20Sopenharmony_ci nr_to_write_done = wbc->nr_to_write <= 0; 42308c2ecf20Sopenharmony_ci } 42318c2ecf20Sopenharmony_ci pagevec_release(&pvec); 42328c2ecf20Sopenharmony_ci cond_resched(); 42338c2ecf20Sopenharmony_ci } 42348c2ecf20Sopenharmony_ci if (!scanned && !done) { 42358c2ecf20Sopenharmony_ci /* 42368c2ecf20Sopenharmony_ci * We hit the last page and there is more work to be done: wrap 42378c2ecf20Sopenharmony_ci * back to the start of the file 42388c2ecf20Sopenharmony_ci */ 42398c2ecf20Sopenharmony_ci scanned = 1; 42408c2ecf20Sopenharmony_ci index = 0; 42418c2ecf20Sopenharmony_ci 42428c2ecf20Sopenharmony_ci /* 42438c2ecf20Sopenharmony_ci * If we're looping we could run into a page that is locked by a 42448c2ecf20Sopenharmony_ci * writer and that writer could be waiting on writeback for a 42458c2ecf20Sopenharmony_ci * page in our current bio, and thus deadlock, so flush the 42468c2ecf20Sopenharmony_ci * write bio here. 42478c2ecf20Sopenharmony_ci */ 42488c2ecf20Sopenharmony_ci ret = flush_write_bio(epd); 42498c2ecf20Sopenharmony_ci if (!ret) 42508c2ecf20Sopenharmony_ci goto retry; 42518c2ecf20Sopenharmony_ci } 42528c2ecf20Sopenharmony_ci 42538c2ecf20Sopenharmony_ci if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole)) 42548c2ecf20Sopenharmony_ci mapping->writeback_index = done_index; 42558c2ecf20Sopenharmony_ci 42568c2ecf20Sopenharmony_ci btrfs_add_delayed_iput(inode); 42578c2ecf20Sopenharmony_ci return ret; 42588c2ecf20Sopenharmony_ci} 42598c2ecf20Sopenharmony_ci 42608c2ecf20Sopenharmony_ciint extent_write_full_page(struct page *page, struct writeback_control *wbc) 42618c2ecf20Sopenharmony_ci{ 42628c2ecf20Sopenharmony_ci int ret; 42638c2ecf20Sopenharmony_ci struct extent_page_data epd = { 42648c2ecf20Sopenharmony_ci .bio = NULL, 42658c2ecf20Sopenharmony_ci .extent_locked = 0, 42668c2ecf20Sopenharmony_ci .sync_io = wbc->sync_mode == WB_SYNC_ALL, 42678c2ecf20Sopenharmony_ci }; 42688c2ecf20Sopenharmony_ci 42698c2ecf20Sopenharmony_ci ret = __extent_writepage(page, wbc, &epd); 42708c2ecf20Sopenharmony_ci ASSERT(ret <= 0); 42718c2ecf20Sopenharmony_ci if (ret < 0) { 42728c2ecf20Sopenharmony_ci end_write_bio(&epd, ret); 42738c2ecf20Sopenharmony_ci return ret; 42748c2ecf20Sopenharmony_ci } 42758c2ecf20Sopenharmony_ci 42768c2ecf20Sopenharmony_ci ret = flush_write_bio(&epd); 42778c2ecf20Sopenharmony_ci ASSERT(ret <= 0); 42788c2ecf20Sopenharmony_ci return ret; 42798c2ecf20Sopenharmony_ci} 42808c2ecf20Sopenharmony_ci 42818c2ecf20Sopenharmony_ciint extent_write_locked_range(struct inode *inode, u64 start, u64 end, 42828c2ecf20Sopenharmony_ci int mode) 42838c2ecf20Sopenharmony_ci{ 42848c2ecf20Sopenharmony_ci int ret = 0; 42858c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 42868c2ecf20Sopenharmony_ci struct page *page; 42878c2ecf20Sopenharmony_ci unsigned long nr_pages = (end - start + PAGE_SIZE) >> 42888c2ecf20Sopenharmony_ci PAGE_SHIFT; 42898c2ecf20Sopenharmony_ci 42908c2ecf20Sopenharmony_ci struct extent_page_data epd = { 42918c2ecf20Sopenharmony_ci .bio = NULL, 42928c2ecf20Sopenharmony_ci .extent_locked = 1, 42938c2ecf20Sopenharmony_ci .sync_io = mode == WB_SYNC_ALL, 42948c2ecf20Sopenharmony_ci }; 42958c2ecf20Sopenharmony_ci struct writeback_control wbc_writepages = { 42968c2ecf20Sopenharmony_ci .sync_mode = mode, 42978c2ecf20Sopenharmony_ci .nr_to_write = nr_pages * 2, 42988c2ecf20Sopenharmony_ci .range_start = start, 42998c2ecf20Sopenharmony_ci .range_end = end + 1, 43008c2ecf20Sopenharmony_ci /* We're called from an async helper function */ 43018c2ecf20Sopenharmony_ci .punt_to_cgroup = 1, 43028c2ecf20Sopenharmony_ci .no_cgroup_owner = 1, 43038c2ecf20Sopenharmony_ci }; 43048c2ecf20Sopenharmony_ci 43058c2ecf20Sopenharmony_ci wbc_attach_fdatawrite_inode(&wbc_writepages, inode); 43068c2ecf20Sopenharmony_ci while (start <= end) { 43078c2ecf20Sopenharmony_ci page = find_get_page(mapping, start >> PAGE_SHIFT); 43088c2ecf20Sopenharmony_ci if (clear_page_dirty_for_io(page)) 43098c2ecf20Sopenharmony_ci ret = __extent_writepage(page, &wbc_writepages, &epd); 43108c2ecf20Sopenharmony_ci else { 43118c2ecf20Sopenharmony_ci btrfs_writepage_endio_finish_ordered(page, start, 43128c2ecf20Sopenharmony_ci start + PAGE_SIZE - 1, 1); 43138c2ecf20Sopenharmony_ci unlock_page(page); 43148c2ecf20Sopenharmony_ci } 43158c2ecf20Sopenharmony_ci put_page(page); 43168c2ecf20Sopenharmony_ci start += PAGE_SIZE; 43178c2ecf20Sopenharmony_ci } 43188c2ecf20Sopenharmony_ci 43198c2ecf20Sopenharmony_ci ASSERT(ret <= 0); 43208c2ecf20Sopenharmony_ci if (ret == 0) 43218c2ecf20Sopenharmony_ci ret = flush_write_bio(&epd); 43228c2ecf20Sopenharmony_ci else 43238c2ecf20Sopenharmony_ci end_write_bio(&epd, ret); 43248c2ecf20Sopenharmony_ci 43258c2ecf20Sopenharmony_ci wbc_detach_inode(&wbc_writepages); 43268c2ecf20Sopenharmony_ci return ret; 43278c2ecf20Sopenharmony_ci} 43288c2ecf20Sopenharmony_ci 43298c2ecf20Sopenharmony_ciint extent_writepages(struct address_space *mapping, 43308c2ecf20Sopenharmony_ci struct writeback_control *wbc) 43318c2ecf20Sopenharmony_ci{ 43328c2ecf20Sopenharmony_ci int ret = 0; 43338c2ecf20Sopenharmony_ci struct extent_page_data epd = { 43348c2ecf20Sopenharmony_ci .bio = NULL, 43358c2ecf20Sopenharmony_ci .extent_locked = 0, 43368c2ecf20Sopenharmony_ci .sync_io = wbc->sync_mode == WB_SYNC_ALL, 43378c2ecf20Sopenharmony_ci }; 43388c2ecf20Sopenharmony_ci 43398c2ecf20Sopenharmony_ci ret = extent_write_cache_pages(mapping, wbc, &epd); 43408c2ecf20Sopenharmony_ci ASSERT(ret <= 0); 43418c2ecf20Sopenharmony_ci if (ret < 0) { 43428c2ecf20Sopenharmony_ci end_write_bio(&epd, ret); 43438c2ecf20Sopenharmony_ci return ret; 43448c2ecf20Sopenharmony_ci } 43458c2ecf20Sopenharmony_ci ret = flush_write_bio(&epd); 43468c2ecf20Sopenharmony_ci return ret; 43478c2ecf20Sopenharmony_ci} 43488c2ecf20Sopenharmony_ci 43498c2ecf20Sopenharmony_civoid extent_readahead(struct readahead_control *rac) 43508c2ecf20Sopenharmony_ci{ 43518c2ecf20Sopenharmony_ci struct bio *bio = NULL; 43528c2ecf20Sopenharmony_ci unsigned long bio_flags = 0; 43538c2ecf20Sopenharmony_ci struct page *pagepool[16]; 43548c2ecf20Sopenharmony_ci struct extent_map *em_cached = NULL; 43558c2ecf20Sopenharmony_ci u64 prev_em_start = (u64)-1; 43568c2ecf20Sopenharmony_ci int nr; 43578c2ecf20Sopenharmony_ci 43588c2ecf20Sopenharmony_ci while ((nr = readahead_page_batch(rac, pagepool))) { 43598c2ecf20Sopenharmony_ci u64 contig_start = page_offset(pagepool[0]); 43608c2ecf20Sopenharmony_ci u64 contig_end = page_offset(pagepool[nr - 1]) + PAGE_SIZE - 1; 43618c2ecf20Sopenharmony_ci 43628c2ecf20Sopenharmony_ci ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end); 43638c2ecf20Sopenharmony_ci 43648c2ecf20Sopenharmony_ci contiguous_readpages(pagepool, nr, contig_start, contig_end, 43658c2ecf20Sopenharmony_ci &em_cached, &bio, &bio_flags, &prev_em_start); 43668c2ecf20Sopenharmony_ci } 43678c2ecf20Sopenharmony_ci 43688c2ecf20Sopenharmony_ci if (em_cached) 43698c2ecf20Sopenharmony_ci free_extent_map(em_cached); 43708c2ecf20Sopenharmony_ci 43718c2ecf20Sopenharmony_ci if (bio) { 43728c2ecf20Sopenharmony_ci if (submit_one_bio(bio, 0, bio_flags)) 43738c2ecf20Sopenharmony_ci return; 43748c2ecf20Sopenharmony_ci } 43758c2ecf20Sopenharmony_ci} 43768c2ecf20Sopenharmony_ci 43778c2ecf20Sopenharmony_ci/* 43788c2ecf20Sopenharmony_ci * basic invalidatepage code, this waits on any locked or writeback 43798c2ecf20Sopenharmony_ci * ranges corresponding to the page, and then deletes any extent state 43808c2ecf20Sopenharmony_ci * records from the tree 43818c2ecf20Sopenharmony_ci */ 43828c2ecf20Sopenharmony_ciint extent_invalidatepage(struct extent_io_tree *tree, 43838c2ecf20Sopenharmony_ci struct page *page, unsigned long offset) 43848c2ecf20Sopenharmony_ci{ 43858c2ecf20Sopenharmony_ci struct extent_state *cached_state = NULL; 43868c2ecf20Sopenharmony_ci u64 start = page_offset(page); 43878c2ecf20Sopenharmony_ci u64 end = start + PAGE_SIZE - 1; 43888c2ecf20Sopenharmony_ci size_t blocksize = page->mapping->host->i_sb->s_blocksize; 43898c2ecf20Sopenharmony_ci 43908c2ecf20Sopenharmony_ci start += ALIGN(offset, blocksize); 43918c2ecf20Sopenharmony_ci if (start > end) 43928c2ecf20Sopenharmony_ci return 0; 43938c2ecf20Sopenharmony_ci 43948c2ecf20Sopenharmony_ci lock_extent_bits(tree, start, end, &cached_state); 43958c2ecf20Sopenharmony_ci wait_on_page_writeback(page); 43968c2ecf20Sopenharmony_ci clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DELALLOC | 43978c2ecf20Sopenharmony_ci EXTENT_DO_ACCOUNTING, 1, 1, &cached_state); 43988c2ecf20Sopenharmony_ci return 0; 43998c2ecf20Sopenharmony_ci} 44008c2ecf20Sopenharmony_ci 44018c2ecf20Sopenharmony_ci/* 44028c2ecf20Sopenharmony_ci * a helper for releasepage, this tests for areas of the page that 44038c2ecf20Sopenharmony_ci * are locked or under IO and drops the related state bits if it is safe 44048c2ecf20Sopenharmony_ci * to drop the page. 44058c2ecf20Sopenharmony_ci */ 44068c2ecf20Sopenharmony_cistatic int try_release_extent_state(struct extent_io_tree *tree, 44078c2ecf20Sopenharmony_ci struct page *page, gfp_t mask) 44088c2ecf20Sopenharmony_ci{ 44098c2ecf20Sopenharmony_ci u64 start = page_offset(page); 44108c2ecf20Sopenharmony_ci u64 end = start + PAGE_SIZE - 1; 44118c2ecf20Sopenharmony_ci int ret = 1; 44128c2ecf20Sopenharmony_ci 44138c2ecf20Sopenharmony_ci if (test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) { 44148c2ecf20Sopenharmony_ci ret = 0; 44158c2ecf20Sopenharmony_ci } else { 44168c2ecf20Sopenharmony_ci /* 44178c2ecf20Sopenharmony_ci * at this point we can safely clear everything except the 44188c2ecf20Sopenharmony_ci * locked bit and the nodatasum bit 44198c2ecf20Sopenharmony_ci */ 44208c2ecf20Sopenharmony_ci ret = __clear_extent_bit(tree, start, end, 44218c2ecf20Sopenharmony_ci ~(EXTENT_LOCKED | EXTENT_NODATASUM), 44228c2ecf20Sopenharmony_ci 0, 0, NULL, mask, NULL); 44238c2ecf20Sopenharmony_ci 44248c2ecf20Sopenharmony_ci /* if clear_extent_bit failed for enomem reasons, 44258c2ecf20Sopenharmony_ci * we can't allow the release to continue. 44268c2ecf20Sopenharmony_ci */ 44278c2ecf20Sopenharmony_ci if (ret < 0) 44288c2ecf20Sopenharmony_ci ret = 0; 44298c2ecf20Sopenharmony_ci else 44308c2ecf20Sopenharmony_ci ret = 1; 44318c2ecf20Sopenharmony_ci } 44328c2ecf20Sopenharmony_ci return ret; 44338c2ecf20Sopenharmony_ci} 44348c2ecf20Sopenharmony_ci 44358c2ecf20Sopenharmony_ci/* 44368c2ecf20Sopenharmony_ci * a helper for releasepage. As long as there are no locked extents 44378c2ecf20Sopenharmony_ci * in the range corresponding to the page, both state records and extent 44388c2ecf20Sopenharmony_ci * map records are removed 44398c2ecf20Sopenharmony_ci */ 44408c2ecf20Sopenharmony_ciint try_release_extent_mapping(struct page *page, gfp_t mask) 44418c2ecf20Sopenharmony_ci{ 44428c2ecf20Sopenharmony_ci struct extent_map *em; 44438c2ecf20Sopenharmony_ci u64 start = page_offset(page); 44448c2ecf20Sopenharmony_ci u64 end = start + PAGE_SIZE - 1; 44458c2ecf20Sopenharmony_ci struct btrfs_inode *btrfs_inode = BTRFS_I(page->mapping->host); 44468c2ecf20Sopenharmony_ci struct extent_io_tree *tree = &btrfs_inode->io_tree; 44478c2ecf20Sopenharmony_ci struct extent_map_tree *map = &btrfs_inode->extent_tree; 44488c2ecf20Sopenharmony_ci 44498c2ecf20Sopenharmony_ci if (gfpflags_allow_blocking(mask) && 44508c2ecf20Sopenharmony_ci page->mapping->host->i_size > SZ_16M) { 44518c2ecf20Sopenharmony_ci u64 len; 44528c2ecf20Sopenharmony_ci while (start <= end) { 44538c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info; 44548c2ecf20Sopenharmony_ci u64 cur_gen; 44558c2ecf20Sopenharmony_ci 44568c2ecf20Sopenharmony_ci len = end - start + 1; 44578c2ecf20Sopenharmony_ci write_lock(&map->lock); 44588c2ecf20Sopenharmony_ci em = lookup_extent_mapping(map, start, len); 44598c2ecf20Sopenharmony_ci if (!em) { 44608c2ecf20Sopenharmony_ci write_unlock(&map->lock); 44618c2ecf20Sopenharmony_ci break; 44628c2ecf20Sopenharmony_ci } 44638c2ecf20Sopenharmony_ci if (test_bit(EXTENT_FLAG_PINNED, &em->flags) || 44648c2ecf20Sopenharmony_ci em->start != start) { 44658c2ecf20Sopenharmony_ci write_unlock(&map->lock); 44668c2ecf20Sopenharmony_ci free_extent_map(em); 44678c2ecf20Sopenharmony_ci break; 44688c2ecf20Sopenharmony_ci } 44698c2ecf20Sopenharmony_ci if (test_range_bit(tree, em->start, 44708c2ecf20Sopenharmony_ci extent_map_end(em) - 1, 44718c2ecf20Sopenharmony_ci EXTENT_LOCKED, 0, NULL)) 44728c2ecf20Sopenharmony_ci goto next; 44738c2ecf20Sopenharmony_ci /* 44748c2ecf20Sopenharmony_ci * If it's not in the list of modified extents, used 44758c2ecf20Sopenharmony_ci * by a fast fsync, we can remove it. If it's being 44768c2ecf20Sopenharmony_ci * logged we can safely remove it since fsync took an 44778c2ecf20Sopenharmony_ci * extra reference on the em. 44788c2ecf20Sopenharmony_ci */ 44798c2ecf20Sopenharmony_ci if (list_empty(&em->list) || 44808c2ecf20Sopenharmony_ci test_bit(EXTENT_FLAG_LOGGING, &em->flags)) 44818c2ecf20Sopenharmony_ci goto remove_em; 44828c2ecf20Sopenharmony_ci /* 44838c2ecf20Sopenharmony_ci * If it's in the list of modified extents, remove it 44848c2ecf20Sopenharmony_ci * only if its generation is older then the current one, 44858c2ecf20Sopenharmony_ci * in which case we don't need it for a fast fsync. 44868c2ecf20Sopenharmony_ci * Otherwise don't remove it, we could be racing with an 44878c2ecf20Sopenharmony_ci * ongoing fast fsync that could miss the new extent. 44888c2ecf20Sopenharmony_ci */ 44898c2ecf20Sopenharmony_ci fs_info = btrfs_inode->root->fs_info; 44908c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 44918c2ecf20Sopenharmony_ci cur_gen = fs_info->generation; 44928c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 44938c2ecf20Sopenharmony_ci if (em->generation >= cur_gen) 44948c2ecf20Sopenharmony_ci goto next; 44958c2ecf20Sopenharmony_ciremove_em: 44968c2ecf20Sopenharmony_ci /* 44978c2ecf20Sopenharmony_ci * We only remove extent maps that are not in the list of 44988c2ecf20Sopenharmony_ci * modified extents or that are in the list but with a 44998c2ecf20Sopenharmony_ci * generation lower then the current generation, so there 45008c2ecf20Sopenharmony_ci * is no need to set the full fsync flag on the inode (it 45018c2ecf20Sopenharmony_ci * hurts the fsync performance for workloads with a data 45028c2ecf20Sopenharmony_ci * size that exceeds or is close to the system's memory). 45038c2ecf20Sopenharmony_ci */ 45048c2ecf20Sopenharmony_ci remove_extent_mapping(map, em); 45058c2ecf20Sopenharmony_ci /* once for the rb tree */ 45068c2ecf20Sopenharmony_ci free_extent_map(em); 45078c2ecf20Sopenharmony_cinext: 45088c2ecf20Sopenharmony_ci start = extent_map_end(em); 45098c2ecf20Sopenharmony_ci write_unlock(&map->lock); 45108c2ecf20Sopenharmony_ci 45118c2ecf20Sopenharmony_ci /* once for us */ 45128c2ecf20Sopenharmony_ci free_extent_map(em); 45138c2ecf20Sopenharmony_ci 45148c2ecf20Sopenharmony_ci cond_resched(); /* Allow large-extent preemption. */ 45158c2ecf20Sopenharmony_ci } 45168c2ecf20Sopenharmony_ci } 45178c2ecf20Sopenharmony_ci return try_release_extent_state(tree, page, mask); 45188c2ecf20Sopenharmony_ci} 45198c2ecf20Sopenharmony_ci 45208c2ecf20Sopenharmony_ci/* 45218c2ecf20Sopenharmony_ci * helper function for fiemap, which doesn't want to see any holes. 45228c2ecf20Sopenharmony_ci * This maps until we find something past 'last' 45238c2ecf20Sopenharmony_ci */ 45248c2ecf20Sopenharmony_cistatic struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode, 45258c2ecf20Sopenharmony_ci u64 offset, u64 last) 45268c2ecf20Sopenharmony_ci{ 45278c2ecf20Sopenharmony_ci u64 sectorsize = btrfs_inode_sectorsize(inode); 45288c2ecf20Sopenharmony_ci struct extent_map *em; 45298c2ecf20Sopenharmony_ci u64 len; 45308c2ecf20Sopenharmony_ci 45318c2ecf20Sopenharmony_ci if (offset >= last) 45328c2ecf20Sopenharmony_ci return NULL; 45338c2ecf20Sopenharmony_ci 45348c2ecf20Sopenharmony_ci while (1) { 45358c2ecf20Sopenharmony_ci len = last - offset; 45368c2ecf20Sopenharmony_ci if (len == 0) 45378c2ecf20Sopenharmony_ci break; 45388c2ecf20Sopenharmony_ci len = ALIGN(len, sectorsize); 45398c2ecf20Sopenharmony_ci em = btrfs_get_extent_fiemap(inode, offset, len); 45408c2ecf20Sopenharmony_ci if (IS_ERR_OR_NULL(em)) 45418c2ecf20Sopenharmony_ci return em; 45428c2ecf20Sopenharmony_ci 45438c2ecf20Sopenharmony_ci /* if this isn't a hole return it */ 45448c2ecf20Sopenharmony_ci if (em->block_start != EXTENT_MAP_HOLE) 45458c2ecf20Sopenharmony_ci return em; 45468c2ecf20Sopenharmony_ci 45478c2ecf20Sopenharmony_ci /* this is a hole, advance to the next extent */ 45488c2ecf20Sopenharmony_ci offset = extent_map_end(em); 45498c2ecf20Sopenharmony_ci free_extent_map(em); 45508c2ecf20Sopenharmony_ci if (offset >= last) 45518c2ecf20Sopenharmony_ci break; 45528c2ecf20Sopenharmony_ci } 45538c2ecf20Sopenharmony_ci return NULL; 45548c2ecf20Sopenharmony_ci} 45558c2ecf20Sopenharmony_ci 45568c2ecf20Sopenharmony_ci/* 45578c2ecf20Sopenharmony_ci * To cache previous fiemap extent 45588c2ecf20Sopenharmony_ci * 45598c2ecf20Sopenharmony_ci * Will be used for merging fiemap extent 45608c2ecf20Sopenharmony_ci */ 45618c2ecf20Sopenharmony_cistruct fiemap_cache { 45628c2ecf20Sopenharmony_ci u64 offset; 45638c2ecf20Sopenharmony_ci u64 phys; 45648c2ecf20Sopenharmony_ci u64 len; 45658c2ecf20Sopenharmony_ci u32 flags; 45668c2ecf20Sopenharmony_ci bool cached; 45678c2ecf20Sopenharmony_ci}; 45688c2ecf20Sopenharmony_ci 45698c2ecf20Sopenharmony_ci/* 45708c2ecf20Sopenharmony_ci * Helper to submit fiemap extent. 45718c2ecf20Sopenharmony_ci * 45728c2ecf20Sopenharmony_ci * Will try to merge current fiemap extent specified by @offset, @phys, 45738c2ecf20Sopenharmony_ci * @len and @flags with cached one. 45748c2ecf20Sopenharmony_ci * And only when we fails to merge, cached one will be submitted as 45758c2ecf20Sopenharmony_ci * fiemap extent. 45768c2ecf20Sopenharmony_ci * 45778c2ecf20Sopenharmony_ci * Return value is the same as fiemap_fill_next_extent(). 45788c2ecf20Sopenharmony_ci */ 45798c2ecf20Sopenharmony_cistatic int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, 45808c2ecf20Sopenharmony_ci struct fiemap_cache *cache, 45818c2ecf20Sopenharmony_ci u64 offset, u64 phys, u64 len, u32 flags) 45828c2ecf20Sopenharmony_ci{ 45838c2ecf20Sopenharmony_ci int ret = 0; 45848c2ecf20Sopenharmony_ci 45858c2ecf20Sopenharmony_ci if (!cache->cached) 45868c2ecf20Sopenharmony_ci goto assign; 45878c2ecf20Sopenharmony_ci 45888c2ecf20Sopenharmony_ci /* 45898c2ecf20Sopenharmony_ci * Sanity check, extent_fiemap() should have ensured that new 45908c2ecf20Sopenharmony_ci * fiemap extent won't overlap with cached one. 45918c2ecf20Sopenharmony_ci * Not recoverable. 45928c2ecf20Sopenharmony_ci * 45938c2ecf20Sopenharmony_ci * NOTE: Physical address can overlap, due to compression 45948c2ecf20Sopenharmony_ci */ 45958c2ecf20Sopenharmony_ci if (cache->offset + cache->len > offset) { 45968c2ecf20Sopenharmony_ci WARN_ON(1); 45978c2ecf20Sopenharmony_ci return -EINVAL; 45988c2ecf20Sopenharmony_ci } 45998c2ecf20Sopenharmony_ci 46008c2ecf20Sopenharmony_ci /* 46018c2ecf20Sopenharmony_ci * Only merges fiemap extents if 46028c2ecf20Sopenharmony_ci * 1) Their logical addresses are continuous 46038c2ecf20Sopenharmony_ci * 46048c2ecf20Sopenharmony_ci * 2) Their physical addresses are continuous 46058c2ecf20Sopenharmony_ci * So truly compressed (physical size smaller than logical size) 46068c2ecf20Sopenharmony_ci * extents won't get merged with each other 46078c2ecf20Sopenharmony_ci * 46088c2ecf20Sopenharmony_ci * 3) Share same flags except FIEMAP_EXTENT_LAST 46098c2ecf20Sopenharmony_ci * So regular extent won't get merged with prealloc extent 46108c2ecf20Sopenharmony_ci */ 46118c2ecf20Sopenharmony_ci if (cache->offset + cache->len == offset && 46128c2ecf20Sopenharmony_ci cache->phys + cache->len == phys && 46138c2ecf20Sopenharmony_ci (cache->flags & ~FIEMAP_EXTENT_LAST) == 46148c2ecf20Sopenharmony_ci (flags & ~FIEMAP_EXTENT_LAST)) { 46158c2ecf20Sopenharmony_ci cache->len += len; 46168c2ecf20Sopenharmony_ci cache->flags |= flags; 46178c2ecf20Sopenharmony_ci goto try_submit_last; 46188c2ecf20Sopenharmony_ci } 46198c2ecf20Sopenharmony_ci 46208c2ecf20Sopenharmony_ci /* Not mergeable, need to submit cached one */ 46218c2ecf20Sopenharmony_ci ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, 46228c2ecf20Sopenharmony_ci cache->len, cache->flags); 46238c2ecf20Sopenharmony_ci cache->cached = false; 46248c2ecf20Sopenharmony_ci if (ret) 46258c2ecf20Sopenharmony_ci return ret; 46268c2ecf20Sopenharmony_ciassign: 46278c2ecf20Sopenharmony_ci cache->cached = true; 46288c2ecf20Sopenharmony_ci cache->offset = offset; 46298c2ecf20Sopenharmony_ci cache->phys = phys; 46308c2ecf20Sopenharmony_ci cache->len = len; 46318c2ecf20Sopenharmony_ci cache->flags = flags; 46328c2ecf20Sopenharmony_citry_submit_last: 46338c2ecf20Sopenharmony_ci if (cache->flags & FIEMAP_EXTENT_LAST) { 46348c2ecf20Sopenharmony_ci ret = fiemap_fill_next_extent(fieinfo, cache->offset, 46358c2ecf20Sopenharmony_ci cache->phys, cache->len, cache->flags); 46368c2ecf20Sopenharmony_ci cache->cached = false; 46378c2ecf20Sopenharmony_ci } 46388c2ecf20Sopenharmony_ci return ret; 46398c2ecf20Sopenharmony_ci} 46408c2ecf20Sopenharmony_ci 46418c2ecf20Sopenharmony_ci/* 46428c2ecf20Sopenharmony_ci * Emit last fiemap cache 46438c2ecf20Sopenharmony_ci * 46448c2ecf20Sopenharmony_ci * The last fiemap cache may still be cached in the following case: 46458c2ecf20Sopenharmony_ci * 0 4k 8k 46468c2ecf20Sopenharmony_ci * |<- Fiemap range ->| 46478c2ecf20Sopenharmony_ci * |<------------ First extent ----------->| 46488c2ecf20Sopenharmony_ci * 46498c2ecf20Sopenharmony_ci * In this case, the first extent range will be cached but not emitted. 46508c2ecf20Sopenharmony_ci * So we must emit it before ending extent_fiemap(). 46518c2ecf20Sopenharmony_ci */ 46528c2ecf20Sopenharmony_cistatic int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo, 46538c2ecf20Sopenharmony_ci struct fiemap_cache *cache) 46548c2ecf20Sopenharmony_ci{ 46558c2ecf20Sopenharmony_ci int ret; 46568c2ecf20Sopenharmony_ci 46578c2ecf20Sopenharmony_ci if (!cache->cached) 46588c2ecf20Sopenharmony_ci return 0; 46598c2ecf20Sopenharmony_ci 46608c2ecf20Sopenharmony_ci ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, 46618c2ecf20Sopenharmony_ci cache->len, cache->flags); 46628c2ecf20Sopenharmony_ci cache->cached = false; 46638c2ecf20Sopenharmony_ci if (ret > 0) 46648c2ecf20Sopenharmony_ci ret = 0; 46658c2ecf20Sopenharmony_ci return ret; 46668c2ecf20Sopenharmony_ci} 46678c2ecf20Sopenharmony_ci 46688c2ecf20Sopenharmony_ciint extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, 46698c2ecf20Sopenharmony_ci u64 start, u64 len) 46708c2ecf20Sopenharmony_ci{ 46718c2ecf20Sopenharmony_ci int ret = 0; 46728c2ecf20Sopenharmony_ci u64 off; 46738c2ecf20Sopenharmony_ci u64 max = start + len; 46748c2ecf20Sopenharmony_ci u32 flags = 0; 46758c2ecf20Sopenharmony_ci u32 found_type; 46768c2ecf20Sopenharmony_ci u64 last; 46778c2ecf20Sopenharmony_ci u64 last_for_get_extent = 0; 46788c2ecf20Sopenharmony_ci u64 disko = 0; 46798c2ecf20Sopenharmony_ci u64 isize = i_size_read(&inode->vfs_inode); 46808c2ecf20Sopenharmony_ci struct btrfs_key found_key; 46818c2ecf20Sopenharmony_ci struct extent_map *em = NULL; 46828c2ecf20Sopenharmony_ci struct extent_state *cached_state = NULL; 46838c2ecf20Sopenharmony_ci struct btrfs_path *path; 46848c2ecf20Sopenharmony_ci struct btrfs_root *root = inode->root; 46858c2ecf20Sopenharmony_ci struct fiemap_cache cache = { 0 }; 46868c2ecf20Sopenharmony_ci struct ulist *roots; 46878c2ecf20Sopenharmony_ci struct ulist *tmp_ulist; 46888c2ecf20Sopenharmony_ci int end = 0; 46898c2ecf20Sopenharmony_ci u64 em_start = 0; 46908c2ecf20Sopenharmony_ci u64 em_len = 0; 46918c2ecf20Sopenharmony_ci u64 em_end = 0; 46928c2ecf20Sopenharmony_ci 46938c2ecf20Sopenharmony_ci if (len == 0) 46948c2ecf20Sopenharmony_ci return -EINVAL; 46958c2ecf20Sopenharmony_ci 46968c2ecf20Sopenharmony_ci path = btrfs_alloc_path(); 46978c2ecf20Sopenharmony_ci if (!path) 46988c2ecf20Sopenharmony_ci return -ENOMEM; 46998c2ecf20Sopenharmony_ci path->leave_spinning = 1; 47008c2ecf20Sopenharmony_ci 47018c2ecf20Sopenharmony_ci roots = ulist_alloc(GFP_KERNEL); 47028c2ecf20Sopenharmony_ci tmp_ulist = ulist_alloc(GFP_KERNEL); 47038c2ecf20Sopenharmony_ci if (!roots || !tmp_ulist) { 47048c2ecf20Sopenharmony_ci ret = -ENOMEM; 47058c2ecf20Sopenharmony_ci goto out_free_ulist; 47068c2ecf20Sopenharmony_ci } 47078c2ecf20Sopenharmony_ci 47088c2ecf20Sopenharmony_ci /* 47098c2ecf20Sopenharmony_ci * We can't initialize that to 'start' as this could miss extents due 47108c2ecf20Sopenharmony_ci * to extent item merging 47118c2ecf20Sopenharmony_ci */ 47128c2ecf20Sopenharmony_ci off = 0; 47138c2ecf20Sopenharmony_ci start = round_down(start, btrfs_inode_sectorsize(inode)); 47148c2ecf20Sopenharmony_ci len = round_up(max, btrfs_inode_sectorsize(inode)) - start; 47158c2ecf20Sopenharmony_ci 47168c2ecf20Sopenharmony_ci /* 47178c2ecf20Sopenharmony_ci * lookup the last file extent. We're not using i_size here 47188c2ecf20Sopenharmony_ci * because there might be preallocation past i_size 47198c2ecf20Sopenharmony_ci */ 47208c2ecf20Sopenharmony_ci ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode), -1, 47218c2ecf20Sopenharmony_ci 0); 47228c2ecf20Sopenharmony_ci if (ret < 0) { 47238c2ecf20Sopenharmony_ci goto out_free_ulist; 47248c2ecf20Sopenharmony_ci } else { 47258c2ecf20Sopenharmony_ci WARN_ON(!ret); 47268c2ecf20Sopenharmony_ci if (ret == 1) 47278c2ecf20Sopenharmony_ci ret = 0; 47288c2ecf20Sopenharmony_ci } 47298c2ecf20Sopenharmony_ci 47308c2ecf20Sopenharmony_ci path->slots[0]--; 47318c2ecf20Sopenharmony_ci btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 47328c2ecf20Sopenharmony_ci found_type = found_key.type; 47338c2ecf20Sopenharmony_ci 47348c2ecf20Sopenharmony_ci /* No extents, but there might be delalloc bits */ 47358c2ecf20Sopenharmony_ci if (found_key.objectid != btrfs_ino(inode) || 47368c2ecf20Sopenharmony_ci found_type != BTRFS_EXTENT_DATA_KEY) { 47378c2ecf20Sopenharmony_ci /* have to trust i_size as the end */ 47388c2ecf20Sopenharmony_ci last = (u64)-1; 47398c2ecf20Sopenharmony_ci last_for_get_extent = isize; 47408c2ecf20Sopenharmony_ci } else { 47418c2ecf20Sopenharmony_ci /* 47428c2ecf20Sopenharmony_ci * remember the start of the last extent. There are a 47438c2ecf20Sopenharmony_ci * bunch of different factors that go into the length of the 47448c2ecf20Sopenharmony_ci * extent, so its much less complex to remember where it started 47458c2ecf20Sopenharmony_ci */ 47468c2ecf20Sopenharmony_ci last = found_key.offset; 47478c2ecf20Sopenharmony_ci last_for_get_extent = last + 1; 47488c2ecf20Sopenharmony_ci } 47498c2ecf20Sopenharmony_ci btrfs_release_path(path); 47508c2ecf20Sopenharmony_ci 47518c2ecf20Sopenharmony_ci /* 47528c2ecf20Sopenharmony_ci * we might have some extents allocated but more delalloc past those 47538c2ecf20Sopenharmony_ci * extents. so, we trust isize unless the start of the last extent is 47548c2ecf20Sopenharmony_ci * beyond isize 47558c2ecf20Sopenharmony_ci */ 47568c2ecf20Sopenharmony_ci if (last < isize) { 47578c2ecf20Sopenharmony_ci last = (u64)-1; 47588c2ecf20Sopenharmony_ci last_for_get_extent = isize; 47598c2ecf20Sopenharmony_ci } 47608c2ecf20Sopenharmony_ci 47618c2ecf20Sopenharmony_ci lock_extent_bits(&inode->io_tree, start, start + len - 1, 47628c2ecf20Sopenharmony_ci &cached_state); 47638c2ecf20Sopenharmony_ci 47648c2ecf20Sopenharmony_ci em = get_extent_skip_holes(inode, start, last_for_get_extent); 47658c2ecf20Sopenharmony_ci if (!em) 47668c2ecf20Sopenharmony_ci goto out; 47678c2ecf20Sopenharmony_ci if (IS_ERR(em)) { 47688c2ecf20Sopenharmony_ci ret = PTR_ERR(em); 47698c2ecf20Sopenharmony_ci goto out; 47708c2ecf20Sopenharmony_ci } 47718c2ecf20Sopenharmony_ci 47728c2ecf20Sopenharmony_ci while (!end) { 47738c2ecf20Sopenharmony_ci u64 offset_in_extent = 0; 47748c2ecf20Sopenharmony_ci 47758c2ecf20Sopenharmony_ci /* break if the extent we found is outside the range */ 47768c2ecf20Sopenharmony_ci if (em->start >= max || extent_map_end(em) < off) 47778c2ecf20Sopenharmony_ci break; 47788c2ecf20Sopenharmony_ci 47798c2ecf20Sopenharmony_ci /* 47808c2ecf20Sopenharmony_ci * get_extent may return an extent that starts before our 47818c2ecf20Sopenharmony_ci * requested range. We have to make sure the ranges 47828c2ecf20Sopenharmony_ci * we return to fiemap always move forward and don't 47838c2ecf20Sopenharmony_ci * overlap, so adjust the offsets here 47848c2ecf20Sopenharmony_ci */ 47858c2ecf20Sopenharmony_ci em_start = max(em->start, off); 47868c2ecf20Sopenharmony_ci 47878c2ecf20Sopenharmony_ci /* 47888c2ecf20Sopenharmony_ci * record the offset from the start of the extent 47898c2ecf20Sopenharmony_ci * for adjusting the disk offset below. Only do this if the 47908c2ecf20Sopenharmony_ci * extent isn't compressed since our in ram offset may be past 47918c2ecf20Sopenharmony_ci * what we have actually allocated on disk. 47928c2ecf20Sopenharmony_ci */ 47938c2ecf20Sopenharmony_ci if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 47948c2ecf20Sopenharmony_ci offset_in_extent = em_start - em->start; 47958c2ecf20Sopenharmony_ci em_end = extent_map_end(em); 47968c2ecf20Sopenharmony_ci em_len = em_end - em_start; 47978c2ecf20Sopenharmony_ci flags = 0; 47988c2ecf20Sopenharmony_ci if (em->block_start < EXTENT_MAP_LAST_BYTE) 47998c2ecf20Sopenharmony_ci disko = em->block_start + offset_in_extent; 48008c2ecf20Sopenharmony_ci else 48018c2ecf20Sopenharmony_ci disko = 0; 48028c2ecf20Sopenharmony_ci 48038c2ecf20Sopenharmony_ci /* 48048c2ecf20Sopenharmony_ci * bump off for our next call to get_extent 48058c2ecf20Sopenharmony_ci */ 48068c2ecf20Sopenharmony_ci off = extent_map_end(em); 48078c2ecf20Sopenharmony_ci if (off >= max) 48088c2ecf20Sopenharmony_ci end = 1; 48098c2ecf20Sopenharmony_ci 48108c2ecf20Sopenharmony_ci if (em->block_start == EXTENT_MAP_LAST_BYTE) { 48118c2ecf20Sopenharmony_ci end = 1; 48128c2ecf20Sopenharmony_ci flags |= FIEMAP_EXTENT_LAST; 48138c2ecf20Sopenharmony_ci } else if (em->block_start == EXTENT_MAP_INLINE) { 48148c2ecf20Sopenharmony_ci flags |= (FIEMAP_EXTENT_DATA_INLINE | 48158c2ecf20Sopenharmony_ci FIEMAP_EXTENT_NOT_ALIGNED); 48168c2ecf20Sopenharmony_ci } else if (em->block_start == EXTENT_MAP_DELALLOC) { 48178c2ecf20Sopenharmony_ci flags |= (FIEMAP_EXTENT_DELALLOC | 48188c2ecf20Sopenharmony_ci FIEMAP_EXTENT_UNKNOWN); 48198c2ecf20Sopenharmony_ci } else if (fieinfo->fi_extents_max) { 48208c2ecf20Sopenharmony_ci u64 bytenr = em->block_start - 48218c2ecf20Sopenharmony_ci (em->start - em->orig_start); 48228c2ecf20Sopenharmony_ci 48238c2ecf20Sopenharmony_ci /* 48248c2ecf20Sopenharmony_ci * As btrfs supports shared space, this information 48258c2ecf20Sopenharmony_ci * can be exported to userspace tools via 48268c2ecf20Sopenharmony_ci * flag FIEMAP_EXTENT_SHARED. If fi_extents_max == 0 48278c2ecf20Sopenharmony_ci * then we're just getting a count and we can skip the 48288c2ecf20Sopenharmony_ci * lookup stuff. 48298c2ecf20Sopenharmony_ci */ 48308c2ecf20Sopenharmony_ci ret = btrfs_check_shared(root, btrfs_ino(inode), 48318c2ecf20Sopenharmony_ci bytenr, roots, tmp_ulist); 48328c2ecf20Sopenharmony_ci if (ret < 0) 48338c2ecf20Sopenharmony_ci goto out_free; 48348c2ecf20Sopenharmony_ci if (ret) 48358c2ecf20Sopenharmony_ci flags |= FIEMAP_EXTENT_SHARED; 48368c2ecf20Sopenharmony_ci ret = 0; 48378c2ecf20Sopenharmony_ci } 48388c2ecf20Sopenharmony_ci if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 48398c2ecf20Sopenharmony_ci flags |= FIEMAP_EXTENT_ENCODED; 48408c2ecf20Sopenharmony_ci if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 48418c2ecf20Sopenharmony_ci flags |= FIEMAP_EXTENT_UNWRITTEN; 48428c2ecf20Sopenharmony_ci 48438c2ecf20Sopenharmony_ci free_extent_map(em); 48448c2ecf20Sopenharmony_ci em = NULL; 48458c2ecf20Sopenharmony_ci if ((em_start >= last) || em_len == (u64)-1 || 48468c2ecf20Sopenharmony_ci (last == (u64)-1 && isize <= em_end)) { 48478c2ecf20Sopenharmony_ci flags |= FIEMAP_EXTENT_LAST; 48488c2ecf20Sopenharmony_ci end = 1; 48498c2ecf20Sopenharmony_ci } 48508c2ecf20Sopenharmony_ci 48518c2ecf20Sopenharmony_ci /* now scan forward to see if this is really the last extent. */ 48528c2ecf20Sopenharmony_ci em = get_extent_skip_holes(inode, off, last_for_get_extent); 48538c2ecf20Sopenharmony_ci if (IS_ERR(em)) { 48548c2ecf20Sopenharmony_ci ret = PTR_ERR(em); 48558c2ecf20Sopenharmony_ci goto out; 48568c2ecf20Sopenharmony_ci } 48578c2ecf20Sopenharmony_ci if (!em) { 48588c2ecf20Sopenharmony_ci flags |= FIEMAP_EXTENT_LAST; 48598c2ecf20Sopenharmony_ci end = 1; 48608c2ecf20Sopenharmony_ci } 48618c2ecf20Sopenharmony_ci ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko, 48628c2ecf20Sopenharmony_ci em_len, flags); 48638c2ecf20Sopenharmony_ci if (ret) { 48648c2ecf20Sopenharmony_ci if (ret == 1) 48658c2ecf20Sopenharmony_ci ret = 0; 48668c2ecf20Sopenharmony_ci goto out_free; 48678c2ecf20Sopenharmony_ci } 48688c2ecf20Sopenharmony_ci } 48698c2ecf20Sopenharmony_ciout_free: 48708c2ecf20Sopenharmony_ci if (!ret) 48718c2ecf20Sopenharmony_ci ret = emit_last_fiemap_cache(fieinfo, &cache); 48728c2ecf20Sopenharmony_ci free_extent_map(em); 48738c2ecf20Sopenharmony_ciout: 48748c2ecf20Sopenharmony_ci unlock_extent_cached(&inode->io_tree, start, start + len - 1, 48758c2ecf20Sopenharmony_ci &cached_state); 48768c2ecf20Sopenharmony_ci 48778c2ecf20Sopenharmony_ciout_free_ulist: 48788c2ecf20Sopenharmony_ci btrfs_free_path(path); 48798c2ecf20Sopenharmony_ci ulist_free(roots); 48808c2ecf20Sopenharmony_ci ulist_free(tmp_ulist); 48818c2ecf20Sopenharmony_ci return ret; 48828c2ecf20Sopenharmony_ci} 48838c2ecf20Sopenharmony_ci 48848c2ecf20Sopenharmony_cistatic void __free_extent_buffer(struct extent_buffer *eb) 48858c2ecf20Sopenharmony_ci{ 48868c2ecf20Sopenharmony_ci kmem_cache_free(extent_buffer_cache, eb); 48878c2ecf20Sopenharmony_ci} 48888c2ecf20Sopenharmony_ci 48898c2ecf20Sopenharmony_ciint extent_buffer_under_io(const struct extent_buffer *eb) 48908c2ecf20Sopenharmony_ci{ 48918c2ecf20Sopenharmony_ci return (atomic_read(&eb->io_pages) || 48928c2ecf20Sopenharmony_ci test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) || 48938c2ecf20Sopenharmony_ci test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); 48948c2ecf20Sopenharmony_ci} 48958c2ecf20Sopenharmony_ci 48968c2ecf20Sopenharmony_ci/* 48978c2ecf20Sopenharmony_ci * Release all pages attached to the extent buffer. 48988c2ecf20Sopenharmony_ci */ 48998c2ecf20Sopenharmony_cistatic void btrfs_release_extent_buffer_pages(struct extent_buffer *eb) 49008c2ecf20Sopenharmony_ci{ 49018c2ecf20Sopenharmony_ci int i; 49028c2ecf20Sopenharmony_ci int num_pages; 49038c2ecf20Sopenharmony_ci int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); 49048c2ecf20Sopenharmony_ci 49058c2ecf20Sopenharmony_ci BUG_ON(extent_buffer_under_io(eb)); 49068c2ecf20Sopenharmony_ci 49078c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 49088c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 49098c2ecf20Sopenharmony_ci struct page *page = eb->pages[i]; 49108c2ecf20Sopenharmony_ci 49118c2ecf20Sopenharmony_ci if (!page) 49128c2ecf20Sopenharmony_ci continue; 49138c2ecf20Sopenharmony_ci if (mapped) 49148c2ecf20Sopenharmony_ci spin_lock(&page->mapping->private_lock); 49158c2ecf20Sopenharmony_ci /* 49168c2ecf20Sopenharmony_ci * We do this since we'll remove the pages after we've 49178c2ecf20Sopenharmony_ci * removed the eb from the radix tree, so we could race 49188c2ecf20Sopenharmony_ci * and have this page now attached to the new eb. So 49198c2ecf20Sopenharmony_ci * only clear page_private if it's still connected to 49208c2ecf20Sopenharmony_ci * this eb. 49218c2ecf20Sopenharmony_ci */ 49228c2ecf20Sopenharmony_ci if (PagePrivate(page) && 49238c2ecf20Sopenharmony_ci page->private == (unsigned long)eb) { 49248c2ecf20Sopenharmony_ci BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); 49258c2ecf20Sopenharmony_ci BUG_ON(PageDirty(page)); 49268c2ecf20Sopenharmony_ci BUG_ON(PageWriteback(page)); 49278c2ecf20Sopenharmony_ci /* 49288c2ecf20Sopenharmony_ci * We need to make sure we haven't be attached 49298c2ecf20Sopenharmony_ci * to a new eb. 49308c2ecf20Sopenharmony_ci */ 49318c2ecf20Sopenharmony_ci detach_page_private(page); 49328c2ecf20Sopenharmony_ci } 49338c2ecf20Sopenharmony_ci 49348c2ecf20Sopenharmony_ci if (mapped) 49358c2ecf20Sopenharmony_ci spin_unlock(&page->mapping->private_lock); 49368c2ecf20Sopenharmony_ci 49378c2ecf20Sopenharmony_ci /* One for when we allocated the page */ 49388c2ecf20Sopenharmony_ci put_page(page); 49398c2ecf20Sopenharmony_ci } 49408c2ecf20Sopenharmony_ci} 49418c2ecf20Sopenharmony_ci 49428c2ecf20Sopenharmony_ci/* 49438c2ecf20Sopenharmony_ci * Helper for releasing the extent buffer. 49448c2ecf20Sopenharmony_ci */ 49458c2ecf20Sopenharmony_cistatic inline void btrfs_release_extent_buffer(struct extent_buffer *eb) 49468c2ecf20Sopenharmony_ci{ 49478c2ecf20Sopenharmony_ci btrfs_release_extent_buffer_pages(eb); 49488c2ecf20Sopenharmony_ci btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list); 49498c2ecf20Sopenharmony_ci __free_extent_buffer(eb); 49508c2ecf20Sopenharmony_ci} 49518c2ecf20Sopenharmony_ci 49528c2ecf20Sopenharmony_cistatic struct extent_buffer * 49538c2ecf20Sopenharmony_ci__alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, 49548c2ecf20Sopenharmony_ci unsigned long len) 49558c2ecf20Sopenharmony_ci{ 49568c2ecf20Sopenharmony_ci struct extent_buffer *eb = NULL; 49578c2ecf20Sopenharmony_ci 49588c2ecf20Sopenharmony_ci eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS|__GFP_NOFAIL); 49598c2ecf20Sopenharmony_ci eb->start = start; 49608c2ecf20Sopenharmony_ci eb->len = len; 49618c2ecf20Sopenharmony_ci eb->fs_info = fs_info; 49628c2ecf20Sopenharmony_ci eb->bflags = 0; 49638c2ecf20Sopenharmony_ci rwlock_init(&eb->lock); 49648c2ecf20Sopenharmony_ci atomic_set(&eb->blocking_readers, 0); 49658c2ecf20Sopenharmony_ci eb->blocking_writers = 0; 49668c2ecf20Sopenharmony_ci eb->lock_recursed = false; 49678c2ecf20Sopenharmony_ci init_waitqueue_head(&eb->write_lock_wq); 49688c2ecf20Sopenharmony_ci init_waitqueue_head(&eb->read_lock_wq); 49698c2ecf20Sopenharmony_ci 49708c2ecf20Sopenharmony_ci btrfs_leak_debug_add(&fs_info->eb_leak_lock, &eb->leak_list, 49718c2ecf20Sopenharmony_ci &fs_info->allocated_ebs); 49728c2ecf20Sopenharmony_ci 49738c2ecf20Sopenharmony_ci spin_lock_init(&eb->refs_lock); 49748c2ecf20Sopenharmony_ci atomic_set(&eb->refs, 1); 49758c2ecf20Sopenharmony_ci atomic_set(&eb->io_pages, 0); 49768c2ecf20Sopenharmony_ci 49778c2ecf20Sopenharmony_ci /* 49788c2ecf20Sopenharmony_ci * Sanity checks, currently the maximum is 64k covered by 16x 4k pages 49798c2ecf20Sopenharmony_ci */ 49808c2ecf20Sopenharmony_ci BUILD_BUG_ON(BTRFS_MAX_METADATA_BLOCKSIZE 49818c2ecf20Sopenharmony_ci > MAX_INLINE_EXTENT_BUFFER_SIZE); 49828c2ecf20Sopenharmony_ci BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE); 49838c2ecf20Sopenharmony_ci 49848c2ecf20Sopenharmony_ci#ifdef CONFIG_BTRFS_DEBUG 49858c2ecf20Sopenharmony_ci eb->spinning_writers = 0; 49868c2ecf20Sopenharmony_ci atomic_set(&eb->spinning_readers, 0); 49878c2ecf20Sopenharmony_ci atomic_set(&eb->read_locks, 0); 49888c2ecf20Sopenharmony_ci eb->write_locks = 0; 49898c2ecf20Sopenharmony_ci#endif 49908c2ecf20Sopenharmony_ci 49918c2ecf20Sopenharmony_ci return eb; 49928c2ecf20Sopenharmony_ci} 49938c2ecf20Sopenharmony_ci 49948c2ecf20Sopenharmony_cistruct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src) 49958c2ecf20Sopenharmony_ci{ 49968c2ecf20Sopenharmony_ci int i; 49978c2ecf20Sopenharmony_ci struct page *p; 49988c2ecf20Sopenharmony_ci struct extent_buffer *new; 49998c2ecf20Sopenharmony_ci int num_pages = num_extent_pages(src); 50008c2ecf20Sopenharmony_ci 50018c2ecf20Sopenharmony_ci new = __alloc_extent_buffer(src->fs_info, src->start, src->len); 50028c2ecf20Sopenharmony_ci if (new == NULL) 50038c2ecf20Sopenharmony_ci return NULL; 50048c2ecf20Sopenharmony_ci 50058c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 50068c2ecf20Sopenharmony_ci p = alloc_page(GFP_NOFS); 50078c2ecf20Sopenharmony_ci if (!p) { 50088c2ecf20Sopenharmony_ci btrfs_release_extent_buffer(new); 50098c2ecf20Sopenharmony_ci return NULL; 50108c2ecf20Sopenharmony_ci } 50118c2ecf20Sopenharmony_ci attach_extent_buffer_page(new, p); 50128c2ecf20Sopenharmony_ci WARN_ON(PageDirty(p)); 50138c2ecf20Sopenharmony_ci SetPageUptodate(p); 50148c2ecf20Sopenharmony_ci new->pages[i] = p; 50158c2ecf20Sopenharmony_ci copy_page(page_address(p), page_address(src->pages[i])); 50168c2ecf20Sopenharmony_ci } 50178c2ecf20Sopenharmony_ci 50188c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_UPTODATE, &new->bflags); 50198c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags); 50208c2ecf20Sopenharmony_ci 50218c2ecf20Sopenharmony_ci return new; 50228c2ecf20Sopenharmony_ci} 50238c2ecf20Sopenharmony_ci 50248c2ecf20Sopenharmony_cistruct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 50258c2ecf20Sopenharmony_ci u64 start, unsigned long len) 50268c2ecf20Sopenharmony_ci{ 50278c2ecf20Sopenharmony_ci struct extent_buffer *eb; 50288c2ecf20Sopenharmony_ci int num_pages; 50298c2ecf20Sopenharmony_ci int i; 50308c2ecf20Sopenharmony_ci 50318c2ecf20Sopenharmony_ci eb = __alloc_extent_buffer(fs_info, start, len); 50328c2ecf20Sopenharmony_ci if (!eb) 50338c2ecf20Sopenharmony_ci return NULL; 50348c2ecf20Sopenharmony_ci 50358c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 50368c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 50378c2ecf20Sopenharmony_ci eb->pages[i] = alloc_page(GFP_NOFS); 50388c2ecf20Sopenharmony_ci if (!eb->pages[i]) 50398c2ecf20Sopenharmony_ci goto err; 50408c2ecf20Sopenharmony_ci } 50418c2ecf20Sopenharmony_ci set_extent_buffer_uptodate(eb); 50428c2ecf20Sopenharmony_ci btrfs_set_header_nritems(eb, 0); 50438c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); 50448c2ecf20Sopenharmony_ci 50458c2ecf20Sopenharmony_ci return eb; 50468c2ecf20Sopenharmony_cierr: 50478c2ecf20Sopenharmony_ci for (; i > 0; i--) 50488c2ecf20Sopenharmony_ci __free_page(eb->pages[i - 1]); 50498c2ecf20Sopenharmony_ci __free_extent_buffer(eb); 50508c2ecf20Sopenharmony_ci return NULL; 50518c2ecf20Sopenharmony_ci} 50528c2ecf20Sopenharmony_ci 50538c2ecf20Sopenharmony_cistruct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 50548c2ecf20Sopenharmony_ci u64 start) 50558c2ecf20Sopenharmony_ci{ 50568c2ecf20Sopenharmony_ci return __alloc_dummy_extent_buffer(fs_info, start, fs_info->nodesize); 50578c2ecf20Sopenharmony_ci} 50588c2ecf20Sopenharmony_ci 50598c2ecf20Sopenharmony_cistatic void check_buffer_tree_ref(struct extent_buffer *eb) 50608c2ecf20Sopenharmony_ci{ 50618c2ecf20Sopenharmony_ci int refs; 50628c2ecf20Sopenharmony_ci /* 50638c2ecf20Sopenharmony_ci * The TREE_REF bit is first set when the extent_buffer is added 50648c2ecf20Sopenharmony_ci * to the radix tree. It is also reset, if unset, when a new reference 50658c2ecf20Sopenharmony_ci * is created by find_extent_buffer. 50668c2ecf20Sopenharmony_ci * 50678c2ecf20Sopenharmony_ci * It is only cleared in two cases: freeing the last non-tree 50688c2ecf20Sopenharmony_ci * reference to the extent_buffer when its STALE bit is set or 50698c2ecf20Sopenharmony_ci * calling releasepage when the tree reference is the only reference. 50708c2ecf20Sopenharmony_ci * 50718c2ecf20Sopenharmony_ci * In both cases, care is taken to ensure that the extent_buffer's 50728c2ecf20Sopenharmony_ci * pages are not under io. However, releasepage can be concurrently 50738c2ecf20Sopenharmony_ci * called with creating new references, which is prone to race 50748c2ecf20Sopenharmony_ci * conditions between the calls to check_buffer_tree_ref in those 50758c2ecf20Sopenharmony_ci * codepaths and clearing TREE_REF in try_release_extent_buffer. 50768c2ecf20Sopenharmony_ci * 50778c2ecf20Sopenharmony_ci * The actual lifetime of the extent_buffer in the radix tree is 50788c2ecf20Sopenharmony_ci * adequately protected by the refcount, but the TREE_REF bit and 50798c2ecf20Sopenharmony_ci * its corresponding reference are not. To protect against this 50808c2ecf20Sopenharmony_ci * class of races, we call check_buffer_tree_ref from the codepaths 50818c2ecf20Sopenharmony_ci * which trigger io after they set eb->io_pages. Note that once io is 50828c2ecf20Sopenharmony_ci * initiated, TREE_REF can no longer be cleared, so that is the 50838c2ecf20Sopenharmony_ci * moment at which any such race is best fixed. 50848c2ecf20Sopenharmony_ci */ 50858c2ecf20Sopenharmony_ci refs = atomic_read(&eb->refs); 50868c2ecf20Sopenharmony_ci if (refs >= 2 && test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) 50878c2ecf20Sopenharmony_ci return; 50888c2ecf20Sopenharmony_ci 50898c2ecf20Sopenharmony_ci spin_lock(&eb->refs_lock); 50908c2ecf20Sopenharmony_ci if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) 50918c2ecf20Sopenharmony_ci atomic_inc(&eb->refs); 50928c2ecf20Sopenharmony_ci spin_unlock(&eb->refs_lock); 50938c2ecf20Sopenharmony_ci} 50948c2ecf20Sopenharmony_ci 50958c2ecf20Sopenharmony_cistatic void mark_extent_buffer_accessed(struct extent_buffer *eb, 50968c2ecf20Sopenharmony_ci struct page *accessed) 50978c2ecf20Sopenharmony_ci{ 50988c2ecf20Sopenharmony_ci int num_pages, i; 50998c2ecf20Sopenharmony_ci 51008c2ecf20Sopenharmony_ci check_buffer_tree_ref(eb); 51018c2ecf20Sopenharmony_ci 51028c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 51038c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 51048c2ecf20Sopenharmony_ci struct page *p = eb->pages[i]; 51058c2ecf20Sopenharmony_ci 51068c2ecf20Sopenharmony_ci if (p != accessed) 51078c2ecf20Sopenharmony_ci mark_page_accessed(p); 51088c2ecf20Sopenharmony_ci } 51098c2ecf20Sopenharmony_ci} 51108c2ecf20Sopenharmony_ci 51118c2ecf20Sopenharmony_cistruct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, 51128c2ecf20Sopenharmony_ci u64 start) 51138c2ecf20Sopenharmony_ci{ 51148c2ecf20Sopenharmony_ci struct extent_buffer *eb; 51158c2ecf20Sopenharmony_ci 51168c2ecf20Sopenharmony_ci rcu_read_lock(); 51178c2ecf20Sopenharmony_ci eb = radix_tree_lookup(&fs_info->buffer_radix, 51188c2ecf20Sopenharmony_ci start >> PAGE_SHIFT); 51198c2ecf20Sopenharmony_ci if (eb && atomic_inc_not_zero(&eb->refs)) { 51208c2ecf20Sopenharmony_ci rcu_read_unlock(); 51218c2ecf20Sopenharmony_ci /* 51228c2ecf20Sopenharmony_ci * Lock our eb's refs_lock to avoid races with 51238c2ecf20Sopenharmony_ci * free_extent_buffer. When we get our eb it might be flagged 51248c2ecf20Sopenharmony_ci * with EXTENT_BUFFER_STALE and another task running 51258c2ecf20Sopenharmony_ci * free_extent_buffer might have seen that flag set, 51268c2ecf20Sopenharmony_ci * eb->refs == 2, that the buffer isn't under IO (dirty and 51278c2ecf20Sopenharmony_ci * writeback flags not set) and it's still in the tree (flag 51288c2ecf20Sopenharmony_ci * EXTENT_BUFFER_TREE_REF set), therefore being in the process 51298c2ecf20Sopenharmony_ci * of decrementing the extent buffer's reference count twice. 51308c2ecf20Sopenharmony_ci * So here we could race and increment the eb's reference count, 51318c2ecf20Sopenharmony_ci * clear its stale flag, mark it as dirty and drop our reference 51328c2ecf20Sopenharmony_ci * before the other task finishes executing free_extent_buffer, 51338c2ecf20Sopenharmony_ci * which would later result in an attempt to free an extent 51348c2ecf20Sopenharmony_ci * buffer that is dirty. 51358c2ecf20Sopenharmony_ci */ 51368c2ecf20Sopenharmony_ci if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) { 51378c2ecf20Sopenharmony_ci spin_lock(&eb->refs_lock); 51388c2ecf20Sopenharmony_ci spin_unlock(&eb->refs_lock); 51398c2ecf20Sopenharmony_ci } 51408c2ecf20Sopenharmony_ci mark_extent_buffer_accessed(eb, NULL); 51418c2ecf20Sopenharmony_ci return eb; 51428c2ecf20Sopenharmony_ci } 51438c2ecf20Sopenharmony_ci rcu_read_unlock(); 51448c2ecf20Sopenharmony_ci 51458c2ecf20Sopenharmony_ci return NULL; 51468c2ecf20Sopenharmony_ci} 51478c2ecf20Sopenharmony_ci 51488c2ecf20Sopenharmony_ci#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 51498c2ecf20Sopenharmony_cistruct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, 51508c2ecf20Sopenharmony_ci u64 start) 51518c2ecf20Sopenharmony_ci{ 51528c2ecf20Sopenharmony_ci struct extent_buffer *eb, *exists = NULL; 51538c2ecf20Sopenharmony_ci int ret; 51548c2ecf20Sopenharmony_ci 51558c2ecf20Sopenharmony_ci eb = find_extent_buffer(fs_info, start); 51568c2ecf20Sopenharmony_ci if (eb) 51578c2ecf20Sopenharmony_ci return eb; 51588c2ecf20Sopenharmony_ci eb = alloc_dummy_extent_buffer(fs_info, start); 51598c2ecf20Sopenharmony_ci if (!eb) 51608c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 51618c2ecf20Sopenharmony_ci eb->fs_info = fs_info; 51628c2ecf20Sopenharmony_ciagain: 51638c2ecf20Sopenharmony_ci ret = radix_tree_preload(GFP_NOFS); 51648c2ecf20Sopenharmony_ci if (ret) { 51658c2ecf20Sopenharmony_ci exists = ERR_PTR(ret); 51668c2ecf20Sopenharmony_ci goto free_eb; 51678c2ecf20Sopenharmony_ci } 51688c2ecf20Sopenharmony_ci spin_lock(&fs_info->buffer_lock); 51698c2ecf20Sopenharmony_ci ret = radix_tree_insert(&fs_info->buffer_radix, 51708c2ecf20Sopenharmony_ci start >> PAGE_SHIFT, eb); 51718c2ecf20Sopenharmony_ci spin_unlock(&fs_info->buffer_lock); 51728c2ecf20Sopenharmony_ci radix_tree_preload_end(); 51738c2ecf20Sopenharmony_ci if (ret == -EEXIST) { 51748c2ecf20Sopenharmony_ci exists = find_extent_buffer(fs_info, start); 51758c2ecf20Sopenharmony_ci if (exists) 51768c2ecf20Sopenharmony_ci goto free_eb; 51778c2ecf20Sopenharmony_ci else 51788c2ecf20Sopenharmony_ci goto again; 51798c2ecf20Sopenharmony_ci } 51808c2ecf20Sopenharmony_ci check_buffer_tree_ref(eb); 51818c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); 51828c2ecf20Sopenharmony_ci 51838c2ecf20Sopenharmony_ci return eb; 51848c2ecf20Sopenharmony_cifree_eb: 51858c2ecf20Sopenharmony_ci btrfs_release_extent_buffer(eb); 51868c2ecf20Sopenharmony_ci return exists; 51878c2ecf20Sopenharmony_ci} 51888c2ecf20Sopenharmony_ci#endif 51898c2ecf20Sopenharmony_ci 51908c2ecf20Sopenharmony_cistruct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 51918c2ecf20Sopenharmony_ci u64 start) 51928c2ecf20Sopenharmony_ci{ 51938c2ecf20Sopenharmony_ci unsigned long len = fs_info->nodesize; 51948c2ecf20Sopenharmony_ci int num_pages; 51958c2ecf20Sopenharmony_ci int i; 51968c2ecf20Sopenharmony_ci unsigned long index = start >> PAGE_SHIFT; 51978c2ecf20Sopenharmony_ci struct extent_buffer *eb; 51988c2ecf20Sopenharmony_ci struct extent_buffer *exists = NULL; 51998c2ecf20Sopenharmony_ci struct page *p; 52008c2ecf20Sopenharmony_ci struct address_space *mapping = fs_info->btree_inode->i_mapping; 52018c2ecf20Sopenharmony_ci int uptodate = 1; 52028c2ecf20Sopenharmony_ci int ret; 52038c2ecf20Sopenharmony_ci 52048c2ecf20Sopenharmony_ci if (!IS_ALIGNED(start, fs_info->sectorsize)) { 52058c2ecf20Sopenharmony_ci btrfs_err(fs_info, "bad tree block start %llu", start); 52068c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 52078c2ecf20Sopenharmony_ci } 52088c2ecf20Sopenharmony_ci 52098c2ecf20Sopenharmony_ci eb = find_extent_buffer(fs_info, start); 52108c2ecf20Sopenharmony_ci if (eb) 52118c2ecf20Sopenharmony_ci return eb; 52128c2ecf20Sopenharmony_ci 52138c2ecf20Sopenharmony_ci eb = __alloc_extent_buffer(fs_info, start, len); 52148c2ecf20Sopenharmony_ci if (!eb) 52158c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 52168c2ecf20Sopenharmony_ci 52178c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 52188c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++, index++) { 52198c2ecf20Sopenharmony_ci p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL); 52208c2ecf20Sopenharmony_ci if (!p) { 52218c2ecf20Sopenharmony_ci exists = ERR_PTR(-ENOMEM); 52228c2ecf20Sopenharmony_ci goto free_eb; 52238c2ecf20Sopenharmony_ci } 52248c2ecf20Sopenharmony_ci 52258c2ecf20Sopenharmony_ci spin_lock(&mapping->private_lock); 52268c2ecf20Sopenharmony_ci if (PagePrivate(p)) { 52278c2ecf20Sopenharmony_ci /* 52288c2ecf20Sopenharmony_ci * We could have already allocated an eb for this page 52298c2ecf20Sopenharmony_ci * and attached one so lets see if we can get a ref on 52308c2ecf20Sopenharmony_ci * the existing eb, and if we can we know it's good and 52318c2ecf20Sopenharmony_ci * we can just return that one, else we know we can just 52328c2ecf20Sopenharmony_ci * overwrite page->private. 52338c2ecf20Sopenharmony_ci */ 52348c2ecf20Sopenharmony_ci exists = (struct extent_buffer *)p->private; 52358c2ecf20Sopenharmony_ci if (atomic_inc_not_zero(&exists->refs)) { 52368c2ecf20Sopenharmony_ci spin_unlock(&mapping->private_lock); 52378c2ecf20Sopenharmony_ci unlock_page(p); 52388c2ecf20Sopenharmony_ci put_page(p); 52398c2ecf20Sopenharmony_ci mark_extent_buffer_accessed(exists, p); 52408c2ecf20Sopenharmony_ci goto free_eb; 52418c2ecf20Sopenharmony_ci } 52428c2ecf20Sopenharmony_ci exists = NULL; 52438c2ecf20Sopenharmony_ci 52448c2ecf20Sopenharmony_ci /* 52458c2ecf20Sopenharmony_ci * Do this so attach doesn't complain and we need to 52468c2ecf20Sopenharmony_ci * drop the ref the old guy had. 52478c2ecf20Sopenharmony_ci */ 52488c2ecf20Sopenharmony_ci ClearPagePrivate(p); 52498c2ecf20Sopenharmony_ci WARN_ON(PageDirty(p)); 52508c2ecf20Sopenharmony_ci put_page(p); 52518c2ecf20Sopenharmony_ci } 52528c2ecf20Sopenharmony_ci attach_extent_buffer_page(eb, p); 52538c2ecf20Sopenharmony_ci spin_unlock(&mapping->private_lock); 52548c2ecf20Sopenharmony_ci WARN_ON(PageDirty(p)); 52558c2ecf20Sopenharmony_ci eb->pages[i] = p; 52568c2ecf20Sopenharmony_ci if (!PageUptodate(p)) 52578c2ecf20Sopenharmony_ci uptodate = 0; 52588c2ecf20Sopenharmony_ci 52598c2ecf20Sopenharmony_ci /* 52608c2ecf20Sopenharmony_ci * We can't unlock the pages just yet since the extent buffer 52618c2ecf20Sopenharmony_ci * hasn't been properly inserted in the radix tree, this 52628c2ecf20Sopenharmony_ci * opens a race with btree_releasepage which can free a page 52638c2ecf20Sopenharmony_ci * while we are still filling in all pages for the buffer and 52648c2ecf20Sopenharmony_ci * we could crash. 52658c2ecf20Sopenharmony_ci */ 52668c2ecf20Sopenharmony_ci } 52678c2ecf20Sopenharmony_ci if (uptodate) 52688c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 52698c2ecf20Sopenharmony_ciagain: 52708c2ecf20Sopenharmony_ci ret = radix_tree_preload(GFP_NOFS); 52718c2ecf20Sopenharmony_ci if (ret) { 52728c2ecf20Sopenharmony_ci exists = ERR_PTR(ret); 52738c2ecf20Sopenharmony_ci goto free_eb; 52748c2ecf20Sopenharmony_ci } 52758c2ecf20Sopenharmony_ci 52768c2ecf20Sopenharmony_ci spin_lock(&fs_info->buffer_lock); 52778c2ecf20Sopenharmony_ci ret = radix_tree_insert(&fs_info->buffer_radix, 52788c2ecf20Sopenharmony_ci start >> PAGE_SHIFT, eb); 52798c2ecf20Sopenharmony_ci spin_unlock(&fs_info->buffer_lock); 52808c2ecf20Sopenharmony_ci radix_tree_preload_end(); 52818c2ecf20Sopenharmony_ci if (ret == -EEXIST) { 52828c2ecf20Sopenharmony_ci exists = find_extent_buffer(fs_info, start); 52838c2ecf20Sopenharmony_ci if (exists) 52848c2ecf20Sopenharmony_ci goto free_eb; 52858c2ecf20Sopenharmony_ci else 52868c2ecf20Sopenharmony_ci goto again; 52878c2ecf20Sopenharmony_ci } 52888c2ecf20Sopenharmony_ci /* add one reference for the tree */ 52898c2ecf20Sopenharmony_ci check_buffer_tree_ref(eb); 52908c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags); 52918c2ecf20Sopenharmony_ci 52928c2ecf20Sopenharmony_ci /* 52938c2ecf20Sopenharmony_ci * Now it's safe to unlock the pages because any calls to 52948c2ecf20Sopenharmony_ci * btree_releasepage will correctly detect that a page belongs to a 52958c2ecf20Sopenharmony_ci * live buffer and won't free them prematurely. 52968c2ecf20Sopenharmony_ci */ 52978c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) 52988c2ecf20Sopenharmony_ci unlock_page(eb->pages[i]); 52998c2ecf20Sopenharmony_ci return eb; 53008c2ecf20Sopenharmony_ci 53018c2ecf20Sopenharmony_cifree_eb: 53028c2ecf20Sopenharmony_ci WARN_ON(!atomic_dec_and_test(&eb->refs)); 53038c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 53048c2ecf20Sopenharmony_ci if (eb->pages[i]) 53058c2ecf20Sopenharmony_ci unlock_page(eb->pages[i]); 53068c2ecf20Sopenharmony_ci } 53078c2ecf20Sopenharmony_ci 53088c2ecf20Sopenharmony_ci btrfs_release_extent_buffer(eb); 53098c2ecf20Sopenharmony_ci return exists; 53108c2ecf20Sopenharmony_ci} 53118c2ecf20Sopenharmony_ci 53128c2ecf20Sopenharmony_cistatic inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head) 53138c2ecf20Sopenharmony_ci{ 53148c2ecf20Sopenharmony_ci struct extent_buffer *eb = 53158c2ecf20Sopenharmony_ci container_of(head, struct extent_buffer, rcu_head); 53168c2ecf20Sopenharmony_ci 53178c2ecf20Sopenharmony_ci __free_extent_buffer(eb); 53188c2ecf20Sopenharmony_ci} 53198c2ecf20Sopenharmony_ci 53208c2ecf20Sopenharmony_cistatic int release_extent_buffer(struct extent_buffer *eb) 53218c2ecf20Sopenharmony_ci __releases(&eb->refs_lock) 53228c2ecf20Sopenharmony_ci{ 53238c2ecf20Sopenharmony_ci lockdep_assert_held(&eb->refs_lock); 53248c2ecf20Sopenharmony_ci 53258c2ecf20Sopenharmony_ci WARN_ON(atomic_read(&eb->refs) == 0); 53268c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&eb->refs)) { 53278c2ecf20Sopenharmony_ci if (test_and_clear_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags)) { 53288c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = eb->fs_info; 53298c2ecf20Sopenharmony_ci 53308c2ecf20Sopenharmony_ci spin_unlock(&eb->refs_lock); 53318c2ecf20Sopenharmony_ci 53328c2ecf20Sopenharmony_ci spin_lock(&fs_info->buffer_lock); 53338c2ecf20Sopenharmony_ci radix_tree_delete(&fs_info->buffer_radix, 53348c2ecf20Sopenharmony_ci eb->start >> PAGE_SHIFT); 53358c2ecf20Sopenharmony_ci spin_unlock(&fs_info->buffer_lock); 53368c2ecf20Sopenharmony_ci } else { 53378c2ecf20Sopenharmony_ci spin_unlock(&eb->refs_lock); 53388c2ecf20Sopenharmony_ci } 53398c2ecf20Sopenharmony_ci 53408c2ecf20Sopenharmony_ci btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list); 53418c2ecf20Sopenharmony_ci /* Should be safe to release our pages at this point */ 53428c2ecf20Sopenharmony_ci btrfs_release_extent_buffer_pages(eb); 53438c2ecf20Sopenharmony_ci#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 53448c2ecf20Sopenharmony_ci if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags))) { 53458c2ecf20Sopenharmony_ci __free_extent_buffer(eb); 53468c2ecf20Sopenharmony_ci return 1; 53478c2ecf20Sopenharmony_ci } 53488c2ecf20Sopenharmony_ci#endif 53498c2ecf20Sopenharmony_ci call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 53508c2ecf20Sopenharmony_ci return 1; 53518c2ecf20Sopenharmony_ci } 53528c2ecf20Sopenharmony_ci spin_unlock(&eb->refs_lock); 53538c2ecf20Sopenharmony_ci 53548c2ecf20Sopenharmony_ci return 0; 53558c2ecf20Sopenharmony_ci} 53568c2ecf20Sopenharmony_ci 53578c2ecf20Sopenharmony_civoid free_extent_buffer(struct extent_buffer *eb) 53588c2ecf20Sopenharmony_ci{ 53598c2ecf20Sopenharmony_ci int refs; 53608c2ecf20Sopenharmony_ci int old; 53618c2ecf20Sopenharmony_ci if (!eb) 53628c2ecf20Sopenharmony_ci return; 53638c2ecf20Sopenharmony_ci 53648c2ecf20Sopenharmony_ci while (1) { 53658c2ecf20Sopenharmony_ci refs = atomic_read(&eb->refs); 53668c2ecf20Sopenharmony_ci if ((!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && refs <= 3) 53678c2ecf20Sopenharmony_ci || (test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags) && 53688c2ecf20Sopenharmony_ci refs == 1)) 53698c2ecf20Sopenharmony_ci break; 53708c2ecf20Sopenharmony_ci old = atomic_cmpxchg(&eb->refs, refs, refs - 1); 53718c2ecf20Sopenharmony_ci if (old == refs) 53728c2ecf20Sopenharmony_ci return; 53738c2ecf20Sopenharmony_ci } 53748c2ecf20Sopenharmony_ci 53758c2ecf20Sopenharmony_ci spin_lock(&eb->refs_lock); 53768c2ecf20Sopenharmony_ci if (atomic_read(&eb->refs) == 2 && 53778c2ecf20Sopenharmony_ci test_bit(EXTENT_BUFFER_STALE, &eb->bflags) && 53788c2ecf20Sopenharmony_ci !extent_buffer_under_io(eb) && 53798c2ecf20Sopenharmony_ci test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) 53808c2ecf20Sopenharmony_ci atomic_dec(&eb->refs); 53818c2ecf20Sopenharmony_ci 53828c2ecf20Sopenharmony_ci /* 53838c2ecf20Sopenharmony_ci * I know this is terrible, but it's temporary until we stop tracking 53848c2ecf20Sopenharmony_ci * the uptodate bits and such for the extent buffers. 53858c2ecf20Sopenharmony_ci */ 53868c2ecf20Sopenharmony_ci release_extent_buffer(eb); 53878c2ecf20Sopenharmony_ci} 53888c2ecf20Sopenharmony_ci 53898c2ecf20Sopenharmony_civoid free_extent_buffer_stale(struct extent_buffer *eb) 53908c2ecf20Sopenharmony_ci{ 53918c2ecf20Sopenharmony_ci if (!eb) 53928c2ecf20Sopenharmony_ci return; 53938c2ecf20Sopenharmony_ci 53948c2ecf20Sopenharmony_ci spin_lock(&eb->refs_lock); 53958c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_STALE, &eb->bflags); 53968c2ecf20Sopenharmony_ci 53978c2ecf20Sopenharmony_ci if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) && 53988c2ecf20Sopenharmony_ci test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) 53998c2ecf20Sopenharmony_ci atomic_dec(&eb->refs); 54008c2ecf20Sopenharmony_ci release_extent_buffer(eb); 54018c2ecf20Sopenharmony_ci} 54028c2ecf20Sopenharmony_ci 54038c2ecf20Sopenharmony_civoid clear_extent_buffer_dirty(const struct extent_buffer *eb) 54048c2ecf20Sopenharmony_ci{ 54058c2ecf20Sopenharmony_ci int i; 54068c2ecf20Sopenharmony_ci int num_pages; 54078c2ecf20Sopenharmony_ci struct page *page; 54088c2ecf20Sopenharmony_ci 54098c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 54108c2ecf20Sopenharmony_ci 54118c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 54128c2ecf20Sopenharmony_ci page = eb->pages[i]; 54138c2ecf20Sopenharmony_ci if (!PageDirty(page)) 54148c2ecf20Sopenharmony_ci continue; 54158c2ecf20Sopenharmony_ci 54168c2ecf20Sopenharmony_ci lock_page(page); 54178c2ecf20Sopenharmony_ci WARN_ON(!PagePrivate(page)); 54188c2ecf20Sopenharmony_ci 54198c2ecf20Sopenharmony_ci clear_page_dirty_for_io(page); 54208c2ecf20Sopenharmony_ci xa_lock_irq(&page->mapping->i_pages); 54218c2ecf20Sopenharmony_ci if (!PageDirty(page)) 54228c2ecf20Sopenharmony_ci __xa_clear_mark(&page->mapping->i_pages, 54238c2ecf20Sopenharmony_ci page_index(page), PAGECACHE_TAG_DIRTY); 54248c2ecf20Sopenharmony_ci xa_unlock_irq(&page->mapping->i_pages); 54258c2ecf20Sopenharmony_ci ClearPageError(page); 54268c2ecf20Sopenharmony_ci unlock_page(page); 54278c2ecf20Sopenharmony_ci } 54288c2ecf20Sopenharmony_ci WARN_ON(atomic_read(&eb->refs) == 0); 54298c2ecf20Sopenharmony_ci} 54308c2ecf20Sopenharmony_ci 54318c2ecf20Sopenharmony_cibool set_extent_buffer_dirty(struct extent_buffer *eb) 54328c2ecf20Sopenharmony_ci{ 54338c2ecf20Sopenharmony_ci int i; 54348c2ecf20Sopenharmony_ci int num_pages; 54358c2ecf20Sopenharmony_ci bool was_dirty; 54368c2ecf20Sopenharmony_ci 54378c2ecf20Sopenharmony_ci check_buffer_tree_ref(eb); 54388c2ecf20Sopenharmony_ci 54398c2ecf20Sopenharmony_ci was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 54408c2ecf20Sopenharmony_ci 54418c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 54428c2ecf20Sopenharmony_ci WARN_ON(atomic_read(&eb->refs) == 0); 54438c2ecf20Sopenharmony_ci WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)); 54448c2ecf20Sopenharmony_ci 54458c2ecf20Sopenharmony_ci if (!was_dirty) 54468c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) 54478c2ecf20Sopenharmony_ci set_page_dirty(eb->pages[i]); 54488c2ecf20Sopenharmony_ci 54498c2ecf20Sopenharmony_ci#ifdef CONFIG_BTRFS_DEBUG 54508c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) 54518c2ecf20Sopenharmony_ci ASSERT(PageDirty(eb->pages[i])); 54528c2ecf20Sopenharmony_ci#endif 54538c2ecf20Sopenharmony_ci 54548c2ecf20Sopenharmony_ci return was_dirty; 54558c2ecf20Sopenharmony_ci} 54568c2ecf20Sopenharmony_ci 54578c2ecf20Sopenharmony_civoid clear_extent_buffer_uptodate(struct extent_buffer *eb) 54588c2ecf20Sopenharmony_ci{ 54598c2ecf20Sopenharmony_ci int i; 54608c2ecf20Sopenharmony_ci struct page *page; 54618c2ecf20Sopenharmony_ci int num_pages; 54628c2ecf20Sopenharmony_ci 54638c2ecf20Sopenharmony_ci clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 54648c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 54658c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 54668c2ecf20Sopenharmony_ci page = eb->pages[i]; 54678c2ecf20Sopenharmony_ci if (page) 54688c2ecf20Sopenharmony_ci ClearPageUptodate(page); 54698c2ecf20Sopenharmony_ci } 54708c2ecf20Sopenharmony_ci} 54718c2ecf20Sopenharmony_ci 54728c2ecf20Sopenharmony_civoid set_extent_buffer_uptodate(struct extent_buffer *eb) 54738c2ecf20Sopenharmony_ci{ 54748c2ecf20Sopenharmony_ci int i; 54758c2ecf20Sopenharmony_ci struct page *page; 54768c2ecf20Sopenharmony_ci int num_pages; 54778c2ecf20Sopenharmony_ci 54788c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 54798c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 54808c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 54818c2ecf20Sopenharmony_ci page = eb->pages[i]; 54828c2ecf20Sopenharmony_ci SetPageUptodate(page); 54838c2ecf20Sopenharmony_ci } 54848c2ecf20Sopenharmony_ci} 54858c2ecf20Sopenharmony_ci 54868c2ecf20Sopenharmony_ciint read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) 54878c2ecf20Sopenharmony_ci{ 54888c2ecf20Sopenharmony_ci int i; 54898c2ecf20Sopenharmony_ci struct page *page; 54908c2ecf20Sopenharmony_ci int err; 54918c2ecf20Sopenharmony_ci int ret = 0; 54928c2ecf20Sopenharmony_ci int locked_pages = 0; 54938c2ecf20Sopenharmony_ci int all_uptodate = 1; 54948c2ecf20Sopenharmony_ci int num_pages; 54958c2ecf20Sopenharmony_ci unsigned long num_reads = 0; 54968c2ecf20Sopenharmony_ci struct bio *bio = NULL; 54978c2ecf20Sopenharmony_ci unsigned long bio_flags = 0; 54988c2ecf20Sopenharmony_ci 54998c2ecf20Sopenharmony_ci if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) 55008c2ecf20Sopenharmony_ci return 0; 55018c2ecf20Sopenharmony_ci 55028c2ecf20Sopenharmony_ci num_pages = num_extent_pages(eb); 55038c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 55048c2ecf20Sopenharmony_ci page = eb->pages[i]; 55058c2ecf20Sopenharmony_ci if (wait == WAIT_NONE) { 55068c2ecf20Sopenharmony_ci if (!trylock_page(page)) 55078c2ecf20Sopenharmony_ci goto unlock_exit; 55088c2ecf20Sopenharmony_ci } else { 55098c2ecf20Sopenharmony_ci lock_page(page); 55108c2ecf20Sopenharmony_ci } 55118c2ecf20Sopenharmony_ci locked_pages++; 55128c2ecf20Sopenharmony_ci } 55138c2ecf20Sopenharmony_ci /* 55148c2ecf20Sopenharmony_ci * We need to firstly lock all pages to make sure that 55158c2ecf20Sopenharmony_ci * the uptodate bit of our pages won't be affected by 55168c2ecf20Sopenharmony_ci * clear_extent_buffer_uptodate(). 55178c2ecf20Sopenharmony_ci */ 55188c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 55198c2ecf20Sopenharmony_ci page = eb->pages[i]; 55208c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 55218c2ecf20Sopenharmony_ci num_reads++; 55228c2ecf20Sopenharmony_ci all_uptodate = 0; 55238c2ecf20Sopenharmony_ci } 55248c2ecf20Sopenharmony_ci } 55258c2ecf20Sopenharmony_ci 55268c2ecf20Sopenharmony_ci if (all_uptodate) { 55278c2ecf20Sopenharmony_ci set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 55288c2ecf20Sopenharmony_ci goto unlock_exit; 55298c2ecf20Sopenharmony_ci } 55308c2ecf20Sopenharmony_ci 55318c2ecf20Sopenharmony_ci clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); 55328c2ecf20Sopenharmony_ci eb->read_mirror = 0; 55338c2ecf20Sopenharmony_ci atomic_set(&eb->io_pages, num_reads); 55348c2ecf20Sopenharmony_ci /* 55358c2ecf20Sopenharmony_ci * It is possible for releasepage to clear the TREE_REF bit before we 55368c2ecf20Sopenharmony_ci * set io_pages. See check_buffer_tree_ref for a more detailed comment. 55378c2ecf20Sopenharmony_ci */ 55388c2ecf20Sopenharmony_ci check_buffer_tree_ref(eb); 55398c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 55408c2ecf20Sopenharmony_ci page = eb->pages[i]; 55418c2ecf20Sopenharmony_ci 55428c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 55438c2ecf20Sopenharmony_ci if (ret) { 55448c2ecf20Sopenharmony_ci atomic_dec(&eb->io_pages); 55458c2ecf20Sopenharmony_ci unlock_page(page); 55468c2ecf20Sopenharmony_ci continue; 55478c2ecf20Sopenharmony_ci } 55488c2ecf20Sopenharmony_ci 55498c2ecf20Sopenharmony_ci ClearPageError(page); 55508c2ecf20Sopenharmony_ci err = submit_extent_page(REQ_OP_READ | REQ_META, NULL, 55518c2ecf20Sopenharmony_ci page, page_offset(page), PAGE_SIZE, 0, 55528c2ecf20Sopenharmony_ci &bio, end_bio_extent_readpage, 55538c2ecf20Sopenharmony_ci mirror_num, 0, 0, false); 55548c2ecf20Sopenharmony_ci if (err) { 55558c2ecf20Sopenharmony_ci /* 55568c2ecf20Sopenharmony_ci * We failed to submit the bio so it's the 55578c2ecf20Sopenharmony_ci * caller's responsibility to perform cleanup 55588c2ecf20Sopenharmony_ci * i.e unlock page/set error bit. 55598c2ecf20Sopenharmony_ci */ 55608c2ecf20Sopenharmony_ci ret = err; 55618c2ecf20Sopenharmony_ci SetPageError(page); 55628c2ecf20Sopenharmony_ci unlock_page(page); 55638c2ecf20Sopenharmony_ci atomic_dec(&eb->io_pages); 55648c2ecf20Sopenharmony_ci } 55658c2ecf20Sopenharmony_ci } else { 55668c2ecf20Sopenharmony_ci unlock_page(page); 55678c2ecf20Sopenharmony_ci } 55688c2ecf20Sopenharmony_ci } 55698c2ecf20Sopenharmony_ci 55708c2ecf20Sopenharmony_ci if (bio) { 55718c2ecf20Sopenharmony_ci err = submit_one_bio(bio, mirror_num, bio_flags); 55728c2ecf20Sopenharmony_ci if (err) 55738c2ecf20Sopenharmony_ci return err; 55748c2ecf20Sopenharmony_ci } 55758c2ecf20Sopenharmony_ci 55768c2ecf20Sopenharmony_ci if (ret || wait != WAIT_COMPLETE) 55778c2ecf20Sopenharmony_ci return ret; 55788c2ecf20Sopenharmony_ci 55798c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) { 55808c2ecf20Sopenharmony_ci page = eb->pages[i]; 55818c2ecf20Sopenharmony_ci wait_on_page_locked(page); 55828c2ecf20Sopenharmony_ci if (!PageUptodate(page)) 55838c2ecf20Sopenharmony_ci ret = -EIO; 55848c2ecf20Sopenharmony_ci } 55858c2ecf20Sopenharmony_ci 55868c2ecf20Sopenharmony_ci return ret; 55878c2ecf20Sopenharmony_ci 55888c2ecf20Sopenharmony_ciunlock_exit: 55898c2ecf20Sopenharmony_ci while (locked_pages > 0) { 55908c2ecf20Sopenharmony_ci locked_pages--; 55918c2ecf20Sopenharmony_ci page = eb->pages[locked_pages]; 55928c2ecf20Sopenharmony_ci unlock_page(page); 55938c2ecf20Sopenharmony_ci } 55948c2ecf20Sopenharmony_ci return ret; 55958c2ecf20Sopenharmony_ci} 55968c2ecf20Sopenharmony_ci 55978c2ecf20Sopenharmony_cistatic bool report_eb_range(const struct extent_buffer *eb, unsigned long start, 55988c2ecf20Sopenharmony_ci unsigned long len) 55998c2ecf20Sopenharmony_ci{ 56008c2ecf20Sopenharmony_ci btrfs_warn(eb->fs_info, 56018c2ecf20Sopenharmony_ci "access to eb bytenr %llu len %lu out of range start %lu len %lu", 56028c2ecf20Sopenharmony_ci eb->start, eb->len, start, len); 56038c2ecf20Sopenharmony_ci WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); 56048c2ecf20Sopenharmony_ci 56058c2ecf20Sopenharmony_ci return true; 56068c2ecf20Sopenharmony_ci} 56078c2ecf20Sopenharmony_ci 56088c2ecf20Sopenharmony_ci/* 56098c2ecf20Sopenharmony_ci * Check if the [start, start + len) range is valid before reading/writing 56108c2ecf20Sopenharmony_ci * the eb. 56118c2ecf20Sopenharmony_ci * NOTE: @start and @len are offset inside the eb, not logical address. 56128c2ecf20Sopenharmony_ci * 56138c2ecf20Sopenharmony_ci * Caller should not touch the dst/src memory if this function returns error. 56148c2ecf20Sopenharmony_ci */ 56158c2ecf20Sopenharmony_cistatic inline int check_eb_range(const struct extent_buffer *eb, 56168c2ecf20Sopenharmony_ci unsigned long start, unsigned long len) 56178c2ecf20Sopenharmony_ci{ 56188c2ecf20Sopenharmony_ci unsigned long offset; 56198c2ecf20Sopenharmony_ci 56208c2ecf20Sopenharmony_ci /* start, start + len should not go beyond eb->len nor overflow */ 56218c2ecf20Sopenharmony_ci if (unlikely(check_add_overflow(start, len, &offset) || offset > eb->len)) 56228c2ecf20Sopenharmony_ci return report_eb_range(eb, start, len); 56238c2ecf20Sopenharmony_ci 56248c2ecf20Sopenharmony_ci return false; 56258c2ecf20Sopenharmony_ci} 56268c2ecf20Sopenharmony_ci 56278c2ecf20Sopenharmony_civoid read_extent_buffer(const struct extent_buffer *eb, void *dstv, 56288c2ecf20Sopenharmony_ci unsigned long start, unsigned long len) 56298c2ecf20Sopenharmony_ci{ 56308c2ecf20Sopenharmony_ci size_t cur; 56318c2ecf20Sopenharmony_ci size_t offset; 56328c2ecf20Sopenharmony_ci struct page *page; 56338c2ecf20Sopenharmony_ci char *kaddr; 56348c2ecf20Sopenharmony_ci char *dst = (char *)dstv; 56358c2ecf20Sopenharmony_ci unsigned long i = start >> PAGE_SHIFT; 56368c2ecf20Sopenharmony_ci 56378c2ecf20Sopenharmony_ci if (check_eb_range(eb, start, len)) { 56388c2ecf20Sopenharmony_ci /* 56398c2ecf20Sopenharmony_ci * Invalid range hit, reset the memory, so callers won't get 56408c2ecf20Sopenharmony_ci * some random garbage for their uninitialzed memory. 56418c2ecf20Sopenharmony_ci */ 56428c2ecf20Sopenharmony_ci memset(dstv, 0, len); 56438c2ecf20Sopenharmony_ci return; 56448c2ecf20Sopenharmony_ci } 56458c2ecf20Sopenharmony_ci 56468c2ecf20Sopenharmony_ci offset = offset_in_page(start); 56478c2ecf20Sopenharmony_ci 56488c2ecf20Sopenharmony_ci while (len > 0) { 56498c2ecf20Sopenharmony_ci page = eb->pages[i]; 56508c2ecf20Sopenharmony_ci 56518c2ecf20Sopenharmony_ci cur = min(len, (PAGE_SIZE - offset)); 56528c2ecf20Sopenharmony_ci kaddr = page_address(page); 56538c2ecf20Sopenharmony_ci memcpy(dst, kaddr + offset, cur); 56548c2ecf20Sopenharmony_ci 56558c2ecf20Sopenharmony_ci dst += cur; 56568c2ecf20Sopenharmony_ci len -= cur; 56578c2ecf20Sopenharmony_ci offset = 0; 56588c2ecf20Sopenharmony_ci i++; 56598c2ecf20Sopenharmony_ci } 56608c2ecf20Sopenharmony_ci} 56618c2ecf20Sopenharmony_ci 56628c2ecf20Sopenharmony_ciint read_extent_buffer_to_user_nofault(const struct extent_buffer *eb, 56638c2ecf20Sopenharmony_ci void __user *dstv, 56648c2ecf20Sopenharmony_ci unsigned long start, unsigned long len) 56658c2ecf20Sopenharmony_ci{ 56668c2ecf20Sopenharmony_ci size_t cur; 56678c2ecf20Sopenharmony_ci size_t offset; 56688c2ecf20Sopenharmony_ci struct page *page; 56698c2ecf20Sopenharmony_ci char *kaddr; 56708c2ecf20Sopenharmony_ci char __user *dst = (char __user *)dstv; 56718c2ecf20Sopenharmony_ci unsigned long i = start >> PAGE_SHIFT; 56728c2ecf20Sopenharmony_ci int ret = 0; 56738c2ecf20Sopenharmony_ci 56748c2ecf20Sopenharmony_ci WARN_ON(start > eb->len); 56758c2ecf20Sopenharmony_ci WARN_ON(start + len > eb->start + eb->len); 56768c2ecf20Sopenharmony_ci 56778c2ecf20Sopenharmony_ci offset = offset_in_page(start); 56788c2ecf20Sopenharmony_ci 56798c2ecf20Sopenharmony_ci while (len > 0) { 56808c2ecf20Sopenharmony_ci page = eb->pages[i]; 56818c2ecf20Sopenharmony_ci 56828c2ecf20Sopenharmony_ci cur = min(len, (PAGE_SIZE - offset)); 56838c2ecf20Sopenharmony_ci kaddr = page_address(page); 56848c2ecf20Sopenharmony_ci if (copy_to_user_nofault(dst, kaddr + offset, cur)) { 56858c2ecf20Sopenharmony_ci ret = -EFAULT; 56868c2ecf20Sopenharmony_ci break; 56878c2ecf20Sopenharmony_ci } 56888c2ecf20Sopenharmony_ci 56898c2ecf20Sopenharmony_ci dst += cur; 56908c2ecf20Sopenharmony_ci len -= cur; 56918c2ecf20Sopenharmony_ci offset = 0; 56928c2ecf20Sopenharmony_ci i++; 56938c2ecf20Sopenharmony_ci } 56948c2ecf20Sopenharmony_ci 56958c2ecf20Sopenharmony_ci return ret; 56968c2ecf20Sopenharmony_ci} 56978c2ecf20Sopenharmony_ci 56988c2ecf20Sopenharmony_ciint memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, 56998c2ecf20Sopenharmony_ci unsigned long start, unsigned long len) 57008c2ecf20Sopenharmony_ci{ 57018c2ecf20Sopenharmony_ci size_t cur; 57028c2ecf20Sopenharmony_ci size_t offset; 57038c2ecf20Sopenharmony_ci struct page *page; 57048c2ecf20Sopenharmony_ci char *kaddr; 57058c2ecf20Sopenharmony_ci char *ptr = (char *)ptrv; 57068c2ecf20Sopenharmony_ci unsigned long i = start >> PAGE_SHIFT; 57078c2ecf20Sopenharmony_ci int ret = 0; 57088c2ecf20Sopenharmony_ci 57098c2ecf20Sopenharmony_ci if (check_eb_range(eb, start, len)) 57108c2ecf20Sopenharmony_ci return -EINVAL; 57118c2ecf20Sopenharmony_ci 57128c2ecf20Sopenharmony_ci offset = offset_in_page(start); 57138c2ecf20Sopenharmony_ci 57148c2ecf20Sopenharmony_ci while (len > 0) { 57158c2ecf20Sopenharmony_ci page = eb->pages[i]; 57168c2ecf20Sopenharmony_ci 57178c2ecf20Sopenharmony_ci cur = min(len, (PAGE_SIZE - offset)); 57188c2ecf20Sopenharmony_ci 57198c2ecf20Sopenharmony_ci kaddr = page_address(page); 57208c2ecf20Sopenharmony_ci ret = memcmp(ptr, kaddr + offset, cur); 57218c2ecf20Sopenharmony_ci if (ret) 57228c2ecf20Sopenharmony_ci break; 57238c2ecf20Sopenharmony_ci 57248c2ecf20Sopenharmony_ci ptr += cur; 57258c2ecf20Sopenharmony_ci len -= cur; 57268c2ecf20Sopenharmony_ci offset = 0; 57278c2ecf20Sopenharmony_ci i++; 57288c2ecf20Sopenharmony_ci } 57298c2ecf20Sopenharmony_ci return ret; 57308c2ecf20Sopenharmony_ci} 57318c2ecf20Sopenharmony_ci 57328c2ecf20Sopenharmony_civoid write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb, 57338c2ecf20Sopenharmony_ci const void *srcv) 57348c2ecf20Sopenharmony_ci{ 57358c2ecf20Sopenharmony_ci char *kaddr; 57368c2ecf20Sopenharmony_ci 57378c2ecf20Sopenharmony_ci WARN_ON(!PageUptodate(eb->pages[0])); 57388c2ecf20Sopenharmony_ci kaddr = page_address(eb->pages[0]); 57398c2ecf20Sopenharmony_ci memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv, 57408c2ecf20Sopenharmony_ci BTRFS_FSID_SIZE); 57418c2ecf20Sopenharmony_ci} 57428c2ecf20Sopenharmony_ci 57438c2ecf20Sopenharmony_civoid write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv) 57448c2ecf20Sopenharmony_ci{ 57458c2ecf20Sopenharmony_ci char *kaddr; 57468c2ecf20Sopenharmony_ci 57478c2ecf20Sopenharmony_ci WARN_ON(!PageUptodate(eb->pages[0])); 57488c2ecf20Sopenharmony_ci kaddr = page_address(eb->pages[0]); 57498c2ecf20Sopenharmony_ci memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv, 57508c2ecf20Sopenharmony_ci BTRFS_FSID_SIZE); 57518c2ecf20Sopenharmony_ci} 57528c2ecf20Sopenharmony_ci 57538c2ecf20Sopenharmony_civoid write_extent_buffer(const struct extent_buffer *eb, const void *srcv, 57548c2ecf20Sopenharmony_ci unsigned long start, unsigned long len) 57558c2ecf20Sopenharmony_ci{ 57568c2ecf20Sopenharmony_ci size_t cur; 57578c2ecf20Sopenharmony_ci size_t offset; 57588c2ecf20Sopenharmony_ci struct page *page; 57598c2ecf20Sopenharmony_ci char *kaddr; 57608c2ecf20Sopenharmony_ci char *src = (char *)srcv; 57618c2ecf20Sopenharmony_ci unsigned long i = start >> PAGE_SHIFT; 57628c2ecf20Sopenharmony_ci 57638c2ecf20Sopenharmony_ci if (check_eb_range(eb, start, len)) 57648c2ecf20Sopenharmony_ci return; 57658c2ecf20Sopenharmony_ci 57668c2ecf20Sopenharmony_ci offset = offset_in_page(start); 57678c2ecf20Sopenharmony_ci 57688c2ecf20Sopenharmony_ci while (len > 0) { 57698c2ecf20Sopenharmony_ci page = eb->pages[i]; 57708c2ecf20Sopenharmony_ci WARN_ON(!PageUptodate(page)); 57718c2ecf20Sopenharmony_ci 57728c2ecf20Sopenharmony_ci cur = min(len, PAGE_SIZE - offset); 57738c2ecf20Sopenharmony_ci kaddr = page_address(page); 57748c2ecf20Sopenharmony_ci memcpy(kaddr + offset, src, cur); 57758c2ecf20Sopenharmony_ci 57768c2ecf20Sopenharmony_ci src += cur; 57778c2ecf20Sopenharmony_ci len -= cur; 57788c2ecf20Sopenharmony_ci offset = 0; 57798c2ecf20Sopenharmony_ci i++; 57808c2ecf20Sopenharmony_ci } 57818c2ecf20Sopenharmony_ci} 57828c2ecf20Sopenharmony_ci 57838c2ecf20Sopenharmony_civoid memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start, 57848c2ecf20Sopenharmony_ci unsigned long len) 57858c2ecf20Sopenharmony_ci{ 57868c2ecf20Sopenharmony_ci size_t cur; 57878c2ecf20Sopenharmony_ci size_t offset; 57888c2ecf20Sopenharmony_ci struct page *page; 57898c2ecf20Sopenharmony_ci char *kaddr; 57908c2ecf20Sopenharmony_ci unsigned long i = start >> PAGE_SHIFT; 57918c2ecf20Sopenharmony_ci 57928c2ecf20Sopenharmony_ci if (check_eb_range(eb, start, len)) 57938c2ecf20Sopenharmony_ci return; 57948c2ecf20Sopenharmony_ci 57958c2ecf20Sopenharmony_ci offset = offset_in_page(start); 57968c2ecf20Sopenharmony_ci 57978c2ecf20Sopenharmony_ci while (len > 0) { 57988c2ecf20Sopenharmony_ci page = eb->pages[i]; 57998c2ecf20Sopenharmony_ci WARN_ON(!PageUptodate(page)); 58008c2ecf20Sopenharmony_ci 58018c2ecf20Sopenharmony_ci cur = min(len, PAGE_SIZE - offset); 58028c2ecf20Sopenharmony_ci kaddr = page_address(page); 58038c2ecf20Sopenharmony_ci memset(kaddr + offset, 0, cur); 58048c2ecf20Sopenharmony_ci 58058c2ecf20Sopenharmony_ci len -= cur; 58068c2ecf20Sopenharmony_ci offset = 0; 58078c2ecf20Sopenharmony_ci i++; 58088c2ecf20Sopenharmony_ci } 58098c2ecf20Sopenharmony_ci} 58108c2ecf20Sopenharmony_ci 58118c2ecf20Sopenharmony_civoid copy_extent_buffer_full(const struct extent_buffer *dst, 58128c2ecf20Sopenharmony_ci const struct extent_buffer *src) 58138c2ecf20Sopenharmony_ci{ 58148c2ecf20Sopenharmony_ci int i; 58158c2ecf20Sopenharmony_ci int num_pages; 58168c2ecf20Sopenharmony_ci 58178c2ecf20Sopenharmony_ci ASSERT(dst->len == src->len); 58188c2ecf20Sopenharmony_ci 58198c2ecf20Sopenharmony_ci num_pages = num_extent_pages(dst); 58208c2ecf20Sopenharmony_ci for (i = 0; i < num_pages; i++) 58218c2ecf20Sopenharmony_ci copy_page(page_address(dst->pages[i]), 58228c2ecf20Sopenharmony_ci page_address(src->pages[i])); 58238c2ecf20Sopenharmony_ci} 58248c2ecf20Sopenharmony_ci 58258c2ecf20Sopenharmony_civoid copy_extent_buffer(const struct extent_buffer *dst, 58268c2ecf20Sopenharmony_ci const struct extent_buffer *src, 58278c2ecf20Sopenharmony_ci unsigned long dst_offset, unsigned long src_offset, 58288c2ecf20Sopenharmony_ci unsigned long len) 58298c2ecf20Sopenharmony_ci{ 58308c2ecf20Sopenharmony_ci u64 dst_len = dst->len; 58318c2ecf20Sopenharmony_ci size_t cur; 58328c2ecf20Sopenharmony_ci size_t offset; 58338c2ecf20Sopenharmony_ci struct page *page; 58348c2ecf20Sopenharmony_ci char *kaddr; 58358c2ecf20Sopenharmony_ci unsigned long i = dst_offset >> PAGE_SHIFT; 58368c2ecf20Sopenharmony_ci 58378c2ecf20Sopenharmony_ci if (check_eb_range(dst, dst_offset, len) || 58388c2ecf20Sopenharmony_ci check_eb_range(src, src_offset, len)) 58398c2ecf20Sopenharmony_ci return; 58408c2ecf20Sopenharmony_ci 58418c2ecf20Sopenharmony_ci WARN_ON(src->len != dst_len); 58428c2ecf20Sopenharmony_ci 58438c2ecf20Sopenharmony_ci offset = offset_in_page(dst_offset); 58448c2ecf20Sopenharmony_ci 58458c2ecf20Sopenharmony_ci while (len > 0) { 58468c2ecf20Sopenharmony_ci page = dst->pages[i]; 58478c2ecf20Sopenharmony_ci WARN_ON(!PageUptodate(page)); 58488c2ecf20Sopenharmony_ci 58498c2ecf20Sopenharmony_ci cur = min(len, (unsigned long)(PAGE_SIZE - offset)); 58508c2ecf20Sopenharmony_ci 58518c2ecf20Sopenharmony_ci kaddr = page_address(page); 58528c2ecf20Sopenharmony_ci read_extent_buffer(src, kaddr + offset, src_offset, cur); 58538c2ecf20Sopenharmony_ci 58548c2ecf20Sopenharmony_ci src_offset += cur; 58558c2ecf20Sopenharmony_ci len -= cur; 58568c2ecf20Sopenharmony_ci offset = 0; 58578c2ecf20Sopenharmony_ci i++; 58588c2ecf20Sopenharmony_ci } 58598c2ecf20Sopenharmony_ci} 58608c2ecf20Sopenharmony_ci 58618c2ecf20Sopenharmony_ci/* 58628c2ecf20Sopenharmony_ci * eb_bitmap_offset() - calculate the page and offset of the byte containing the 58638c2ecf20Sopenharmony_ci * given bit number 58648c2ecf20Sopenharmony_ci * @eb: the extent buffer 58658c2ecf20Sopenharmony_ci * @start: offset of the bitmap item in the extent buffer 58668c2ecf20Sopenharmony_ci * @nr: bit number 58678c2ecf20Sopenharmony_ci * @page_index: return index of the page in the extent buffer that contains the 58688c2ecf20Sopenharmony_ci * given bit number 58698c2ecf20Sopenharmony_ci * @page_offset: return offset into the page given by page_index 58708c2ecf20Sopenharmony_ci * 58718c2ecf20Sopenharmony_ci * This helper hides the ugliness of finding the byte in an extent buffer which 58728c2ecf20Sopenharmony_ci * contains a given bit. 58738c2ecf20Sopenharmony_ci */ 58748c2ecf20Sopenharmony_cistatic inline void eb_bitmap_offset(const struct extent_buffer *eb, 58758c2ecf20Sopenharmony_ci unsigned long start, unsigned long nr, 58768c2ecf20Sopenharmony_ci unsigned long *page_index, 58778c2ecf20Sopenharmony_ci size_t *page_offset) 58788c2ecf20Sopenharmony_ci{ 58798c2ecf20Sopenharmony_ci size_t byte_offset = BIT_BYTE(nr); 58808c2ecf20Sopenharmony_ci size_t offset; 58818c2ecf20Sopenharmony_ci 58828c2ecf20Sopenharmony_ci /* 58838c2ecf20Sopenharmony_ci * The byte we want is the offset of the extent buffer + the offset of 58848c2ecf20Sopenharmony_ci * the bitmap item in the extent buffer + the offset of the byte in the 58858c2ecf20Sopenharmony_ci * bitmap item. 58868c2ecf20Sopenharmony_ci */ 58878c2ecf20Sopenharmony_ci offset = start + byte_offset; 58888c2ecf20Sopenharmony_ci 58898c2ecf20Sopenharmony_ci *page_index = offset >> PAGE_SHIFT; 58908c2ecf20Sopenharmony_ci *page_offset = offset_in_page(offset); 58918c2ecf20Sopenharmony_ci} 58928c2ecf20Sopenharmony_ci 58938c2ecf20Sopenharmony_ci/** 58948c2ecf20Sopenharmony_ci * extent_buffer_test_bit - determine whether a bit in a bitmap item is set 58958c2ecf20Sopenharmony_ci * @eb: the extent buffer 58968c2ecf20Sopenharmony_ci * @start: offset of the bitmap item in the extent buffer 58978c2ecf20Sopenharmony_ci * @nr: bit number to test 58988c2ecf20Sopenharmony_ci */ 58998c2ecf20Sopenharmony_ciint extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, 59008c2ecf20Sopenharmony_ci unsigned long nr) 59018c2ecf20Sopenharmony_ci{ 59028c2ecf20Sopenharmony_ci u8 *kaddr; 59038c2ecf20Sopenharmony_ci struct page *page; 59048c2ecf20Sopenharmony_ci unsigned long i; 59058c2ecf20Sopenharmony_ci size_t offset; 59068c2ecf20Sopenharmony_ci 59078c2ecf20Sopenharmony_ci eb_bitmap_offset(eb, start, nr, &i, &offset); 59088c2ecf20Sopenharmony_ci page = eb->pages[i]; 59098c2ecf20Sopenharmony_ci WARN_ON(!PageUptodate(page)); 59108c2ecf20Sopenharmony_ci kaddr = page_address(page); 59118c2ecf20Sopenharmony_ci return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1))); 59128c2ecf20Sopenharmony_ci} 59138c2ecf20Sopenharmony_ci 59148c2ecf20Sopenharmony_ci/** 59158c2ecf20Sopenharmony_ci * extent_buffer_bitmap_set - set an area of a bitmap 59168c2ecf20Sopenharmony_ci * @eb: the extent buffer 59178c2ecf20Sopenharmony_ci * @start: offset of the bitmap item in the extent buffer 59188c2ecf20Sopenharmony_ci * @pos: bit number of the first bit 59198c2ecf20Sopenharmony_ci * @len: number of bits to set 59208c2ecf20Sopenharmony_ci */ 59218c2ecf20Sopenharmony_civoid extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start, 59228c2ecf20Sopenharmony_ci unsigned long pos, unsigned long len) 59238c2ecf20Sopenharmony_ci{ 59248c2ecf20Sopenharmony_ci u8 *kaddr; 59258c2ecf20Sopenharmony_ci struct page *page; 59268c2ecf20Sopenharmony_ci unsigned long i; 59278c2ecf20Sopenharmony_ci size_t offset; 59288c2ecf20Sopenharmony_ci const unsigned int size = pos + len; 59298c2ecf20Sopenharmony_ci int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE); 59308c2ecf20Sopenharmony_ci u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos); 59318c2ecf20Sopenharmony_ci 59328c2ecf20Sopenharmony_ci eb_bitmap_offset(eb, start, pos, &i, &offset); 59338c2ecf20Sopenharmony_ci page = eb->pages[i]; 59348c2ecf20Sopenharmony_ci WARN_ON(!PageUptodate(page)); 59358c2ecf20Sopenharmony_ci kaddr = page_address(page); 59368c2ecf20Sopenharmony_ci 59378c2ecf20Sopenharmony_ci while (len >= bits_to_set) { 59388c2ecf20Sopenharmony_ci kaddr[offset] |= mask_to_set; 59398c2ecf20Sopenharmony_ci len -= bits_to_set; 59408c2ecf20Sopenharmony_ci bits_to_set = BITS_PER_BYTE; 59418c2ecf20Sopenharmony_ci mask_to_set = ~0; 59428c2ecf20Sopenharmony_ci if (++offset >= PAGE_SIZE && len > 0) { 59438c2ecf20Sopenharmony_ci offset = 0; 59448c2ecf20Sopenharmony_ci page = eb->pages[++i]; 59458c2ecf20Sopenharmony_ci WARN_ON(!PageUptodate(page)); 59468c2ecf20Sopenharmony_ci kaddr = page_address(page); 59478c2ecf20Sopenharmony_ci } 59488c2ecf20Sopenharmony_ci } 59498c2ecf20Sopenharmony_ci if (len) { 59508c2ecf20Sopenharmony_ci mask_to_set &= BITMAP_LAST_BYTE_MASK(size); 59518c2ecf20Sopenharmony_ci kaddr[offset] |= mask_to_set; 59528c2ecf20Sopenharmony_ci } 59538c2ecf20Sopenharmony_ci} 59548c2ecf20Sopenharmony_ci 59558c2ecf20Sopenharmony_ci 59568c2ecf20Sopenharmony_ci/** 59578c2ecf20Sopenharmony_ci * extent_buffer_bitmap_clear - clear an area of a bitmap 59588c2ecf20Sopenharmony_ci * @eb: the extent buffer 59598c2ecf20Sopenharmony_ci * @start: offset of the bitmap item in the extent buffer 59608c2ecf20Sopenharmony_ci * @pos: bit number of the first bit 59618c2ecf20Sopenharmony_ci * @len: number of bits to clear 59628c2ecf20Sopenharmony_ci */ 59638c2ecf20Sopenharmony_civoid extent_buffer_bitmap_clear(const struct extent_buffer *eb, 59648c2ecf20Sopenharmony_ci unsigned long start, unsigned long pos, 59658c2ecf20Sopenharmony_ci unsigned long len) 59668c2ecf20Sopenharmony_ci{ 59678c2ecf20Sopenharmony_ci u8 *kaddr; 59688c2ecf20Sopenharmony_ci struct page *page; 59698c2ecf20Sopenharmony_ci unsigned long i; 59708c2ecf20Sopenharmony_ci size_t offset; 59718c2ecf20Sopenharmony_ci const unsigned int size = pos + len; 59728c2ecf20Sopenharmony_ci int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE); 59738c2ecf20Sopenharmony_ci u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos); 59748c2ecf20Sopenharmony_ci 59758c2ecf20Sopenharmony_ci eb_bitmap_offset(eb, start, pos, &i, &offset); 59768c2ecf20Sopenharmony_ci page = eb->pages[i]; 59778c2ecf20Sopenharmony_ci WARN_ON(!PageUptodate(page)); 59788c2ecf20Sopenharmony_ci kaddr = page_address(page); 59798c2ecf20Sopenharmony_ci 59808c2ecf20Sopenharmony_ci while (len >= bits_to_clear) { 59818c2ecf20Sopenharmony_ci kaddr[offset] &= ~mask_to_clear; 59828c2ecf20Sopenharmony_ci len -= bits_to_clear; 59838c2ecf20Sopenharmony_ci bits_to_clear = BITS_PER_BYTE; 59848c2ecf20Sopenharmony_ci mask_to_clear = ~0; 59858c2ecf20Sopenharmony_ci if (++offset >= PAGE_SIZE && len > 0) { 59868c2ecf20Sopenharmony_ci offset = 0; 59878c2ecf20Sopenharmony_ci page = eb->pages[++i]; 59888c2ecf20Sopenharmony_ci WARN_ON(!PageUptodate(page)); 59898c2ecf20Sopenharmony_ci kaddr = page_address(page); 59908c2ecf20Sopenharmony_ci } 59918c2ecf20Sopenharmony_ci } 59928c2ecf20Sopenharmony_ci if (len) { 59938c2ecf20Sopenharmony_ci mask_to_clear &= BITMAP_LAST_BYTE_MASK(size); 59948c2ecf20Sopenharmony_ci kaddr[offset] &= ~mask_to_clear; 59958c2ecf20Sopenharmony_ci } 59968c2ecf20Sopenharmony_ci} 59978c2ecf20Sopenharmony_ci 59988c2ecf20Sopenharmony_cistatic inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) 59998c2ecf20Sopenharmony_ci{ 60008c2ecf20Sopenharmony_ci unsigned long distance = (src > dst) ? src - dst : dst - src; 60018c2ecf20Sopenharmony_ci return distance < len; 60028c2ecf20Sopenharmony_ci} 60038c2ecf20Sopenharmony_ci 60048c2ecf20Sopenharmony_cistatic void copy_pages(struct page *dst_page, struct page *src_page, 60058c2ecf20Sopenharmony_ci unsigned long dst_off, unsigned long src_off, 60068c2ecf20Sopenharmony_ci unsigned long len) 60078c2ecf20Sopenharmony_ci{ 60088c2ecf20Sopenharmony_ci char *dst_kaddr = page_address(dst_page); 60098c2ecf20Sopenharmony_ci char *src_kaddr; 60108c2ecf20Sopenharmony_ci int must_memmove = 0; 60118c2ecf20Sopenharmony_ci 60128c2ecf20Sopenharmony_ci if (dst_page != src_page) { 60138c2ecf20Sopenharmony_ci src_kaddr = page_address(src_page); 60148c2ecf20Sopenharmony_ci } else { 60158c2ecf20Sopenharmony_ci src_kaddr = dst_kaddr; 60168c2ecf20Sopenharmony_ci if (areas_overlap(src_off, dst_off, len)) 60178c2ecf20Sopenharmony_ci must_memmove = 1; 60188c2ecf20Sopenharmony_ci } 60198c2ecf20Sopenharmony_ci 60208c2ecf20Sopenharmony_ci if (must_memmove) 60218c2ecf20Sopenharmony_ci memmove(dst_kaddr + dst_off, src_kaddr + src_off, len); 60228c2ecf20Sopenharmony_ci else 60238c2ecf20Sopenharmony_ci memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); 60248c2ecf20Sopenharmony_ci} 60258c2ecf20Sopenharmony_ci 60268c2ecf20Sopenharmony_civoid memcpy_extent_buffer(const struct extent_buffer *dst, 60278c2ecf20Sopenharmony_ci unsigned long dst_offset, unsigned long src_offset, 60288c2ecf20Sopenharmony_ci unsigned long len) 60298c2ecf20Sopenharmony_ci{ 60308c2ecf20Sopenharmony_ci size_t cur; 60318c2ecf20Sopenharmony_ci size_t dst_off_in_page; 60328c2ecf20Sopenharmony_ci size_t src_off_in_page; 60338c2ecf20Sopenharmony_ci unsigned long dst_i; 60348c2ecf20Sopenharmony_ci unsigned long src_i; 60358c2ecf20Sopenharmony_ci 60368c2ecf20Sopenharmony_ci if (check_eb_range(dst, dst_offset, len) || 60378c2ecf20Sopenharmony_ci check_eb_range(dst, src_offset, len)) 60388c2ecf20Sopenharmony_ci return; 60398c2ecf20Sopenharmony_ci 60408c2ecf20Sopenharmony_ci while (len > 0) { 60418c2ecf20Sopenharmony_ci dst_off_in_page = offset_in_page(dst_offset); 60428c2ecf20Sopenharmony_ci src_off_in_page = offset_in_page(src_offset); 60438c2ecf20Sopenharmony_ci 60448c2ecf20Sopenharmony_ci dst_i = dst_offset >> PAGE_SHIFT; 60458c2ecf20Sopenharmony_ci src_i = src_offset >> PAGE_SHIFT; 60468c2ecf20Sopenharmony_ci 60478c2ecf20Sopenharmony_ci cur = min(len, (unsigned long)(PAGE_SIZE - 60488c2ecf20Sopenharmony_ci src_off_in_page)); 60498c2ecf20Sopenharmony_ci cur = min_t(unsigned long, cur, 60508c2ecf20Sopenharmony_ci (unsigned long)(PAGE_SIZE - dst_off_in_page)); 60518c2ecf20Sopenharmony_ci 60528c2ecf20Sopenharmony_ci copy_pages(dst->pages[dst_i], dst->pages[src_i], 60538c2ecf20Sopenharmony_ci dst_off_in_page, src_off_in_page, cur); 60548c2ecf20Sopenharmony_ci 60558c2ecf20Sopenharmony_ci src_offset += cur; 60568c2ecf20Sopenharmony_ci dst_offset += cur; 60578c2ecf20Sopenharmony_ci len -= cur; 60588c2ecf20Sopenharmony_ci } 60598c2ecf20Sopenharmony_ci} 60608c2ecf20Sopenharmony_ci 60618c2ecf20Sopenharmony_civoid memmove_extent_buffer(const struct extent_buffer *dst, 60628c2ecf20Sopenharmony_ci unsigned long dst_offset, unsigned long src_offset, 60638c2ecf20Sopenharmony_ci unsigned long len) 60648c2ecf20Sopenharmony_ci{ 60658c2ecf20Sopenharmony_ci size_t cur; 60668c2ecf20Sopenharmony_ci size_t dst_off_in_page; 60678c2ecf20Sopenharmony_ci size_t src_off_in_page; 60688c2ecf20Sopenharmony_ci unsigned long dst_end = dst_offset + len - 1; 60698c2ecf20Sopenharmony_ci unsigned long src_end = src_offset + len - 1; 60708c2ecf20Sopenharmony_ci unsigned long dst_i; 60718c2ecf20Sopenharmony_ci unsigned long src_i; 60728c2ecf20Sopenharmony_ci 60738c2ecf20Sopenharmony_ci if (check_eb_range(dst, dst_offset, len) || 60748c2ecf20Sopenharmony_ci check_eb_range(dst, src_offset, len)) 60758c2ecf20Sopenharmony_ci return; 60768c2ecf20Sopenharmony_ci if (dst_offset < src_offset) { 60778c2ecf20Sopenharmony_ci memcpy_extent_buffer(dst, dst_offset, src_offset, len); 60788c2ecf20Sopenharmony_ci return; 60798c2ecf20Sopenharmony_ci } 60808c2ecf20Sopenharmony_ci while (len > 0) { 60818c2ecf20Sopenharmony_ci dst_i = dst_end >> PAGE_SHIFT; 60828c2ecf20Sopenharmony_ci src_i = src_end >> PAGE_SHIFT; 60838c2ecf20Sopenharmony_ci 60848c2ecf20Sopenharmony_ci dst_off_in_page = offset_in_page(dst_end); 60858c2ecf20Sopenharmony_ci src_off_in_page = offset_in_page(src_end); 60868c2ecf20Sopenharmony_ci 60878c2ecf20Sopenharmony_ci cur = min_t(unsigned long, len, src_off_in_page + 1); 60888c2ecf20Sopenharmony_ci cur = min(cur, dst_off_in_page + 1); 60898c2ecf20Sopenharmony_ci copy_pages(dst->pages[dst_i], dst->pages[src_i], 60908c2ecf20Sopenharmony_ci dst_off_in_page - cur + 1, 60918c2ecf20Sopenharmony_ci src_off_in_page - cur + 1, cur); 60928c2ecf20Sopenharmony_ci 60938c2ecf20Sopenharmony_ci dst_end -= cur; 60948c2ecf20Sopenharmony_ci src_end -= cur; 60958c2ecf20Sopenharmony_ci len -= cur; 60968c2ecf20Sopenharmony_ci } 60978c2ecf20Sopenharmony_ci} 60988c2ecf20Sopenharmony_ci 60998c2ecf20Sopenharmony_ciint try_release_extent_buffer(struct page *page) 61008c2ecf20Sopenharmony_ci{ 61018c2ecf20Sopenharmony_ci struct extent_buffer *eb; 61028c2ecf20Sopenharmony_ci 61038c2ecf20Sopenharmony_ci /* 61048c2ecf20Sopenharmony_ci * We need to make sure nobody is attaching this page to an eb right 61058c2ecf20Sopenharmony_ci * now. 61068c2ecf20Sopenharmony_ci */ 61078c2ecf20Sopenharmony_ci spin_lock(&page->mapping->private_lock); 61088c2ecf20Sopenharmony_ci if (!PagePrivate(page)) { 61098c2ecf20Sopenharmony_ci spin_unlock(&page->mapping->private_lock); 61108c2ecf20Sopenharmony_ci return 1; 61118c2ecf20Sopenharmony_ci } 61128c2ecf20Sopenharmony_ci 61138c2ecf20Sopenharmony_ci eb = (struct extent_buffer *)page->private; 61148c2ecf20Sopenharmony_ci BUG_ON(!eb); 61158c2ecf20Sopenharmony_ci 61168c2ecf20Sopenharmony_ci /* 61178c2ecf20Sopenharmony_ci * This is a little awful but should be ok, we need to make sure that 61188c2ecf20Sopenharmony_ci * the eb doesn't disappear out from under us while we're looking at 61198c2ecf20Sopenharmony_ci * this page. 61208c2ecf20Sopenharmony_ci */ 61218c2ecf20Sopenharmony_ci spin_lock(&eb->refs_lock); 61228c2ecf20Sopenharmony_ci if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb)) { 61238c2ecf20Sopenharmony_ci spin_unlock(&eb->refs_lock); 61248c2ecf20Sopenharmony_ci spin_unlock(&page->mapping->private_lock); 61258c2ecf20Sopenharmony_ci return 0; 61268c2ecf20Sopenharmony_ci } 61278c2ecf20Sopenharmony_ci spin_unlock(&page->mapping->private_lock); 61288c2ecf20Sopenharmony_ci 61298c2ecf20Sopenharmony_ci /* 61308c2ecf20Sopenharmony_ci * If tree ref isn't set then we know the ref on this eb is a real ref, 61318c2ecf20Sopenharmony_ci * so just return, this page will likely be freed soon anyway. 61328c2ecf20Sopenharmony_ci */ 61338c2ecf20Sopenharmony_ci if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) { 61348c2ecf20Sopenharmony_ci spin_unlock(&eb->refs_lock); 61358c2ecf20Sopenharmony_ci return 0; 61368c2ecf20Sopenharmony_ci } 61378c2ecf20Sopenharmony_ci 61388c2ecf20Sopenharmony_ci return release_extent_buffer(eb); 61398c2ecf20Sopenharmony_ci} 6140