18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2007 Oracle. All rights reserved. 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <linux/slab.h> 78c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 88c2ecf20Sopenharmony_ci#include <linux/writeback.h> 98c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 108c2ecf20Sopenharmony_ci#include "misc.h" 118c2ecf20Sopenharmony_ci#include "ctree.h" 128c2ecf20Sopenharmony_ci#include "transaction.h" 138c2ecf20Sopenharmony_ci#include "btrfs_inode.h" 148c2ecf20Sopenharmony_ci#include "extent_io.h" 158c2ecf20Sopenharmony_ci#include "disk-io.h" 168c2ecf20Sopenharmony_ci#include "compression.h" 178c2ecf20Sopenharmony_ci#include "delalloc-space.h" 188c2ecf20Sopenharmony_ci#include "qgroup.h" 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_cistatic struct kmem_cache *btrfs_ordered_extent_cache; 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_cistatic u64 entry_end(struct btrfs_ordered_extent *entry) 238c2ecf20Sopenharmony_ci{ 248c2ecf20Sopenharmony_ci if (entry->file_offset + entry->num_bytes < entry->file_offset) 258c2ecf20Sopenharmony_ci return (u64)-1; 268c2ecf20Sopenharmony_ci return entry->file_offset + entry->num_bytes; 278c2ecf20Sopenharmony_ci} 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci/* returns NULL if the insertion worked, or it returns the node it did find 308c2ecf20Sopenharmony_ci * in the tree 318c2ecf20Sopenharmony_ci */ 328c2ecf20Sopenharmony_cistatic struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, 338c2ecf20Sopenharmony_ci struct rb_node *node) 348c2ecf20Sopenharmony_ci{ 358c2ecf20Sopenharmony_ci struct rb_node **p = &root->rb_node; 368c2ecf20Sopenharmony_ci struct rb_node *parent = NULL; 378c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *entry; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci while (*p) { 408c2ecf20Sopenharmony_ci parent = *p; 418c2ecf20Sopenharmony_ci entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci if (file_offset < entry->file_offset) 448c2ecf20Sopenharmony_ci p = &(*p)->rb_left; 458c2ecf20Sopenharmony_ci else if (file_offset >= entry_end(entry)) 468c2ecf20Sopenharmony_ci p = &(*p)->rb_right; 478c2ecf20Sopenharmony_ci else 488c2ecf20Sopenharmony_ci return parent; 498c2ecf20Sopenharmony_ci } 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci rb_link_node(node, parent, p); 528c2ecf20Sopenharmony_ci rb_insert_color(node, root); 538c2ecf20Sopenharmony_ci return NULL; 548c2ecf20Sopenharmony_ci} 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci/* 578c2ecf20Sopenharmony_ci * look for a given offset in the tree, and if it can't be found return the 588c2ecf20Sopenharmony_ci * first lesser offset 598c2ecf20Sopenharmony_ci */ 608c2ecf20Sopenharmony_cistatic struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, 618c2ecf20Sopenharmony_ci struct rb_node **prev_ret) 628c2ecf20Sopenharmony_ci{ 638c2ecf20Sopenharmony_ci struct rb_node *n = root->rb_node; 648c2ecf20Sopenharmony_ci struct rb_node *prev = NULL; 658c2ecf20Sopenharmony_ci struct rb_node *test; 668c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *entry; 678c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *prev_entry = NULL; 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci while (n) { 708c2ecf20Sopenharmony_ci entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); 718c2ecf20Sopenharmony_ci prev = n; 728c2ecf20Sopenharmony_ci prev_entry = entry; 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci if (file_offset < entry->file_offset) 758c2ecf20Sopenharmony_ci n = n->rb_left; 768c2ecf20Sopenharmony_ci else if (file_offset >= entry_end(entry)) 778c2ecf20Sopenharmony_ci n = n->rb_right; 788c2ecf20Sopenharmony_ci else 798c2ecf20Sopenharmony_ci return n; 808c2ecf20Sopenharmony_ci } 818c2ecf20Sopenharmony_ci if (!prev_ret) 828c2ecf20Sopenharmony_ci return NULL; 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci while (prev && file_offset >= entry_end(prev_entry)) { 858c2ecf20Sopenharmony_ci test = rb_next(prev); 868c2ecf20Sopenharmony_ci if (!test) 878c2ecf20Sopenharmony_ci break; 888c2ecf20Sopenharmony_ci prev_entry = rb_entry(test, struct btrfs_ordered_extent, 898c2ecf20Sopenharmony_ci rb_node); 908c2ecf20Sopenharmony_ci if (file_offset < entry_end(prev_entry)) 918c2ecf20Sopenharmony_ci break; 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci prev = test; 948c2ecf20Sopenharmony_ci } 958c2ecf20Sopenharmony_ci if (prev) 968c2ecf20Sopenharmony_ci prev_entry = rb_entry(prev, struct btrfs_ordered_extent, 978c2ecf20Sopenharmony_ci rb_node); 988c2ecf20Sopenharmony_ci while (prev && file_offset < entry_end(prev_entry)) { 998c2ecf20Sopenharmony_ci test = rb_prev(prev); 1008c2ecf20Sopenharmony_ci if (!test) 1018c2ecf20Sopenharmony_ci break; 1028c2ecf20Sopenharmony_ci prev_entry = rb_entry(test, struct btrfs_ordered_extent, 1038c2ecf20Sopenharmony_ci rb_node); 1048c2ecf20Sopenharmony_ci prev = test; 1058c2ecf20Sopenharmony_ci } 1068c2ecf20Sopenharmony_ci *prev_ret = prev; 1078c2ecf20Sopenharmony_ci return NULL; 1088c2ecf20Sopenharmony_ci} 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci/* 1118c2ecf20Sopenharmony_ci * helper to check if a given offset is inside a given entry 1128c2ecf20Sopenharmony_ci */ 1138c2ecf20Sopenharmony_cistatic int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) 1148c2ecf20Sopenharmony_ci{ 1158c2ecf20Sopenharmony_ci if (file_offset < entry->file_offset || 1168c2ecf20Sopenharmony_ci entry->file_offset + entry->num_bytes <= file_offset) 1178c2ecf20Sopenharmony_ci return 0; 1188c2ecf20Sopenharmony_ci return 1; 1198c2ecf20Sopenharmony_ci} 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_cistatic int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, 1228c2ecf20Sopenharmony_ci u64 len) 1238c2ecf20Sopenharmony_ci{ 1248c2ecf20Sopenharmony_ci if (file_offset + len <= entry->file_offset || 1258c2ecf20Sopenharmony_ci entry->file_offset + entry->num_bytes <= file_offset) 1268c2ecf20Sopenharmony_ci return 0; 1278c2ecf20Sopenharmony_ci return 1; 1288c2ecf20Sopenharmony_ci} 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci/* 1318c2ecf20Sopenharmony_ci * look find the first ordered struct that has this offset, otherwise 1328c2ecf20Sopenharmony_ci * the first one less than this offset 1338c2ecf20Sopenharmony_ci */ 1348c2ecf20Sopenharmony_cistatic inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, 1358c2ecf20Sopenharmony_ci u64 file_offset) 1368c2ecf20Sopenharmony_ci{ 1378c2ecf20Sopenharmony_ci struct rb_root *root = &tree->tree; 1388c2ecf20Sopenharmony_ci struct rb_node *prev = NULL; 1398c2ecf20Sopenharmony_ci struct rb_node *ret; 1408c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *entry; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci if (tree->last) { 1438c2ecf20Sopenharmony_ci entry = rb_entry(tree->last, struct btrfs_ordered_extent, 1448c2ecf20Sopenharmony_ci rb_node); 1458c2ecf20Sopenharmony_ci if (offset_in_entry(entry, file_offset)) 1468c2ecf20Sopenharmony_ci return tree->last; 1478c2ecf20Sopenharmony_ci } 1488c2ecf20Sopenharmony_ci ret = __tree_search(root, file_offset, &prev); 1498c2ecf20Sopenharmony_ci if (!ret) 1508c2ecf20Sopenharmony_ci ret = prev; 1518c2ecf20Sopenharmony_ci if (ret) 1528c2ecf20Sopenharmony_ci tree->last = ret; 1538c2ecf20Sopenharmony_ci return ret; 1548c2ecf20Sopenharmony_ci} 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci/* 1578c2ecf20Sopenharmony_ci * Allocate and add a new ordered_extent into the per-inode tree. 1588c2ecf20Sopenharmony_ci * 1598c2ecf20Sopenharmony_ci * The tree is given a single reference on the ordered extent that was 1608c2ecf20Sopenharmony_ci * inserted. 1618c2ecf20Sopenharmony_ci */ 1628c2ecf20Sopenharmony_cistatic int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, 1638c2ecf20Sopenharmony_ci u64 disk_bytenr, u64 num_bytes, 1648c2ecf20Sopenharmony_ci u64 disk_num_bytes, int type, int dio, 1658c2ecf20Sopenharmony_ci int compress_type) 1668c2ecf20Sopenharmony_ci{ 1678c2ecf20Sopenharmony_ci struct btrfs_root *root = inode->root; 1688c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 1698c2ecf20Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 1708c2ecf20Sopenharmony_ci struct rb_node *node; 1718c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *entry; 1728c2ecf20Sopenharmony_ci int ret; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) { 1758c2ecf20Sopenharmony_ci /* For nocow write, we can release the qgroup rsv right now */ 1768c2ecf20Sopenharmony_ci ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes); 1778c2ecf20Sopenharmony_ci if (ret < 0) 1788c2ecf20Sopenharmony_ci return ret; 1798c2ecf20Sopenharmony_ci ret = 0; 1808c2ecf20Sopenharmony_ci } else { 1818c2ecf20Sopenharmony_ci /* 1828c2ecf20Sopenharmony_ci * The ordered extent has reserved qgroup space, release now 1838c2ecf20Sopenharmony_ci * and pass the reserved number for qgroup_record to free. 1848c2ecf20Sopenharmony_ci */ 1858c2ecf20Sopenharmony_ci ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes); 1868c2ecf20Sopenharmony_ci if (ret < 0) 1878c2ecf20Sopenharmony_ci return ret; 1888c2ecf20Sopenharmony_ci } 1898c2ecf20Sopenharmony_ci entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS); 1908c2ecf20Sopenharmony_ci if (!entry) 1918c2ecf20Sopenharmony_ci return -ENOMEM; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci entry->file_offset = file_offset; 1948c2ecf20Sopenharmony_ci entry->disk_bytenr = disk_bytenr; 1958c2ecf20Sopenharmony_ci entry->num_bytes = num_bytes; 1968c2ecf20Sopenharmony_ci entry->disk_num_bytes = disk_num_bytes; 1978c2ecf20Sopenharmony_ci entry->bytes_left = num_bytes; 1988c2ecf20Sopenharmony_ci entry->inode = igrab(&inode->vfs_inode); 1998c2ecf20Sopenharmony_ci entry->compress_type = compress_type; 2008c2ecf20Sopenharmony_ci entry->truncated_len = (u64)-1; 2018c2ecf20Sopenharmony_ci entry->qgroup_rsv = ret; 2028c2ecf20Sopenharmony_ci if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 2038c2ecf20Sopenharmony_ci set_bit(type, &entry->flags); 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci if (dio) { 2068c2ecf20Sopenharmony_ci percpu_counter_add_batch(&fs_info->dio_bytes, num_bytes, 2078c2ecf20Sopenharmony_ci fs_info->delalloc_batch); 2088c2ecf20Sopenharmony_ci set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); 2098c2ecf20Sopenharmony_ci } 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci /* one ref for the tree */ 2128c2ecf20Sopenharmony_ci refcount_set(&entry->refs, 1); 2138c2ecf20Sopenharmony_ci init_waitqueue_head(&entry->wait); 2148c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&entry->list); 2158c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&entry->log_list); 2168c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&entry->root_extent_list); 2178c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&entry->work_list); 2188c2ecf20Sopenharmony_ci init_completion(&entry->completion); 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci trace_btrfs_ordered_extent_add(inode, entry); 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci spin_lock_irq(&tree->lock); 2238c2ecf20Sopenharmony_ci node = tree_insert(&tree->tree, file_offset, 2248c2ecf20Sopenharmony_ci &entry->rb_node); 2258c2ecf20Sopenharmony_ci if (node) 2268c2ecf20Sopenharmony_ci btrfs_panic(fs_info, -EEXIST, 2278c2ecf20Sopenharmony_ci "inconsistency in ordered tree at offset %llu", 2288c2ecf20Sopenharmony_ci file_offset); 2298c2ecf20Sopenharmony_ci spin_unlock_irq(&tree->lock); 2308c2ecf20Sopenharmony_ci 2318c2ecf20Sopenharmony_ci spin_lock(&root->ordered_extent_lock); 2328c2ecf20Sopenharmony_ci list_add_tail(&entry->root_extent_list, 2338c2ecf20Sopenharmony_ci &root->ordered_extents); 2348c2ecf20Sopenharmony_ci root->nr_ordered_extents++; 2358c2ecf20Sopenharmony_ci if (root->nr_ordered_extents == 1) { 2368c2ecf20Sopenharmony_ci spin_lock(&fs_info->ordered_root_lock); 2378c2ecf20Sopenharmony_ci BUG_ON(!list_empty(&root->ordered_root)); 2388c2ecf20Sopenharmony_ci list_add_tail(&root->ordered_root, &fs_info->ordered_roots); 2398c2ecf20Sopenharmony_ci spin_unlock(&fs_info->ordered_root_lock); 2408c2ecf20Sopenharmony_ci } 2418c2ecf20Sopenharmony_ci spin_unlock(&root->ordered_extent_lock); 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci /* 2448c2ecf20Sopenharmony_ci * We don't need the count_max_extents here, we can assume that all of 2458c2ecf20Sopenharmony_ci * that work has been done at higher layers, so this is truly the 2468c2ecf20Sopenharmony_ci * smallest the extent is going to get. 2478c2ecf20Sopenharmony_ci */ 2488c2ecf20Sopenharmony_ci spin_lock(&inode->lock); 2498c2ecf20Sopenharmony_ci btrfs_mod_outstanding_extents(inode, 1); 2508c2ecf20Sopenharmony_ci spin_unlock(&inode->lock); 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci return 0; 2538c2ecf20Sopenharmony_ci} 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ciint btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, 2568c2ecf20Sopenharmony_ci u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes, 2578c2ecf20Sopenharmony_ci int type) 2588c2ecf20Sopenharmony_ci{ 2598c2ecf20Sopenharmony_ci return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, 2608c2ecf20Sopenharmony_ci num_bytes, disk_num_bytes, type, 0, 2618c2ecf20Sopenharmony_ci BTRFS_COMPRESS_NONE); 2628c2ecf20Sopenharmony_ci} 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ciint btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset, 2658c2ecf20Sopenharmony_ci u64 disk_bytenr, u64 num_bytes, 2668c2ecf20Sopenharmony_ci u64 disk_num_bytes, int type) 2678c2ecf20Sopenharmony_ci{ 2688c2ecf20Sopenharmony_ci return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, 2698c2ecf20Sopenharmony_ci num_bytes, disk_num_bytes, type, 1, 2708c2ecf20Sopenharmony_ci BTRFS_COMPRESS_NONE); 2718c2ecf20Sopenharmony_ci} 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ciint btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset, 2748c2ecf20Sopenharmony_ci u64 disk_bytenr, u64 num_bytes, 2758c2ecf20Sopenharmony_ci u64 disk_num_bytes, int type, 2768c2ecf20Sopenharmony_ci int compress_type) 2778c2ecf20Sopenharmony_ci{ 2788c2ecf20Sopenharmony_ci return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr, 2798c2ecf20Sopenharmony_ci num_bytes, disk_num_bytes, type, 0, 2808c2ecf20Sopenharmony_ci compress_type); 2818c2ecf20Sopenharmony_ci} 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci/* 2848c2ecf20Sopenharmony_ci * Add a struct btrfs_ordered_sum into the list of checksums to be inserted 2858c2ecf20Sopenharmony_ci * when an ordered extent is finished. If the list covers more than one 2868c2ecf20Sopenharmony_ci * ordered extent, it is split across multiples. 2878c2ecf20Sopenharmony_ci */ 2888c2ecf20Sopenharmony_civoid btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, 2898c2ecf20Sopenharmony_ci struct btrfs_ordered_sum *sum) 2908c2ecf20Sopenharmony_ci{ 2918c2ecf20Sopenharmony_ci struct btrfs_ordered_inode_tree *tree; 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci tree = &BTRFS_I(entry->inode)->ordered_tree; 2948c2ecf20Sopenharmony_ci spin_lock_irq(&tree->lock); 2958c2ecf20Sopenharmony_ci list_add_tail(&sum->list, &entry->list); 2968c2ecf20Sopenharmony_ci spin_unlock_irq(&tree->lock); 2978c2ecf20Sopenharmony_ci} 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci/* 3008c2ecf20Sopenharmony_ci * this is used to account for finished IO across a given range 3018c2ecf20Sopenharmony_ci * of the file. The IO may span ordered extents. If 3028c2ecf20Sopenharmony_ci * a given ordered_extent is completely done, 1 is returned, otherwise 3038c2ecf20Sopenharmony_ci * 0. 3048c2ecf20Sopenharmony_ci * 3058c2ecf20Sopenharmony_ci * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used 3068c2ecf20Sopenharmony_ci * to make sure this function only returns 1 once for a given ordered extent. 3078c2ecf20Sopenharmony_ci * 3088c2ecf20Sopenharmony_ci * file_offset is updated to one byte past the range that is recorded as 3098c2ecf20Sopenharmony_ci * complete. This allows you to walk forward in the file. 3108c2ecf20Sopenharmony_ci */ 3118c2ecf20Sopenharmony_ciint btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode, 3128c2ecf20Sopenharmony_ci struct btrfs_ordered_extent **cached, 3138c2ecf20Sopenharmony_ci u64 *file_offset, u64 io_size, int uptodate) 3148c2ecf20Sopenharmony_ci{ 3158c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 3168c2ecf20Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 3178c2ecf20Sopenharmony_ci struct rb_node *node; 3188c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 3198c2ecf20Sopenharmony_ci int ret; 3208c2ecf20Sopenharmony_ci unsigned long flags; 3218c2ecf20Sopenharmony_ci u64 dec_end; 3228c2ecf20Sopenharmony_ci u64 dec_start; 3238c2ecf20Sopenharmony_ci u64 to_dec; 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci spin_lock_irqsave(&tree->lock, flags); 3268c2ecf20Sopenharmony_ci node = tree_search(tree, *file_offset); 3278c2ecf20Sopenharmony_ci if (!node) { 3288c2ecf20Sopenharmony_ci ret = 1; 3298c2ecf20Sopenharmony_ci goto out; 3308c2ecf20Sopenharmony_ci } 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 3338c2ecf20Sopenharmony_ci if (!offset_in_entry(entry, *file_offset)) { 3348c2ecf20Sopenharmony_ci ret = 1; 3358c2ecf20Sopenharmony_ci goto out; 3368c2ecf20Sopenharmony_ci } 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci dec_start = max(*file_offset, entry->file_offset); 3398c2ecf20Sopenharmony_ci dec_end = min(*file_offset + io_size, 3408c2ecf20Sopenharmony_ci entry->file_offset + entry->num_bytes); 3418c2ecf20Sopenharmony_ci *file_offset = dec_end; 3428c2ecf20Sopenharmony_ci if (dec_start > dec_end) { 3438c2ecf20Sopenharmony_ci btrfs_crit(fs_info, "bad ordering dec_start %llu end %llu", 3448c2ecf20Sopenharmony_ci dec_start, dec_end); 3458c2ecf20Sopenharmony_ci } 3468c2ecf20Sopenharmony_ci to_dec = dec_end - dec_start; 3478c2ecf20Sopenharmony_ci if (to_dec > entry->bytes_left) { 3488c2ecf20Sopenharmony_ci btrfs_crit(fs_info, 3498c2ecf20Sopenharmony_ci "bad ordered accounting left %llu size %llu", 3508c2ecf20Sopenharmony_ci entry->bytes_left, to_dec); 3518c2ecf20Sopenharmony_ci } 3528c2ecf20Sopenharmony_ci entry->bytes_left -= to_dec; 3538c2ecf20Sopenharmony_ci if (!uptodate) 3548c2ecf20Sopenharmony_ci set_bit(BTRFS_ORDERED_IOERR, &entry->flags); 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci if (entry->bytes_left == 0) { 3578c2ecf20Sopenharmony_ci ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 3588c2ecf20Sopenharmony_ci /* test_and_set_bit implies a barrier */ 3598c2ecf20Sopenharmony_ci cond_wake_up_nomb(&entry->wait); 3608c2ecf20Sopenharmony_ci } else { 3618c2ecf20Sopenharmony_ci ret = 1; 3628c2ecf20Sopenharmony_ci } 3638c2ecf20Sopenharmony_ciout: 3648c2ecf20Sopenharmony_ci if (!ret && cached && entry) { 3658c2ecf20Sopenharmony_ci *cached = entry; 3668c2ecf20Sopenharmony_ci refcount_inc(&entry->refs); 3678c2ecf20Sopenharmony_ci } 3688c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&tree->lock, flags); 3698c2ecf20Sopenharmony_ci return ret == 0; 3708c2ecf20Sopenharmony_ci} 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_ci/* 3738c2ecf20Sopenharmony_ci * this is used to account for finished IO across a given range 3748c2ecf20Sopenharmony_ci * of the file. The IO should not span ordered extents. If 3758c2ecf20Sopenharmony_ci * a given ordered_extent is completely done, 1 is returned, otherwise 3768c2ecf20Sopenharmony_ci * 0. 3778c2ecf20Sopenharmony_ci * 3788c2ecf20Sopenharmony_ci * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used 3798c2ecf20Sopenharmony_ci * to make sure this function only returns 1 once for a given ordered extent. 3808c2ecf20Sopenharmony_ci */ 3818c2ecf20Sopenharmony_ciint btrfs_dec_test_ordered_pending(struct btrfs_inode *inode, 3828c2ecf20Sopenharmony_ci struct btrfs_ordered_extent **cached, 3838c2ecf20Sopenharmony_ci u64 file_offset, u64 io_size, int uptodate) 3848c2ecf20Sopenharmony_ci{ 3858c2ecf20Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 3868c2ecf20Sopenharmony_ci struct rb_node *node; 3878c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 3888c2ecf20Sopenharmony_ci unsigned long flags; 3898c2ecf20Sopenharmony_ci int ret; 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci spin_lock_irqsave(&tree->lock, flags); 3928c2ecf20Sopenharmony_ci if (cached && *cached) { 3938c2ecf20Sopenharmony_ci entry = *cached; 3948c2ecf20Sopenharmony_ci goto have_entry; 3958c2ecf20Sopenharmony_ci } 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci node = tree_search(tree, file_offset); 3988c2ecf20Sopenharmony_ci if (!node) { 3998c2ecf20Sopenharmony_ci ret = 1; 4008c2ecf20Sopenharmony_ci goto out; 4018c2ecf20Sopenharmony_ci } 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 4048c2ecf20Sopenharmony_cihave_entry: 4058c2ecf20Sopenharmony_ci if (!offset_in_entry(entry, file_offset)) { 4068c2ecf20Sopenharmony_ci ret = 1; 4078c2ecf20Sopenharmony_ci goto out; 4088c2ecf20Sopenharmony_ci } 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci if (io_size > entry->bytes_left) { 4118c2ecf20Sopenharmony_ci btrfs_crit(inode->root->fs_info, 4128c2ecf20Sopenharmony_ci "bad ordered accounting left %llu size %llu", 4138c2ecf20Sopenharmony_ci entry->bytes_left, io_size); 4148c2ecf20Sopenharmony_ci } 4158c2ecf20Sopenharmony_ci entry->bytes_left -= io_size; 4168c2ecf20Sopenharmony_ci if (!uptodate) 4178c2ecf20Sopenharmony_ci set_bit(BTRFS_ORDERED_IOERR, &entry->flags); 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_ci if (entry->bytes_left == 0) { 4208c2ecf20Sopenharmony_ci ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 4218c2ecf20Sopenharmony_ci /* test_and_set_bit implies a barrier */ 4228c2ecf20Sopenharmony_ci cond_wake_up_nomb(&entry->wait); 4238c2ecf20Sopenharmony_ci } else { 4248c2ecf20Sopenharmony_ci ret = 1; 4258c2ecf20Sopenharmony_ci } 4268c2ecf20Sopenharmony_ciout: 4278c2ecf20Sopenharmony_ci if (!ret && cached && entry) { 4288c2ecf20Sopenharmony_ci *cached = entry; 4298c2ecf20Sopenharmony_ci refcount_inc(&entry->refs); 4308c2ecf20Sopenharmony_ci } 4318c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&tree->lock, flags); 4328c2ecf20Sopenharmony_ci return ret == 0; 4338c2ecf20Sopenharmony_ci} 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci/* 4368c2ecf20Sopenharmony_ci * used to drop a reference on an ordered extent. This will free 4378c2ecf20Sopenharmony_ci * the extent if the last reference is dropped 4388c2ecf20Sopenharmony_ci */ 4398c2ecf20Sopenharmony_civoid btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) 4408c2ecf20Sopenharmony_ci{ 4418c2ecf20Sopenharmony_ci struct list_head *cur; 4428c2ecf20Sopenharmony_ci struct btrfs_ordered_sum *sum; 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci trace_btrfs_ordered_extent_put(BTRFS_I(entry->inode), entry); 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&entry->refs)) { 4478c2ecf20Sopenharmony_ci ASSERT(list_empty(&entry->root_extent_list)); 4488c2ecf20Sopenharmony_ci ASSERT(list_empty(&entry->log_list)); 4498c2ecf20Sopenharmony_ci ASSERT(RB_EMPTY_NODE(&entry->rb_node)); 4508c2ecf20Sopenharmony_ci if (entry->inode) 4518c2ecf20Sopenharmony_ci btrfs_add_delayed_iput(entry->inode); 4528c2ecf20Sopenharmony_ci while (!list_empty(&entry->list)) { 4538c2ecf20Sopenharmony_ci cur = entry->list.next; 4548c2ecf20Sopenharmony_ci sum = list_entry(cur, struct btrfs_ordered_sum, list); 4558c2ecf20Sopenharmony_ci list_del(&sum->list); 4568c2ecf20Sopenharmony_ci kvfree(sum); 4578c2ecf20Sopenharmony_ci } 4588c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_ordered_extent_cache, entry); 4598c2ecf20Sopenharmony_ci } 4608c2ecf20Sopenharmony_ci} 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci/* 4638c2ecf20Sopenharmony_ci * remove an ordered extent from the tree. No references are dropped 4648c2ecf20Sopenharmony_ci * and waiters are woken up. 4658c2ecf20Sopenharmony_ci */ 4668c2ecf20Sopenharmony_civoid btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, 4678c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *entry) 4688c2ecf20Sopenharmony_ci{ 4698c2ecf20Sopenharmony_ci struct btrfs_ordered_inode_tree *tree; 4708c2ecf20Sopenharmony_ci struct btrfs_root *root = btrfs_inode->root; 4718c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 4728c2ecf20Sopenharmony_ci struct rb_node *node; 4738c2ecf20Sopenharmony_ci bool pending; 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci /* This is paired with btrfs_add_ordered_extent. */ 4768c2ecf20Sopenharmony_ci spin_lock(&btrfs_inode->lock); 4778c2ecf20Sopenharmony_ci btrfs_mod_outstanding_extents(btrfs_inode, -1); 4788c2ecf20Sopenharmony_ci spin_unlock(&btrfs_inode->lock); 4798c2ecf20Sopenharmony_ci if (root != fs_info->tree_root) 4808c2ecf20Sopenharmony_ci btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes, 4818c2ecf20Sopenharmony_ci false); 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) 4848c2ecf20Sopenharmony_ci percpu_counter_add_batch(&fs_info->dio_bytes, -entry->num_bytes, 4858c2ecf20Sopenharmony_ci fs_info->delalloc_batch); 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci tree = &btrfs_inode->ordered_tree; 4888c2ecf20Sopenharmony_ci spin_lock_irq(&tree->lock); 4898c2ecf20Sopenharmony_ci node = &entry->rb_node; 4908c2ecf20Sopenharmony_ci rb_erase(node, &tree->tree); 4918c2ecf20Sopenharmony_ci RB_CLEAR_NODE(node); 4928c2ecf20Sopenharmony_ci if (tree->last == node) 4938c2ecf20Sopenharmony_ci tree->last = NULL; 4948c2ecf20Sopenharmony_ci set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 4958c2ecf20Sopenharmony_ci pending = test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags); 4968c2ecf20Sopenharmony_ci spin_unlock_irq(&tree->lock); 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci /* 4998c2ecf20Sopenharmony_ci * The current running transaction is waiting on us, we need to let it 5008c2ecf20Sopenharmony_ci * know that we're complete and wake it up. 5018c2ecf20Sopenharmony_ci */ 5028c2ecf20Sopenharmony_ci if (pending) { 5038c2ecf20Sopenharmony_ci struct btrfs_transaction *trans; 5048c2ecf20Sopenharmony_ci 5058c2ecf20Sopenharmony_ci /* 5068c2ecf20Sopenharmony_ci * The checks for trans are just a formality, it should be set, 5078c2ecf20Sopenharmony_ci * but if it isn't we don't want to deref/assert under the spin 5088c2ecf20Sopenharmony_ci * lock, so be nice and check if trans is set, but ASSERT() so 5098c2ecf20Sopenharmony_ci * if it isn't set a developer will notice. 5108c2ecf20Sopenharmony_ci */ 5118c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 5128c2ecf20Sopenharmony_ci trans = fs_info->running_transaction; 5138c2ecf20Sopenharmony_ci if (trans) 5148c2ecf20Sopenharmony_ci refcount_inc(&trans->use_count); 5158c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci ASSERT(trans); 5188c2ecf20Sopenharmony_ci if (trans) { 5198c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&trans->pending_ordered)) 5208c2ecf20Sopenharmony_ci wake_up(&trans->pending_wait); 5218c2ecf20Sopenharmony_ci btrfs_put_transaction(trans); 5228c2ecf20Sopenharmony_ci } 5238c2ecf20Sopenharmony_ci } 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci spin_lock(&root->ordered_extent_lock); 5268c2ecf20Sopenharmony_ci list_del_init(&entry->root_extent_list); 5278c2ecf20Sopenharmony_ci root->nr_ordered_extents--; 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci trace_btrfs_ordered_extent_remove(btrfs_inode, entry); 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci if (!root->nr_ordered_extents) { 5328c2ecf20Sopenharmony_ci spin_lock(&fs_info->ordered_root_lock); 5338c2ecf20Sopenharmony_ci BUG_ON(list_empty(&root->ordered_root)); 5348c2ecf20Sopenharmony_ci list_del_init(&root->ordered_root); 5358c2ecf20Sopenharmony_ci spin_unlock(&fs_info->ordered_root_lock); 5368c2ecf20Sopenharmony_ci } 5378c2ecf20Sopenharmony_ci spin_unlock(&root->ordered_extent_lock); 5388c2ecf20Sopenharmony_ci wake_up(&entry->wait); 5398c2ecf20Sopenharmony_ci} 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_cistatic void btrfs_run_ordered_extent_work(struct btrfs_work *work) 5428c2ecf20Sopenharmony_ci{ 5438c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *ordered; 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci ordered = container_of(work, struct btrfs_ordered_extent, flush_work); 5468c2ecf20Sopenharmony_ci btrfs_start_ordered_extent(ordered, 1); 5478c2ecf20Sopenharmony_ci complete(&ordered->completion); 5488c2ecf20Sopenharmony_ci} 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_ci/* 5518c2ecf20Sopenharmony_ci * wait for all the ordered extents in a root. This is done when balancing 5528c2ecf20Sopenharmony_ci * space between drives. 5538c2ecf20Sopenharmony_ci */ 5548c2ecf20Sopenharmony_ciu64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, 5558c2ecf20Sopenharmony_ci const u64 range_start, const u64 range_len) 5568c2ecf20Sopenharmony_ci{ 5578c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 5588c2ecf20Sopenharmony_ci LIST_HEAD(splice); 5598c2ecf20Sopenharmony_ci LIST_HEAD(skipped); 5608c2ecf20Sopenharmony_ci LIST_HEAD(works); 5618c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *ordered, *next; 5628c2ecf20Sopenharmony_ci u64 count = 0; 5638c2ecf20Sopenharmony_ci const u64 range_end = range_start + range_len; 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci mutex_lock(&root->ordered_extent_mutex); 5668c2ecf20Sopenharmony_ci spin_lock(&root->ordered_extent_lock); 5678c2ecf20Sopenharmony_ci list_splice_init(&root->ordered_extents, &splice); 5688c2ecf20Sopenharmony_ci while (!list_empty(&splice) && nr) { 5698c2ecf20Sopenharmony_ci ordered = list_first_entry(&splice, struct btrfs_ordered_extent, 5708c2ecf20Sopenharmony_ci root_extent_list); 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_ci if (range_end <= ordered->disk_bytenr || 5738c2ecf20Sopenharmony_ci ordered->disk_bytenr + ordered->disk_num_bytes <= range_start) { 5748c2ecf20Sopenharmony_ci list_move_tail(&ordered->root_extent_list, &skipped); 5758c2ecf20Sopenharmony_ci cond_resched_lock(&root->ordered_extent_lock); 5768c2ecf20Sopenharmony_ci continue; 5778c2ecf20Sopenharmony_ci } 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_ci list_move_tail(&ordered->root_extent_list, 5808c2ecf20Sopenharmony_ci &root->ordered_extents); 5818c2ecf20Sopenharmony_ci refcount_inc(&ordered->refs); 5828c2ecf20Sopenharmony_ci spin_unlock(&root->ordered_extent_lock); 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci btrfs_init_work(&ordered->flush_work, 5858c2ecf20Sopenharmony_ci btrfs_run_ordered_extent_work, NULL, NULL); 5868c2ecf20Sopenharmony_ci list_add_tail(&ordered->work_list, &works); 5878c2ecf20Sopenharmony_ci btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work); 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci cond_resched(); 5908c2ecf20Sopenharmony_ci spin_lock(&root->ordered_extent_lock); 5918c2ecf20Sopenharmony_ci if (nr != U64_MAX) 5928c2ecf20Sopenharmony_ci nr--; 5938c2ecf20Sopenharmony_ci count++; 5948c2ecf20Sopenharmony_ci } 5958c2ecf20Sopenharmony_ci list_splice_tail(&skipped, &root->ordered_extents); 5968c2ecf20Sopenharmony_ci list_splice_tail(&splice, &root->ordered_extents); 5978c2ecf20Sopenharmony_ci spin_unlock(&root->ordered_extent_lock); 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci list_for_each_entry_safe(ordered, next, &works, work_list) { 6008c2ecf20Sopenharmony_ci list_del_init(&ordered->work_list); 6018c2ecf20Sopenharmony_ci wait_for_completion(&ordered->completion); 6028c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 6038c2ecf20Sopenharmony_ci cond_resched(); 6048c2ecf20Sopenharmony_ci } 6058c2ecf20Sopenharmony_ci mutex_unlock(&root->ordered_extent_mutex); 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci return count; 6088c2ecf20Sopenharmony_ci} 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_civoid btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, 6118c2ecf20Sopenharmony_ci const u64 range_start, const u64 range_len) 6128c2ecf20Sopenharmony_ci{ 6138c2ecf20Sopenharmony_ci struct btrfs_root *root; 6148c2ecf20Sopenharmony_ci struct list_head splice; 6158c2ecf20Sopenharmony_ci u64 done; 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&splice); 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci mutex_lock(&fs_info->ordered_operations_mutex); 6208c2ecf20Sopenharmony_ci spin_lock(&fs_info->ordered_root_lock); 6218c2ecf20Sopenharmony_ci list_splice_init(&fs_info->ordered_roots, &splice); 6228c2ecf20Sopenharmony_ci while (!list_empty(&splice) && nr) { 6238c2ecf20Sopenharmony_ci root = list_first_entry(&splice, struct btrfs_root, 6248c2ecf20Sopenharmony_ci ordered_root); 6258c2ecf20Sopenharmony_ci root = btrfs_grab_root(root); 6268c2ecf20Sopenharmony_ci BUG_ON(!root); 6278c2ecf20Sopenharmony_ci list_move_tail(&root->ordered_root, 6288c2ecf20Sopenharmony_ci &fs_info->ordered_roots); 6298c2ecf20Sopenharmony_ci spin_unlock(&fs_info->ordered_root_lock); 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci done = btrfs_wait_ordered_extents(root, nr, 6328c2ecf20Sopenharmony_ci range_start, range_len); 6338c2ecf20Sopenharmony_ci btrfs_put_root(root); 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci spin_lock(&fs_info->ordered_root_lock); 6368c2ecf20Sopenharmony_ci if (nr != U64_MAX) { 6378c2ecf20Sopenharmony_ci nr -= done; 6388c2ecf20Sopenharmony_ci } 6398c2ecf20Sopenharmony_ci } 6408c2ecf20Sopenharmony_ci list_splice_tail(&splice, &fs_info->ordered_roots); 6418c2ecf20Sopenharmony_ci spin_unlock(&fs_info->ordered_root_lock); 6428c2ecf20Sopenharmony_ci mutex_unlock(&fs_info->ordered_operations_mutex); 6438c2ecf20Sopenharmony_ci} 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_ci/* 6468c2ecf20Sopenharmony_ci * Used to start IO or wait for a given ordered extent to finish. 6478c2ecf20Sopenharmony_ci * 6488c2ecf20Sopenharmony_ci * If wait is one, this effectively waits on page writeback for all the pages 6498c2ecf20Sopenharmony_ci * in the extent, and it waits on the io completion code to insert 6508c2ecf20Sopenharmony_ci * metadata into the btree corresponding to the extent 6518c2ecf20Sopenharmony_ci */ 6528c2ecf20Sopenharmony_civoid btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait) 6538c2ecf20Sopenharmony_ci{ 6548c2ecf20Sopenharmony_ci u64 start = entry->file_offset; 6558c2ecf20Sopenharmony_ci u64 end = start + entry->num_bytes - 1; 6568c2ecf20Sopenharmony_ci struct btrfs_inode *inode = BTRFS_I(entry->inode); 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci trace_btrfs_ordered_extent_start(inode, entry); 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_ci /* 6618c2ecf20Sopenharmony_ci * pages in the range can be dirty, clean or writeback. We 6628c2ecf20Sopenharmony_ci * start IO on any dirty ones so the wait doesn't stall waiting 6638c2ecf20Sopenharmony_ci * for the flusher thread to find them 6648c2ecf20Sopenharmony_ci */ 6658c2ecf20Sopenharmony_ci if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) 6668c2ecf20Sopenharmony_ci filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end); 6678c2ecf20Sopenharmony_ci if (wait) { 6688c2ecf20Sopenharmony_ci wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, 6698c2ecf20Sopenharmony_ci &entry->flags)); 6708c2ecf20Sopenharmony_ci } 6718c2ecf20Sopenharmony_ci} 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci/* 6748c2ecf20Sopenharmony_ci * Used to wait on ordered extents across a large range of bytes. 6758c2ecf20Sopenharmony_ci */ 6768c2ecf20Sopenharmony_ciint btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 6778c2ecf20Sopenharmony_ci{ 6788c2ecf20Sopenharmony_ci int ret = 0; 6798c2ecf20Sopenharmony_ci int ret_wb = 0; 6808c2ecf20Sopenharmony_ci u64 end; 6818c2ecf20Sopenharmony_ci u64 orig_end; 6828c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *ordered; 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci if (start + len < start) { 6858c2ecf20Sopenharmony_ci orig_end = INT_LIMIT(loff_t); 6868c2ecf20Sopenharmony_ci } else { 6878c2ecf20Sopenharmony_ci orig_end = start + len - 1; 6888c2ecf20Sopenharmony_ci if (orig_end > INT_LIMIT(loff_t)) 6898c2ecf20Sopenharmony_ci orig_end = INT_LIMIT(loff_t); 6908c2ecf20Sopenharmony_ci } 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci /* start IO across the range first to instantiate any delalloc 6938c2ecf20Sopenharmony_ci * extents 6948c2ecf20Sopenharmony_ci */ 6958c2ecf20Sopenharmony_ci ret = btrfs_fdatawrite_range(inode, start, orig_end); 6968c2ecf20Sopenharmony_ci if (ret) 6978c2ecf20Sopenharmony_ci return ret; 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_ci /* 7008c2ecf20Sopenharmony_ci * If we have a writeback error don't return immediately. Wait first 7018c2ecf20Sopenharmony_ci * for any ordered extents that haven't completed yet. This is to make 7028c2ecf20Sopenharmony_ci * sure no one can dirty the same page ranges and call writepages() 7038c2ecf20Sopenharmony_ci * before the ordered extents complete - to avoid failures (-EEXIST) 7048c2ecf20Sopenharmony_ci * when adding the new ordered extents to the ordered tree. 7058c2ecf20Sopenharmony_ci */ 7068c2ecf20Sopenharmony_ci ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end); 7078c2ecf20Sopenharmony_ci 7088c2ecf20Sopenharmony_ci end = orig_end; 7098c2ecf20Sopenharmony_ci while (1) { 7108c2ecf20Sopenharmony_ci ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), end); 7118c2ecf20Sopenharmony_ci if (!ordered) 7128c2ecf20Sopenharmony_ci break; 7138c2ecf20Sopenharmony_ci if (ordered->file_offset > orig_end) { 7148c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 7158c2ecf20Sopenharmony_ci break; 7168c2ecf20Sopenharmony_ci } 7178c2ecf20Sopenharmony_ci if (ordered->file_offset + ordered->num_bytes <= start) { 7188c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 7198c2ecf20Sopenharmony_ci break; 7208c2ecf20Sopenharmony_ci } 7218c2ecf20Sopenharmony_ci btrfs_start_ordered_extent(ordered, 1); 7228c2ecf20Sopenharmony_ci end = ordered->file_offset; 7238c2ecf20Sopenharmony_ci /* 7248c2ecf20Sopenharmony_ci * If the ordered extent had an error save the error but don't 7258c2ecf20Sopenharmony_ci * exit without waiting first for all other ordered extents in 7268c2ecf20Sopenharmony_ci * the range to complete. 7278c2ecf20Sopenharmony_ci */ 7288c2ecf20Sopenharmony_ci if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) 7298c2ecf20Sopenharmony_ci ret = -EIO; 7308c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 7318c2ecf20Sopenharmony_ci if (end == 0 || end == start) 7328c2ecf20Sopenharmony_ci break; 7338c2ecf20Sopenharmony_ci end--; 7348c2ecf20Sopenharmony_ci } 7358c2ecf20Sopenharmony_ci return ret_wb ? ret_wb : ret; 7368c2ecf20Sopenharmony_ci} 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci/* 7398c2ecf20Sopenharmony_ci * find an ordered extent corresponding to file_offset. return NULL if 7408c2ecf20Sopenharmony_ci * nothing is found, otherwise take a reference on the extent and return it 7418c2ecf20Sopenharmony_ci */ 7428c2ecf20Sopenharmony_cistruct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode, 7438c2ecf20Sopenharmony_ci u64 file_offset) 7448c2ecf20Sopenharmony_ci{ 7458c2ecf20Sopenharmony_ci struct btrfs_ordered_inode_tree *tree; 7468c2ecf20Sopenharmony_ci struct rb_node *node; 7478c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_ci tree = &inode->ordered_tree; 7508c2ecf20Sopenharmony_ci spin_lock_irq(&tree->lock); 7518c2ecf20Sopenharmony_ci node = tree_search(tree, file_offset); 7528c2ecf20Sopenharmony_ci if (!node) 7538c2ecf20Sopenharmony_ci goto out; 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 7568c2ecf20Sopenharmony_ci if (!offset_in_entry(entry, file_offset)) 7578c2ecf20Sopenharmony_ci entry = NULL; 7588c2ecf20Sopenharmony_ci if (entry) 7598c2ecf20Sopenharmony_ci refcount_inc(&entry->refs); 7608c2ecf20Sopenharmony_ciout: 7618c2ecf20Sopenharmony_ci spin_unlock_irq(&tree->lock); 7628c2ecf20Sopenharmony_ci return entry; 7638c2ecf20Sopenharmony_ci} 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci/* Since the DIO code tries to lock a wide area we need to look for any ordered 7668c2ecf20Sopenharmony_ci * extents that exist in the range, rather than just the start of the range. 7678c2ecf20Sopenharmony_ci */ 7688c2ecf20Sopenharmony_cistruct btrfs_ordered_extent *btrfs_lookup_ordered_range( 7698c2ecf20Sopenharmony_ci struct btrfs_inode *inode, u64 file_offset, u64 len) 7708c2ecf20Sopenharmony_ci{ 7718c2ecf20Sopenharmony_ci struct btrfs_ordered_inode_tree *tree; 7728c2ecf20Sopenharmony_ci struct rb_node *node; 7738c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_ci tree = &inode->ordered_tree; 7768c2ecf20Sopenharmony_ci spin_lock_irq(&tree->lock); 7778c2ecf20Sopenharmony_ci node = tree_search(tree, file_offset); 7788c2ecf20Sopenharmony_ci if (!node) { 7798c2ecf20Sopenharmony_ci node = tree_search(tree, file_offset + len); 7808c2ecf20Sopenharmony_ci if (!node) 7818c2ecf20Sopenharmony_ci goto out; 7828c2ecf20Sopenharmony_ci } 7838c2ecf20Sopenharmony_ci 7848c2ecf20Sopenharmony_ci while (1) { 7858c2ecf20Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 7868c2ecf20Sopenharmony_ci if (range_overlaps(entry, file_offset, len)) 7878c2ecf20Sopenharmony_ci break; 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci if (entry->file_offset >= file_offset + len) { 7908c2ecf20Sopenharmony_ci entry = NULL; 7918c2ecf20Sopenharmony_ci break; 7928c2ecf20Sopenharmony_ci } 7938c2ecf20Sopenharmony_ci entry = NULL; 7948c2ecf20Sopenharmony_ci node = rb_next(node); 7958c2ecf20Sopenharmony_ci if (!node) 7968c2ecf20Sopenharmony_ci break; 7978c2ecf20Sopenharmony_ci } 7988c2ecf20Sopenharmony_ciout: 7998c2ecf20Sopenharmony_ci if (entry) 8008c2ecf20Sopenharmony_ci refcount_inc(&entry->refs); 8018c2ecf20Sopenharmony_ci spin_unlock_irq(&tree->lock); 8028c2ecf20Sopenharmony_ci return entry; 8038c2ecf20Sopenharmony_ci} 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_ci/* 8068c2ecf20Sopenharmony_ci * Adds all ordered extents to the given list. The list ends up sorted by the 8078c2ecf20Sopenharmony_ci * file_offset of the ordered extents. 8088c2ecf20Sopenharmony_ci */ 8098c2ecf20Sopenharmony_civoid btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode, 8108c2ecf20Sopenharmony_ci struct list_head *list) 8118c2ecf20Sopenharmony_ci{ 8128c2ecf20Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 8138c2ecf20Sopenharmony_ci struct rb_node *n; 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci ASSERT(inode_is_locked(&inode->vfs_inode)); 8168c2ecf20Sopenharmony_ci 8178c2ecf20Sopenharmony_ci spin_lock_irq(&tree->lock); 8188c2ecf20Sopenharmony_ci for (n = rb_first(&tree->tree); n; n = rb_next(n)) { 8198c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *ordered; 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) 8248c2ecf20Sopenharmony_ci continue; 8258c2ecf20Sopenharmony_ci 8268c2ecf20Sopenharmony_ci ASSERT(list_empty(&ordered->log_list)); 8278c2ecf20Sopenharmony_ci list_add_tail(&ordered->log_list, list); 8288c2ecf20Sopenharmony_ci refcount_inc(&ordered->refs); 8298c2ecf20Sopenharmony_ci } 8308c2ecf20Sopenharmony_ci spin_unlock_irq(&tree->lock); 8318c2ecf20Sopenharmony_ci} 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci/* 8348c2ecf20Sopenharmony_ci * lookup and return any extent before 'file_offset'. NULL is returned 8358c2ecf20Sopenharmony_ci * if none is found 8368c2ecf20Sopenharmony_ci */ 8378c2ecf20Sopenharmony_cistruct btrfs_ordered_extent * 8388c2ecf20Sopenharmony_cibtrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset) 8398c2ecf20Sopenharmony_ci{ 8408c2ecf20Sopenharmony_ci struct btrfs_ordered_inode_tree *tree; 8418c2ecf20Sopenharmony_ci struct rb_node *node; 8428c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci tree = &inode->ordered_tree; 8458c2ecf20Sopenharmony_ci spin_lock_irq(&tree->lock); 8468c2ecf20Sopenharmony_ci node = tree_search(tree, file_offset); 8478c2ecf20Sopenharmony_ci if (!node) 8488c2ecf20Sopenharmony_ci goto out; 8498c2ecf20Sopenharmony_ci 8508c2ecf20Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 8518c2ecf20Sopenharmony_ci refcount_inc(&entry->refs); 8528c2ecf20Sopenharmony_ciout: 8538c2ecf20Sopenharmony_ci spin_unlock_irq(&tree->lock); 8548c2ecf20Sopenharmony_ci return entry; 8558c2ecf20Sopenharmony_ci} 8568c2ecf20Sopenharmony_ci 8578c2ecf20Sopenharmony_ci/* 8588c2ecf20Sopenharmony_ci * search the ordered extents for one corresponding to 'offset' and 8598c2ecf20Sopenharmony_ci * try to find a checksum. This is used because we allow pages to 8608c2ecf20Sopenharmony_ci * be reclaimed before their checksum is actually put into the btree 8618c2ecf20Sopenharmony_ci */ 8628c2ecf20Sopenharmony_ciint btrfs_find_ordered_sum(struct btrfs_inode *inode, u64 offset, 8638c2ecf20Sopenharmony_ci u64 disk_bytenr, u8 *sum, int len) 8648c2ecf20Sopenharmony_ci{ 8658c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 8668c2ecf20Sopenharmony_ci struct btrfs_ordered_sum *ordered_sum; 8678c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *ordered; 8688c2ecf20Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 8698c2ecf20Sopenharmony_ci unsigned long num_sectors; 8708c2ecf20Sopenharmony_ci unsigned long i; 8718c2ecf20Sopenharmony_ci u32 sectorsize = btrfs_inode_sectorsize(inode); 8728c2ecf20Sopenharmony_ci const u8 blocksize_bits = inode->vfs_inode.i_sb->s_blocksize_bits; 8738c2ecf20Sopenharmony_ci const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); 8748c2ecf20Sopenharmony_ci int index = 0; 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci ordered = btrfs_lookup_ordered_extent(inode, offset); 8778c2ecf20Sopenharmony_ci if (!ordered) 8788c2ecf20Sopenharmony_ci return 0; 8798c2ecf20Sopenharmony_ci 8808c2ecf20Sopenharmony_ci spin_lock_irq(&tree->lock); 8818c2ecf20Sopenharmony_ci list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 8828c2ecf20Sopenharmony_ci if (disk_bytenr >= ordered_sum->bytenr && 8838c2ecf20Sopenharmony_ci disk_bytenr < ordered_sum->bytenr + ordered_sum->len) { 8848c2ecf20Sopenharmony_ci i = (disk_bytenr - ordered_sum->bytenr) >> blocksize_bits; 8858c2ecf20Sopenharmony_ci num_sectors = ordered_sum->len >> blocksize_bits; 8868c2ecf20Sopenharmony_ci num_sectors = min_t(int, len - index, num_sectors - i); 8878c2ecf20Sopenharmony_ci memcpy(sum + index, ordered_sum->sums + i * csum_size, 8888c2ecf20Sopenharmony_ci num_sectors * csum_size); 8898c2ecf20Sopenharmony_ci 8908c2ecf20Sopenharmony_ci index += (int)num_sectors * csum_size; 8918c2ecf20Sopenharmony_ci if (index == len) 8928c2ecf20Sopenharmony_ci goto out; 8938c2ecf20Sopenharmony_ci disk_bytenr += num_sectors * sectorsize; 8948c2ecf20Sopenharmony_ci } 8958c2ecf20Sopenharmony_ci } 8968c2ecf20Sopenharmony_ciout: 8978c2ecf20Sopenharmony_ci spin_unlock_irq(&tree->lock); 8988c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 8998c2ecf20Sopenharmony_ci return index; 9008c2ecf20Sopenharmony_ci} 9018c2ecf20Sopenharmony_ci 9028c2ecf20Sopenharmony_ci/* 9038c2ecf20Sopenharmony_ci * btrfs_flush_ordered_range - Lock the passed range and ensures all pending 9048c2ecf20Sopenharmony_ci * ordered extents in it are run to completion. 9058c2ecf20Sopenharmony_ci * 9068c2ecf20Sopenharmony_ci * @inode: Inode whose ordered tree is to be searched 9078c2ecf20Sopenharmony_ci * @start: Beginning of range to flush 9088c2ecf20Sopenharmony_ci * @end: Last byte of range to lock 9098c2ecf20Sopenharmony_ci * @cached_state: If passed, will return the extent state responsible for the 9108c2ecf20Sopenharmony_ci * locked range. It's the caller's responsibility to free the cached state. 9118c2ecf20Sopenharmony_ci * 9128c2ecf20Sopenharmony_ci * This function always returns with the given range locked, ensuring after it's 9138c2ecf20Sopenharmony_ci * called no order extent can be pending. 9148c2ecf20Sopenharmony_ci */ 9158c2ecf20Sopenharmony_civoid btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, 9168c2ecf20Sopenharmony_ci u64 end, 9178c2ecf20Sopenharmony_ci struct extent_state **cached_state) 9188c2ecf20Sopenharmony_ci{ 9198c2ecf20Sopenharmony_ci struct btrfs_ordered_extent *ordered; 9208c2ecf20Sopenharmony_ci struct extent_state *cache = NULL; 9218c2ecf20Sopenharmony_ci struct extent_state **cachedp = &cache; 9228c2ecf20Sopenharmony_ci 9238c2ecf20Sopenharmony_ci if (cached_state) 9248c2ecf20Sopenharmony_ci cachedp = cached_state; 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci while (1) { 9278c2ecf20Sopenharmony_ci lock_extent_bits(&inode->io_tree, start, end, cachedp); 9288c2ecf20Sopenharmony_ci ordered = btrfs_lookup_ordered_range(inode, start, 9298c2ecf20Sopenharmony_ci end - start + 1); 9308c2ecf20Sopenharmony_ci if (!ordered) { 9318c2ecf20Sopenharmony_ci /* 9328c2ecf20Sopenharmony_ci * If no external cached_state has been passed then 9338c2ecf20Sopenharmony_ci * decrement the extra ref taken for cachedp since we 9348c2ecf20Sopenharmony_ci * aren't exposing it outside of this function 9358c2ecf20Sopenharmony_ci */ 9368c2ecf20Sopenharmony_ci if (!cached_state) 9378c2ecf20Sopenharmony_ci refcount_dec(&cache->refs); 9388c2ecf20Sopenharmony_ci break; 9398c2ecf20Sopenharmony_ci } 9408c2ecf20Sopenharmony_ci unlock_extent_cached(&inode->io_tree, start, end, cachedp); 9418c2ecf20Sopenharmony_ci btrfs_start_ordered_extent(ordered, 1); 9428c2ecf20Sopenharmony_ci btrfs_put_ordered_extent(ordered); 9438c2ecf20Sopenharmony_ci } 9448c2ecf20Sopenharmony_ci} 9458c2ecf20Sopenharmony_ci 9468c2ecf20Sopenharmony_ciint __init ordered_data_init(void) 9478c2ecf20Sopenharmony_ci{ 9488c2ecf20Sopenharmony_ci btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", 9498c2ecf20Sopenharmony_ci sizeof(struct btrfs_ordered_extent), 0, 9508c2ecf20Sopenharmony_ci SLAB_MEM_SPREAD, 9518c2ecf20Sopenharmony_ci NULL); 9528c2ecf20Sopenharmony_ci if (!btrfs_ordered_extent_cache) 9538c2ecf20Sopenharmony_ci return -ENOMEM; 9548c2ecf20Sopenharmony_ci 9558c2ecf20Sopenharmony_ci return 0; 9568c2ecf20Sopenharmony_ci} 9578c2ecf20Sopenharmony_ci 9588c2ecf20Sopenharmony_civoid __cold ordered_data_exit(void) 9598c2ecf20Sopenharmony_ci{ 9608c2ecf20Sopenharmony_ci kmem_cache_destroy(btrfs_ordered_extent_cache); 9618c2ecf20Sopenharmony_ci} 962