162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2007 Oracle. All rights reserved. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/slab.h> 762306a36Sopenharmony_ci#include <linux/blkdev.h> 862306a36Sopenharmony_ci#include <linux/writeback.h> 962306a36Sopenharmony_ci#include <linux/sched/mm.h> 1062306a36Sopenharmony_ci#include "messages.h" 1162306a36Sopenharmony_ci#include "misc.h" 1262306a36Sopenharmony_ci#include "ctree.h" 1362306a36Sopenharmony_ci#include "transaction.h" 1462306a36Sopenharmony_ci#include "btrfs_inode.h" 1562306a36Sopenharmony_ci#include "extent_io.h" 1662306a36Sopenharmony_ci#include "disk-io.h" 1762306a36Sopenharmony_ci#include "compression.h" 1862306a36Sopenharmony_ci#include "delalloc-space.h" 1962306a36Sopenharmony_ci#include "qgroup.h" 2062306a36Sopenharmony_ci#include "subpage.h" 2162306a36Sopenharmony_ci#include "file.h" 2262306a36Sopenharmony_ci#include "super.h" 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_cistatic struct kmem_cache *btrfs_ordered_extent_cache; 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_cistatic u64 entry_end(struct btrfs_ordered_extent *entry) 2762306a36Sopenharmony_ci{ 2862306a36Sopenharmony_ci if (entry->file_offset + entry->num_bytes < entry->file_offset) 2962306a36Sopenharmony_ci return (u64)-1; 3062306a36Sopenharmony_ci return entry->file_offset + entry->num_bytes; 3162306a36Sopenharmony_ci} 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci/* returns NULL if the insertion worked, or it returns the node it did find 3462306a36Sopenharmony_ci * in the tree 3562306a36Sopenharmony_ci */ 3662306a36Sopenharmony_cistatic struct rb_node *tree_insert(struct rb_root *root, u64 file_offset, 3762306a36Sopenharmony_ci struct rb_node *node) 3862306a36Sopenharmony_ci{ 3962306a36Sopenharmony_ci struct rb_node **p = &root->rb_node; 4062306a36Sopenharmony_ci struct rb_node *parent = NULL; 4162306a36Sopenharmony_ci struct btrfs_ordered_extent *entry; 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci while (*p) { 4462306a36Sopenharmony_ci parent = *p; 4562306a36Sopenharmony_ci entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node); 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci if (file_offset < entry->file_offset) 4862306a36Sopenharmony_ci p = &(*p)->rb_left; 4962306a36Sopenharmony_ci else if (file_offset >= entry_end(entry)) 5062306a36Sopenharmony_ci p = &(*p)->rb_right; 5162306a36Sopenharmony_ci else 5262306a36Sopenharmony_ci return parent; 5362306a36Sopenharmony_ci } 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci rb_link_node(node, parent, p); 5662306a36Sopenharmony_ci rb_insert_color(node, root); 5762306a36Sopenharmony_ci return NULL; 5862306a36Sopenharmony_ci} 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci/* 6162306a36Sopenharmony_ci * look for a given offset in the tree, and if it can't be found return the 6262306a36Sopenharmony_ci * first lesser offset 6362306a36Sopenharmony_ci */ 6462306a36Sopenharmony_cistatic struct rb_node *__tree_search(struct rb_root *root, u64 file_offset, 6562306a36Sopenharmony_ci struct rb_node **prev_ret) 6662306a36Sopenharmony_ci{ 6762306a36Sopenharmony_ci struct rb_node *n = root->rb_node; 6862306a36Sopenharmony_ci struct rb_node *prev = NULL; 6962306a36Sopenharmony_ci struct rb_node *test; 7062306a36Sopenharmony_ci struct btrfs_ordered_extent *entry; 7162306a36Sopenharmony_ci struct btrfs_ordered_extent *prev_entry = NULL; 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci while (n) { 7462306a36Sopenharmony_ci entry = rb_entry(n, struct btrfs_ordered_extent, rb_node); 7562306a36Sopenharmony_ci prev = n; 7662306a36Sopenharmony_ci prev_entry = entry; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci if (file_offset < entry->file_offset) 7962306a36Sopenharmony_ci n = n->rb_left; 8062306a36Sopenharmony_ci else if (file_offset >= entry_end(entry)) 8162306a36Sopenharmony_ci n = n->rb_right; 8262306a36Sopenharmony_ci else 8362306a36Sopenharmony_ci return n; 8462306a36Sopenharmony_ci } 8562306a36Sopenharmony_ci if (!prev_ret) 8662306a36Sopenharmony_ci return NULL; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci while (prev && file_offset >= entry_end(prev_entry)) { 8962306a36Sopenharmony_ci test = rb_next(prev); 9062306a36Sopenharmony_ci if (!test) 9162306a36Sopenharmony_ci break; 9262306a36Sopenharmony_ci prev_entry = rb_entry(test, struct btrfs_ordered_extent, 9362306a36Sopenharmony_ci rb_node); 9462306a36Sopenharmony_ci if (file_offset < entry_end(prev_entry)) 9562306a36Sopenharmony_ci break; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci prev = test; 9862306a36Sopenharmony_ci } 9962306a36Sopenharmony_ci if (prev) 10062306a36Sopenharmony_ci prev_entry = rb_entry(prev, struct btrfs_ordered_extent, 10162306a36Sopenharmony_ci rb_node); 10262306a36Sopenharmony_ci while (prev && file_offset < entry_end(prev_entry)) { 10362306a36Sopenharmony_ci test = rb_prev(prev); 10462306a36Sopenharmony_ci if (!test) 10562306a36Sopenharmony_ci break; 10662306a36Sopenharmony_ci prev_entry = rb_entry(test, struct btrfs_ordered_extent, 10762306a36Sopenharmony_ci rb_node); 10862306a36Sopenharmony_ci prev = test; 10962306a36Sopenharmony_ci } 11062306a36Sopenharmony_ci *prev_ret = prev; 11162306a36Sopenharmony_ci return NULL; 11262306a36Sopenharmony_ci} 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_cistatic int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, 11562306a36Sopenharmony_ci u64 len) 11662306a36Sopenharmony_ci{ 11762306a36Sopenharmony_ci if (file_offset + len <= entry->file_offset || 11862306a36Sopenharmony_ci entry->file_offset + entry->num_bytes <= file_offset) 11962306a36Sopenharmony_ci return 0; 12062306a36Sopenharmony_ci return 1; 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci/* 12462306a36Sopenharmony_ci * look find the first ordered struct that has this offset, otherwise 12562306a36Sopenharmony_ci * the first one less than this offset 12662306a36Sopenharmony_ci */ 12762306a36Sopenharmony_cistatic inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, 12862306a36Sopenharmony_ci u64 file_offset) 12962306a36Sopenharmony_ci{ 13062306a36Sopenharmony_ci struct rb_root *root = &tree->tree; 13162306a36Sopenharmony_ci struct rb_node *prev = NULL; 13262306a36Sopenharmony_ci struct rb_node *ret; 13362306a36Sopenharmony_ci struct btrfs_ordered_extent *entry; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci if (tree->last) { 13662306a36Sopenharmony_ci entry = rb_entry(tree->last, struct btrfs_ordered_extent, 13762306a36Sopenharmony_ci rb_node); 13862306a36Sopenharmony_ci if (in_range(file_offset, entry->file_offset, entry->num_bytes)) 13962306a36Sopenharmony_ci return tree->last; 14062306a36Sopenharmony_ci } 14162306a36Sopenharmony_ci ret = __tree_search(root, file_offset, &prev); 14262306a36Sopenharmony_ci if (!ret) 14362306a36Sopenharmony_ci ret = prev; 14462306a36Sopenharmony_ci if (ret) 14562306a36Sopenharmony_ci tree->last = ret; 14662306a36Sopenharmony_ci return ret; 14762306a36Sopenharmony_ci} 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_cistatic struct btrfs_ordered_extent *alloc_ordered_extent( 15062306a36Sopenharmony_ci struct btrfs_inode *inode, u64 file_offset, u64 num_bytes, 15162306a36Sopenharmony_ci u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes, 15262306a36Sopenharmony_ci u64 offset, unsigned long flags, int compress_type) 15362306a36Sopenharmony_ci{ 15462306a36Sopenharmony_ci struct btrfs_ordered_extent *entry; 15562306a36Sopenharmony_ci int ret; 15662306a36Sopenharmony_ci u64 qgroup_rsv = 0; 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci if (flags & 15962306a36Sopenharmony_ci ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) { 16062306a36Sopenharmony_ci /* For nocow write, we can release the qgroup rsv right now */ 16162306a36Sopenharmony_ci ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv); 16262306a36Sopenharmony_ci if (ret < 0) 16362306a36Sopenharmony_ci return ERR_PTR(ret); 16462306a36Sopenharmony_ci } else { 16562306a36Sopenharmony_ci /* 16662306a36Sopenharmony_ci * The ordered extent has reserved qgroup space, release now 16762306a36Sopenharmony_ci * and pass the reserved number for qgroup_record to free. 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_ci ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv); 17062306a36Sopenharmony_ci if (ret < 0) 17162306a36Sopenharmony_ci return ERR_PTR(ret); 17262306a36Sopenharmony_ci } 17362306a36Sopenharmony_ci entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS); 17462306a36Sopenharmony_ci if (!entry) 17562306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci entry->file_offset = file_offset; 17862306a36Sopenharmony_ci entry->num_bytes = num_bytes; 17962306a36Sopenharmony_ci entry->ram_bytes = ram_bytes; 18062306a36Sopenharmony_ci entry->disk_bytenr = disk_bytenr; 18162306a36Sopenharmony_ci entry->disk_num_bytes = disk_num_bytes; 18262306a36Sopenharmony_ci entry->offset = offset; 18362306a36Sopenharmony_ci entry->bytes_left = num_bytes; 18462306a36Sopenharmony_ci entry->inode = igrab(&inode->vfs_inode); 18562306a36Sopenharmony_ci entry->compress_type = compress_type; 18662306a36Sopenharmony_ci entry->truncated_len = (u64)-1; 18762306a36Sopenharmony_ci entry->qgroup_rsv = qgroup_rsv; 18862306a36Sopenharmony_ci entry->flags = flags; 18962306a36Sopenharmony_ci refcount_set(&entry->refs, 1); 19062306a36Sopenharmony_ci init_waitqueue_head(&entry->wait); 19162306a36Sopenharmony_ci INIT_LIST_HEAD(&entry->list); 19262306a36Sopenharmony_ci INIT_LIST_HEAD(&entry->log_list); 19362306a36Sopenharmony_ci INIT_LIST_HEAD(&entry->root_extent_list); 19462306a36Sopenharmony_ci INIT_LIST_HEAD(&entry->work_list); 19562306a36Sopenharmony_ci init_completion(&entry->completion); 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci /* 19862306a36Sopenharmony_ci * We don't need the count_max_extents here, we can assume that all of 19962306a36Sopenharmony_ci * that work has been done at higher layers, so this is truly the 20062306a36Sopenharmony_ci * smallest the extent is going to get. 20162306a36Sopenharmony_ci */ 20262306a36Sopenharmony_ci spin_lock(&inode->lock); 20362306a36Sopenharmony_ci btrfs_mod_outstanding_extents(inode, 1); 20462306a36Sopenharmony_ci spin_unlock(&inode->lock); 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci return entry; 20762306a36Sopenharmony_ci} 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_cistatic void insert_ordered_extent(struct btrfs_ordered_extent *entry) 21062306a36Sopenharmony_ci{ 21162306a36Sopenharmony_ci struct btrfs_inode *inode = BTRFS_I(entry->inode); 21262306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 21362306a36Sopenharmony_ci struct btrfs_root *root = inode->root; 21462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 21562306a36Sopenharmony_ci struct rb_node *node; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci trace_btrfs_ordered_extent_add(inode, entry); 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci percpu_counter_add_batch(&fs_info->ordered_bytes, entry->num_bytes, 22062306a36Sopenharmony_ci fs_info->delalloc_batch); 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci /* One ref for the tree. */ 22362306a36Sopenharmony_ci refcount_inc(&entry->refs); 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci spin_lock_irq(&tree->lock); 22662306a36Sopenharmony_ci node = tree_insert(&tree->tree, entry->file_offset, &entry->rb_node); 22762306a36Sopenharmony_ci if (node) 22862306a36Sopenharmony_ci btrfs_panic(fs_info, -EEXIST, 22962306a36Sopenharmony_ci "inconsistency in ordered tree at offset %llu", 23062306a36Sopenharmony_ci entry->file_offset); 23162306a36Sopenharmony_ci spin_unlock_irq(&tree->lock); 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci spin_lock(&root->ordered_extent_lock); 23462306a36Sopenharmony_ci list_add_tail(&entry->root_extent_list, 23562306a36Sopenharmony_ci &root->ordered_extents); 23662306a36Sopenharmony_ci root->nr_ordered_extents++; 23762306a36Sopenharmony_ci if (root->nr_ordered_extents == 1) { 23862306a36Sopenharmony_ci spin_lock(&fs_info->ordered_root_lock); 23962306a36Sopenharmony_ci BUG_ON(!list_empty(&root->ordered_root)); 24062306a36Sopenharmony_ci list_add_tail(&root->ordered_root, &fs_info->ordered_roots); 24162306a36Sopenharmony_ci spin_unlock(&fs_info->ordered_root_lock); 24262306a36Sopenharmony_ci } 24362306a36Sopenharmony_ci spin_unlock(&root->ordered_extent_lock); 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci/* 24762306a36Sopenharmony_ci * Add an ordered extent to the per-inode tree. 24862306a36Sopenharmony_ci * 24962306a36Sopenharmony_ci * @inode: Inode that this extent is for. 25062306a36Sopenharmony_ci * @file_offset: Logical offset in file where the extent starts. 25162306a36Sopenharmony_ci * @num_bytes: Logical length of extent in file. 25262306a36Sopenharmony_ci * @ram_bytes: Full length of unencoded data. 25362306a36Sopenharmony_ci * @disk_bytenr: Offset of extent on disk. 25462306a36Sopenharmony_ci * @disk_num_bytes: Size of extent on disk. 25562306a36Sopenharmony_ci * @offset: Offset into unencoded data where file data starts. 25662306a36Sopenharmony_ci * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*). 25762306a36Sopenharmony_ci * @compress_type: Compression algorithm used for data. 25862306a36Sopenharmony_ci * 25962306a36Sopenharmony_ci * Most of these parameters correspond to &struct btrfs_file_extent_item. The 26062306a36Sopenharmony_ci * tree is given a single reference on the ordered extent that was inserted, and 26162306a36Sopenharmony_ci * the returned pointer is given a second reference. 26262306a36Sopenharmony_ci * 26362306a36Sopenharmony_ci * Return: the new ordered extent or error pointer. 26462306a36Sopenharmony_ci */ 26562306a36Sopenharmony_cistruct btrfs_ordered_extent *btrfs_alloc_ordered_extent( 26662306a36Sopenharmony_ci struct btrfs_inode *inode, u64 file_offset, 26762306a36Sopenharmony_ci u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, 26862306a36Sopenharmony_ci u64 disk_num_bytes, u64 offset, unsigned long flags, 26962306a36Sopenharmony_ci int compress_type) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci struct btrfs_ordered_extent *entry; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0); 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes, 27662306a36Sopenharmony_ci disk_bytenr, disk_num_bytes, offset, flags, 27762306a36Sopenharmony_ci compress_type); 27862306a36Sopenharmony_ci if (!IS_ERR(entry)) 27962306a36Sopenharmony_ci insert_ordered_extent(entry); 28062306a36Sopenharmony_ci return entry; 28162306a36Sopenharmony_ci} 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci/* 28462306a36Sopenharmony_ci * Add a struct btrfs_ordered_sum into the list of checksums to be inserted 28562306a36Sopenharmony_ci * when an ordered extent is finished. If the list covers more than one 28662306a36Sopenharmony_ci * ordered extent, it is split across multiples. 28762306a36Sopenharmony_ci */ 28862306a36Sopenharmony_civoid btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, 28962306a36Sopenharmony_ci struct btrfs_ordered_sum *sum) 29062306a36Sopenharmony_ci{ 29162306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci tree = &BTRFS_I(entry->inode)->ordered_tree; 29462306a36Sopenharmony_ci spin_lock_irq(&tree->lock); 29562306a36Sopenharmony_ci list_add_tail(&sum->list, &entry->list); 29662306a36Sopenharmony_ci spin_unlock_irq(&tree->lock); 29762306a36Sopenharmony_ci} 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_cistatic void finish_ordered_fn(struct btrfs_work *work) 30062306a36Sopenharmony_ci{ 30162306a36Sopenharmony_ci struct btrfs_ordered_extent *ordered_extent; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci ordered_extent = container_of(work, struct btrfs_ordered_extent, work); 30462306a36Sopenharmony_ci btrfs_finish_ordered_io(ordered_extent); 30562306a36Sopenharmony_ci} 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_cistatic bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered, 30862306a36Sopenharmony_ci struct page *page, u64 file_offset, 30962306a36Sopenharmony_ci u64 len, bool uptodate) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci struct btrfs_inode *inode = BTRFS_I(ordered->inode); 31262306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci lockdep_assert_held(&inode->ordered_tree.lock); 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci if (page) { 31762306a36Sopenharmony_ci ASSERT(page->mapping); 31862306a36Sopenharmony_ci ASSERT(page_offset(page) <= file_offset); 31962306a36Sopenharmony_ci ASSERT(file_offset + len <= page_offset(page) + PAGE_SIZE); 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci /* 32262306a36Sopenharmony_ci * Ordered (Private2) bit indicates whether we still have 32362306a36Sopenharmony_ci * pending io unfinished for the ordered extent. 32462306a36Sopenharmony_ci * 32562306a36Sopenharmony_ci * If there's no such bit, we need to skip to next range. 32662306a36Sopenharmony_ci */ 32762306a36Sopenharmony_ci if (!btrfs_page_test_ordered(fs_info, page, file_offset, len)) 32862306a36Sopenharmony_ci return false; 32962306a36Sopenharmony_ci btrfs_page_clear_ordered(fs_info, page, file_offset, len); 33062306a36Sopenharmony_ci } 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci /* Now we're fine to update the accounting. */ 33362306a36Sopenharmony_ci if (WARN_ON_ONCE(len > ordered->bytes_left)) { 33462306a36Sopenharmony_ci btrfs_crit(fs_info, 33562306a36Sopenharmony_ci"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu", 33662306a36Sopenharmony_ci inode->root->root_key.objectid, btrfs_ino(inode), 33762306a36Sopenharmony_ci ordered->file_offset, ordered->num_bytes, 33862306a36Sopenharmony_ci len, ordered->bytes_left); 33962306a36Sopenharmony_ci ordered->bytes_left = 0; 34062306a36Sopenharmony_ci } else { 34162306a36Sopenharmony_ci ordered->bytes_left -= len; 34262306a36Sopenharmony_ci } 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci if (!uptodate) 34562306a36Sopenharmony_ci set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci if (ordered->bytes_left) 34862306a36Sopenharmony_ci return false; 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci /* 35162306a36Sopenharmony_ci * All the IO of the ordered extent is finished, we need to queue 35262306a36Sopenharmony_ci * the finish_func to be executed. 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ci set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags); 35562306a36Sopenharmony_ci cond_wake_up(&ordered->wait); 35662306a36Sopenharmony_ci refcount_inc(&ordered->refs); 35762306a36Sopenharmony_ci trace_btrfs_ordered_extent_mark_finished(inode, ordered); 35862306a36Sopenharmony_ci return true; 35962306a36Sopenharmony_ci} 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_cistatic void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered) 36262306a36Sopenharmony_ci{ 36362306a36Sopenharmony_ci struct btrfs_inode *inode = BTRFS_I(ordered->inode); 36462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 36562306a36Sopenharmony_ci struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ? 36662306a36Sopenharmony_ci fs_info->endio_freespace_worker : fs_info->endio_write_workers; 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); 36962306a36Sopenharmony_ci btrfs_queue_work(wq, &ordered->work); 37062306a36Sopenharmony_ci} 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_cibool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered, 37362306a36Sopenharmony_ci struct page *page, u64 file_offset, u64 len, 37462306a36Sopenharmony_ci bool uptodate) 37562306a36Sopenharmony_ci{ 37662306a36Sopenharmony_ci struct btrfs_inode *inode = BTRFS_I(ordered->inode); 37762306a36Sopenharmony_ci unsigned long flags; 37862306a36Sopenharmony_ci bool ret; 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate); 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci spin_lock_irqsave(&inode->ordered_tree.lock, flags); 38362306a36Sopenharmony_ci ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate); 38462306a36Sopenharmony_ci spin_unlock_irqrestore(&inode->ordered_tree.lock, flags); 38562306a36Sopenharmony_ci 38662306a36Sopenharmony_ci if (ret) 38762306a36Sopenharmony_ci btrfs_queue_ordered_fn(ordered); 38862306a36Sopenharmony_ci return ret; 38962306a36Sopenharmony_ci} 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci/* 39262306a36Sopenharmony_ci * Mark all ordered extents io inside the specified range finished. 39362306a36Sopenharmony_ci * 39462306a36Sopenharmony_ci * @page: The involved page for the operation. 39562306a36Sopenharmony_ci * For uncompressed buffered IO, the page status also needs to be 39662306a36Sopenharmony_ci * updated to indicate whether the pending ordered io is finished. 39762306a36Sopenharmony_ci * Can be NULL for direct IO and compressed write. 39862306a36Sopenharmony_ci * For these cases, callers are ensured they won't execute the 39962306a36Sopenharmony_ci * endio function twice. 40062306a36Sopenharmony_ci * 40162306a36Sopenharmony_ci * This function is called for endio, thus the range must have ordered 40262306a36Sopenharmony_ci * extent(s) covering it. 40362306a36Sopenharmony_ci */ 40462306a36Sopenharmony_civoid btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, 40562306a36Sopenharmony_ci struct page *page, u64 file_offset, 40662306a36Sopenharmony_ci u64 num_bytes, bool uptodate) 40762306a36Sopenharmony_ci{ 40862306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 40962306a36Sopenharmony_ci struct rb_node *node; 41062306a36Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 41162306a36Sopenharmony_ci unsigned long flags; 41262306a36Sopenharmony_ci u64 cur = file_offset; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci trace_btrfs_writepage_end_io_hook(inode, file_offset, 41562306a36Sopenharmony_ci file_offset + num_bytes - 1, 41662306a36Sopenharmony_ci uptodate); 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci spin_lock_irqsave(&tree->lock, flags); 41962306a36Sopenharmony_ci while (cur < file_offset + num_bytes) { 42062306a36Sopenharmony_ci u64 entry_end; 42162306a36Sopenharmony_ci u64 end; 42262306a36Sopenharmony_ci u32 len; 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci node = tree_search(tree, cur); 42562306a36Sopenharmony_ci /* No ordered extents at all */ 42662306a36Sopenharmony_ci if (!node) 42762306a36Sopenharmony_ci break; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 43062306a36Sopenharmony_ci entry_end = entry->file_offset + entry->num_bytes; 43162306a36Sopenharmony_ci /* 43262306a36Sopenharmony_ci * |<-- OE --->| | 43362306a36Sopenharmony_ci * cur 43462306a36Sopenharmony_ci * Go to next OE. 43562306a36Sopenharmony_ci */ 43662306a36Sopenharmony_ci if (cur >= entry_end) { 43762306a36Sopenharmony_ci node = rb_next(node); 43862306a36Sopenharmony_ci /* No more ordered extents, exit */ 43962306a36Sopenharmony_ci if (!node) 44062306a36Sopenharmony_ci break; 44162306a36Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, 44262306a36Sopenharmony_ci rb_node); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci /* Go to next ordered extent and continue */ 44562306a36Sopenharmony_ci cur = entry->file_offset; 44662306a36Sopenharmony_ci continue; 44762306a36Sopenharmony_ci } 44862306a36Sopenharmony_ci /* 44962306a36Sopenharmony_ci * | |<--- OE --->| 45062306a36Sopenharmony_ci * cur 45162306a36Sopenharmony_ci * Go to the start of OE. 45262306a36Sopenharmony_ci */ 45362306a36Sopenharmony_ci if (cur < entry->file_offset) { 45462306a36Sopenharmony_ci cur = entry->file_offset; 45562306a36Sopenharmony_ci continue; 45662306a36Sopenharmony_ci } 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci /* 45962306a36Sopenharmony_ci * Now we are definitely inside one ordered extent. 46062306a36Sopenharmony_ci * 46162306a36Sopenharmony_ci * |<--- OE --->| 46262306a36Sopenharmony_ci * | 46362306a36Sopenharmony_ci * cur 46462306a36Sopenharmony_ci */ 46562306a36Sopenharmony_ci end = min(entry->file_offset + entry->num_bytes, 46662306a36Sopenharmony_ci file_offset + num_bytes) - 1; 46762306a36Sopenharmony_ci ASSERT(end + 1 - cur < U32_MAX); 46862306a36Sopenharmony_ci len = end + 1 - cur; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) { 47162306a36Sopenharmony_ci spin_unlock_irqrestore(&tree->lock, flags); 47262306a36Sopenharmony_ci btrfs_queue_ordered_fn(entry); 47362306a36Sopenharmony_ci spin_lock_irqsave(&tree->lock, flags); 47462306a36Sopenharmony_ci } 47562306a36Sopenharmony_ci cur += len; 47662306a36Sopenharmony_ci } 47762306a36Sopenharmony_ci spin_unlock_irqrestore(&tree->lock, flags); 47862306a36Sopenharmony_ci} 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci/* 48162306a36Sopenharmony_ci * Finish IO for one ordered extent across a given range. The range can only 48262306a36Sopenharmony_ci * contain one ordered extent. 48362306a36Sopenharmony_ci * 48462306a36Sopenharmony_ci * @cached: The cached ordered extent. If not NULL, we can skip the tree 48562306a36Sopenharmony_ci * search and use the ordered extent directly. 48662306a36Sopenharmony_ci * Will be also used to store the finished ordered extent. 48762306a36Sopenharmony_ci * @file_offset: File offset for the finished IO 48862306a36Sopenharmony_ci * @io_size: Length of the finish IO range 48962306a36Sopenharmony_ci * 49062306a36Sopenharmony_ci * Return true if the ordered extent is finished in the range, and update 49162306a36Sopenharmony_ci * @cached. 49262306a36Sopenharmony_ci * Return false otherwise. 49362306a36Sopenharmony_ci * 49462306a36Sopenharmony_ci * NOTE: The range can NOT cross multiple ordered extents. 49562306a36Sopenharmony_ci * Thus caller should ensure the range doesn't cross ordered extents. 49662306a36Sopenharmony_ci */ 49762306a36Sopenharmony_cibool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode, 49862306a36Sopenharmony_ci struct btrfs_ordered_extent **cached, 49962306a36Sopenharmony_ci u64 file_offset, u64 io_size) 50062306a36Sopenharmony_ci{ 50162306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 50262306a36Sopenharmony_ci struct rb_node *node; 50362306a36Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 50462306a36Sopenharmony_ci unsigned long flags; 50562306a36Sopenharmony_ci bool finished = false; 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci spin_lock_irqsave(&tree->lock, flags); 50862306a36Sopenharmony_ci if (cached && *cached) { 50962306a36Sopenharmony_ci entry = *cached; 51062306a36Sopenharmony_ci goto have_entry; 51162306a36Sopenharmony_ci } 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci node = tree_search(tree, file_offset); 51462306a36Sopenharmony_ci if (!node) 51562306a36Sopenharmony_ci goto out; 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 51862306a36Sopenharmony_cihave_entry: 51962306a36Sopenharmony_ci if (!in_range(file_offset, entry->file_offset, entry->num_bytes)) 52062306a36Sopenharmony_ci goto out; 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci if (io_size > entry->bytes_left) 52362306a36Sopenharmony_ci btrfs_crit(inode->root->fs_info, 52462306a36Sopenharmony_ci "bad ordered accounting left %llu size %llu", 52562306a36Sopenharmony_ci entry->bytes_left, io_size); 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci entry->bytes_left -= io_size; 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci if (entry->bytes_left == 0) { 53062306a36Sopenharmony_ci /* 53162306a36Sopenharmony_ci * Ensure only one caller can set the flag and finished_ret 53262306a36Sopenharmony_ci * accordingly 53362306a36Sopenharmony_ci */ 53462306a36Sopenharmony_ci finished = !test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 53562306a36Sopenharmony_ci /* test_and_set_bit implies a barrier */ 53662306a36Sopenharmony_ci cond_wake_up_nomb(&entry->wait); 53762306a36Sopenharmony_ci } 53862306a36Sopenharmony_ciout: 53962306a36Sopenharmony_ci if (finished && cached && entry) { 54062306a36Sopenharmony_ci *cached = entry; 54162306a36Sopenharmony_ci refcount_inc(&entry->refs); 54262306a36Sopenharmony_ci trace_btrfs_ordered_extent_dec_test_pending(inode, entry); 54362306a36Sopenharmony_ci } 54462306a36Sopenharmony_ci spin_unlock_irqrestore(&tree->lock, flags); 54562306a36Sopenharmony_ci return finished; 54662306a36Sopenharmony_ci} 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci/* 54962306a36Sopenharmony_ci * used to drop a reference on an ordered extent. This will free 55062306a36Sopenharmony_ci * the extent if the last reference is dropped 55162306a36Sopenharmony_ci */ 55262306a36Sopenharmony_civoid btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) 55362306a36Sopenharmony_ci{ 55462306a36Sopenharmony_ci struct list_head *cur; 55562306a36Sopenharmony_ci struct btrfs_ordered_sum *sum; 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci trace_btrfs_ordered_extent_put(BTRFS_I(entry->inode), entry); 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci if (refcount_dec_and_test(&entry->refs)) { 56062306a36Sopenharmony_ci ASSERT(list_empty(&entry->root_extent_list)); 56162306a36Sopenharmony_ci ASSERT(list_empty(&entry->log_list)); 56262306a36Sopenharmony_ci ASSERT(RB_EMPTY_NODE(&entry->rb_node)); 56362306a36Sopenharmony_ci if (entry->inode) 56462306a36Sopenharmony_ci btrfs_add_delayed_iput(BTRFS_I(entry->inode)); 56562306a36Sopenharmony_ci while (!list_empty(&entry->list)) { 56662306a36Sopenharmony_ci cur = entry->list.next; 56762306a36Sopenharmony_ci sum = list_entry(cur, struct btrfs_ordered_sum, list); 56862306a36Sopenharmony_ci list_del(&sum->list); 56962306a36Sopenharmony_ci kvfree(sum); 57062306a36Sopenharmony_ci } 57162306a36Sopenharmony_ci kmem_cache_free(btrfs_ordered_extent_cache, entry); 57262306a36Sopenharmony_ci } 57362306a36Sopenharmony_ci} 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci/* 57662306a36Sopenharmony_ci * remove an ordered extent from the tree. No references are dropped 57762306a36Sopenharmony_ci * and waiters are woken up. 57862306a36Sopenharmony_ci */ 57962306a36Sopenharmony_civoid btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, 58062306a36Sopenharmony_ci struct btrfs_ordered_extent *entry) 58162306a36Sopenharmony_ci{ 58262306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree; 58362306a36Sopenharmony_ci struct btrfs_root *root = btrfs_inode->root; 58462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 58562306a36Sopenharmony_ci struct rb_node *node; 58662306a36Sopenharmony_ci bool pending; 58762306a36Sopenharmony_ci bool freespace_inode; 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci /* 59062306a36Sopenharmony_ci * If this is a free space inode the thread has not acquired the ordered 59162306a36Sopenharmony_ci * extents lockdep map. 59262306a36Sopenharmony_ci */ 59362306a36Sopenharmony_ci freespace_inode = btrfs_is_free_space_inode(btrfs_inode); 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ci btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered); 59662306a36Sopenharmony_ci /* This is paired with btrfs_alloc_ordered_extent. */ 59762306a36Sopenharmony_ci spin_lock(&btrfs_inode->lock); 59862306a36Sopenharmony_ci btrfs_mod_outstanding_extents(btrfs_inode, -1); 59962306a36Sopenharmony_ci spin_unlock(&btrfs_inode->lock); 60062306a36Sopenharmony_ci if (root != fs_info->tree_root) { 60162306a36Sopenharmony_ci u64 release; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags)) 60462306a36Sopenharmony_ci release = entry->disk_num_bytes; 60562306a36Sopenharmony_ci else 60662306a36Sopenharmony_ci release = entry->num_bytes; 60762306a36Sopenharmony_ci btrfs_delalloc_release_metadata(btrfs_inode, release, 60862306a36Sopenharmony_ci test_bit(BTRFS_ORDERED_IOERR, 60962306a36Sopenharmony_ci &entry->flags)); 61062306a36Sopenharmony_ci } 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes, 61362306a36Sopenharmony_ci fs_info->delalloc_batch); 61462306a36Sopenharmony_ci 61562306a36Sopenharmony_ci tree = &btrfs_inode->ordered_tree; 61662306a36Sopenharmony_ci spin_lock_irq(&tree->lock); 61762306a36Sopenharmony_ci node = &entry->rb_node; 61862306a36Sopenharmony_ci rb_erase(node, &tree->tree); 61962306a36Sopenharmony_ci RB_CLEAR_NODE(node); 62062306a36Sopenharmony_ci if (tree->last == node) 62162306a36Sopenharmony_ci tree->last = NULL; 62262306a36Sopenharmony_ci set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 62362306a36Sopenharmony_ci pending = test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags); 62462306a36Sopenharmony_ci spin_unlock_irq(&tree->lock); 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci /* 62762306a36Sopenharmony_ci * The current running transaction is waiting on us, we need to let it 62862306a36Sopenharmony_ci * know that we're complete and wake it up. 62962306a36Sopenharmony_ci */ 63062306a36Sopenharmony_ci if (pending) { 63162306a36Sopenharmony_ci struct btrfs_transaction *trans; 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci /* 63462306a36Sopenharmony_ci * The checks for trans are just a formality, it should be set, 63562306a36Sopenharmony_ci * but if it isn't we don't want to deref/assert under the spin 63662306a36Sopenharmony_ci * lock, so be nice and check if trans is set, but ASSERT() so 63762306a36Sopenharmony_ci * if it isn't set a developer will notice. 63862306a36Sopenharmony_ci */ 63962306a36Sopenharmony_ci spin_lock(&fs_info->trans_lock); 64062306a36Sopenharmony_ci trans = fs_info->running_transaction; 64162306a36Sopenharmony_ci if (trans) 64262306a36Sopenharmony_ci refcount_inc(&trans->use_count); 64362306a36Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci ASSERT(trans || BTRFS_FS_ERROR(fs_info)); 64662306a36Sopenharmony_ci if (trans) { 64762306a36Sopenharmony_ci if (atomic_dec_and_test(&trans->pending_ordered)) 64862306a36Sopenharmony_ci wake_up(&trans->pending_wait); 64962306a36Sopenharmony_ci btrfs_put_transaction(trans); 65062306a36Sopenharmony_ci } 65162306a36Sopenharmony_ci } 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci btrfs_lockdep_release(fs_info, btrfs_trans_pending_ordered); 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci spin_lock(&root->ordered_extent_lock); 65662306a36Sopenharmony_ci list_del_init(&entry->root_extent_list); 65762306a36Sopenharmony_ci root->nr_ordered_extents--; 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci trace_btrfs_ordered_extent_remove(btrfs_inode, entry); 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci if (!root->nr_ordered_extents) { 66262306a36Sopenharmony_ci spin_lock(&fs_info->ordered_root_lock); 66362306a36Sopenharmony_ci BUG_ON(list_empty(&root->ordered_root)); 66462306a36Sopenharmony_ci list_del_init(&root->ordered_root); 66562306a36Sopenharmony_ci spin_unlock(&fs_info->ordered_root_lock); 66662306a36Sopenharmony_ci } 66762306a36Sopenharmony_ci spin_unlock(&root->ordered_extent_lock); 66862306a36Sopenharmony_ci wake_up(&entry->wait); 66962306a36Sopenharmony_ci if (!freespace_inode) 67062306a36Sopenharmony_ci btrfs_lockdep_release(fs_info, btrfs_ordered_extent); 67162306a36Sopenharmony_ci} 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_cistatic void btrfs_run_ordered_extent_work(struct btrfs_work *work) 67462306a36Sopenharmony_ci{ 67562306a36Sopenharmony_ci struct btrfs_ordered_extent *ordered; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci ordered = container_of(work, struct btrfs_ordered_extent, flush_work); 67862306a36Sopenharmony_ci btrfs_start_ordered_extent(ordered); 67962306a36Sopenharmony_ci complete(&ordered->completion); 68062306a36Sopenharmony_ci} 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ci/* 68362306a36Sopenharmony_ci * wait for all the ordered extents in a root. This is done when balancing 68462306a36Sopenharmony_ci * space between drives. 68562306a36Sopenharmony_ci */ 68662306a36Sopenharmony_ciu64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, 68762306a36Sopenharmony_ci const u64 range_start, const u64 range_len) 68862306a36Sopenharmony_ci{ 68962306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 69062306a36Sopenharmony_ci LIST_HEAD(splice); 69162306a36Sopenharmony_ci LIST_HEAD(skipped); 69262306a36Sopenharmony_ci LIST_HEAD(works); 69362306a36Sopenharmony_ci struct btrfs_ordered_extent *ordered, *next; 69462306a36Sopenharmony_ci u64 count = 0; 69562306a36Sopenharmony_ci const u64 range_end = range_start + range_len; 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci mutex_lock(&root->ordered_extent_mutex); 69862306a36Sopenharmony_ci spin_lock(&root->ordered_extent_lock); 69962306a36Sopenharmony_ci list_splice_init(&root->ordered_extents, &splice); 70062306a36Sopenharmony_ci while (!list_empty(&splice) && nr) { 70162306a36Sopenharmony_ci ordered = list_first_entry(&splice, struct btrfs_ordered_extent, 70262306a36Sopenharmony_ci root_extent_list); 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci if (range_end <= ordered->disk_bytenr || 70562306a36Sopenharmony_ci ordered->disk_bytenr + ordered->disk_num_bytes <= range_start) { 70662306a36Sopenharmony_ci list_move_tail(&ordered->root_extent_list, &skipped); 70762306a36Sopenharmony_ci cond_resched_lock(&root->ordered_extent_lock); 70862306a36Sopenharmony_ci continue; 70962306a36Sopenharmony_ci } 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci list_move_tail(&ordered->root_extent_list, 71262306a36Sopenharmony_ci &root->ordered_extents); 71362306a36Sopenharmony_ci refcount_inc(&ordered->refs); 71462306a36Sopenharmony_ci spin_unlock(&root->ordered_extent_lock); 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci btrfs_init_work(&ordered->flush_work, 71762306a36Sopenharmony_ci btrfs_run_ordered_extent_work, NULL, NULL); 71862306a36Sopenharmony_ci list_add_tail(&ordered->work_list, &works); 71962306a36Sopenharmony_ci btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work); 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci cond_resched(); 72262306a36Sopenharmony_ci spin_lock(&root->ordered_extent_lock); 72362306a36Sopenharmony_ci if (nr != U64_MAX) 72462306a36Sopenharmony_ci nr--; 72562306a36Sopenharmony_ci count++; 72662306a36Sopenharmony_ci } 72762306a36Sopenharmony_ci list_splice_tail(&skipped, &root->ordered_extents); 72862306a36Sopenharmony_ci list_splice_tail(&splice, &root->ordered_extents); 72962306a36Sopenharmony_ci spin_unlock(&root->ordered_extent_lock); 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci list_for_each_entry_safe(ordered, next, &works, work_list) { 73262306a36Sopenharmony_ci list_del_init(&ordered->work_list); 73362306a36Sopenharmony_ci wait_for_completion(&ordered->completion); 73462306a36Sopenharmony_ci btrfs_put_ordered_extent(ordered); 73562306a36Sopenharmony_ci cond_resched(); 73662306a36Sopenharmony_ci } 73762306a36Sopenharmony_ci mutex_unlock(&root->ordered_extent_mutex); 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci return count; 74062306a36Sopenharmony_ci} 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_civoid btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr, 74362306a36Sopenharmony_ci const u64 range_start, const u64 range_len) 74462306a36Sopenharmony_ci{ 74562306a36Sopenharmony_ci struct btrfs_root *root; 74662306a36Sopenharmony_ci LIST_HEAD(splice); 74762306a36Sopenharmony_ci u64 done; 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci mutex_lock(&fs_info->ordered_operations_mutex); 75062306a36Sopenharmony_ci spin_lock(&fs_info->ordered_root_lock); 75162306a36Sopenharmony_ci list_splice_init(&fs_info->ordered_roots, &splice); 75262306a36Sopenharmony_ci while (!list_empty(&splice) && nr) { 75362306a36Sopenharmony_ci root = list_first_entry(&splice, struct btrfs_root, 75462306a36Sopenharmony_ci ordered_root); 75562306a36Sopenharmony_ci root = btrfs_grab_root(root); 75662306a36Sopenharmony_ci BUG_ON(!root); 75762306a36Sopenharmony_ci list_move_tail(&root->ordered_root, 75862306a36Sopenharmony_ci &fs_info->ordered_roots); 75962306a36Sopenharmony_ci spin_unlock(&fs_info->ordered_root_lock); 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci done = btrfs_wait_ordered_extents(root, nr, 76262306a36Sopenharmony_ci range_start, range_len); 76362306a36Sopenharmony_ci btrfs_put_root(root); 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci spin_lock(&fs_info->ordered_root_lock); 76662306a36Sopenharmony_ci if (nr != U64_MAX) { 76762306a36Sopenharmony_ci nr -= done; 76862306a36Sopenharmony_ci } 76962306a36Sopenharmony_ci } 77062306a36Sopenharmony_ci list_splice_tail(&splice, &fs_info->ordered_roots); 77162306a36Sopenharmony_ci spin_unlock(&fs_info->ordered_root_lock); 77262306a36Sopenharmony_ci mutex_unlock(&fs_info->ordered_operations_mutex); 77362306a36Sopenharmony_ci} 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci/* 77662306a36Sopenharmony_ci * Start IO and wait for a given ordered extent to finish. 77762306a36Sopenharmony_ci * 77862306a36Sopenharmony_ci * Wait on page writeback for all the pages in the extent and the IO completion 77962306a36Sopenharmony_ci * code to insert metadata into the btree corresponding to the extent. 78062306a36Sopenharmony_ci */ 78162306a36Sopenharmony_civoid btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry) 78262306a36Sopenharmony_ci{ 78362306a36Sopenharmony_ci u64 start = entry->file_offset; 78462306a36Sopenharmony_ci u64 end = start + entry->num_bytes - 1; 78562306a36Sopenharmony_ci struct btrfs_inode *inode = BTRFS_I(entry->inode); 78662306a36Sopenharmony_ci bool freespace_inode; 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_ci trace_btrfs_ordered_extent_start(inode, entry); 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci /* 79162306a36Sopenharmony_ci * If this is a free space inode do not take the ordered extents lockdep 79262306a36Sopenharmony_ci * map. 79362306a36Sopenharmony_ci */ 79462306a36Sopenharmony_ci freespace_inode = btrfs_is_free_space_inode(inode); 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci /* 79762306a36Sopenharmony_ci * pages in the range can be dirty, clean or writeback. We 79862306a36Sopenharmony_ci * start IO on any dirty ones so the wait doesn't stall waiting 79962306a36Sopenharmony_ci * for the flusher thread to find them 80062306a36Sopenharmony_ci */ 80162306a36Sopenharmony_ci if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) 80262306a36Sopenharmony_ci filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end); 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci if (!freespace_inode) 80562306a36Sopenharmony_ci btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent); 80662306a36Sopenharmony_ci wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags)); 80762306a36Sopenharmony_ci} 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci/* 81062306a36Sopenharmony_ci * Used to wait on ordered extents across a large range of bytes. 81162306a36Sopenharmony_ci */ 81262306a36Sopenharmony_ciint btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) 81362306a36Sopenharmony_ci{ 81462306a36Sopenharmony_ci int ret = 0; 81562306a36Sopenharmony_ci int ret_wb = 0; 81662306a36Sopenharmony_ci u64 end; 81762306a36Sopenharmony_ci u64 orig_end; 81862306a36Sopenharmony_ci struct btrfs_ordered_extent *ordered; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci if (start + len < start) { 82162306a36Sopenharmony_ci orig_end = OFFSET_MAX; 82262306a36Sopenharmony_ci } else { 82362306a36Sopenharmony_ci orig_end = start + len - 1; 82462306a36Sopenharmony_ci if (orig_end > OFFSET_MAX) 82562306a36Sopenharmony_ci orig_end = OFFSET_MAX; 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci /* start IO across the range first to instantiate any delalloc 82962306a36Sopenharmony_ci * extents 83062306a36Sopenharmony_ci */ 83162306a36Sopenharmony_ci ret = btrfs_fdatawrite_range(inode, start, orig_end); 83262306a36Sopenharmony_ci if (ret) 83362306a36Sopenharmony_ci return ret; 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_ci /* 83662306a36Sopenharmony_ci * If we have a writeback error don't return immediately. Wait first 83762306a36Sopenharmony_ci * for any ordered extents that haven't completed yet. This is to make 83862306a36Sopenharmony_ci * sure no one can dirty the same page ranges and call writepages() 83962306a36Sopenharmony_ci * before the ordered extents complete - to avoid failures (-EEXIST) 84062306a36Sopenharmony_ci * when adding the new ordered extents to the ordered tree. 84162306a36Sopenharmony_ci */ 84262306a36Sopenharmony_ci ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end); 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci end = orig_end; 84562306a36Sopenharmony_ci while (1) { 84662306a36Sopenharmony_ci ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), end); 84762306a36Sopenharmony_ci if (!ordered) 84862306a36Sopenharmony_ci break; 84962306a36Sopenharmony_ci if (ordered->file_offset > orig_end) { 85062306a36Sopenharmony_ci btrfs_put_ordered_extent(ordered); 85162306a36Sopenharmony_ci break; 85262306a36Sopenharmony_ci } 85362306a36Sopenharmony_ci if (ordered->file_offset + ordered->num_bytes <= start) { 85462306a36Sopenharmony_ci btrfs_put_ordered_extent(ordered); 85562306a36Sopenharmony_ci break; 85662306a36Sopenharmony_ci } 85762306a36Sopenharmony_ci btrfs_start_ordered_extent(ordered); 85862306a36Sopenharmony_ci end = ordered->file_offset; 85962306a36Sopenharmony_ci /* 86062306a36Sopenharmony_ci * If the ordered extent had an error save the error but don't 86162306a36Sopenharmony_ci * exit without waiting first for all other ordered extents in 86262306a36Sopenharmony_ci * the range to complete. 86362306a36Sopenharmony_ci */ 86462306a36Sopenharmony_ci if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) 86562306a36Sopenharmony_ci ret = -EIO; 86662306a36Sopenharmony_ci btrfs_put_ordered_extent(ordered); 86762306a36Sopenharmony_ci if (end == 0 || end == start) 86862306a36Sopenharmony_ci break; 86962306a36Sopenharmony_ci end--; 87062306a36Sopenharmony_ci } 87162306a36Sopenharmony_ci return ret_wb ? ret_wb : ret; 87262306a36Sopenharmony_ci} 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ci/* 87562306a36Sopenharmony_ci * find an ordered extent corresponding to file_offset. return NULL if 87662306a36Sopenharmony_ci * nothing is found, otherwise take a reference on the extent and return it 87762306a36Sopenharmony_ci */ 87862306a36Sopenharmony_cistruct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode, 87962306a36Sopenharmony_ci u64 file_offset) 88062306a36Sopenharmony_ci{ 88162306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree; 88262306a36Sopenharmony_ci struct rb_node *node; 88362306a36Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 88462306a36Sopenharmony_ci unsigned long flags; 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci tree = &inode->ordered_tree; 88762306a36Sopenharmony_ci spin_lock_irqsave(&tree->lock, flags); 88862306a36Sopenharmony_ci node = tree_search(tree, file_offset); 88962306a36Sopenharmony_ci if (!node) 89062306a36Sopenharmony_ci goto out; 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 89362306a36Sopenharmony_ci if (!in_range(file_offset, entry->file_offset, entry->num_bytes)) 89462306a36Sopenharmony_ci entry = NULL; 89562306a36Sopenharmony_ci if (entry) { 89662306a36Sopenharmony_ci refcount_inc(&entry->refs); 89762306a36Sopenharmony_ci trace_btrfs_ordered_extent_lookup(inode, entry); 89862306a36Sopenharmony_ci } 89962306a36Sopenharmony_ciout: 90062306a36Sopenharmony_ci spin_unlock_irqrestore(&tree->lock, flags); 90162306a36Sopenharmony_ci return entry; 90262306a36Sopenharmony_ci} 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci/* Since the DIO code tries to lock a wide area we need to look for any ordered 90562306a36Sopenharmony_ci * extents that exist in the range, rather than just the start of the range. 90662306a36Sopenharmony_ci */ 90762306a36Sopenharmony_cistruct btrfs_ordered_extent *btrfs_lookup_ordered_range( 90862306a36Sopenharmony_ci struct btrfs_inode *inode, u64 file_offset, u64 len) 90962306a36Sopenharmony_ci{ 91062306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree; 91162306a36Sopenharmony_ci struct rb_node *node; 91262306a36Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci tree = &inode->ordered_tree; 91562306a36Sopenharmony_ci spin_lock_irq(&tree->lock); 91662306a36Sopenharmony_ci node = tree_search(tree, file_offset); 91762306a36Sopenharmony_ci if (!node) { 91862306a36Sopenharmony_ci node = tree_search(tree, file_offset + len); 91962306a36Sopenharmony_ci if (!node) 92062306a36Sopenharmony_ci goto out; 92162306a36Sopenharmony_ci } 92262306a36Sopenharmony_ci 92362306a36Sopenharmony_ci while (1) { 92462306a36Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 92562306a36Sopenharmony_ci if (range_overlaps(entry, file_offset, len)) 92662306a36Sopenharmony_ci break; 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci if (entry->file_offset >= file_offset + len) { 92962306a36Sopenharmony_ci entry = NULL; 93062306a36Sopenharmony_ci break; 93162306a36Sopenharmony_ci } 93262306a36Sopenharmony_ci entry = NULL; 93362306a36Sopenharmony_ci node = rb_next(node); 93462306a36Sopenharmony_ci if (!node) 93562306a36Sopenharmony_ci break; 93662306a36Sopenharmony_ci } 93762306a36Sopenharmony_ciout: 93862306a36Sopenharmony_ci if (entry) { 93962306a36Sopenharmony_ci refcount_inc(&entry->refs); 94062306a36Sopenharmony_ci trace_btrfs_ordered_extent_lookup_range(inode, entry); 94162306a36Sopenharmony_ci } 94262306a36Sopenharmony_ci spin_unlock_irq(&tree->lock); 94362306a36Sopenharmony_ci return entry; 94462306a36Sopenharmony_ci} 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_ci/* 94762306a36Sopenharmony_ci * Adds all ordered extents to the given list. The list ends up sorted by the 94862306a36Sopenharmony_ci * file_offset of the ordered extents. 94962306a36Sopenharmony_ci */ 95062306a36Sopenharmony_civoid btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode, 95162306a36Sopenharmony_ci struct list_head *list) 95262306a36Sopenharmony_ci{ 95362306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 95462306a36Sopenharmony_ci struct rb_node *n; 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci ASSERT(inode_is_locked(&inode->vfs_inode)); 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci spin_lock_irq(&tree->lock); 95962306a36Sopenharmony_ci for (n = rb_first(&tree->tree); n; n = rb_next(n)) { 96062306a36Sopenharmony_ci struct btrfs_ordered_extent *ordered; 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) 96562306a36Sopenharmony_ci continue; 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci ASSERT(list_empty(&ordered->log_list)); 96862306a36Sopenharmony_ci list_add_tail(&ordered->log_list, list); 96962306a36Sopenharmony_ci refcount_inc(&ordered->refs); 97062306a36Sopenharmony_ci trace_btrfs_ordered_extent_lookup_for_logging(inode, ordered); 97162306a36Sopenharmony_ci } 97262306a36Sopenharmony_ci spin_unlock_irq(&tree->lock); 97362306a36Sopenharmony_ci} 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci/* 97662306a36Sopenharmony_ci * lookup and return any extent before 'file_offset'. NULL is returned 97762306a36Sopenharmony_ci * if none is found 97862306a36Sopenharmony_ci */ 97962306a36Sopenharmony_cistruct btrfs_ordered_extent * 98062306a36Sopenharmony_cibtrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset) 98162306a36Sopenharmony_ci{ 98262306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree; 98362306a36Sopenharmony_ci struct rb_node *node; 98462306a36Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 98562306a36Sopenharmony_ci 98662306a36Sopenharmony_ci tree = &inode->ordered_tree; 98762306a36Sopenharmony_ci spin_lock_irq(&tree->lock); 98862306a36Sopenharmony_ci node = tree_search(tree, file_offset); 98962306a36Sopenharmony_ci if (!node) 99062306a36Sopenharmony_ci goto out; 99162306a36Sopenharmony_ci 99262306a36Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 99362306a36Sopenharmony_ci refcount_inc(&entry->refs); 99462306a36Sopenharmony_ci trace_btrfs_ordered_extent_lookup_first(inode, entry); 99562306a36Sopenharmony_ciout: 99662306a36Sopenharmony_ci spin_unlock_irq(&tree->lock); 99762306a36Sopenharmony_ci return entry; 99862306a36Sopenharmony_ci} 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci/* 100162306a36Sopenharmony_ci * Lookup the first ordered extent that overlaps the range 100262306a36Sopenharmony_ci * [@file_offset, @file_offset + @len). 100362306a36Sopenharmony_ci * 100462306a36Sopenharmony_ci * The difference between this and btrfs_lookup_first_ordered_extent() is 100562306a36Sopenharmony_ci * that this one won't return any ordered extent that does not overlap the range. 100662306a36Sopenharmony_ci * And the difference against btrfs_lookup_ordered_extent() is, this function 100762306a36Sopenharmony_ci * ensures the first ordered extent gets returned. 100862306a36Sopenharmony_ci */ 100962306a36Sopenharmony_cistruct btrfs_ordered_extent *btrfs_lookup_first_ordered_range( 101062306a36Sopenharmony_ci struct btrfs_inode *inode, u64 file_offset, u64 len) 101162306a36Sopenharmony_ci{ 101262306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 101362306a36Sopenharmony_ci struct rb_node *node; 101462306a36Sopenharmony_ci struct rb_node *cur; 101562306a36Sopenharmony_ci struct rb_node *prev; 101662306a36Sopenharmony_ci struct rb_node *next; 101762306a36Sopenharmony_ci struct btrfs_ordered_extent *entry = NULL; 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci spin_lock_irq(&tree->lock); 102062306a36Sopenharmony_ci node = tree->tree.rb_node; 102162306a36Sopenharmony_ci /* 102262306a36Sopenharmony_ci * Here we don't want to use tree_search() which will use tree->last 102362306a36Sopenharmony_ci * and screw up the search order. 102462306a36Sopenharmony_ci * And __tree_search() can't return the adjacent ordered extents 102562306a36Sopenharmony_ci * either, thus here we do our own search. 102662306a36Sopenharmony_ci */ 102762306a36Sopenharmony_ci while (node) { 102862306a36Sopenharmony_ci entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 102962306a36Sopenharmony_ci 103062306a36Sopenharmony_ci if (file_offset < entry->file_offset) { 103162306a36Sopenharmony_ci node = node->rb_left; 103262306a36Sopenharmony_ci } else if (file_offset >= entry_end(entry)) { 103362306a36Sopenharmony_ci node = node->rb_right; 103462306a36Sopenharmony_ci } else { 103562306a36Sopenharmony_ci /* 103662306a36Sopenharmony_ci * Direct hit, got an ordered extent that starts at 103762306a36Sopenharmony_ci * @file_offset 103862306a36Sopenharmony_ci */ 103962306a36Sopenharmony_ci goto out; 104062306a36Sopenharmony_ci } 104162306a36Sopenharmony_ci } 104262306a36Sopenharmony_ci if (!entry) { 104362306a36Sopenharmony_ci /* Empty tree */ 104462306a36Sopenharmony_ci goto out; 104562306a36Sopenharmony_ci } 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci cur = &entry->rb_node; 104862306a36Sopenharmony_ci /* We got an entry around @file_offset, check adjacent entries */ 104962306a36Sopenharmony_ci if (entry->file_offset < file_offset) { 105062306a36Sopenharmony_ci prev = cur; 105162306a36Sopenharmony_ci next = rb_next(cur); 105262306a36Sopenharmony_ci } else { 105362306a36Sopenharmony_ci prev = rb_prev(cur); 105462306a36Sopenharmony_ci next = cur; 105562306a36Sopenharmony_ci } 105662306a36Sopenharmony_ci if (prev) { 105762306a36Sopenharmony_ci entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node); 105862306a36Sopenharmony_ci if (range_overlaps(entry, file_offset, len)) 105962306a36Sopenharmony_ci goto out; 106062306a36Sopenharmony_ci } 106162306a36Sopenharmony_ci if (next) { 106262306a36Sopenharmony_ci entry = rb_entry(next, struct btrfs_ordered_extent, rb_node); 106362306a36Sopenharmony_ci if (range_overlaps(entry, file_offset, len)) 106462306a36Sopenharmony_ci goto out; 106562306a36Sopenharmony_ci } 106662306a36Sopenharmony_ci /* No ordered extent in the range */ 106762306a36Sopenharmony_ci entry = NULL; 106862306a36Sopenharmony_ciout: 106962306a36Sopenharmony_ci if (entry) { 107062306a36Sopenharmony_ci refcount_inc(&entry->refs); 107162306a36Sopenharmony_ci trace_btrfs_ordered_extent_lookup_first_range(inode, entry); 107262306a36Sopenharmony_ci } 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci spin_unlock_irq(&tree->lock); 107562306a36Sopenharmony_ci return entry; 107662306a36Sopenharmony_ci} 107762306a36Sopenharmony_ci 107862306a36Sopenharmony_ci/* 107962306a36Sopenharmony_ci * Lock the passed range and ensures all pending ordered extents in it are run 108062306a36Sopenharmony_ci * to completion. 108162306a36Sopenharmony_ci * 108262306a36Sopenharmony_ci * @inode: Inode whose ordered tree is to be searched 108362306a36Sopenharmony_ci * @start: Beginning of range to flush 108462306a36Sopenharmony_ci * @end: Last byte of range to lock 108562306a36Sopenharmony_ci * @cached_state: If passed, will return the extent state responsible for the 108662306a36Sopenharmony_ci * locked range. It's the caller's responsibility to free the 108762306a36Sopenharmony_ci * cached state. 108862306a36Sopenharmony_ci * 108962306a36Sopenharmony_ci * Always return with the given range locked, ensuring after it's called no 109062306a36Sopenharmony_ci * order extent can be pending. 109162306a36Sopenharmony_ci */ 109262306a36Sopenharmony_civoid btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, 109362306a36Sopenharmony_ci u64 end, 109462306a36Sopenharmony_ci struct extent_state **cached_state) 109562306a36Sopenharmony_ci{ 109662306a36Sopenharmony_ci struct btrfs_ordered_extent *ordered; 109762306a36Sopenharmony_ci struct extent_state *cache = NULL; 109862306a36Sopenharmony_ci struct extent_state **cachedp = &cache; 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci if (cached_state) 110162306a36Sopenharmony_ci cachedp = cached_state; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci while (1) { 110462306a36Sopenharmony_ci lock_extent(&inode->io_tree, start, end, cachedp); 110562306a36Sopenharmony_ci ordered = btrfs_lookup_ordered_range(inode, start, 110662306a36Sopenharmony_ci end - start + 1); 110762306a36Sopenharmony_ci if (!ordered) { 110862306a36Sopenharmony_ci /* 110962306a36Sopenharmony_ci * If no external cached_state has been passed then 111062306a36Sopenharmony_ci * decrement the extra ref taken for cachedp since we 111162306a36Sopenharmony_ci * aren't exposing it outside of this function 111262306a36Sopenharmony_ci */ 111362306a36Sopenharmony_ci if (!cached_state) 111462306a36Sopenharmony_ci refcount_dec(&cache->refs); 111562306a36Sopenharmony_ci break; 111662306a36Sopenharmony_ci } 111762306a36Sopenharmony_ci unlock_extent(&inode->io_tree, start, end, cachedp); 111862306a36Sopenharmony_ci btrfs_start_ordered_extent(ordered); 111962306a36Sopenharmony_ci btrfs_put_ordered_extent(ordered); 112062306a36Sopenharmony_ci } 112162306a36Sopenharmony_ci} 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci/* 112462306a36Sopenharmony_ci * Lock the passed range and ensure all pending ordered extents in it are run 112562306a36Sopenharmony_ci * to completion in nowait mode. 112662306a36Sopenharmony_ci * 112762306a36Sopenharmony_ci * Return true if btrfs_lock_ordered_range does not return any extents, 112862306a36Sopenharmony_ci * otherwise false. 112962306a36Sopenharmony_ci */ 113062306a36Sopenharmony_cibool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, 113162306a36Sopenharmony_ci struct extent_state **cached_state) 113262306a36Sopenharmony_ci{ 113362306a36Sopenharmony_ci struct btrfs_ordered_extent *ordered; 113462306a36Sopenharmony_ci 113562306a36Sopenharmony_ci if (!try_lock_extent(&inode->io_tree, start, end, cached_state)) 113662306a36Sopenharmony_ci return false; 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1); 113962306a36Sopenharmony_ci if (!ordered) 114062306a36Sopenharmony_ci return true; 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci btrfs_put_ordered_extent(ordered); 114362306a36Sopenharmony_ci unlock_extent(&inode->io_tree, start, end, cached_state); 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci return false; 114662306a36Sopenharmony_ci} 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci/* Split out a new ordered extent for this first @len bytes of @ordered. */ 114962306a36Sopenharmony_cistruct btrfs_ordered_extent *btrfs_split_ordered_extent( 115062306a36Sopenharmony_ci struct btrfs_ordered_extent *ordered, u64 len) 115162306a36Sopenharmony_ci{ 115262306a36Sopenharmony_ci struct btrfs_inode *inode = BTRFS_I(ordered->inode); 115362306a36Sopenharmony_ci struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; 115462306a36Sopenharmony_ci struct btrfs_root *root = inode->root; 115562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 115662306a36Sopenharmony_ci u64 file_offset = ordered->file_offset; 115762306a36Sopenharmony_ci u64 disk_bytenr = ordered->disk_bytenr; 115862306a36Sopenharmony_ci unsigned long flags = ordered->flags; 115962306a36Sopenharmony_ci struct btrfs_ordered_sum *sum, *tmpsum; 116062306a36Sopenharmony_ci struct btrfs_ordered_extent *new; 116162306a36Sopenharmony_ci struct rb_node *node; 116262306a36Sopenharmony_ci u64 offset = 0; 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci trace_btrfs_ordered_extent_split(inode, ordered); 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED))); 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci /* 116962306a36Sopenharmony_ci * The entire bio must be covered by the ordered extent, but we can't 117062306a36Sopenharmony_ci * reduce the original extent to a zero length either. 117162306a36Sopenharmony_ci */ 117262306a36Sopenharmony_ci if (WARN_ON_ONCE(len >= ordered->num_bytes)) 117362306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 117462306a36Sopenharmony_ci /* We cannot split partially completed ordered extents. */ 117562306a36Sopenharmony_ci if (ordered->bytes_left) { 117662306a36Sopenharmony_ci ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS)); 117762306a36Sopenharmony_ci if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) 117862306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 117962306a36Sopenharmony_ci } 118062306a36Sopenharmony_ci /* We cannot split a compressed ordered extent. */ 118162306a36Sopenharmony_ci if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) 118262306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr, 118562306a36Sopenharmony_ci len, 0, flags, ordered->compress_type); 118662306a36Sopenharmony_ci if (IS_ERR(new)) 118762306a36Sopenharmony_ci return new; 118862306a36Sopenharmony_ci 118962306a36Sopenharmony_ci /* One ref for the tree. */ 119062306a36Sopenharmony_ci refcount_inc(&new->refs); 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci spin_lock_irq(&root->ordered_extent_lock); 119362306a36Sopenharmony_ci spin_lock(&tree->lock); 119462306a36Sopenharmony_ci /* Remove from tree once */ 119562306a36Sopenharmony_ci node = &ordered->rb_node; 119662306a36Sopenharmony_ci rb_erase(node, &tree->tree); 119762306a36Sopenharmony_ci RB_CLEAR_NODE(node); 119862306a36Sopenharmony_ci if (tree->last == node) 119962306a36Sopenharmony_ci tree->last = NULL; 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_ci ordered->file_offset += len; 120262306a36Sopenharmony_ci ordered->disk_bytenr += len; 120362306a36Sopenharmony_ci ordered->num_bytes -= len; 120462306a36Sopenharmony_ci ordered->disk_num_bytes -= len; 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_ci if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { 120762306a36Sopenharmony_ci ASSERT(ordered->bytes_left == 0); 120862306a36Sopenharmony_ci new->bytes_left = 0; 120962306a36Sopenharmony_ci } else { 121062306a36Sopenharmony_ci ordered->bytes_left -= len; 121162306a36Sopenharmony_ci } 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_ci if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) { 121462306a36Sopenharmony_ci if (ordered->truncated_len > len) { 121562306a36Sopenharmony_ci ordered->truncated_len -= len; 121662306a36Sopenharmony_ci } else { 121762306a36Sopenharmony_ci new->truncated_len = ordered->truncated_len; 121862306a36Sopenharmony_ci ordered->truncated_len = 0; 121962306a36Sopenharmony_ci } 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci list_for_each_entry_safe(sum, tmpsum, &ordered->list, list) { 122362306a36Sopenharmony_ci if (offset == len) 122462306a36Sopenharmony_ci break; 122562306a36Sopenharmony_ci list_move_tail(&sum->list, &new->list); 122662306a36Sopenharmony_ci offset += sum->len; 122762306a36Sopenharmony_ci } 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci /* Re-insert the node */ 123062306a36Sopenharmony_ci node = tree_insert(&tree->tree, ordered->file_offset, &ordered->rb_node); 123162306a36Sopenharmony_ci if (node) 123262306a36Sopenharmony_ci btrfs_panic(fs_info, -EEXIST, 123362306a36Sopenharmony_ci "zoned: inconsistency in ordered tree at offset %llu", 123462306a36Sopenharmony_ci ordered->file_offset); 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ci node = tree_insert(&tree->tree, new->file_offset, &new->rb_node); 123762306a36Sopenharmony_ci if (node) 123862306a36Sopenharmony_ci btrfs_panic(fs_info, -EEXIST, 123962306a36Sopenharmony_ci "zoned: inconsistency in ordered tree at offset %llu", 124062306a36Sopenharmony_ci new->file_offset); 124162306a36Sopenharmony_ci spin_unlock(&tree->lock); 124262306a36Sopenharmony_ci 124362306a36Sopenharmony_ci list_add_tail(&new->root_extent_list, &root->ordered_extents); 124462306a36Sopenharmony_ci root->nr_ordered_extents++; 124562306a36Sopenharmony_ci spin_unlock_irq(&root->ordered_extent_lock); 124662306a36Sopenharmony_ci return new; 124762306a36Sopenharmony_ci} 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ciint __init ordered_data_init(void) 125062306a36Sopenharmony_ci{ 125162306a36Sopenharmony_ci btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", 125262306a36Sopenharmony_ci sizeof(struct btrfs_ordered_extent), 0, 125362306a36Sopenharmony_ci SLAB_MEM_SPREAD, 125462306a36Sopenharmony_ci NULL); 125562306a36Sopenharmony_ci if (!btrfs_ordered_extent_cache) 125662306a36Sopenharmony_ci return -ENOMEM; 125762306a36Sopenharmony_ci 125862306a36Sopenharmony_ci return 0; 125962306a36Sopenharmony_ci} 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_civoid __cold ordered_data_exit(void) 126262306a36Sopenharmony_ci{ 126362306a36Sopenharmony_ci kmem_cache_destroy(btrfs_ordered_extent_cache); 126462306a36Sopenharmony_ci} 1265