162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2007 Oracle.  All rights reserved.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/slab.h>
762306a36Sopenharmony_ci#include <linux/blkdev.h>
862306a36Sopenharmony_ci#include <linux/writeback.h>
962306a36Sopenharmony_ci#include <linux/sched/mm.h>
1062306a36Sopenharmony_ci#include "messages.h"
1162306a36Sopenharmony_ci#include "misc.h"
1262306a36Sopenharmony_ci#include "ctree.h"
1362306a36Sopenharmony_ci#include "transaction.h"
1462306a36Sopenharmony_ci#include "btrfs_inode.h"
1562306a36Sopenharmony_ci#include "extent_io.h"
1662306a36Sopenharmony_ci#include "disk-io.h"
1762306a36Sopenharmony_ci#include "compression.h"
1862306a36Sopenharmony_ci#include "delalloc-space.h"
1962306a36Sopenharmony_ci#include "qgroup.h"
2062306a36Sopenharmony_ci#include "subpage.h"
2162306a36Sopenharmony_ci#include "file.h"
2262306a36Sopenharmony_ci#include "super.h"
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_cistatic struct kmem_cache *btrfs_ordered_extent_cache;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_cistatic u64 entry_end(struct btrfs_ordered_extent *entry)
2762306a36Sopenharmony_ci{
2862306a36Sopenharmony_ci	if (entry->file_offset + entry->num_bytes < entry->file_offset)
2962306a36Sopenharmony_ci		return (u64)-1;
3062306a36Sopenharmony_ci	return entry->file_offset + entry->num_bytes;
3162306a36Sopenharmony_ci}
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci/* returns NULL if the insertion worked, or it returns the node it did find
3462306a36Sopenharmony_ci * in the tree
3562306a36Sopenharmony_ci */
3662306a36Sopenharmony_cistatic struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
3762306a36Sopenharmony_ci				   struct rb_node *node)
3862306a36Sopenharmony_ci{
3962306a36Sopenharmony_ci	struct rb_node **p = &root->rb_node;
4062306a36Sopenharmony_ci	struct rb_node *parent = NULL;
4162306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry;
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci	while (*p) {
4462306a36Sopenharmony_ci		parent = *p;
4562306a36Sopenharmony_ci		entry = rb_entry(parent, struct btrfs_ordered_extent, rb_node);
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci		if (file_offset < entry->file_offset)
4862306a36Sopenharmony_ci			p = &(*p)->rb_left;
4962306a36Sopenharmony_ci		else if (file_offset >= entry_end(entry))
5062306a36Sopenharmony_ci			p = &(*p)->rb_right;
5162306a36Sopenharmony_ci		else
5262306a36Sopenharmony_ci			return parent;
5362306a36Sopenharmony_ci	}
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	rb_link_node(node, parent, p);
5662306a36Sopenharmony_ci	rb_insert_color(node, root);
5762306a36Sopenharmony_ci	return NULL;
5862306a36Sopenharmony_ci}
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci/*
6162306a36Sopenharmony_ci * look for a given offset in the tree, and if it can't be found return the
6262306a36Sopenharmony_ci * first lesser offset
6362306a36Sopenharmony_ci */
6462306a36Sopenharmony_cistatic struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
6562306a36Sopenharmony_ci				     struct rb_node **prev_ret)
6662306a36Sopenharmony_ci{
6762306a36Sopenharmony_ci	struct rb_node *n = root->rb_node;
6862306a36Sopenharmony_ci	struct rb_node *prev = NULL;
6962306a36Sopenharmony_ci	struct rb_node *test;
7062306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry;
7162306a36Sopenharmony_ci	struct btrfs_ordered_extent *prev_entry = NULL;
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	while (n) {
7462306a36Sopenharmony_ci		entry = rb_entry(n, struct btrfs_ordered_extent, rb_node);
7562306a36Sopenharmony_ci		prev = n;
7662306a36Sopenharmony_ci		prev_entry = entry;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci		if (file_offset < entry->file_offset)
7962306a36Sopenharmony_ci			n = n->rb_left;
8062306a36Sopenharmony_ci		else if (file_offset >= entry_end(entry))
8162306a36Sopenharmony_ci			n = n->rb_right;
8262306a36Sopenharmony_ci		else
8362306a36Sopenharmony_ci			return n;
8462306a36Sopenharmony_ci	}
8562306a36Sopenharmony_ci	if (!prev_ret)
8662306a36Sopenharmony_ci		return NULL;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	while (prev && file_offset >= entry_end(prev_entry)) {
8962306a36Sopenharmony_ci		test = rb_next(prev);
9062306a36Sopenharmony_ci		if (!test)
9162306a36Sopenharmony_ci			break;
9262306a36Sopenharmony_ci		prev_entry = rb_entry(test, struct btrfs_ordered_extent,
9362306a36Sopenharmony_ci				      rb_node);
9462306a36Sopenharmony_ci		if (file_offset < entry_end(prev_entry))
9562306a36Sopenharmony_ci			break;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci		prev = test;
9862306a36Sopenharmony_ci	}
9962306a36Sopenharmony_ci	if (prev)
10062306a36Sopenharmony_ci		prev_entry = rb_entry(prev, struct btrfs_ordered_extent,
10162306a36Sopenharmony_ci				      rb_node);
10262306a36Sopenharmony_ci	while (prev && file_offset < entry_end(prev_entry)) {
10362306a36Sopenharmony_ci		test = rb_prev(prev);
10462306a36Sopenharmony_ci		if (!test)
10562306a36Sopenharmony_ci			break;
10662306a36Sopenharmony_ci		prev_entry = rb_entry(test, struct btrfs_ordered_extent,
10762306a36Sopenharmony_ci				      rb_node);
10862306a36Sopenharmony_ci		prev = test;
10962306a36Sopenharmony_ci	}
11062306a36Sopenharmony_ci	*prev_ret = prev;
11162306a36Sopenharmony_ci	return NULL;
11262306a36Sopenharmony_ci}
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_cistatic int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
11562306a36Sopenharmony_ci			  u64 len)
11662306a36Sopenharmony_ci{
11762306a36Sopenharmony_ci	if (file_offset + len <= entry->file_offset ||
11862306a36Sopenharmony_ci	    entry->file_offset + entry->num_bytes <= file_offset)
11962306a36Sopenharmony_ci		return 0;
12062306a36Sopenharmony_ci	return 1;
12162306a36Sopenharmony_ci}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci/*
12462306a36Sopenharmony_ci * look find the first ordered struct that has this offset, otherwise
12562306a36Sopenharmony_ci * the first one less than this offset
12662306a36Sopenharmony_ci */
12762306a36Sopenharmony_cistatic inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
12862306a36Sopenharmony_ci					  u64 file_offset)
12962306a36Sopenharmony_ci{
13062306a36Sopenharmony_ci	struct rb_root *root = &tree->tree;
13162306a36Sopenharmony_ci	struct rb_node *prev = NULL;
13262306a36Sopenharmony_ci	struct rb_node *ret;
13362306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	if (tree->last) {
13662306a36Sopenharmony_ci		entry = rb_entry(tree->last, struct btrfs_ordered_extent,
13762306a36Sopenharmony_ci				 rb_node);
13862306a36Sopenharmony_ci		if (in_range(file_offset, entry->file_offset, entry->num_bytes))
13962306a36Sopenharmony_ci			return tree->last;
14062306a36Sopenharmony_ci	}
14162306a36Sopenharmony_ci	ret = __tree_search(root, file_offset, &prev);
14262306a36Sopenharmony_ci	if (!ret)
14362306a36Sopenharmony_ci		ret = prev;
14462306a36Sopenharmony_ci	if (ret)
14562306a36Sopenharmony_ci		tree->last = ret;
14662306a36Sopenharmony_ci	return ret;
14762306a36Sopenharmony_ci}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_cistatic struct btrfs_ordered_extent *alloc_ordered_extent(
15062306a36Sopenharmony_ci			struct btrfs_inode *inode, u64 file_offset, u64 num_bytes,
15162306a36Sopenharmony_ci			u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes,
15262306a36Sopenharmony_ci			u64 offset, unsigned long flags, int compress_type)
15362306a36Sopenharmony_ci{
15462306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry;
15562306a36Sopenharmony_ci	int ret;
15662306a36Sopenharmony_ci	u64 qgroup_rsv = 0;
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	if (flags &
15962306a36Sopenharmony_ci	    ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
16062306a36Sopenharmony_ci		/* For nocow write, we can release the qgroup rsv right now */
16162306a36Sopenharmony_ci		ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes, &qgroup_rsv);
16262306a36Sopenharmony_ci		if (ret < 0)
16362306a36Sopenharmony_ci			return ERR_PTR(ret);
16462306a36Sopenharmony_ci	} else {
16562306a36Sopenharmony_ci		/*
16662306a36Sopenharmony_ci		 * The ordered extent has reserved qgroup space, release now
16762306a36Sopenharmony_ci		 * and pass the reserved number for qgroup_record to free.
16862306a36Sopenharmony_ci		 */
16962306a36Sopenharmony_ci		ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes, &qgroup_rsv);
17062306a36Sopenharmony_ci		if (ret < 0)
17162306a36Sopenharmony_ci			return ERR_PTR(ret);
17262306a36Sopenharmony_ci	}
17362306a36Sopenharmony_ci	entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
17462306a36Sopenharmony_ci	if (!entry)
17562306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	entry->file_offset = file_offset;
17862306a36Sopenharmony_ci	entry->num_bytes = num_bytes;
17962306a36Sopenharmony_ci	entry->ram_bytes = ram_bytes;
18062306a36Sopenharmony_ci	entry->disk_bytenr = disk_bytenr;
18162306a36Sopenharmony_ci	entry->disk_num_bytes = disk_num_bytes;
18262306a36Sopenharmony_ci	entry->offset = offset;
18362306a36Sopenharmony_ci	entry->bytes_left = num_bytes;
18462306a36Sopenharmony_ci	entry->inode = igrab(&inode->vfs_inode);
18562306a36Sopenharmony_ci	entry->compress_type = compress_type;
18662306a36Sopenharmony_ci	entry->truncated_len = (u64)-1;
18762306a36Sopenharmony_ci	entry->qgroup_rsv = qgroup_rsv;
18862306a36Sopenharmony_ci	entry->flags = flags;
18962306a36Sopenharmony_ci	refcount_set(&entry->refs, 1);
19062306a36Sopenharmony_ci	init_waitqueue_head(&entry->wait);
19162306a36Sopenharmony_ci	INIT_LIST_HEAD(&entry->list);
19262306a36Sopenharmony_ci	INIT_LIST_HEAD(&entry->log_list);
19362306a36Sopenharmony_ci	INIT_LIST_HEAD(&entry->root_extent_list);
19462306a36Sopenharmony_ci	INIT_LIST_HEAD(&entry->work_list);
19562306a36Sopenharmony_ci	init_completion(&entry->completion);
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	/*
19862306a36Sopenharmony_ci	 * We don't need the count_max_extents here, we can assume that all of
19962306a36Sopenharmony_ci	 * that work has been done at higher layers, so this is truly the
20062306a36Sopenharmony_ci	 * smallest the extent is going to get.
20162306a36Sopenharmony_ci	 */
20262306a36Sopenharmony_ci	spin_lock(&inode->lock);
20362306a36Sopenharmony_ci	btrfs_mod_outstanding_extents(inode, 1);
20462306a36Sopenharmony_ci	spin_unlock(&inode->lock);
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	return entry;
20762306a36Sopenharmony_ci}
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_cistatic void insert_ordered_extent(struct btrfs_ordered_extent *entry)
21062306a36Sopenharmony_ci{
21162306a36Sopenharmony_ci	struct btrfs_inode *inode = BTRFS_I(entry->inode);
21262306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
21362306a36Sopenharmony_ci	struct btrfs_root *root = inode->root;
21462306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
21562306a36Sopenharmony_ci	struct rb_node *node;
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	trace_btrfs_ordered_extent_add(inode, entry);
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	percpu_counter_add_batch(&fs_info->ordered_bytes, entry->num_bytes,
22062306a36Sopenharmony_ci				 fs_info->delalloc_batch);
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	/* One ref for the tree. */
22362306a36Sopenharmony_ci	refcount_inc(&entry->refs);
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	spin_lock_irq(&tree->lock);
22662306a36Sopenharmony_ci	node = tree_insert(&tree->tree, entry->file_offset, &entry->rb_node);
22762306a36Sopenharmony_ci	if (node)
22862306a36Sopenharmony_ci		btrfs_panic(fs_info, -EEXIST,
22962306a36Sopenharmony_ci				"inconsistency in ordered tree at offset %llu",
23062306a36Sopenharmony_ci				entry->file_offset);
23162306a36Sopenharmony_ci	spin_unlock_irq(&tree->lock);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	spin_lock(&root->ordered_extent_lock);
23462306a36Sopenharmony_ci	list_add_tail(&entry->root_extent_list,
23562306a36Sopenharmony_ci		      &root->ordered_extents);
23662306a36Sopenharmony_ci	root->nr_ordered_extents++;
23762306a36Sopenharmony_ci	if (root->nr_ordered_extents == 1) {
23862306a36Sopenharmony_ci		spin_lock(&fs_info->ordered_root_lock);
23962306a36Sopenharmony_ci		BUG_ON(!list_empty(&root->ordered_root));
24062306a36Sopenharmony_ci		list_add_tail(&root->ordered_root, &fs_info->ordered_roots);
24162306a36Sopenharmony_ci		spin_unlock(&fs_info->ordered_root_lock);
24262306a36Sopenharmony_ci	}
24362306a36Sopenharmony_ci	spin_unlock(&root->ordered_extent_lock);
24462306a36Sopenharmony_ci}
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci/*
24762306a36Sopenharmony_ci * Add an ordered extent to the per-inode tree.
24862306a36Sopenharmony_ci *
24962306a36Sopenharmony_ci * @inode:           Inode that this extent is for.
25062306a36Sopenharmony_ci * @file_offset:     Logical offset in file where the extent starts.
25162306a36Sopenharmony_ci * @num_bytes:       Logical length of extent in file.
25262306a36Sopenharmony_ci * @ram_bytes:       Full length of unencoded data.
25362306a36Sopenharmony_ci * @disk_bytenr:     Offset of extent on disk.
25462306a36Sopenharmony_ci * @disk_num_bytes:  Size of extent on disk.
25562306a36Sopenharmony_ci * @offset:          Offset into unencoded data where file data starts.
25662306a36Sopenharmony_ci * @flags:           Flags specifying type of extent (1 << BTRFS_ORDERED_*).
25762306a36Sopenharmony_ci * @compress_type:   Compression algorithm used for data.
25862306a36Sopenharmony_ci *
25962306a36Sopenharmony_ci * Most of these parameters correspond to &struct btrfs_file_extent_item. The
26062306a36Sopenharmony_ci * tree is given a single reference on the ordered extent that was inserted, and
26162306a36Sopenharmony_ci * the returned pointer is given a second reference.
26262306a36Sopenharmony_ci *
26362306a36Sopenharmony_ci * Return: the new ordered extent or error pointer.
26462306a36Sopenharmony_ci */
26562306a36Sopenharmony_cistruct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
26662306a36Sopenharmony_ci			struct btrfs_inode *inode, u64 file_offset,
26762306a36Sopenharmony_ci			u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
26862306a36Sopenharmony_ci			u64 disk_num_bytes, u64 offset, unsigned long flags,
26962306a36Sopenharmony_ci			int compress_type)
27062306a36Sopenharmony_ci{
27162306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes,
27662306a36Sopenharmony_ci				     disk_bytenr, disk_num_bytes, offset, flags,
27762306a36Sopenharmony_ci				     compress_type);
27862306a36Sopenharmony_ci	if (!IS_ERR(entry))
27962306a36Sopenharmony_ci		insert_ordered_extent(entry);
28062306a36Sopenharmony_ci	return entry;
28162306a36Sopenharmony_ci}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci/*
28462306a36Sopenharmony_ci * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
28562306a36Sopenharmony_ci * when an ordered extent is finished.  If the list covers more than one
28662306a36Sopenharmony_ci * ordered extent, it is split across multiples.
28762306a36Sopenharmony_ci */
28862306a36Sopenharmony_civoid btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
28962306a36Sopenharmony_ci			   struct btrfs_ordered_sum *sum)
29062306a36Sopenharmony_ci{
29162306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree;
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	tree = &BTRFS_I(entry->inode)->ordered_tree;
29462306a36Sopenharmony_ci	spin_lock_irq(&tree->lock);
29562306a36Sopenharmony_ci	list_add_tail(&sum->list, &entry->list);
29662306a36Sopenharmony_ci	spin_unlock_irq(&tree->lock);
29762306a36Sopenharmony_ci}
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_cistatic void finish_ordered_fn(struct btrfs_work *work)
30062306a36Sopenharmony_ci{
30162306a36Sopenharmony_ci	struct btrfs_ordered_extent *ordered_extent;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	ordered_extent = container_of(work, struct btrfs_ordered_extent, work);
30462306a36Sopenharmony_ci	btrfs_finish_ordered_io(ordered_extent);
30562306a36Sopenharmony_ci}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_cistatic bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
30862306a36Sopenharmony_ci				      struct page *page, u64 file_offset,
30962306a36Sopenharmony_ci				      u64 len, bool uptodate)
31062306a36Sopenharmony_ci{
31162306a36Sopenharmony_ci	struct btrfs_inode *inode = BTRFS_I(ordered->inode);
31262306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = inode->root->fs_info;
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci	lockdep_assert_held(&inode->ordered_tree.lock);
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	if (page) {
31762306a36Sopenharmony_ci		ASSERT(page->mapping);
31862306a36Sopenharmony_ci		ASSERT(page_offset(page) <= file_offset);
31962306a36Sopenharmony_ci		ASSERT(file_offset + len <= page_offset(page) + PAGE_SIZE);
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci		/*
32262306a36Sopenharmony_ci		 * Ordered (Private2) bit indicates whether we still have
32362306a36Sopenharmony_ci		 * pending io unfinished for the ordered extent.
32462306a36Sopenharmony_ci		 *
32562306a36Sopenharmony_ci		 * If there's no such bit, we need to skip to next range.
32662306a36Sopenharmony_ci		 */
32762306a36Sopenharmony_ci		if (!btrfs_page_test_ordered(fs_info, page, file_offset, len))
32862306a36Sopenharmony_ci			return false;
32962306a36Sopenharmony_ci		btrfs_page_clear_ordered(fs_info, page, file_offset, len);
33062306a36Sopenharmony_ci	}
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci	/* Now we're fine to update the accounting. */
33362306a36Sopenharmony_ci	if (WARN_ON_ONCE(len > ordered->bytes_left)) {
33462306a36Sopenharmony_ci		btrfs_crit(fs_info,
33562306a36Sopenharmony_ci"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu",
33662306a36Sopenharmony_ci			   inode->root->root_key.objectid, btrfs_ino(inode),
33762306a36Sopenharmony_ci			   ordered->file_offset, ordered->num_bytes,
33862306a36Sopenharmony_ci			   len, ordered->bytes_left);
33962306a36Sopenharmony_ci		ordered->bytes_left = 0;
34062306a36Sopenharmony_ci	} else {
34162306a36Sopenharmony_ci		ordered->bytes_left -= len;
34262306a36Sopenharmony_ci	}
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	if (!uptodate)
34562306a36Sopenharmony_ci		set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	if (ordered->bytes_left)
34862306a36Sopenharmony_ci		return false;
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci	/*
35162306a36Sopenharmony_ci	 * All the IO of the ordered extent is finished, we need to queue
35262306a36Sopenharmony_ci	 * the finish_func to be executed.
35362306a36Sopenharmony_ci	 */
35462306a36Sopenharmony_ci	set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags);
35562306a36Sopenharmony_ci	cond_wake_up(&ordered->wait);
35662306a36Sopenharmony_ci	refcount_inc(&ordered->refs);
35762306a36Sopenharmony_ci	trace_btrfs_ordered_extent_mark_finished(inode, ordered);
35862306a36Sopenharmony_ci	return true;
35962306a36Sopenharmony_ci}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_cistatic void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
36262306a36Sopenharmony_ci{
36362306a36Sopenharmony_ci	struct btrfs_inode *inode = BTRFS_I(ordered->inode);
36462306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = inode->root->fs_info;
36562306a36Sopenharmony_ci	struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ?
36662306a36Sopenharmony_ci		fs_info->endio_freespace_worker : fs_info->endio_write_workers;
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
36962306a36Sopenharmony_ci	btrfs_queue_work(wq, &ordered->work);
37062306a36Sopenharmony_ci}
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_cibool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
37362306a36Sopenharmony_ci				 struct page *page, u64 file_offset, u64 len,
37462306a36Sopenharmony_ci				 bool uptodate)
37562306a36Sopenharmony_ci{
37662306a36Sopenharmony_ci	struct btrfs_inode *inode = BTRFS_I(ordered->inode);
37762306a36Sopenharmony_ci	unsigned long flags;
37862306a36Sopenharmony_ci	bool ret;
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate);
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	spin_lock_irqsave(&inode->ordered_tree.lock, flags);
38362306a36Sopenharmony_ci	ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate);
38462306a36Sopenharmony_ci	spin_unlock_irqrestore(&inode->ordered_tree.lock, flags);
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	if (ret)
38762306a36Sopenharmony_ci		btrfs_queue_ordered_fn(ordered);
38862306a36Sopenharmony_ci	return ret;
38962306a36Sopenharmony_ci}
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci/*
39262306a36Sopenharmony_ci * Mark all ordered extents io inside the specified range finished.
39362306a36Sopenharmony_ci *
39462306a36Sopenharmony_ci * @page:	 The involved page for the operation.
39562306a36Sopenharmony_ci *		 For uncompressed buffered IO, the page status also needs to be
39662306a36Sopenharmony_ci *		 updated to indicate whether the pending ordered io is finished.
39762306a36Sopenharmony_ci *		 Can be NULL for direct IO and compressed write.
39862306a36Sopenharmony_ci *		 For these cases, callers are ensured they won't execute the
39962306a36Sopenharmony_ci *		 endio function twice.
40062306a36Sopenharmony_ci *
40162306a36Sopenharmony_ci * This function is called for endio, thus the range must have ordered
40262306a36Sopenharmony_ci * extent(s) covering it.
40362306a36Sopenharmony_ci */
40462306a36Sopenharmony_civoid btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
40562306a36Sopenharmony_ci				    struct page *page, u64 file_offset,
40662306a36Sopenharmony_ci				    u64 num_bytes, bool uptodate)
40762306a36Sopenharmony_ci{
40862306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
40962306a36Sopenharmony_ci	struct rb_node *node;
41062306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry = NULL;
41162306a36Sopenharmony_ci	unsigned long flags;
41262306a36Sopenharmony_ci	u64 cur = file_offset;
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	trace_btrfs_writepage_end_io_hook(inode, file_offset,
41562306a36Sopenharmony_ci					  file_offset + num_bytes - 1,
41662306a36Sopenharmony_ci					  uptodate);
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	spin_lock_irqsave(&tree->lock, flags);
41962306a36Sopenharmony_ci	while (cur < file_offset + num_bytes) {
42062306a36Sopenharmony_ci		u64 entry_end;
42162306a36Sopenharmony_ci		u64 end;
42262306a36Sopenharmony_ci		u32 len;
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci		node = tree_search(tree, cur);
42562306a36Sopenharmony_ci		/* No ordered extents at all */
42662306a36Sopenharmony_ci		if (!node)
42762306a36Sopenharmony_ci			break;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci		entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
43062306a36Sopenharmony_ci		entry_end = entry->file_offset + entry->num_bytes;
43162306a36Sopenharmony_ci		/*
43262306a36Sopenharmony_ci		 * |<-- OE --->|  |
43362306a36Sopenharmony_ci		 *		  cur
43462306a36Sopenharmony_ci		 * Go to next OE.
43562306a36Sopenharmony_ci		 */
43662306a36Sopenharmony_ci		if (cur >= entry_end) {
43762306a36Sopenharmony_ci			node = rb_next(node);
43862306a36Sopenharmony_ci			/* No more ordered extents, exit */
43962306a36Sopenharmony_ci			if (!node)
44062306a36Sopenharmony_ci				break;
44162306a36Sopenharmony_ci			entry = rb_entry(node, struct btrfs_ordered_extent,
44262306a36Sopenharmony_ci					 rb_node);
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci			/* Go to next ordered extent and continue */
44562306a36Sopenharmony_ci			cur = entry->file_offset;
44662306a36Sopenharmony_ci			continue;
44762306a36Sopenharmony_ci		}
44862306a36Sopenharmony_ci		/*
44962306a36Sopenharmony_ci		 * |	|<--- OE --->|
45062306a36Sopenharmony_ci		 * cur
45162306a36Sopenharmony_ci		 * Go to the start of OE.
45262306a36Sopenharmony_ci		 */
45362306a36Sopenharmony_ci		if (cur < entry->file_offset) {
45462306a36Sopenharmony_ci			cur = entry->file_offset;
45562306a36Sopenharmony_ci			continue;
45662306a36Sopenharmony_ci		}
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci		/*
45962306a36Sopenharmony_ci		 * Now we are definitely inside one ordered extent.
46062306a36Sopenharmony_ci		 *
46162306a36Sopenharmony_ci		 * |<--- OE --->|
46262306a36Sopenharmony_ci		 *	|
46362306a36Sopenharmony_ci		 *	cur
46462306a36Sopenharmony_ci		 */
46562306a36Sopenharmony_ci		end = min(entry->file_offset + entry->num_bytes,
46662306a36Sopenharmony_ci			  file_offset + num_bytes) - 1;
46762306a36Sopenharmony_ci		ASSERT(end + 1 - cur < U32_MAX);
46862306a36Sopenharmony_ci		len = end + 1 - cur;
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci		if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) {
47162306a36Sopenharmony_ci			spin_unlock_irqrestore(&tree->lock, flags);
47262306a36Sopenharmony_ci			btrfs_queue_ordered_fn(entry);
47362306a36Sopenharmony_ci			spin_lock_irqsave(&tree->lock, flags);
47462306a36Sopenharmony_ci		}
47562306a36Sopenharmony_ci		cur += len;
47662306a36Sopenharmony_ci	}
47762306a36Sopenharmony_ci	spin_unlock_irqrestore(&tree->lock, flags);
47862306a36Sopenharmony_ci}
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci/*
48162306a36Sopenharmony_ci * Finish IO for one ordered extent across a given range.  The range can only
48262306a36Sopenharmony_ci * contain one ordered extent.
48362306a36Sopenharmony_ci *
48462306a36Sopenharmony_ci * @cached:	 The cached ordered extent. If not NULL, we can skip the tree
48562306a36Sopenharmony_ci *               search and use the ordered extent directly.
48662306a36Sopenharmony_ci * 		 Will be also used to store the finished ordered extent.
48762306a36Sopenharmony_ci * @file_offset: File offset for the finished IO
48862306a36Sopenharmony_ci * @io_size:	 Length of the finish IO range
48962306a36Sopenharmony_ci *
49062306a36Sopenharmony_ci * Return true if the ordered extent is finished in the range, and update
49162306a36Sopenharmony_ci * @cached.
49262306a36Sopenharmony_ci * Return false otherwise.
49362306a36Sopenharmony_ci *
49462306a36Sopenharmony_ci * NOTE: The range can NOT cross multiple ordered extents.
49562306a36Sopenharmony_ci * Thus caller should ensure the range doesn't cross ordered extents.
49662306a36Sopenharmony_ci */
49762306a36Sopenharmony_cibool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
49862306a36Sopenharmony_ci				    struct btrfs_ordered_extent **cached,
49962306a36Sopenharmony_ci				    u64 file_offset, u64 io_size)
50062306a36Sopenharmony_ci{
50162306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
50262306a36Sopenharmony_ci	struct rb_node *node;
50362306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry = NULL;
50462306a36Sopenharmony_ci	unsigned long flags;
50562306a36Sopenharmony_ci	bool finished = false;
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	spin_lock_irqsave(&tree->lock, flags);
50862306a36Sopenharmony_ci	if (cached && *cached) {
50962306a36Sopenharmony_ci		entry = *cached;
51062306a36Sopenharmony_ci		goto have_entry;
51162306a36Sopenharmony_ci	}
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	node = tree_search(tree, file_offset);
51462306a36Sopenharmony_ci	if (!node)
51562306a36Sopenharmony_ci		goto out;
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
51862306a36Sopenharmony_cihave_entry:
51962306a36Sopenharmony_ci	if (!in_range(file_offset, entry->file_offset, entry->num_bytes))
52062306a36Sopenharmony_ci		goto out;
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	if (io_size > entry->bytes_left)
52362306a36Sopenharmony_ci		btrfs_crit(inode->root->fs_info,
52462306a36Sopenharmony_ci			   "bad ordered accounting left %llu size %llu",
52562306a36Sopenharmony_ci		       entry->bytes_left, io_size);
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	entry->bytes_left -= io_size;
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	if (entry->bytes_left == 0) {
53062306a36Sopenharmony_ci		/*
53162306a36Sopenharmony_ci		 * Ensure only one caller can set the flag and finished_ret
53262306a36Sopenharmony_ci		 * accordingly
53362306a36Sopenharmony_ci		 */
53462306a36Sopenharmony_ci		finished = !test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
53562306a36Sopenharmony_ci		/* test_and_set_bit implies a barrier */
53662306a36Sopenharmony_ci		cond_wake_up_nomb(&entry->wait);
53762306a36Sopenharmony_ci	}
53862306a36Sopenharmony_ciout:
53962306a36Sopenharmony_ci	if (finished && cached && entry) {
54062306a36Sopenharmony_ci		*cached = entry;
54162306a36Sopenharmony_ci		refcount_inc(&entry->refs);
54262306a36Sopenharmony_ci		trace_btrfs_ordered_extent_dec_test_pending(inode, entry);
54362306a36Sopenharmony_ci	}
54462306a36Sopenharmony_ci	spin_unlock_irqrestore(&tree->lock, flags);
54562306a36Sopenharmony_ci	return finished;
54662306a36Sopenharmony_ci}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci/*
54962306a36Sopenharmony_ci * used to drop a reference on an ordered extent.  This will free
55062306a36Sopenharmony_ci * the extent if the last reference is dropped
55162306a36Sopenharmony_ci */
55262306a36Sopenharmony_civoid btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
55362306a36Sopenharmony_ci{
55462306a36Sopenharmony_ci	struct list_head *cur;
55562306a36Sopenharmony_ci	struct btrfs_ordered_sum *sum;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	trace_btrfs_ordered_extent_put(BTRFS_I(entry->inode), entry);
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci	if (refcount_dec_and_test(&entry->refs)) {
56062306a36Sopenharmony_ci		ASSERT(list_empty(&entry->root_extent_list));
56162306a36Sopenharmony_ci		ASSERT(list_empty(&entry->log_list));
56262306a36Sopenharmony_ci		ASSERT(RB_EMPTY_NODE(&entry->rb_node));
56362306a36Sopenharmony_ci		if (entry->inode)
56462306a36Sopenharmony_ci			btrfs_add_delayed_iput(BTRFS_I(entry->inode));
56562306a36Sopenharmony_ci		while (!list_empty(&entry->list)) {
56662306a36Sopenharmony_ci			cur = entry->list.next;
56762306a36Sopenharmony_ci			sum = list_entry(cur, struct btrfs_ordered_sum, list);
56862306a36Sopenharmony_ci			list_del(&sum->list);
56962306a36Sopenharmony_ci			kvfree(sum);
57062306a36Sopenharmony_ci		}
57162306a36Sopenharmony_ci		kmem_cache_free(btrfs_ordered_extent_cache, entry);
57262306a36Sopenharmony_ci	}
57362306a36Sopenharmony_ci}
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci/*
57662306a36Sopenharmony_ci * remove an ordered extent from the tree.  No references are dropped
57762306a36Sopenharmony_ci * and waiters are woken up.
57862306a36Sopenharmony_ci */
57962306a36Sopenharmony_civoid btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
58062306a36Sopenharmony_ci				 struct btrfs_ordered_extent *entry)
58162306a36Sopenharmony_ci{
58262306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree;
58362306a36Sopenharmony_ci	struct btrfs_root *root = btrfs_inode->root;
58462306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
58562306a36Sopenharmony_ci	struct rb_node *node;
58662306a36Sopenharmony_ci	bool pending;
58762306a36Sopenharmony_ci	bool freespace_inode;
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	/*
59062306a36Sopenharmony_ci	 * If this is a free space inode the thread has not acquired the ordered
59162306a36Sopenharmony_ci	 * extents lockdep map.
59262306a36Sopenharmony_ci	 */
59362306a36Sopenharmony_ci	freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
59462306a36Sopenharmony_ci
59562306a36Sopenharmony_ci	btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
59662306a36Sopenharmony_ci	/* This is paired with btrfs_alloc_ordered_extent. */
59762306a36Sopenharmony_ci	spin_lock(&btrfs_inode->lock);
59862306a36Sopenharmony_ci	btrfs_mod_outstanding_extents(btrfs_inode, -1);
59962306a36Sopenharmony_ci	spin_unlock(&btrfs_inode->lock);
60062306a36Sopenharmony_ci	if (root != fs_info->tree_root) {
60162306a36Sopenharmony_ci		u64 release;
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci		if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags))
60462306a36Sopenharmony_ci			release = entry->disk_num_bytes;
60562306a36Sopenharmony_ci		else
60662306a36Sopenharmony_ci			release = entry->num_bytes;
60762306a36Sopenharmony_ci		btrfs_delalloc_release_metadata(btrfs_inode, release,
60862306a36Sopenharmony_ci						test_bit(BTRFS_ORDERED_IOERR,
60962306a36Sopenharmony_ci							 &entry->flags));
61062306a36Sopenharmony_ci	}
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci	percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
61362306a36Sopenharmony_ci				 fs_info->delalloc_batch);
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	tree = &btrfs_inode->ordered_tree;
61662306a36Sopenharmony_ci	spin_lock_irq(&tree->lock);
61762306a36Sopenharmony_ci	node = &entry->rb_node;
61862306a36Sopenharmony_ci	rb_erase(node, &tree->tree);
61962306a36Sopenharmony_ci	RB_CLEAR_NODE(node);
62062306a36Sopenharmony_ci	if (tree->last == node)
62162306a36Sopenharmony_ci		tree->last = NULL;
62262306a36Sopenharmony_ci	set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
62362306a36Sopenharmony_ci	pending = test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags);
62462306a36Sopenharmony_ci	spin_unlock_irq(&tree->lock);
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	/*
62762306a36Sopenharmony_ci	 * The current running transaction is waiting on us, we need to let it
62862306a36Sopenharmony_ci	 * know that we're complete and wake it up.
62962306a36Sopenharmony_ci	 */
63062306a36Sopenharmony_ci	if (pending) {
63162306a36Sopenharmony_ci		struct btrfs_transaction *trans;
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci		/*
63462306a36Sopenharmony_ci		 * The checks for trans are just a formality, it should be set,
63562306a36Sopenharmony_ci		 * but if it isn't we don't want to deref/assert under the spin
63662306a36Sopenharmony_ci		 * lock, so be nice and check if trans is set, but ASSERT() so
63762306a36Sopenharmony_ci		 * if it isn't set a developer will notice.
63862306a36Sopenharmony_ci		 */
63962306a36Sopenharmony_ci		spin_lock(&fs_info->trans_lock);
64062306a36Sopenharmony_ci		trans = fs_info->running_transaction;
64162306a36Sopenharmony_ci		if (trans)
64262306a36Sopenharmony_ci			refcount_inc(&trans->use_count);
64362306a36Sopenharmony_ci		spin_unlock(&fs_info->trans_lock);
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci		ASSERT(trans || BTRFS_FS_ERROR(fs_info));
64662306a36Sopenharmony_ci		if (trans) {
64762306a36Sopenharmony_ci			if (atomic_dec_and_test(&trans->pending_ordered))
64862306a36Sopenharmony_ci				wake_up(&trans->pending_wait);
64962306a36Sopenharmony_ci			btrfs_put_transaction(trans);
65062306a36Sopenharmony_ci		}
65162306a36Sopenharmony_ci	}
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	btrfs_lockdep_release(fs_info, btrfs_trans_pending_ordered);
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci	spin_lock(&root->ordered_extent_lock);
65662306a36Sopenharmony_ci	list_del_init(&entry->root_extent_list);
65762306a36Sopenharmony_ci	root->nr_ordered_extents--;
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci	trace_btrfs_ordered_extent_remove(btrfs_inode, entry);
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci	if (!root->nr_ordered_extents) {
66262306a36Sopenharmony_ci		spin_lock(&fs_info->ordered_root_lock);
66362306a36Sopenharmony_ci		BUG_ON(list_empty(&root->ordered_root));
66462306a36Sopenharmony_ci		list_del_init(&root->ordered_root);
66562306a36Sopenharmony_ci		spin_unlock(&fs_info->ordered_root_lock);
66662306a36Sopenharmony_ci	}
66762306a36Sopenharmony_ci	spin_unlock(&root->ordered_extent_lock);
66862306a36Sopenharmony_ci	wake_up(&entry->wait);
66962306a36Sopenharmony_ci	if (!freespace_inode)
67062306a36Sopenharmony_ci		btrfs_lockdep_release(fs_info, btrfs_ordered_extent);
67162306a36Sopenharmony_ci}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_cistatic void btrfs_run_ordered_extent_work(struct btrfs_work *work)
67462306a36Sopenharmony_ci{
67562306a36Sopenharmony_ci	struct btrfs_ordered_extent *ordered;
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci	ordered = container_of(work, struct btrfs_ordered_extent, flush_work);
67862306a36Sopenharmony_ci	btrfs_start_ordered_extent(ordered);
67962306a36Sopenharmony_ci	complete(&ordered->completion);
68062306a36Sopenharmony_ci}
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci/*
68362306a36Sopenharmony_ci * wait for all the ordered extents in a root.  This is done when balancing
68462306a36Sopenharmony_ci * space between drives.
68562306a36Sopenharmony_ci */
68662306a36Sopenharmony_ciu64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
68762306a36Sopenharmony_ci			       const u64 range_start, const u64 range_len)
68862306a36Sopenharmony_ci{
68962306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
69062306a36Sopenharmony_ci	LIST_HEAD(splice);
69162306a36Sopenharmony_ci	LIST_HEAD(skipped);
69262306a36Sopenharmony_ci	LIST_HEAD(works);
69362306a36Sopenharmony_ci	struct btrfs_ordered_extent *ordered, *next;
69462306a36Sopenharmony_ci	u64 count = 0;
69562306a36Sopenharmony_ci	const u64 range_end = range_start + range_len;
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	mutex_lock(&root->ordered_extent_mutex);
69862306a36Sopenharmony_ci	spin_lock(&root->ordered_extent_lock);
69962306a36Sopenharmony_ci	list_splice_init(&root->ordered_extents, &splice);
70062306a36Sopenharmony_ci	while (!list_empty(&splice) && nr) {
70162306a36Sopenharmony_ci		ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
70262306a36Sopenharmony_ci					   root_extent_list);
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci		if (range_end <= ordered->disk_bytenr ||
70562306a36Sopenharmony_ci		    ordered->disk_bytenr + ordered->disk_num_bytes <= range_start) {
70662306a36Sopenharmony_ci			list_move_tail(&ordered->root_extent_list, &skipped);
70762306a36Sopenharmony_ci			cond_resched_lock(&root->ordered_extent_lock);
70862306a36Sopenharmony_ci			continue;
70962306a36Sopenharmony_ci		}
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci		list_move_tail(&ordered->root_extent_list,
71262306a36Sopenharmony_ci			       &root->ordered_extents);
71362306a36Sopenharmony_ci		refcount_inc(&ordered->refs);
71462306a36Sopenharmony_ci		spin_unlock(&root->ordered_extent_lock);
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci		btrfs_init_work(&ordered->flush_work,
71762306a36Sopenharmony_ci				btrfs_run_ordered_extent_work, NULL, NULL);
71862306a36Sopenharmony_ci		list_add_tail(&ordered->work_list, &works);
71962306a36Sopenharmony_ci		btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci		cond_resched();
72262306a36Sopenharmony_ci		spin_lock(&root->ordered_extent_lock);
72362306a36Sopenharmony_ci		if (nr != U64_MAX)
72462306a36Sopenharmony_ci			nr--;
72562306a36Sopenharmony_ci		count++;
72662306a36Sopenharmony_ci	}
72762306a36Sopenharmony_ci	list_splice_tail(&skipped, &root->ordered_extents);
72862306a36Sopenharmony_ci	list_splice_tail(&splice, &root->ordered_extents);
72962306a36Sopenharmony_ci	spin_unlock(&root->ordered_extent_lock);
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci	list_for_each_entry_safe(ordered, next, &works, work_list) {
73262306a36Sopenharmony_ci		list_del_init(&ordered->work_list);
73362306a36Sopenharmony_ci		wait_for_completion(&ordered->completion);
73462306a36Sopenharmony_ci		btrfs_put_ordered_extent(ordered);
73562306a36Sopenharmony_ci		cond_resched();
73662306a36Sopenharmony_ci	}
73762306a36Sopenharmony_ci	mutex_unlock(&root->ordered_extent_mutex);
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	return count;
74062306a36Sopenharmony_ci}
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_civoid btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
74362306a36Sopenharmony_ci			     const u64 range_start, const u64 range_len)
74462306a36Sopenharmony_ci{
74562306a36Sopenharmony_ci	struct btrfs_root *root;
74662306a36Sopenharmony_ci	LIST_HEAD(splice);
74762306a36Sopenharmony_ci	u64 done;
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ci	mutex_lock(&fs_info->ordered_operations_mutex);
75062306a36Sopenharmony_ci	spin_lock(&fs_info->ordered_root_lock);
75162306a36Sopenharmony_ci	list_splice_init(&fs_info->ordered_roots, &splice);
75262306a36Sopenharmony_ci	while (!list_empty(&splice) && nr) {
75362306a36Sopenharmony_ci		root = list_first_entry(&splice, struct btrfs_root,
75462306a36Sopenharmony_ci					ordered_root);
75562306a36Sopenharmony_ci		root = btrfs_grab_root(root);
75662306a36Sopenharmony_ci		BUG_ON(!root);
75762306a36Sopenharmony_ci		list_move_tail(&root->ordered_root,
75862306a36Sopenharmony_ci			       &fs_info->ordered_roots);
75962306a36Sopenharmony_ci		spin_unlock(&fs_info->ordered_root_lock);
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci		done = btrfs_wait_ordered_extents(root, nr,
76262306a36Sopenharmony_ci						  range_start, range_len);
76362306a36Sopenharmony_ci		btrfs_put_root(root);
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ci		spin_lock(&fs_info->ordered_root_lock);
76662306a36Sopenharmony_ci		if (nr != U64_MAX) {
76762306a36Sopenharmony_ci			nr -= done;
76862306a36Sopenharmony_ci		}
76962306a36Sopenharmony_ci	}
77062306a36Sopenharmony_ci	list_splice_tail(&splice, &fs_info->ordered_roots);
77162306a36Sopenharmony_ci	spin_unlock(&fs_info->ordered_root_lock);
77262306a36Sopenharmony_ci	mutex_unlock(&fs_info->ordered_operations_mutex);
77362306a36Sopenharmony_ci}
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci/*
77662306a36Sopenharmony_ci * Start IO and wait for a given ordered extent to finish.
77762306a36Sopenharmony_ci *
77862306a36Sopenharmony_ci * Wait on page writeback for all the pages in the extent and the IO completion
77962306a36Sopenharmony_ci * code to insert metadata into the btree corresponding to the extent.
78062306a36Sopenharmony_ci */
78162306a36Sopenharmony_civoid btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry)
78262306a36Sopenharmony_ci{
78362306a36Sopenharmony_ci	u64 start = entry->file_offset;
78462306a36Sopenharmony_ci	u64 end = start + entry->num_bytes - 1;
78562306a36Sopenharmony_ci	struct btrfs_inode *inode = BTRFS_I(entry->inode);
78662306a36Sopenharmony_ci	bool freespace_inode;
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci	trace_btrfs_ordered_extent_start(inode, entry);
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci	/*
79162306a36Sopenharmony_ci	 * If this is a free space inode do not take the ordered extents lockdep
79262306a36Sopenharmony_ci	 * map.
79362306a36Sopenharmony_ci	 */
79462306a36Sopenharmony_ci	freespace_inode = btrfs_is_free_space_inode(inode);
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	/*
79762306a36Sopenharmony_ci	 * pages in the range can be dirty, clean or writeback.  We
79862306a36Sopenharmony_ci	 * start IO on any dirty ones so the wait doesn't stall waiting
79962306a36Sopenharmony_ci	 * for the flusher thread to find them
80062306a36Sopenharmony_ci	 */
80162306a36Sopenharmony_ci	if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
80262306a36Sopenharmony_ci		filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end);
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci	if (!freespace_inode)
80562306a36Sopenharmony_ci		btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent);
80662306a36Sopenharmony_ci	wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags));
80762306a36Sopenharmony_ci}
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_ci/*
81062306a36Sopenharmony_ci * Used to wait on ordered extents across a large range of bytes.
81162306a36Sopenharmony_ci */
81262306a36Sopenharmony_ciint btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
81362306a36Sopenharmony_ci{
81462306a36Sopenharmony_ci	int ret = 0;
81562306a36Sopenharmony_ci	int ret_wb = 0;
81662306a36Sopenharmony_ci	u64 end;
81762306a36Sopenharmony_ci	u64 orig_end;
81862306a36Sopenharmony_ci	struct btrfs_ordered_extent *ordered;
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	if (start + len < start) {
82162306a36Sopenharmony_ci		orig_end = OFFSET_MAX;
82262306a36Sopenharmony_ci	} else {
82362306a36Sopenharmony_ci		orig_end = start + len - 1;
82462306a36Sopenharmony_ci		if (orig_end > OFFSET_MAX)
82562306a36Sopenharmony_ci			orig_end = OFFSET_MAX;
82662306a36Sopenharmony_ci	}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	/* start IO across the range first to instantiate any delalloc
82962306a36Sopenharmony_ci	 * extents
83062306a36Sopenharmony_ci	 */
83162306a36Sopenharmony_ci	ret = btrfs_fdatawrite_range(inode, start, orig_end);
83262306a36Sopenharmony_ci	if (ret)
83362306a36Sopenharmony_ci		return ret;
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	/*
83662306a36Sopenharmony_ci	 * If we have a writeback error don't return immediately. Wait first
83762306a36Sopenharmony_ci	 * for any ordered extents that haven't completed yet. This is to make
83862306a36Sopenharmony_ci	 * sure no one can dirty the same page ranges and call writepages()
83962306a36Sopenharmony_ci	 * before the ordered extents complete - to avoid failures (-EEXIST)
84062306a36Sopenharmony_ci	 * when adding the new ordered extents to the ordered tree.
84162306a36Sopenharmony_ci	 */
84262306a36Sopenharmony_ci	ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci	end = orig_end;
84562306a36Sopenharmony_ci	while (1) {
84662306a36Sopenharmony_ci		ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), end);
84762306a36Sopenharmony_ci		if (!ordered)
84862306a36Sopenharmony_ci			break;
84962306a36Sopenharmony_ci		if (ordered->file_offset > orig_end) {
85062306a36Sopenharmony_ci			btrfs_put_ordered_extent(ordered);
85162306a36Sopenharmony_ci			break;
85262306a36Sopenharmony_ci		}
85362306a36Sopenharmony_ci		if (ordered->file_offset + ordered->num_bytes <= start) {
85462306a36Sopenharmony_ci			btrfs_put_ordered_extent(ordered);
85562306a36Sopenharmony_ci			break;
85662306a36Sopenharmony_ci		}
85762306a36Sopenharmony_ci		btrfs_start_ordered_extent(ordered);
85862306a36Sopenharmony_ci		end = ordered->file_offset;
85962306a36Sopenharmony_ci		/*
86062306a36Sopenharmony_ci		 * If the ordered extent had an error save the error but don't
86162306a36Sopenharmony_ci		 * exit without waiting first for all other ordered extents in
86262306a36Sopenharmony_ci		 * the range to complete.
86362306a36Sopenharmony_ci		 */
86462306a36Sopenharmony_ci		if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
86562306a36Sopenharmony_ci			ret = -EIO;
86662306a36Sopenharmony_ci		btrfs_put_ordered_extent(ordered);
86762306a36Sopenharmony_ci		if (end == 0 || end == start)
86862306a36Sopenharmony_ci			break;
86962306a36Sopenharmony_ci		end--;
87062306a36Sopenharmony_ci	}
87162306a36Sopenharmony_ci	return ret_wb ? ret_wb : ret;
87262306a36Sopenharmony_ci}
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci/*
87562306a36Sopenharmony_ci * find an ordered extent corresponding to file_offset.  return NULL if
87662306a36Sopenharmony_ci * nothing is found, otherwise take a reference on the extent and return it
87762306a36Sopenharmony_ci */
87862306a36Sopenharmony_cistruct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
87962306a36Sopenharmony_ci							 u64 file_offset)
88062306a36Sopenharmony_ci{
88162306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree;
88262306a36Sopenharmony_ci	struct rb_node *node;
88362306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry = NULL;
88462306a36Sopenharmony_ci	unsigned long flags;
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci	tree = &inode->ordered_tree;
88762306a36Sopenharmony_ci	spin_lock_irqsave(&tree->lock, flags);
88862306a36Sopenharmony_ci	node = tree_search(tree, file_offset);
88962306a36Sopenharmony_ci	if (!node)
89062306a36Sopenharmony_ci		goto out;
89162306a36Sopenharmony_ci
89262306a36Sopenharmony_ci	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
89362306a36Sopenharmony_ci	if (!in_range(file_offset, entry->file_offset, entry->num_bytes))
89462306a36Sopenharmony_ci		entry = NULL;
89562306a36Sopenharmony_ci	if (entry) {
89662306a36Sopenharmony_ci		refcount_inc(&entry->refs);
89762306a36Sopenharmony_ci		trace_btrfs_ordered_extent_lookup(inode, entry);
89862306a36Sopenharmony_ci	}
89962306a36Sopenharmony_ciout:
90062306a36Sopenharmony_ci	spin_unlock_irqrestore(&tree->lock, flags);
90162306a36Sopenharmony_ci	return entry;
90262306a36Sopenharmony_ci}
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_ci/* Since the DIO code tries to lock a wide area we need to look for any ordered
90562306a36Sopenharmony_ci * extents that exist in the range, rather than just the start of the range.
90662306a36Sopenharmony_ci */
90762306a36Sopenharmony_cistruct btrfs_ordered_extent *btrfs_lookup_ordered_range(
90862306a36Sopenharmony_ci		struct btrfs_inode *inode, u64 file_offset, u64 len)
90962306a36Sopenharmony_ci{
91062306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree;
91162306a36Sopenharmony_ci	struct rb_node *node;
91262306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry = NULL;
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	tree = &inode->ordered_tree;
91562306a36Sopenharmony_ci	spin_lock_irq(&tree->lock);
91662306a36Sopenharmony_ci	node = tree_search(tree, file_offset);
91762306a36Sopenharmony_ci	if (!node) {
91862306a36Sopenharmony_ci		node = tree_search(tree, file_offset + len);
91962306a36Sopenharmony_ci		if (!node)
92062306a36Sopenharmony_ci			goto out;
92162306a36Sopenharmony_ci	}
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci	while (1) {
92462306a36Sopenharmony_ci		entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
92562306a36Sopenharmony_ci		if (range_overlaps(entry, file_offset, len))
92662306a36Sopenharmony_ci			break;
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci		if (entry->file_offset >= file_offset + len) {
92962306a36Sopenharmony_ci			entry = NULL;
93062306a36Sopenharmony_ci			break;
93162306a36Sopenharmony_ci		}
93262306a36Sopenharmony_ci		entry = NULL;
93362306a36Sopenharmony_ci		node = rb_next(node);
93462306a36Sopenharmony_ci		if (!node)
93562306a36Sopenharmony_ci			break;
93662306a36Sopenharmony_ci	}
93762306a36Sopenharmony_ciout:
93862306a36Sopenharmony_ci	if (entry) {
93962306a36Sopenharmony_ci		refcount_inc(&entry->refs);
94062306a36Sopenharmony_ci		trace_btrfs_ordered_extent_lookup_range(inode, entry);
94162306a36Sopenharmony_ci	}
94262306a36Sopenharmony_ci	spin_unlock_irq(&tree->lock);
94362306a36Sopenharmony_ci	return entry;
94462306a36Sopenharmony_ci}
94562306a36Sopenharmony_ci
94662306a36Sopenharmony_ci/*
94762306a36Sopenharmony_ci * Adds all ordered extents to the given list. The list ends up sorted by the
94862306a36Sopenharmony_ci * file_offset of the ordered extents.
94962306a36Sopenharmony_ci */
95062306a36Sopenharmony_civoid btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
95162306a36Sopenharmony_ci					   struct list_head *list)
95262306a36Sopenharmony_ci{
95362306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
95462306a36Sopenharmony_ci	struct rb_node *n;
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci	ASSERT(inode_is_locked(&inode->vfs_inode));
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci	spin_lock_irq(&tree->lock);
95962306a36Sopenharmony_ci	for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
96062306a36Sopenharmony_ci		struct btrfs_ordered_extent *ordered;
96162306a36Sopenharmony_ci
96262306a36Sopenharmony_ci		ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci		if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
96562306a36Sopenharmony_ci			continue;
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_ci		ASSERT(list_empty(&ordered->log_list));
96862306a36Sopenharmony_ci		list_add_tail(&ordered->log_list, list);
96962306a36Sopenharmony_ci		refcount_inc(&ordered->refs);
97062306a36Sopenharmony_ci		trace_btrfs_ordered_extent_lookup_for_logging(inode, ordered);
97162306a36Sopenharmony_ci	}
97262306a36Sopenharmony_ci	spin_unlock_irq(&tree->lock);
97362306a36Sopenharmony_ci}
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci/*
97662306a36Sopenharmony_ci * lookup and return any extent before 'file_offset'.  NULL is returned
97762306a36Sopenharmony_ci * if none is found
97862306a36Sopenharmony_ci */
97962306a36Sopenharmony_cistruct btrfs_ordered_extent *
98062306a36Sopenharmony_cibtrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset)
98162306a36Sopenharmony_ci{
98262306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree;
98362306a36Sopenharmony_ci	struct rb_node *node;
98462306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry = NULL;
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_ci	tree = &inode->ordered_tree;
98762306a36Sopenharmony_ci	spin_lock_irq(&tree->lock);
98862306a36Sopenharmony_ci	node = tree_search(tree, file_offset);
98962306a36Sopenharmony_ci	if (!node)
99062306a36Sopenharmony_ci		goto out;
99162306a36Sopenharmony_ci
99262306a36Sopenharmony_ci	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
99362306a36Sopenharmony_ci	refcount_inc(&entry->refs);
99462306a36Sopenharmony_ci	trace_btrfs_ordered_extent_lookup_first(inode, entry);
99562306a36Sopenharmony_ciout:
99662306a36Sopenharmony_ci	spin_unlock_irq(&tree->lock);
99762306a36Sopenharmony_ci	return entry;
99862306a36Sopenharmony_ci}
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci/*
100162306a36Sopenharmony_ci * Lookup the first ordered extent that overlaps the range
100262306a36Sopenharmony_ci * [@file_offset, @file_offset + @len).
100362306a36Sopenharmony_ci *
100462306a36Sopenharmony_ci * The difference between this and btrfs_lookup_first_ordered_extent() is
100562306a36Sopenharmony_ci * that this one won't return any ordered extent that does not overlap the range.
100662306a36Sopenharmony_ci * And the difference against btrfs_lookup_ordered_extent() is, this function
100762306a36Sopenharmony_ci * ensures the first ordered extent gets returned.
100862306a36Sopenharmony_ci */
100962306a36Sopenharmony_cistruct btrfs_ordered_extent *btrfs_lookup_first_ordered_range(
101062306a36Sopenharmony_ci			struct btrfs_inode *inode, u64 file_offset, u64 len)
101162306a36Sopenharmony_ci{
101262306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
101362306a36Sopenharmony_ci	struct rb_node *node;
101462306a36Sopenharmony_ci	struct rb_node *cur;
101562306a36Sopenharmony_ci	struct rb_node *prev;
101662306a36Sopenharmony_ci	struct rb_node *next;
101762306a36Sopenharmony_ci	struct btrfs_ordered_extent *entry = NULL;
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci	spin_lock_irq(&tree->lock);
102062306a36Sopenharmony_ci	node = tree->tree.rb_node;
102162306a36Sopenharmony_ci	/*
102262306a36Sopenharmony_ci	 * Here we don't want to use tree_search() which will use tree->last
102362306a36Sopenharmony_ci	 * and screw up the search order.
102462306a36Sopenharmony_ci	 * And __tree_search() can't return the adjacent ordered extents
102562306a36Sopenharmony_ci	 * either, thus here we do our own search.
102662306a36Sopenharmony_ci	 */
102762306a36Sopenharmony_ci	while (node) {
102862306a36Sopenharmony_ci		entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
102962306a36Sopenharmony_ci
103062306a36Sopenharmony_ci		if (file_offset < entry->file_offset) {
103162306a36Sopenharmony_ci			node = node->rb_left;
103262306a36Sopenharmony_ci		} else if (file_offset >= entry_end(entry)) {
103362306a36Sopenharmony_ci			node = node->rb_right;
103462306a36Sopenharmony_ci		} else {
103562306a36Sopenharmony_ci			/*
103662306a36Sopenharmony_ci			 * Direct hit, got an ordered extent that starts at
103762306a36Sopenharmony_ci			 * @file_offset
103862306a36Sopenharmony_ci			 */
103962306a36Sopenharmony_ci			goto out;
104062306a36Sopenharmony_ci		}
104162306a36Sopenharmony_ci	}
104262306a36Sopenharmony_ci	if (!entry) {
104362306a36Sopenharmony_ci		/* Empty tree */
104462306a36Sopenharmony_ci		goto out;
104562306a36Sopenharmony_ci	}
104662306a36Sopenharmony_ci
104762306a36Sopenharmony_ci	cur = &entry->rb_node;
104862306a36Sopenharmony_ci	/* We got an entry around @file_offset, check adjacent entries */
104962306a36Sopenharmony_ci	if (entry->file_offset < file_offset) {
105062306a36Sopenharmony_ci		prev = cur;
105162306a36Sopenharmony_ci		next = rb_next(cur);
105262306a36Sopenharmony_ci	} else {
105362306a36Sopenharmony_ci		prev = rb_prev(cur);
105462306a36Sopenharmony_ci		next = cur;
105562306a36Sopenharmony_ci	}
105662306a36Sopenharmony_ci	if (prev) {
105762306a36Sopenharmony_ci		entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node);
105862306a36Sopenharmony_ci		if (range_overlaps(entry, file_offset, len))
105962306a36Sopenharmony_ci			goto out;
106062306a36Sopenharmony_ci	}
106162306a36Sopenharmony_ci	if (next) {
106262306a36Sopenharmony_ci		entry = rb_entry(next, struct btrfs_ordered_extent, rb_node);
106362306a36Sopenharmony_ci		if (range_overlaps(entry, file_offset, len))
106462306a36Sopenharmony_ci			goto out;
106562306a36Sopenharmony_ci	}
106662306a36Sopenharmony_ci	/* No ordered extent in the range */
106762306a36Sopenharmony_ci	entry = NULL;
106862306a36Sopenharmony_ciout:
106962306a36Sopenharmony_ci	if (entry) {
107062306a36Sopenharmony_ci		refcount_inc(&entry->refs);
107162306a36Sopenharmony_ci		trace_btrfs_ordered_extent_lookup_first_range(inode, entry);
107262306a36Sopenharmony_ci	}
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci	spin_unlock_irq(&tree->lock);
107562306a36Sopenharmony_ci	return entry;
107662306a36Sopenharmony_ci}
107762306a36Sopenharmony_ci
107862306a36Sopenharmony_ci/*
107962306a36Sopenharmony_ci * Lock the passed range and ensures all pending ordered extents in it are run
108062306a36Sopenharmony_ci * to completion.
108162306a36Sopenharmony_ci *
108262306a36Sopenharmony_ci * @inode:        Inode whose ordered tree is to be searched
108362306a36Sopenharmony_ci * @start:        Beginning of range to flush
108462306a36Sopenharmony_ci * @end:          Last byte of range to lock
108562306a36Sopenharmony_ci * @cached_state: If passed, will return the extent state responsible for the
108662306a36Sopenharmony_ci *                locked range. It's the caller's responsibility to free the
108762306a36Sopenharmony_ci *                cached state.
108862306a36Sopenharmony_ci *
108962306a36Sopenharmony_ci * Always return with the given range locked, ensuring after it's called no
109062306a36Sopenharmony_ci * order extent can be pending.
109162306a36Sopenharmony_ci */
109262306a36Sopenharmony_civoid btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
109362306a36Sopenharmony_ci					u64 end,
109462306a36Sopenharmony_ci					struct extent_state **cached_state)
109562306a36Sopenharmony_ci{
109662306a36Sopenharmony_ci	struct btrfs_ordered_extent *ordered;
109762306a36Sopenharmony_ci	struct extent_state *cache = NULL;
109862306a36Sopenharmony_ci	struct extent_state **cachedp = &cache;
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_ci	if (cached_state)
110162306a36Sopenharmony_ci		cachedp = cached_state;
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	while (1) {
110462306a36Sopenharmony_ci		lock_extent(&inode->io_tree, start, end, cachedp);
110562306a36Sopenharmony_ci		ordered = btrfs_lookup_ordered_range(inode, start,
110662306a36Sopenharmony_ci						     end - start + 1);
110762306a36Sopenharmony_ci		if (!ordered) {
110862306a36Sopenharmony_ci			/*
110962306a36Sopenharmony_ci			 * If no external cached_state has been passed then
111062306a36Sopenharmony_ci			 * decrement the extra ref taken for cachedp since we
111162306a36Sopenharmony_ci			 * aren't exposing it outside of this function
111262306a36Sopenharmony_ci			 */
111362306a36Sopenharmony_ci			if (!cached_state)
111462306a36Sopenharmony_ci				refcount_dec(&cache->refs);
111562306a36Sopenharmony_ci			break;
111662306a36Sopenharmony_ci		}
111762306a36Sopenharmony_ci		unlock_extent(&inode->io_tree, start, end, cachedp);
111862306a36Sopenharmony_ci		btrfs_start_ordered_extent(ordered);
111962306a36Sopenharmony_ci		btrfs_put_ordered_extent(ordered);
112062306a36Sopenharmony_ci	}
112162306a36Sopenharmony_ci}
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci/*
112462306a36Sopenharmony_ci * Lock the passed range and ensure all pending ordered extents in it are run
112562306a36Sopenharmony_ci * to completion in nowait mode.
112662306a36Sopenharmony_ci *
112762306a36Sopenharmony_ci * Return true if btrfs_lock_ordered_range does not return any extents,
112862306a36Sopenharmony_ci * otherwise false.
112962306a36Sopenharmony_ci */
113062306a36Sopenharmony_cibool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
113162306a36Sopenharmony_ci				  struct extent_state **cached_state)
113262306a36Sopenharmony_ci{
113362306a36Sopenharmony_ci	struct btrfs_ordered_extent *ordered;
113462306a36Sopenharmony_ci
113562306a36Sopenharmony_ci	if (!try_lock_extent(&inode->io_tree, start, end, cached_state))
113662306a36Sopenharmony_ci		return false;
113762306a36Sopenharmony_ci
113862306a36Sopenharmony_ci	ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1);
113962306a36Sopenharmony_ci	if (!ordered)
114062306a36Sopenharmony_ci		return true;
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	btrfs_put_ordered_extent(ordered);
114362306a36Sopenharmony_ci	unlock_extent(&inode->io_tree, start, end, cached_state);
114462306a36Sopenharmony_ci
114562306a36Sopenharmony_ci	return false;
114662306a36Sopenharmony_ci}
114762306a36Sopenharmony_ci
114862306a36Sopenharmony_ci/* Split out a new ordered extent for this first @len bytes of @ordered. */
114962306a36Sopenharmony_cistruct btrfs_ordered_extent *btrfs_split_ordered_extent(
115062306a36Sopenharmony_ci			struct btrfs_ordered_extent *ordered, u64 len)
115162306a36Sopenharmony_ci{
115262306a36Sopenharmony_ci	struct btrfs_inode *inode = BTRFS_I(ordered->inode);
115362306a36Sopenharmony_ci	struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
115462306a36Sopenharmony_ci	struct btrfs_root *root = inode->root;
115562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
115662306a36Sopenharmony_ci	u64 file_offset = ordered->file_offset;
115762306a36Sopenharmony_ci	u64 disk_bytenr = ordered->disk_bytenr;
115862306a36Sopenharmony_ci	unsigned long flags = ordered->flags;
115962306a36Sopenharmony_ci	struct btrfs_ordered_sum *sum, *tmpsum;
116062306a36Sopenharmony_ci	struct btrfs_ordered_extent *new;
116162306a36Sopenharmony_ci	struct rb_node *node;
116262306a36Sopenharmony_ci	u64 offset = 0;
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci	trace_btrfs_ordered_extent_split(inode, ordered);
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci	ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED)));
116762306a36Sopenharmony_ci
116862306a36Sopenharmony_ci	/*
116962306a36Sopenharmony_ci	 * The entire bio must be covered by the ordered extent, but we can't
117062306a36Sopenharmony_ci	 * reduce the original extent to a zero length either.
117162306a36Sopenharmony_ci	 */
117262306a36Sopenharmony_ci	if (WARN_ON_ONCE(len >= ordered->num_bytes))
117362306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
117462306a36Sopenharmony_ci	/* We cannot split partially completed ordered extents. */
117562306a36Sopenharmony_ci	if (ordered->bytes_left) {
117662306a36Sopenharmony_ci		ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS));
117762306a36Sopenharmony_ci		if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes))
117862306a36Sopenharmony_ci			return ERR_PTR(-EINVAL);
117962306a36Sopenharmony_ci	}
118062306a36Sopenharmony_ci	/* We cannot split a compressed ordered extent. */
118162306a36Sopenharmony_ci	if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes))
118262306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
118362306a36Sopenharmony_ci
118462306a36Sopenharmony_ci	new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr,
118562306a36Sopenharmony_ci				   len, 0, flags, ordered->compress_type);
118662306a36Sopenharmony_ci	if (IS_ERR(new))
118762306a36Sopenharmony_ci		return new;
118862306a36Sopenharmony_ci
118962306a36Sopenharmony_ci	/* One ref for the tree. */
119062306a36Sopenharmony_ci	refcount_inc(&new->refs);
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci	spin_lock_irq(&root->ordered_extent_lock);
119362306a36Sopenharmony_ci	spin_lock(&tree->lock);
119462306a36Sopenharmony_ci	/* Remove from tree once */
119562306a36Sopenharmony_ci	node = &ordered->rb_node;
119662306a36Sopenharmony_ci	rb_erase(node, &tree->tree);
119762306a36Sopenharmony_ci	RB_CLEAR_NODE(node);
119862306a36Sopenharmony_ci	if (tree->last == node)
119962306a36Sopenharmony_ci		tree->last = NULL;
120062306a36Sopenharmony_ci
120162306a36Sopenharmony_ci	ordered->file_offset += len;
120262306a36Sopenharmony_ci	ordered->disk_bytenr += len;
120362306a36Sopenharmony_ci	ordered->num_bytes -= len;
120462306a36Sopenharmony_ci	ordered->disk_num_bytes -= len;
120562306a36Sopenharmony_ci
120662306a36Sopenharmony_ci	if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
120762306a36Sopenharmony_ci		ASSERT(ordered->bytes_left == 0);
120862306a36Sopenharmony_ci		new->bytes_left = 0;
120962306a36Sopenharmony_ci	} else {
121062306a36Sopenharmony_ci		ordered->bytes_left -= len;
121162306a36Sopenharmony_ci	}
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_ci	if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) {
121462306a36Sopenharmony_ci		if (ordered->truncated_len > len) {
121562306a36Sopenharmony_ci			ordered->truncated_len -= len;
121662306a36Sopenharmony_ci		} else {
121762306a36Sopenharmony_ci			new->truncated_len = ordered->truncated_len;
121862306a36Sopenharmony_ci			ordered->truncated_len = 0;
121962306a36Sopenharmony_ci		}
122062306a36Sopenharmony_ci	}
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_ci	list_for_each_entry_safe(sum, tmpsum, &ordered->list, list) {
122362306a36Sopenharmony_ci		if (offset == len)
122462306a36Sopenharmony_ci			break;
122562306a36Sopenharmony_ci		list_move_tail(&sum->list, &new->list);
122662306a36Sopenharmony_ci		offset += sum->len;
122762306a36Sopenharmony_ci	}
122862306a36Sopenharmony_ci
122962306a36Sopenharmony_ci	/* Re-insert the node */
123062306a36Sopenharmony_ci	node = tree_insert(&tree->tree, ordered->file_offset, &ordered->rb_node);
123162306a36Sopenharmony_ci	if (node)
123262306a36Sopenharmony_ci		btrfs_panic(fs_info, -EEXIST,
123362306a36Sopenharmony_ci			"zoned: inconsistency in ordered tree at offset %llu",
123462306a36Sopenharmony_ci			ordered->file_offset);
123562306a36Sopenharmony_ci
123662306a36Sopenharmony_ci	node = tree_insert(&tree->tree, new->file_offset, &new->rb_node);
123762306a36Sopenharmony_ci	if (node)
123862306a36Sopenharmony_ci		btrfs_panic(fs_info, -EEXIST,
123962306a36Sopenharmony_ci			"zoned: inconsistency in ordered tree at offset %llu",
124062306a36Sopenharmony_ci			new->file_offset);
124162306a36Sopenharmony_ci	spin_unlock(&tree->lock);
124262306a36Sopenharmony_ci
124362306a36Sopenharmony_ci	list_add_tail(&new->root_extent_list, &root->ordered_extents);
124462306a36Sopenharmony_ci	root->nr_ordered_extents++;
124562306a36Sopenharmony_ci	spin_unlock_irq(&root->ordered_extent_lock);
124662306a36Sopenharmony_ci	return new;
124762306a36Sopenharmony_ci}
124862306a36Sopenharmony_ci
124962306a36Sopenharmony_ciint __init ordered_data_init(void)
125062306a36Sopenharmony_ci{
125162306a36Sopenharmony_ci	btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
125262306a36Sopenharmony_ci				     sizeof(struct btrfs_ordered_extent), 0,
125362306a36Sopenharmony_ci				     SLAB_MEM_SPREAD,
125462306a36Sopenharmony_ci				     NULL);
125562306a36Sopenharmony_ci	if (!btrfs_ordered_extent_cache)
125662306a36Sopenharmony_ci		return -ENOMEM;
125762306a36Sopenharmony_ci
125862306a36Sopenharmony_ci	return 0;
125962306a36Sopenharmony_ci}
126062306a36Sopenharmony_ci
126162306a36Sopenharmony_civoid __cold ordered_data_exit(void)
126262306a36Sopenharmony_ci{
126362306a36Sopenharmony_ci	kmem_cache_destroy(btrfs_ordered_extent_cache);
126462306a36Sopenharmony_ci}
1265