162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2007 Oracle.  All rights reserved.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/sched.h>
762306a36Sopenharmony_ci#include <linux/sched/signal.h>
862306a36Sopenharmony_ci#include <linux/pagemap.h>
962306a36Sopenharmony_ci#include <linux/writeback.h>
1062306a36Sopenharmony_ci#include <linux/blkdev.h>
1162306a36Sopenharmony_ci#include <linux/sort.h>
1262306a36Sopenharmony_ci#include <linux/rcupdate.h>
1362306a36Sopenharmony_ci#include <linux/kthread.h>
1462306a36Sopenharmony_ci#include <linux/slab.h>
1562306a36Sopenharmony_ci#include <linux/ratelimit.h>
1662306a36Sopenharmony_ci#include <linux/percpu_counter.h>
1762306a36Sopenharmony_ci#include <linux/lockdep.h>
1862306a36Sopenharmony_ci#include <linux/crc32c.h>
1962306a36Sopenharmony_ci#include "ctree.h"
2062306a36Sopenharmony_ci#include "extent-tree.h"
2162306a36Sopenharmony_ci#include "tree-log.h"
2262306a36Sopenharmony_ci#include "disk-io.h"
2362306a36Sopenharmony_ci#include "print-tree.h"
2462306a36Sopenharmony_ci#include "volumes.h"
2562306a36Sopenharmony_ci#include "raid56.h"
2662306a36Sopenharmony_ci#include "locking.h"
2762306a36Sopenharmony_ci#include "free-space-cache.h"
2862306a36Sopenharmony_ci#include "free-space-tree.h"
2962306a36Sopenharmony_ci#include "sysfs.h"
3062306a36Sopenharmony_ci#include "qgroup.h"
3162306a36Sopenharmony_ci#include "ref-verify.h"
3262306a36Sopenharmony_ci#include "space-info.h"
3362306a36Sopenharmony_ci#include "block-rsv.h"
3462306a36Sopenharmony_ci#include "delalloc-space.h"
3562306a36Sopenharmony_ci#include "discard.h"
3662306a36Sopenharmony_ci#include "rcu-string.h"
3762306a36Sopenharmony_ci#include "zoned.h"
3862306a36Sopenharmony_ci#include "dev-replace.h"
3962306a36Sopenharmony_ci#include "fs.h"
4062306a36Sopenharmony_ci#include "accessors.h"
4162306a36Sopenharmony_ci#include "root-tree.h"
4262306a36Sopenharmony_ci#include "file-item.h"
4362306a36Sopenharmony_ci#include "orphan.h"
4462306a36Sopenharmony_ci#include "tree-checker.h"
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci#undef SCRAMBLE_DELAYED_REFS
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_cistatic int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5062306a36Sopenharmony_ci			       struct btrfs_delayed_ref_node *node, u64 parent,
5162306a36Sopenharmony_ci			       u64 root_objectid, u64 owner_objectid,
5262306a36Sopenharmony_ci			       u64 owner_offset, int refs_to_drop,
5362306a36Sopenharmony_ci			       struct btrfs_delayed_extent_op *extra_op);
5462306a36Sopenharmony_cistatic void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
5562306a36Sopenharmony_ci				    struct extent_buffer *leaf,
5662306a36Sopenharmony_ci				    struct btrfs_extent_item *ei);
5762306a36Sopenharmony_cistatic int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5862306a36Sopenharmony_ci				      u64 parent, u64 root_objectid,
5962306a36Sopenharmony_ci				      u64 flags, u64 owner, u64 offset,
6062306a36Sopenharmony_ci				      struct btrfs_key *ins, int ref_mod);
6162306a36Sopenharmony_cistatic int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6262306a36Sopenharmony_ci				     struct btrfs_delayed_ref_node *node,
6362306a36Sopenharmony_ci				     struct btrfs_delayed_extent_op *extent_op);
6462306a36Sopenharmony_cistatic int find_next_key(struct btrfs_path *path, int level,
6562306a36Sopenharmony_ci			 struct btrfs_key *key);
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cistatic int block_group_bits(struct btrfs_block_group *cache, u64 bits)
6862306a36Sopenharmony_ci{
6962306a36Sopenharmony_ci	return (cache->flags & bits) == bits;
7062306a36Sopenharmony_ci}
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci/* simple helper to search for an existing data extent at a given offset */
7362306a36Sopenharmony_ciint btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
7462306a36Sopenharmony_ci{
7562306a36Sopenharmony_ci	struct btrfs_root *root = btrfs_extent_root(fs_info, start);
7662306a36Sopenharmony_ci	int ret;
7762306a36Sopenharmony_ci	struct btrfs_key key;
7862306a36Sopenharmony_ci	struct btrfs_path *path;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	path = btrfs_alloc_path();
8162306a36Sopenharmony_ci	if (!path)
8262306a36Sopenharmony_ci		return -ENOMEM;
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	key.objectid = start;
8562306a36Sopenharmony_ci	key.offset = len;
8662306a36Sopenharmony_ci	key.type = BTRFS_EXTENT_ITEM_KEY;
8762306a36Sopenharmony_ci	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
8862306a36Sopenharmony_ci	btrfs_free_path(path);
8962306a36Sopenharmony_ci	return ret;
9062306a36Sopenharmony_ci}
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci/*
9362306a36Sopenharmony_ci * helper function to lookup reference count and flags of a tree block.
9462306a36Sopenharmony_ci *
9562306a36Sopenharmony_ci * the head node for delayed ref is used to store the sum of all the
9662306a36Sopenharmony_ci * reference count modifications queued up in the rbtree. the head
9762306a36Sopenharmony_ci * node may also store the extent flags to set. This way you can check
9862306a36Sopenharmony_ci * to see what the reference count and extent flags would be if all of
9962306a36Sopenharmony_ci * the delayed refs are not processed.
10062306a36Sopenharmony_ci */
10162306a36Sopenharmony_ciint btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
10262306a36Sopenharmony_ci			     struct btrfs_fs_info *fs_info, u64 bytenr,
10362306a36Sopenharmony_ci			     u64 offset, int metadata, u64 *refs, u64 *flags)
10462306a36Sopenharmony_ci{
10562306a36Sopenharmony_ci	struct btrfs_root *extent_root;
10662306a36Sopenharmony_ci	struct btrfs_delayed_ref_head *head;
10762306a36Sopenharmony_ci	struct btrfs_delayed_ref_root *delayed_refs;
10862306a36Sopenharmony_ci	struct btrfs_path *path;
10962306a36Sopenharmony_ci	struct btrfs_extent_item *ei;
11062306a36Sopenharmony_ci	struct extent_buffer *leaf;
11162306a36Sopenharmony_ci	struct btrfs_key key;
11262306a36Sopenharmony_ci	u32 item_size;
11362306a36Sopenharmony_ci	u64 num_refs;
11462306a36Sopenharmony_ci	u64 extent_flags;
11562306a36Sopenharmony_ci	int ret;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	/*
11862306a36Sopenharmony_ci	 * If we don't have skinny metadata, don't bother doing anything
11962306a36Sopenharmony_ci	 * different
12062306a36Sopenharmony_ci	 */
12162306a36Sopenharmony_ci	if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
12262306a36Sopenharmony_ci		offset = fs_info->nodesize;
12362306a36Sopenharmony_ci		metadata = 0;
12462306a36Sopenharmony_ci	}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	path = btrfs_alloc_path();
12762306a36Sopenharmony_ci	if (!path)
12862306a36Sopenharmony_ci		return -ENOMEM;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	if (!trans) {
13162306a36Sopenharmony_ci		path->skip_locking = 1;
13262306a36Sopenharmony_ci		path->search_commit_root = 1;
13362306a36Sopenharmony_ci	}
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_cisearch_again:
13662306a36Sopenharmony_ci	key.objectid = bytenr;
13762306a36Sopenharmony_ci	key.offset = offset;
13862306a36Sopenharmony_ci	if (metadata)
13962306a36Sopenharmony_ci		key.type = BTRFS_METADATA_ITEM_KEY;
14062306a36Sopenharmony_ci	else
14162306a36Sopenharmony_ci		key.type = BTRFS_EXTENT_ITEM_KEY;
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	extent_root = btrfs_extent_root(fs_info, bytenr);
14462306a36Sopenharmony_ci	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
14562306a36Sopenharmony_ci	if (ret < 0)
14662306a36Sopenharmony_ci		goto out_free;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
14962306a36Sopenharmony_ci		if (path->slots[0]) {
15062306a36Sopenharmony_ci			path->slots[0]--;
15162306a36Sopenharmony_ci			btrfs_item_key_to_cpu(path->nodes[0], &key,
15262306a36Sopenharmony_ci					      path->slots[0]);
15362306a36Sopenharmony_ci			if (key.objectid == bytenr &&
15462306a36Sopenharmony_ci			    key.type == BTRFS_EXTENT_ITEM_KEY &&
15562306a36Sopenharmony_ci			    key.offset == fs_info->nodesize)
15662306a36Sopenharmony_ci				ret = 0;
15762306a36Sopenharmony_ci		}
15862306a36Sopenharmony_ci	}
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	if (ret == 0) {
16162306a36Sopenharmony_ci		leaf = path->nodes[0];
16262306a36Sopenharmony_ci		item_size = btrfs_item_size(leaf, path->slots[0]);
16362306a36Sopenharmony_ci		if (item_size >= sizeof(*ei)) {
16462306a36Sopenharmony_ci			ei = btrfs_item_ptr(leaf, path->slots[0],
16562306a36Sopenharmony_ci					    struct btrfs_extent_item);
16662306a36Sopenharmony_ci			num_refs = btrfs_extent_refs(leaf, ei);
16762306a36Sopenharmony_ci			extent_flags = btrfs_extent_flags(leaf, ei);
16862306a36Sopenharmony_ci		} else {
16962306a36Sopenharmony_ci			ret = -EUCLEAN;
17062306a36Sopenharmony_ci			btrfs_err(fs_info,
17162306a36Sopenharmony_ci			"unexpected extent item size, has %u expect >= %zu",
17262306a36Sopenharmony_ci				  item_size, sizeof(*ei));
17362306a36Sopenharmony_ci			if (trans)
17462306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
17562306a36Sopenharmony_ci			else
17662306a36Sopenharmony_ci				btrfs_handle_fs_error(fs_info, ret, NULL);
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci			goto out_free;
17962306a36Sopenharmony_ci		}
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci		BUG_ON(num_refs == 0);
18262306a36Sopenharmony_ci	} else {
18362306a36Sopenharmony_ci		num_refs = 0;
18462306a36Sopenharmony_ci		extent_flags = 0;
18562306a36Sopenharmony_ci		ret = 0;
18662306a36Sopenharmony_ci	}
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	if (!trans)
18962306a36Sopenharmony_ci		goto out;
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	delayed_refs = &trans->transaction->delayed_refs;
19262306a36Sopenharmony_ci	spin_lock(&delayed_refs->lock);
19362306a36Sopenharmony_ci	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
19462306a36Sopenharmony_ci	if (head) {
19562306a36Sopenharmony_ci		if (!mutex_trylock(&head->mutex)) {
19662306a36Sopenharmony_ci			refcount_inc(&head->refs);
19762306a36Sopenharmony_ci			spin_unlock(&delayed_refs->lock);
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci			btrfs_release_path(path);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci			/*
20262306a36Sopenharmony_ci			 * Mutex was contended, block until it's released and try
20362306a36Sopenharmony_ci			 * again
20462306a36Sopenharmony_ci			 */
20562306a36Sopenharmony_ci			mutex_lock(&head->mutex);
20662306a36Sopenharmony_ci			mutex_unlock(&head->mutex);
20762306a36Sopenharmony_ci			btrfs_put_delayed_ref_head(head);
20862306a36Sopenharmony_ci			goto search_again;
20962306a36Sopenharmony_ci		}
21062306a36Sopenharmony_ci		spin_lock(&head->lock);
21162306a36Sopenharmony_ci		if (head->extent_op && head->extent_op->update_flags)
21262306a36Sopenharmony_ci			extent_flags |= head->extent_op->flags_to_set;
21362306a36Sopenharmony_ci		else
21462306a36Sopenharmony_ci			BUG_ON(num_refs == 0);
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci		num_refs += head->ref_mod;
21762306a36Sopenharmony_ci		spin_unlock(&head->lock);
21862306a36Sopenharmony_ci		mutex_unlock(&head->mutex);
21962306a36Sopenharmony_ci	}
22062306a36Sopenharmony_ci	spin_unlock(&delayed_refs->lock);
22162306a36Sopenharmony_ciout:
22262306a36Sopenharmony_ci	WARN_ON(num_refs == 0);
22362306a36Sopenharmony_ci	if (refs)
22462306a36Sopenharmony_ci		*refs = num_refs;
22562306a36Sopenharmony_ci	if (flags)
22662306a36Sopenharmony_ci		*flags = extent_flags;
22762306a36Sopenharmony_ciout_free:
22862306a36Sopenharmony_ci	btrfs_free_path(path);
22962306a36Sopenharmony_ci	return ret;
23062306a36Sopenharmony_ci}
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci/*
23362306a36Sopenharmony_ci * Back reference rules.  Back refs have three main goals:
23462306a36Sopenharmony_ci *
23562306a36Sopenharmony_ci * 1) differentiate between all holders of references to an extent so that
23662306a36Sopenharmony_ci *    when a reference is dropped we can make sure it was a valid reference
23762306a36Sopenharmony_ci *    before freeing the extent.
23862306a36Sopenharmony_ci *
23962306a36Sopenharmony_ci * 2) Provide enough information to quickly find the holders of an extent
24062306a36Sopenharmony_ci *    if we notice a given block is corrupted or bad.
24162306a36Sopenharmony_ci *
24262306a36Sopenharmony_ci * 3) Make it easy to migrate blocks for FS shrinking or storage pool
24362306a36Sopenharmony_ci *    maintenance.  This is actually the same as #2, but with a slightly
24462306a36Sopenharmony_ci *    different use case.
24562306a36Sopenharmony_ci *
24662306a36Sopenharmony_ci * There are two kinds of back refs. The implicit back refs is optimized
24762306a36Sopenharmony_ci * for pointers in non-shared tree blocks. For a given pointer in a block,
24862306a36Sopenharmony_ci * back refs of this kind provide information about the block's owner tree
24962306a36Sopenharmony_ci * and the pointer's key. These information allow us to find the block by
25062306a36Sopenharmony_ci * b-tree searching. The full back refs is for pointers in tree blocks not
25162306a36Sopenharmony_ci * referenced by their owner trees. The location of tree block is recorded
25262306a36Sopenharmony_ci * in the back refs. Actually the full back refs is generic, and can be
25362306a36Sopenharmony_ci * used in all cases the implicit back refs is used. The major shortcoming
25462306a36Sopenharmony_ci * of the full back refs is its overhead. Every time a tree block gets
25562306a36Sopenharmony_ci * COWed, we have to update back refs entry for all pointers in it.
25662306a36Sopenharmony_ci *
25762306a36Sopenharmony_ci * For a newly allocated tree block, we use implicit back refs for
25862306a36Sopenharmony_ci * pointers in it. This means most tree related operations only involve
25962306a36Sopenharmony_ci * implicit back refs. For a tree block created in old transaction, the
26062306a36Sopenharmony_ci * only way to drop a reference to it is COW it. So we can detect the
26162306a36Sopenharmony_ci * event that tree block loses its owner tree's reference and do the
26262306a36Sopenharmony_ci * back refs conversion.
26362306a36Sopenharmony_ci *
26462306a36Sopenharmony_ci * When a tree block is COWed through a tree, there are four cases:
26562306a36Sopenharmony_ci *
26662306a36Sopenharmony_ci * The reference count of the block is one and the tree is the block's
26762306a36Sopenharmony_ci * owner tree. Nothing to do in this case.
26862306a36Sopenharmony_ci *
26962306a36Sopenharmony_ci * The reference count of the block is one and the tree is not the
27062306a36Sopenharmony_ci * block's owner tree. In this case, full back refs is used for pointers
27162306a36Sopenharmony_ci * in the block. Remove these full back refs, add implicit back refs for
27262306a36Sopenharmony_ci * every pointers in the new block.
27362306a36Sopenharmony_ci *
27462306a36Sopenharmony_ci * The reference count of the block is greater than one and the tree is
27562306a36Sopenharmony_ci * the block's owner tree. In this case, implicit back refs is used for
27662306a36Sopenharmony_ci * pointers in the block. Add full back refs for every pointers in the
27762306a36Sopenharmony_ci * block, increase lower level extents' reference counts. The original
27862306a36Sopenharmony_ci * implicit back refs are entailed to the new block.
27962306a36Sopenharmony_ci *
28062306a36Sopenharmony_ci * The reference count of the block is greater than one and the tree is
28162306a36Sopenharmony_ci * not the block's owner tree. Add implicit back refs for every pointer in
28262306a36Sopenharmony_ci * the new block, increase lower level extents' reference count.
28362306a36Sopenharmony_ci *
28462306a36Sopenharmony_ci * Back Reference Key composing:
28562306a36Sopenharmony_ci *
28662306a36Sopenharmony_ci * The key objectid corresponds to the first byte in the extent,
28762306a36Sopenharmony_ci * The key type is used to differentiate between types of back refs.
28862306a36Sopenharmony_ci * There are different meanings of the key offset for different types
28962306a36Sopenharmony_ci * of back refs.
29062306a36Sopenharmony_ci *
29162306a36Sopenharmony_ci * File extents can be referenced by:
29262306a36Sopenharmony_ci *
29362306a36Sopenharmony_ci * - multiple snapshots, subvolumes, or different generations in one subvol
29462306a36Sopenharmony_ci * - different files inside a single subvolume
29562306a36Sopenharmony_ci * - different offsets inside a file (bookend extents in file.c)
29662306a36Sopenharmony_ci *
29762306a36Sopenharmony_ci * The extent ref structure for the implicit back refs has fields for:
29862306a36Sopenharmony_ci *
29962306a36Sopenharmony_ci * - Objectid of the subvolume root
30062306a36Sopenharmony_ci * - objectid of the file holding the reference
30162306a36Sopenharmony_ci * - original offset in the file
30262306a36Sopenharmony_ci * - how many bookend extents
30362306a36Sopenharmony_ci *
30462306a36Sopenharmony_ci * The key offset for the implicit back refs is hash of the first
30562306a36Sopenharmony_ci * three fields.
30662306a36Sopenharmony_ci *
30762306a36Sopenharmony_ci * The extent ref structure for the full back refs has field for:
30862306a36Sopenharmony_ci *
30962306a36Sopenharmony_ci * - number of pointers in the tree leaf
31062306a36Sopenharmony_ci *
31162306a36Sopenharmony_ci * The key offset for the implicit back refs is the first byte of
31262306a36Sopenharmony_ci * the tree leaf
31362306a36Sopenharmony_ci *
31462306a36Sopenharmony_ci * When a file extent is allocated, The implicit back refs is used.
31562306a36Sopenharmony_ci * the fields are filled in:
31662306a36Sopenharmony_ci *
31762306a36Sopenharmony_ci *     (root_key.objectid, inode objectid, offset in file, 1)
31862306a36Sopenharmony_ci *
31962306a36Sopenharmony_ci * When a file extent is removed file truncation, we find the
32062306a36Sopenharmony_ci * corresponding implicit back refs and check the following fields:
32162306a36Sopenharmony_ci *
32262306a36Sopenharmony_ci *     (btrfs_header_owner(leaf), inode objectid, offset in file)
32362306a36Sopenharmony_ci *
32462306a36Sopenharmony_ci * Btree extents can be referenced by:
32562306a36Sopenharmony_ci *
32662306a36Sopenharmony_ci * - Different subvolumes
32762306a36Sopenharmony_ci *
32862306a36Sopenharmony_ci * Both the implicit back refs and the full back refs for tree blocks
32962306a36Sopenharmony_ci * only consist of key. The key offset for the implicit back refs is
33062306a36Sopenharmony_ci * objectid of block's owner tree. The key offset for the full back refs
33162306a36Sopenharmony_ci * is the first byte of parent block.
33262306a36Sopenharmony_ci *
33362306a36Sopenharmony_ci * When implicit back refs is used, information about the lowest key and
33462306a36Sopenharmony_ci * level of the tree block are required. These information are stored in
33562306a36Sopenharmony_ci * tree block info structure.
33662306a36Sopenharmony_ci */
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci/*
33962306a36Sopenharmony_ci * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required,
34062306a36Sopenharmony_ci * is_data == BTRFS_REF_TYPE_DATA, data type is requiried,
34162306a36Sopenharmony_ci * is_data == BTRFS_REF_TYPE_ANY, either type is OK.
34262306a36Sopenharmony_ci */
34362306a36Sopenharmony_ciint btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
34462306a36Sopenharmony_ci				     struct btrfs_extent_inline_ref *iref,
34562306a36Sopenharmony_ci				     enum btrfs_inline_ref_type is_data)
34662306a36Sopenharmony_ci{
34762306a36Sopenharmony_ci	int type = btrfs_extent_inline_ref_type(eb, iref);
34862306a36Sopenharmony_ci	u64 offset = btrfs_extent_inline_ref_offset(eb, iref);
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci	if (type == BTRFS_TREE_BLOCK_REF_KEY ||
35162306a36Sopenharmony_ci	    type == BTRFS_SHARED_BLOCK_REF_KEY ||
35262306a36Sopenharmony_ci	    type == BTRFS_SHARED_DATA_REF_KEY ||
35362306a36Sopenharmony_ci	    type == BTRFS_EXTENT_DATA_REF_KEY) {
35462306a36Sopenharmony_ci		if (is_data == BTRFS_REF_TYPE_BLOCK) {
35562306a36Sopenharmony_ci			if (type == BTRFS_TREE_BLOCK_REF_KEY)
35662306a36Sopenharmony_ci				return type;
35762306a36Sopenharmony_ci			if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
35862306a36Sopenharmony_ci				ASSERT(eb->fs_info);
35962306a36Sopenharmony_ci				/*
36062306a36Sopenharmony_ci				 * Every shared one has parent tree block,
36162306a36Sopenharmony_ci				 * which must be aligned to sector size.
36262306a36Sopenharmony_ci				 */
36362306a36Sopenharmony_ci				if (offset &&
36462306a36Sopenharmony_ci				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
36562306a36Sopenharmony_ci					return type;
36662306a36Sopenharmony_ci			}
36762306a36Sopenharmony_ci		} else if (is_data == BTRFS_REF_TYPE_DATA) {
36862306a36Sopenharmony_ci			if (type == BTRFS_EXTENT_DATA_REF_KEY)
36962306a36Sopenharmony_ci				return type;
37062306a36Sopenharmony_ci			if (type == BTRFS_SHARED_DATA_REF_KEY) {
37162306a36Sopenharmony_ci				ASSERT(eb->fs_info);
37262306a36Sopenharmony_ci				/*
37362306a36Sopenharmony_ci				 * Every shared one has parent tree block,
37462306a36Sopenharmony_ci				 * which must be aligned to sector size.
37562306a36Sopenharmony_ci				 */
37662306a36Sopenharmony_ci				if (offset &&
37762306a36Sopenharmony_ci				    IS_ALIGNED(offset, eb->fs_info->sectorsize))
37862306a36Sopenharmony_ci					return type;
37962306a36Sopenharmony_ci			}
38062306a36Sopenharmony_ci		} else {
38162306a36Sopenharmony_ci			ASSERT(is_data == BTRFS_REF_TYPE_ANY);
38262306a36Sopenharmony_ci			return type;
38362306a36Sopenharmony_ci		}
38462306a36Sopenharmony_ci	}
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	WARN_ON(1);
38762306a36Sopenharmony_ci	btrfs_print_leaf(eb);
38862306a36Sopenharmony_ci	btrfs_err(eb->fs_info,
38962306a36Sopenharmony_ci		  "eb %llu iref 0x%lx invalid extent inline ref type %d",
39062306a36Sopenharmony_ci		  eb->start, (unsigned long)iref, type);
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	return BTRFS_REF_TYPE_INVALID;
39362306a36Sopenharmony_ci}
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ciu64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset)
39662306a36Sopenharmony_ci{
39762306a36Sopenharmony_ci	u32 high_crc = ~(u32)0;
39862306a36Sopenharmony_ci	u32 low_crc = ~(u32)0;
39962306a36Sopenharmony_ci	__le64 lenum;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	lenum = cpu_to_le64(root_objectid);
40262306a36Sopenharmony_ci	high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
40362306a36Sopenharmony_ci	lenum = cpu_to_le64(owner);
40462306a36Sopenharmony_ci	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
40562306a36Sopenharmony_ci	lenum = cpu_to_le64(offset);
40662306a36Sopenharmony_ci	low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	return ((u64)high_crc << 31) ^ (u64)low_crc;
40962306a36Sopenharmony_ci}
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_cistatic u64 hash_extent_data_ref_item(struct extent_buffer *leaf,
41262306a36Sopenharmony_ci				     struct btrfs_extent_data_ref *ref)
41362306a36Sopenharmony_ci{
41462306a36Sopenharmony_ci	return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref),
41562306a36Sopenharmony_ci				    btrfs_extent_data_ref_objectid(leaf, ref),
41662306a36Sopenharmony_ci				    btrfs_extent_data_ref_offset(leaf, ref));
41762306a36Sopenharmony_ci}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_cistatic int match_extent_data_ref(struct extent_buffer *leaf,
42062306a36Sopenharmony_ci				 struct btrfs_extent_data_ref *ref,
42162306a36Sopenharmony_ci				 u64 root_objectid, u64 owner, u64 offset)
42262306a36Sopenharmony_ci{
42362306a36Sopenharmony_ci	if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid ||
42462306a36Sopenharmony_ci	    btrfs_extent_data_ref_objectid(leaf, ref) != owner ||
42562306a36Sopenharmony_ci	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
42662306a36Sopenharmony_ci		return 0;
42762306a36Sopenharmony_ci	return 1;
42862306a36Sopenharmony_ci}
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_cistatic noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans,
43162306a36Sopenharmony_ci					   struct btrfs_path *path,
43262306a36Sopenharmony_ci					   u64 bytenr, u64 parent,
43362306a36Sopenharmony_ci					   u64 root_objectid,
43462306a36Sopenharmony_ci					   u64 owner, u64 offset)
43562306a36Sopenharmony_ci{
43662306a36Sopenharmony_ci	struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
43762306a36Sopenharmony_ci	struct btrfs_key key;
43862306a36Sopenharmony_ci	struct btrfs_extent_data_ref *ref;
43962306a36Sopenharmony_ci	struct extent_buffer *leaf;
44062306a36Sopenharmony_ci	u32 nritems;
44162306a36Sopenharmony_ci	int ret;
44262306a36Sopenharmony_ci	int recow;
44362306a36Sopenharmony_ci	int err = -ENOENT;
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci	key.objectid = bytenr;
44662306a36Sopenharmony_ci	if (parent) {
44762306a36Sopenharmony_ci		key.type = BTRFS_SHARED_DATA_REF_KEY;
44862306a36Sopenharmony_ci		key.offset = parent;
44962306a36Sopenharmony_ci	} else {
45062306a36Sopenharmony_ci		key.type = BTRFS_EXTENT_DATA_REF_KEY;
45162306a36Sopenharmony_ci		key.offset = hash_extent_data_ref(root_objectid,
45262306a36Sopenharmony_ci						  owner, offset);
45362306a36Sopenharmony_ci	}
45462306a36Sopenharmony_ciagain:
45562306a36Sopenharmony_ci	recow = 0;
45662306a36Sopenharmony_ci	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
45762306a36Sopenharmony_ci	if (ret < 0) {
45862306a36Sopenharmony_ci		err = ret;
45962306a36Sopenharmony_ci		goto fail;
46062306a36Sopenharmony_ci	}
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	if (parent) {
46362306a36Sopenharmony_ci		if (!ret)
46462306a36Sopenharmony_ci			return 0;
46562306a36Sopenharmony_ci		goto fail;
46662306a36Sopenharmony_ci	}
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	leaf = path->nodes[0];
46962306a36Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
47062306a36Sopenharmony_ci	while (1) {
47162306a36Sopenharmony_ci		if (path->slots[0] >= nritems) {
47262306a36Sopenharmony_ci			ret = btrfs_next_leaf(root, path);
47362306a36Sopenharmony_ci			if (ret < 0)
47462306a36Sopenharmony_ci				err = ret;
47562306a36Sopenharmony_ci			if (ret)
47662306a36Sopenharmony_ci				goto fail;
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci			leaf = path->nodes[0];
47962306a36Sopenharmony_ci			nritems = btrfs_header_nritems(leaf);
48062306a36Sopenharmony_ci			recow = 1;
48162306a36Sopenharmony_ci		}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
48462306a36Sopenharmony_ci		if (key.objectid != bytenr ||
48562306a36Sopenharmony_ci		    key.type != BTRFS_EXTENT_DATA_REF_KEY)
48662306a36Sopenharmony_ci			goto fail;
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci		ref = btrfs_item_ptr(leaf, path->slots[0],
48962306a36Sopenharmony_ci				     struct btrfs_extent_data_ref);
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci		if (match_extent_data_ref(leaf, ref, root_objectid,
49262306a36Sopenharmony_ci					  owner, offset)) {
49362306a36Sopenharmony_ci			if (recow) {
49462306a36Sopenharmony_ci				btrfs_release_path(path);
49562306a36Sopenharmony_ci				goto again;
49662306a36Sopenharmony_ci			}
49762306a36Sopenharmony_ci			err = 0;
49862306a36Sopenharmony_ci			break;
49962306a36Sopenharmony_ci		}
50062306a36Sopenharmony_ci		path->slots[0]++;
50162306a36Sopenharmony_ci	}
50262306a36Sopenharmony_cifail:
50362306a36Sopenharmony_ci	return err;
50462306a36Sopenharmony_ci}
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_cistatic noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
50762306a36Sopenharmony_ci					   struct btrfs_path *path,
50862306a36Sopenharmony_ci					   u64 bytenr, u64 parent,
50962306a36Sopenharmony_ci					   u64 root_objectid, u64 owner,
51062306a36Sopenharmony_ci					   u64 offset, int refs_to_add)
51162306a36Sopenharmony_ci{
51262306a36Sopenharmony_ci	struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
51362306a36Sopenharmony_ci	struct btrfs_key key;
51462306a36Sopenharmony_ci	struct extent_buffer *leaf;
51562306a36Sopenharmony_ci	u32 size;
51662306a36Sopenharmony_ci	u32 num_refs;
51762306a36Sopenharmony_ci	int ret;
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci	key.objectid = bytenr;
52062306a36Sopenharmony_ci	if (parent) {
52162306a36Sopenharmony_ci		key.type = BTRFS_SHARED_DATA_REF_KEY;
52262306a36Sopenharmony_ci		key.offset = parent;
52362306a36Sopenharmony_ci		size = sizeof(struct btrfs_shared_data_ref);
52462306a36Sopenharmony_ci	} else {
52562306a36Sopenharmony_ci		key.type = BTRFS_EXTENT_DATA_REF_KEY;
52662306a36Sopenharmony_ci		key.offset = hash_extent_data_ref(root_objectid,
52762306a36Sopenharmony_ci						  owner, offset);
52862306a36Sopenharmony_ci		size = sizeof(struct btrfs_extent_data_ref);
52962306a36Sopenharmony_ci	}
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	ret = btrfs_insert_empty_item(trans, root, path, &key, size);
53262306a36Sopenharmony_ci	if (ret && ret != -EEXIST)
53362306a36Sopenharmony_ci		goto fail;
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	leaf = path->nodes[0];
53662306a36Sopenharmony_ci	if (parent) {
53762306a36Sopenharmony_ci		struct btrfs_shared_data_ref *ref;
53862306a36Sopenharmony_ci		ref = btrfs_item_ptr(leaf, path->slots[0],
53962306a36Sopenharmony_ci				     struct btrfs_shared_data_ref);
54062306a36Sopenharmony_ci		if (ret == 0) {
54162306a36Sopenharmony_ci			btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add);
54262306a36Sopenharmony_ci		} else {
54362306a36Sopenharmony_ci			num_refs = btrfs_shared_data_ref_count(leaf, ref);
54462306a36Sopenharmony_ci			num_refs += refs_to_add;
54562306a36Sopenharmony_ci			btrfs_set_shared_data_ref_count(leaf, ref, num_refs);
54662306a36Sopenharmony_ci		}
54762306a36Sopenharmony_ci	} else {
54862306a36Sopenharmony_ci		struct btrfs_extent_data_ref *ref;
54962306a36Sopenharmony_ci		while (ret == -EEXIST) {
55062306a36Sopenharmony_ci			ref = btrfs_item_ptr(leaf, path->slots[0],
55162306a36Sopenharmony_ci					     struct btrfs_extent_data_ref);
55262306a36Sopenharmony_ci			if (match_extent_data_ref(leaf, ref, root_objectid,
55362306a36Sopenharmony_ci						  owner, offset))
55462306a36Sopenharmony_ci				break;
55562306a36Sopenharmony_ci			btrfs_release_path(path);
55662306a36Sopenharmony_ci			key.offset++;
55762306a36Sopenharmony_ci			ret = btrfs_insert_empty_item(trans, root, path, &key,
55862306a36Sopenharmony_ci						      size);
55962306a36Sopenharmony_ci			if (ret && ret != -EEXIST)
56062306a36Sopenharmony_ci				goto fail;
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci			leaf = path->nodes[0];
56362306a36Sopenharmony_ci		}
56462306a36Sopenharmony_ci		ref = btrfs_item_ptr(leaf, path->slots[0],
56562306a36Sopenharmony_ci				     struct btrfs_extent_data_ref);
56662306a36Sopenharmony_ci		if (ret == 0) {
56762306a36Sopenharmony_ci			btrfs_set_extent_data_ref_root(leaf, ref,
56862306a36Sopenharmony_ci						       root_objectid);
56962306a36Sopenharmony_ci			btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
57062306a36Sopenharmony_ci			btrfs_set_extent_data_ref_offset(leaf, ref, offset);
57162306a36Sopenharmony_ci			btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add);
57262306a36Sopenharmony_ci		} else {
57362306a36Sopenharmony_ci			num_refs = btrfs_extent_data_ref_count(leaf, ref);
57462306a36Sopenharmony_ci			num_refs += refs_to_add;
57562306a36Sopenharmony_ci			btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
57662306a36Sopenharmony_ci		}
57762306a36Sopenharmony_ci	}
57862306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, leaf);
57962306a36Sopenharmony_ci	ret = 0;
58062306a36Sopenharmony_cifail:
58162306a36Sopenharmony_ci	btrfs_release_path(path);
58262306a36Sopenharmony_ci	return ret;
58362306a36Sopenharmony_ci}
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_cistatic noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
58662306a36Sopenharmony_ci					   struct btrfs_root *root,
58762306a36Sopenharmony_ci					   struct btrfs_path *path,
58862306a36Sopenharmony_ci					   int refs_to_drop)
58962306a36Sopenharmony_ci{
59062306a36Sopenharmony_ci	struct btrfs_key key;
59162306a36Sopenharmony_ci	struct btrfs_extent_data_ref *ref1 = NULL;
59262306a36Sopenharmony_ci	struct btrfs_shared_data_ref *ref2 = NULL;
59362306a36Sopenharmony_ci	struct extent_buffer *leaf;
59462306a36Sopenharmony_ci	u32 num_refs = 0;
59562306a36Sopenharmony_ci	int ret = 0;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	leaf = path->nodes[0];
59862306a36Sopenharmony_ci	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
60162306a36Sopenharmony_ci		ref1 = btrfs_item_ptr(leaf, path->slots[0],
60262306a36Sopenharmony_ci				      struct btrfs_extent_data_ref);
60362306a36Sopenharmony_ci		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
60462306a36Sopenharmony_ci	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
60562306a36Sopenharmony_ci		ref2 = btrfs_item_ptr(leaf, path->slots[0],
60662306a36Sopenharmony_ci				      struct btrfs_shared_data_ref);
60762306a36Sopenharmony_ci		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
60862306a36Sopenharmony_ci	} else {
60962306a36Sopenharmony_ci		btrfs_err(trans->fs_info,
61062306a36Sopenharmony_ci			  "unrecognized backref key (%llu %u %llu)",
61162306a36Sopenharmony_ci			  key.objectid, key.type, key.offset);
61262306a36Sopenharmony_ci		btrfs_abort_transaction(trans, -EUCLEAN);
61362306a36Sopenharmony_ci		return -EUCLEAN;
61462306a36Sopenharmony_ci	}
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci	BUG_ON(num_refs < refs_to_drop);
61762306a36Sopenharmony_ci	num_refs -= refs_to_drop;
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	if (num_refs == 0) {
62062306a36Sopenharmony_ci		ret = btrfs_del_item(trans, root, path);
62162306a36Sopenharmony_ci	} else {
62262306a36Sopenharmony_ci		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
62362306a36Sopenharmony_ci			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
62462306a36Sopenharmony_ci		else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
62562306a36Sopenharmony_ci			btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
62662306a36Sopenharmony_ci		btrfs_mark_buffer_dirty(trans, leaf);
62762306a36Sopenharmony_ci	}
62862306a36Sopenharmony_ci	return ret;
62962306a36Sopenharmony_ci}
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_cistatic noinline u32 extent_data_ref_count(struct btrfs_path *path,
63262306a36Sopenharmony_ci					  struct btrfs_extent_inline_ref *iref)
63362306a36Sopenharmony_ci{
63462306a36Sopenharmony_ci	struct btrfs_key key;
63562306a36Sopenharmony_ci	struct extent_buffer *leaf;
63662306a36Sopenharmony_ci	struct btrfs_extent_data_ref *ref1;
63762306a36Sopenharmony_ci	struct btrfs_shared_data_ref *ref2;
63862306a36Sopenharmony_ci	u32 num_refs = 0;
63962306a36Sopenharmony_ci	int type;
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	leaf = path->nodes[0];
64262306a36Sopenharmony_ci	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci	if (iref) {
64562306a36Sopenharmony_ci		/*
64662306a36Sopenharmony_ci		 * If type is invalid, we should have bailed out earlier than
64762306a36Sopenharmony_ci		 * this call.
64862306a36Sopenharmony_ci		 */
64962306a36Sopenharmony_ci		type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
65062306a36Sopenharmony_ci		ASSERT(type != BTRFS_REF_TYPE_INVALID);
65162306a36Sopenharmony_ci		if (type == BTRFS_EXTENT_DATA_REF_KEY) {
65262306a36Sopenharmony_ci			ref1 = (struct btrfs_extent_data_ref *)(&iref->offset);
65362306a36Sopenharmony_ci			num_refs = btrfs_extent_data_ref_count(leaf, ref1);
65462306a36Sopenharmony_ci		} else {
65562306a36Sopenharmony_ci			ref2 = (struct btrfs_shared_data_ref *)(iref + 1);
65662306a36Sopenharmony_ci			num_refs = btrfs_shared_data_ref_count(leaf, ref2);
65762306a36Sopenharmony_ci		}
65862306a36Sopenharmony_ci	} else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
65962306a36Sopenharmony_ci		ref1 = btrfs_item_ptr(leaf, path->slots[0],
66062306a36Sopenharmony_ci				      struct btrfs_extent_data_ref);
66162306a36Sopenharmony_ci		num_refs = btrfs_extent_data_ref_count(leaf, ref1);
66262306a36Sopenharmony_ci	} else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
66362306a36Sopenharmony_ci		ref2 = btrfs_item_ptr(leaf, path->slots[0],
66462306a36Sopenharmony_ci				      struct btrfs_shared_data_ref);
66562306a36Sopenharmony_ci		num_refs = btrfs_shared_data_ref_count(leaf, ref2);
66662306a36Sopenharmony_ci	} else {
66762306a36Sopenharmony_ci		WARN_ON(1);
66862306a36Sopenharmony_ci	}
66962306a36Sopenharmony_ci	return num_refs;
67062306a36Sopenharmony_ci}
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_cistatic noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans,
67362306a36Sopenharmony_ci					  struct btrfs_path *path,
67462306a36Sopenharmony_ci					  u64 bytenr, u64 parent,
67562306a36Sopenharmony_ci					  u64 root_objectid)
67662306a36Sopenharmony_ci{
67762306a36Sopenharmony_ci	struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
67862306a36Sopenharmony_ci	struct btrfs_key key;
67962306a36Sopenharmony_ci	int ret;
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	key.objectid = bytenr;
68262306a36Sopenharmony_ci	if (parent) {
68362306a36Sopenharmony_ci		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
68462306a36Sopenharmony_ci		key.offset = parent;
68562306a36Sopenharmony_ci	} else {
68662306a36Sopenharmony_ci		key.type = BTRFS_TREE_BLOCK_REF_KEY;
68762306a36Sopenharmony_ci		key.offset = root_objectid;
68862306a36Sopenharmony_ci	}
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
69162306a36Sopenharmony_ci	if (ret > 0)
69262306a36Sopenharmony_ci		ret = -ENOENT;
69362306a36Sopenharmony_ci	return ret;
69462306a36Sopenharmony_ci}
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_cistatic noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans,
69762306a36Sopenharmony_ci					  struct btrfs_path *path,
69862306a36Sopenharmony_ci					  u64 bytenr, u64 parent,
69962306a36Sopenharmony_ci					  u64 root_objectid)
70062306a36Sopenharmony_ci{
70162306a36Sopenharmony_ci	struct btrfs_root *root = btrfs_extent_root(trans->fs_info, bytenr);
70262306a36Sopenharmony_ci	struct btrfs_key key;
70362306a36Sopenharmony_ci	int ret;
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	key.objectid = bytenr;
70662306a36Sopenharmony_ci	if (parent) {
70762306a36Sopenharmony_ci		key.type = BTRFS_SHARED_BLOCK_REF_KEY;
70862306a36Sopenharmony_ci		key.offset = parent;
70962306a36Sopenharmony_ci	} else {
71062306a36Sopenharmony_ci		key.type = BTRFS_TREE_BLOCK_REF_KEY;
71162306a36Sopenharmony_ci		key.offset = root_objectid;
71262306a36Sopenharmony_ci	}
71362306a36Sopenharmony_ci
71462306a36Sopenharmony_ci	ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
71562306a36Sopenharmony_ci	btrfs_release_path(path);
71662306a36Sopenharmony_ci	return ret;
71762306a36Sopenharmony_ci}
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_cistatic inline int extent_ref_type(u64 parent, u64 owner)
72062306a36Sopenharmony_ci{
72162306a36Sopenharmony_ci	int type;
72262306a36Sopenharmony_ci	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
72362306a36Sopenharmony_ci		if (parent > 0)
72462306a36Sopenharmony_ci			type = BTRFS_SHARED_BLOCK_REF_KEY;
72562306a36Sopenharmony_ci		else
72662306a36Sopenharmony_ci			type = BTRFS_TREE_BLOCK_REF_KEY;
72762306a36Sopenharmony_ci	} else {
72862306a36Sopenharmony_ci		if (parent > 0)
72962306a36Sopenharmony_ci			type = BTRFS_SHARED_DATA_REF_KEY;
73062306a36Sopenharmony_ci		else
73162306a36Sopenharmony_ci			type = BTRFS_EXTENT_DATA_REF_KEY;
73262306a36Sopenharmony_ci	}
73362306a36Sopenharmony_ci	return type;
73462306a36Sopenharmony_ci}
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_cistatic int find_next_key(struct btrfs_path *path, int level,
73762306a36Sopenharmony_ci			 struct btrfs_key *key)
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci{
74062306a36Sopenharmony_ci	for (; level < BTRFS_MAX_LEVEL; level++) {
74162306a36Sopenharmony_ci		if (!path->nodes[level])
74262306a36Sopenharmony_ci			break;
74362306a36Sopenharmony_ci		if (path->slots[level] + 1 >=
74462306a36Sopenharmony_ci		    btrfs_header_nritems(path->nodes[level]))
74562306a36Sopenharmony_ci			continue;
74662306a36Sopenharmony_ci		if (level == 0)
74762306a36Sopenharmony_ci			btrfs_item_key_to_cpu(path->nodes[level], key,
74862306a36Sopenharmony_ci					      path->slots[level] + 1);
74962306a36Sopenharmony_ci		else
75062306a36Sopenharmony_ci			btrfs_node_key_to_cpu(path->nodes[level], key,
75162306a36Sopenharmony_ci					      path->slots[level] + 1);
75262306a36Sopenharmony_ci		return 0;
75362306a36Sopenharmony_ci	}
75462306a36Sopenharmony_ci	return 1;
75562306a36Sopenharmony_ci}
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci/*
75862306a36Sopenharmony_ci * look for inline back ref. if back ref is found, *ref_ret is set
75962306a36Sopenharmony_ci * to the address of inline back ref, and 0 is returned.
76062306a36Sopenharmony_ci *
76162306a36Sopenharmony_ci * if back ref isn't found, *ref_ret is set to the address where it
76262306a36Sopenharmony_ci * should be inserted, and -ENOENT is returned.
76362306a36Sopenharmony_ci *
76462306a36Sopenharmony_ci * if insert is true and there are too many inline back refs, the path
76562306a36Sopenharmony_ci * points to the extent item, and -EAGAIN is returned.
76662306a36Sopenharmony_ci *
76762306a36Sopenharmony_ci * NOTE: inline back refs are ordered in the same way that back ref
76862306a36Sopenharmony_ci *	 items in the tree are ordered.
76962306a36Sopenharmony_ci */
77062306a36Sopenharmony_cistatic noinline_for_stack
77162306a36Sopenharmony_ciint lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
77262306a36Sopenharmony_ci				 struct btrfs_path *path,
77362306a36Sopenharmony_ci				 struct btrfs_extent_inline_ref **ref_ret,
77462306a36Sopenharmony_ci				 u64 bytenr, u64 num_bytes,
77562306a36Sopenharmony_ci				 u64 parent, u64 root_objectid,
77662306a36Sopenharmony_ci				 u64 owner, u64 offset, int insert)
77762306a36Sopenharmony_ci{
77862306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
77962306a36Sopenharmony_ci	struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr);
78062306a36Sopenharmony_ci	struct btrfs_key key;
78162306a36Sopenharmony_ci	struct extent_buffer *leaf;
78262306a36Sopenharmony_ci	struct btrfs_extent_item *ei;
78362306a36Sopenharmony_ci	struct btrfs_extent_inline_ref *iref;
78462306a36Sopenharmony_ci	u64 flags;
78562306a36Sopenharmony_ci	u64 item_size;
78662306a36Sopenharmony_ci	unsigned long ptr;
78762306a36Sopenharmony_ci	unsigned long end;
78862306a36Sopenharmony_ci	int extra_size;
78962306a36Sopenharmony_ci	int type;
79062306a36Sopenharmony_ci	int want;
79162306a36Sopenharmony_ci	int ret;
79262306a36Sopenharmony_ci	int err = 0;
79362306a36Sopenharmony_ci	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
79462306a36Sopenharmony_ci	int needed;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	key.objectid = bytenr;
79762306a36Sopenharmony_ci	key.type = BTRFS_EXTENT_ITEM_KEY;
79862306a36Sopenharmony_ci	key.offset = num_bytes;
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci	want = extent_ref_type(parent, owner);
80162306a36Sopenharmony_ci	if (insert) {
80262306a36Sopenharmony_ci		extra_size = btrfs_extent_inline_ref_size(want);
80362306a36Sopenharmony_ci		path->search_for_extension = 1;
80462306a36Sopenharmony_ci		path->keep_locks = 1;
80562306a36Sopenharmony_ci	} else
80662306a36Sopenharmony_ci		extra_size = -1;
80762306a36Sopenharmony_ci
80862306a36Sopenharmony_ci	/*
80962306a36Sopenharmony_ci	 * Owner is our level, so we can just add one to get the level for the
81062306a36Sopenharmony_ci	 * block we are interested in.
81162306a36Sopenharmony_ci	 */
81262306a36Sopenharmony_ci	if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
81362306a36Sopenharmony_ci		key.type = BTRFS_METADATA_ITEM_KEY;
81462306a36Sopenharmony_ci		key.offset = owner;
81562306a36Sopenharmony_ci	}
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ciagain:
81862306a36Sopenharmony_ci	ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
81962306a36Sopenharmony_ci	if (ret < 0) {
82062306a36Sopenharmony_ci		err = ret;
82162306a36Sopenharmony_ci		goto out;
82262306a36Sopenharmony_ci	}
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	/*
82562306a36Sopenharmony_ci	 * We may be a newly converted file system which still has the old fat
82662306a36Sopenharmony_ci	 * extent entries for metadata, so try and see if we have one of those.
82762306a36Sopenharmony_ci	 */
82862306a36Sopenharmony_ci	if (ret > 0 && skinny_metadata) {
82962306a36Sopenharmony_ci		skinny_metadata = false;
83062306a36Sopenharmony_ci		if (path->slots[0]) {
83162306a36Sopenharmony_ci			path->slots[0]--;
83262306a36Sopenharmony_ci			btrfs_item_key_to_cpu(path->nodes[0], &key,
83362306a36Sopenharmony_ci					      path->slots[0]);
83462306a36Sopenharmony_ci			if (key.objectid == bytenr &&
83562306a36Sopenharmony_ci			    key.type == BTRFS_EXTENT_ITEM_KEY &&
83662306a36Sopenharmony_ci			    key.offset == num_bytes)
83762306a36Sopenharmony_ci				ret = 0;
83862306a36Sopenharmony_ci		}
83962306a36Sopenharmony_ci		if (ret) {
84062306a36Sopenharmony_ci			key.objectid = bytenr;
84162306a36Sopenharmony_ci			key.type = BTRFS_EXTENT_ITEM_KEY;
84262306a36Sopenharmony_ci			key.offset = num_bytes;
84362306a36Sopenharmony_ci			btrfs_release_path(path);
84462306a36Sopenharmony_ci			goto again;
84562306a36Sopenharmony_ci		}
84662306a36Sopenharmony_ci	}
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	if (ret && !insert) {
84962306a36Sopenharmony_ci		err = -ENOENT;
85062306a36Sopenharmony_ci		goto out;
85162306a36Sopenharmony_ci	} else if (WARN_ON(ret)) {
85262306a36Sopenharmony_ci		btrfs_print_leaf(path->nodes[0]);
85362306a36Sopenharmony_ci		btrfs_err(fs_info,
85462306a36Sopenharmony_ci"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
85562306a36Sopenharmony_ci			  bytenr, num_bytes, parent, root_objectid, owner,
85662306a36Sopenharmony_ci			  offset);
85762306a36Sopenharmony_ci		err = -EIO;
85862306a36Sopenharmony_ci		goto out;
85962306a36Sopenharmony_ci	}
86062306a36Sopenharmony_ci
86162306a36Sopenharmony_ci	leaf = path->nodes[0];
86262306a36Sopenharmony_ci	item_size = btrfs_item_size(leaf, path->slots[0]);
86362306a36Sopenharmony_ci	if (unlikely(item_size < sizeof(*ei))) {
86462306a36Sopenharmony_ci		err = -EUCLEAN;
86562306a36Sopenharmony_ci		btrfs_err(fs_info,
86662306a36Sopenharmony_ci			  "unexpected extent item size, has %llu expect >= %zu",
86762306a36Sopenharmony_ci			  item_size, sizeof(*ei));
86862306a36Sopenharmony_ci		btrfs_abort_transaction(trans, err);
86962306a36Sopenharmony_ci		goto out;
87062306a36Sopenharmony_ci	}
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
87362306a36Sopenharmony_ci	flags = btrfs_extent_flags(leaf, ei);
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	ptr = (unsigned long)(ei + 1);
87662306a36Sopenharmony_ci	end = (unsigned long)ei + item_size;
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci	if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
87962306a36Sopenharmony_ci		ptr += sizeof(struct btrfs_tree_block_info);
88062306a36Sopenharmony_ci		BUG_ON(ptr > end);
88162306a36Sopenharmony_ci	}
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci	if (owner >= BTRFS_FIRST_FREE_OBJECTID)
88462306a36Sopenharmony_ci		needed = BTRFS_REF_TYPE_DATA;
88562306a36Sopenharmony_ci	else
88662306a36Sopenharmony_ci		needed = BTRFS_REF_TYPE_BLOCK;
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci	err = -ENOENT;
88962306a36Sopenharmony_ci	while (1) {
89062306a36Sopenharmony_ci		if (ptr >= end) {
89162306a36Sopenharmony_ci			if (ptr > end) {
89262306a36Sopenharmony_ci				err = -EUCLEAN;
89362306a36Sopenharmony_ci				btrfs_print_leaf(path->nodes[0]);
89462306a36Sopenharmony_ci				btrfs_crit(fs_info,
89562306a36Sopenharmony_ci"overrun extent record at slot %d while looking for inline extent for root %llu owner %llu offset %llu parent %llu",
89662306a36Sopenharmony_ci					path->slots[0], root_objectid, owner, offset, parent);
89762306a36Sopenharmony_ci			}
89862306a36Sopenharmony_ci			break;
89962306a36Sopenharmony_ci		}
90062306a36Sopenharmony_ci		iref = (struct btrfs_extent_inline_ref *)ptr;
90162306a36Sopenharmony_ci		type = btrfs_get_extent_inline_ref_type(leaf, iref, needed);
90262306a36Sopenharmony_ci		if (type == BTRFS_REF_TYPE_INVALID) {
90362306a36Sopenharmony_ci			err = -EUCLEAN;
90462306a36Sopenharmony_ci			goto out;
90562306a36Sopenharmony_ci		}
90662306a36Sopenharmony_ci
90762306a36Sopenharmony_ci		if (want < type)
90862306a36Sopenharmony_ci			break;
90962306a36Sopenharmony_ci		if (want > type) {
91062306a36Sopenharmony_ci			ptr += btrfs_extent_inline_ref_size(type);
91162306a36Sopenharmony_ci			continue;
91262306a36Sopenharmony_ci		}
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci		if (type == BTRFS_EXTENT_DATA_REF_KEY) {
91562306a36Sopenharmony_ci			struct btrfs_extent_data_ref *dref;
91662306a36Sopenharmony_ci			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
91762306a36Sopenharmony_ci			if (match_extent_data_ref(leaf, dref, root_objectid,
91862306a36Sopenharmony_ci						  owner, offset)) {
91962306a36Sopenharmony_ci				err = 0;
92062306a36Sopenharmony_ci				break;
92162306a36Sopenharmony_ci			}
92262306a36Sopenharmony_ci			if (hash_extent_data_ref_item(leaf, dref) <
92362306a36Sopenharmony_ci			    hash_extent_data_ref(root_objectid, owner, offset))
92462306a36Sopenharmony_ci				break;
92562306a36Sopenharmony_ci		} else {
92662306a36Sopenharmony_ci			u64 ref_offset;
92762306a36Sopenharmony_ci			ref_offset = btrfs_extent_inline_ref_offset(leaf, iref);
92862306a36Sopenharmony_ci			if (parent > 0) {
92962306a36Sopenharmony_ci				if (parent == ref_offset) {
93062306a36Sopenharmony_ci					err = 0;
93162306a36Sopenharmony_ci					break;
93262306a36Sopenharmony_ci				}
93362306a36Sopenharmony_ci				if (ref_offset < parent)
93462306a36Sopenharmony_ci					break;
93562306a36Sopenharmony_ci			} else {
93662306a36Sopenharmony_ci				if (root_objectid == ref_offset) {
93762306a36Sopenharmony_ci					err = 0;
93862306a36Sopenharmony_ci					break;
93962306a36Sopenharmony_ci				}
94062306a36Sopenharmony_ci				if (ref_offset < root_objectid)
94162306a36Sopenharmony_ci					break;
94262306a36Sopenharmony_ci			}
94362306a36Sopenharmony_ci		}
94462306a36Sopenharmony_ci		ptr += btrfs_extent_inline_ref_size(type);
94562306a36Sopenharmony_ci	}
94662306a36Sopenharmony_ci	if (err == -ENOENT && insert) {
94762306a36Sopenharmony_ci		if (item_size + extra_size >=
94862306a36Sopenharmony_ci		    BTRFS_MAX_EXTENT_ITEM_SIZE(root)) {
94962306a36Sopenharmony_ci			err = -EAGAIN;
95062306a36Sopenharmony_ci			goto out;
95162306a36Sopenharmony_ci		}
95262306a36Sopenharmony_ci		/*
95362306a36Sopenharmony_ci		 * To add new inline back ref, we have to make sure
95462306a36Sopenharmony_ci		 * there is no corresponding back ref item.
95562306a36Sopenharmony_ci		 * For simplicity, we just do not add new inline back
95662306a36Sopenharmony_ci		 * ref if there is any kind of item for this block
95762306a36Sopenharmony_ci		 */
95862306a36Sopenharmony_ci		if (find_next_key(path, 0, &key) == 0 &&
95962306a36Sopenharmony_ci		    key.objectid == bytenr &&
96062306a36Sopenharmony_ci		    key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
96162306a36Sopenharmony_ci			err = -EAGAIN;
96262306a36Sopenharmony_ci			goto out;
96362306a36Sopenharmony_ci		}
96462306a36Sopenharmony_ci	}
96562306a36Sopenharmony_ci	*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
96662306a36Sopenharmony_ciout:
96762306a36Sopenharmony_ci	if (insert) {
96862306a36Sopenharmony_ci		path->keep_locks = 0;
96962306a36Sopenharmony_ci		path->search_for_extension = 0;
97062306a36Sopenharmony_ci		btrfs_unlock_up_safe(path, 1);
97162306a36Sopenharmony_ci	}
97262306a36Sopenharmony_ci	return err;
97362306a36Sopenharmony_ci}
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci/*
97662306a36Sopenharmony_ci * helper to add new inline back ref
97762306a36Sopenharmony_ci */
97862306a36Sopenharmony_cistatic noinline_for_stack
97962306a36Sopenharmony_civoid setup_inline_extent_backref(struct btrfs_trans_handle *trans,
98062306a36Sopenharmony_ci				 struct btrfs_path *path,
98162306a36Sopenharmony_ci				 struct btrfs_extent_inline_ref *iref,
98262306a36Sopenharmony_ci				 u64 parent, u64 root_objectid,
98362306a36Sopenharmony_ci				 u64 owner, u64 offset, int refs_to_add,
98462306a36Sopenharmony_ci				 struct btrfs_delayed_extent_op *extent_op)
98562306a36Sopenharmony_ci{
98662306a36Sopenharmony_ci	struct extent_buffer *leaf;
98762306a36Sopenharmony_ci	struct btrfs_extent_item *ei;
98862306a36Sopenharmony_ci	unsigned long ptr;
98962306a36Sopenharmony_ci	unsigned long end;
99062306a36Sopenharmony_ci	unsigned long item_offset;
99162306a36Sopenharmony_ci	u64 refs;
99262306a36Sopenharmony_ci	int size;
99362306a36Sopenharmony_ci	int type;
99462306a36Sopenharmony_ci
99562306a36Sopenharmony_ci	leaf = path->nodes[0];
99662306a36Sopenharmony_ci	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
99762306a36Sopenharmony_ci	item_offset = (unsigned long)iref - (unsigned long)ei;
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	type = extent_ref_type(parent, owner);
100062306a36Sopenharmony_ci	size = btrfs_extent_inline_ref_size(type);
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ci	btrfs_extend_item(trans, path, size);
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
100562306a36Sopenharmony_ci	refs = btrfs_extent_refs(leaf, ei);
100662306a36Sopenharmony_ci	refs += refs_to_add;
100762306a36Sopenharmony_ci	btrfs_set_extent_refs(leaf, ei, refs);
100862306a36Sopenharmony_ci	if (extent_op)
100962306a36Sopenharmony_ci		__run_delayed_extent_op(extent_op, leaf, ei);
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci	ptr = (unsigned long)ei + item_offset;
101262306a36Sopenharmony_ci	end = (unsigned long)ei + btrfs_item_size(leaf, path->slots[0]);
101362306a36Sopenharmony_ci	if (ptr < end - size)
101462306a36Sopenharmony_ci		memmove_extent_buffer(leaf, ptr + size, ptr,
101562306a36Sopenharmony_ci				      end - size - ptr);
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci	iref = (struct btrfs_extent_inline_ref *)ptr;
101862306a36Sopenharmony_ci	btrfs_set_extent_inline_ref_type(leaf, iref, type);
101962306a36Sopenharmony_ci	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
102062306a36Sopenharmony_ci		struct btrfs_extent_data_ref *dref;
102162306a36Sopenharmony_ci		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
102262306a36Sopenharmony_ci		btrfs_set_extent_data_ref_root(leaf, dref, root_objectid);
102362306a36Sopenharmony_ci		btrfs_set_extent_data_ref_objectid(leaf, dref, owner);
102462306a36Sopenharmony_ci		btrfs_set_extent_data_ref_offset(leaf, dref, offset);
102562306a36Sopenharmony_ci		btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add);
102662306a36Sopenharmony_ci	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
102762306a36Sopenharmony_ci		struct btrfs_shared_data_ref *sref;
102862306a36Sopenharmony_ci		sref = (struct btrfs_shared_data_ref *)(iref + 1);
102962306a36Sopenharmony_ci		btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add);
103062306a36Sopenharmony_ci		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
103162306a36Sopenharmony_ci	} else if (type == BTRFS_SHARED_BLOCK_REF_KEY) {
103262306a36Sopenharmony_ci		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
103362306a36Sopenharmony_ci	} else {
103462306a36Sopenharmony_ci		btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
103562306a36Sopenharmony_ci	}
103662306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, leaf);
103762306a36Sopenharmony_ci}
103862306a36Sopenharmony_ci
103962306a36Sopenharmony_cistatic int lookup_extent_backref(struct btrfs_trans_handle *trans,
104062306a36Sopenharmony_ci				 struct btrfs_path *path,
104162306a36Sopenharmony_ci				 struct btrfs_extent_inline_ref **ref_ret,
104262306a36Sopenharmony_ci				 u64 bytenr, u64 num_bytes, u64 parent,
104362306a36Sopenharmony_ci				 u64 root_objectid, u64 owner, u64 offset)
104462306a36Sopenharmony_ci{
104562306a36Sopenharmony_ci	int ret;
104662306a36Sopenharmony_ci
104762306a36Sopenharmony_ci	ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr,
104862306a36Sopenharmony_ci					   num_bytes, parent, root_objectid,
104962306a36Sopenharmony_ci					   owner, offset, 0);
105062306a36Sopenharmony_ci	if (ret != -ENOENT)
105162306a36Sopenharmony_ci		return ret;
105262306a36Sopenharmony_ci
105362306a36Sopenharmony_ci	btrfs_release_path(path);
105462306a36Sopenharmony_ci	*ref_ret = NULL;
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci	if (owner < BTRFS_FIRST_FREE_OBJECTID) {
105762306a36Sopenharmony_ci		ret = lookup_tree_block_ref(trans, path, bytenr, parent,
105862306a36Sopenharmony_ci					    root_objectid);
105962306a36Sopenharmony_ci	} else {
106062306a36Sopenharmony_ci		ret = lookup_extent_data_ref(trans, path, bytenr, parent,
106162306a36Sopenharmony_ci					     root_objectid, owner, offset);
106262306a36Sopenharmony_ci	}
106362306a36Sopenharmony_ci	return ret;
106462306a36Sopenharmony_ci}
106562306a36Sopenharmony_ci
106662306a36Sopenharmony_ci/*
106762306a36Sopenharmony_ci * helper to update/remove inline back ref
106862306a36Sopenharmony_ci */
106962306a36Sopenharmony_cistatic noinline_for_stack int update_inline_extent_backref(
107062306a36Sopenharmony_ci				  struct btrfs_trans_handle *trans,
107162306a36Sopenharmony_ci				  struct btrfs_path *path,
107262306a36Sopenharmony_ci				  struct btrfs_extent_inline_ref *iref,
107362306a36Sopenharmony_ci				  int refs_to_mod,
107462306a36Sopenharmony_ci				  struct btrfs_delayed_extent_op *extent_op)
107562306a36Sopenharmony_ci{
107662306a36Sopenharmony_ci	struct extent_buffer *leaf = path->nodes[0];
107762306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = leaf->fs_info;
107862306a36Sopenharmony_ci	struct btrfs_extent_item *ei;
107962306a36Sopenharmony_ci	struct btrfs_extent_data_ref *dref = NULL;
108062306a36Sopenharmony_ci	struct btrfs_shared_data_ref *sref = NULL;
108162306a36Sopenharmony_ci	unsigned long ptr;
108262306a36Sopenharmony_ci	unsigned long end;
108362306a36Sopenharmony_ci	u32 item_size;
108462306a36Sopenharmony_ci	int size;
108562306a36Sopenharmony_ci	int type;
108662306a36Sopenharmony_ci	u64 refs;
108762306a36Sopenharmony_ci
108862306a36Sopenharmony_ci	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
108962306a36Sopenharmony_ci	refs = btrfs_extent_refs(leaf, ei);
109062306a36Sopenharmony_ci	if (unlikely(refs_to_mod < 0 && refs + refs_to_mod <= 0)) {
109162306a36Sopenharmony_ci		struct btrfs_key key;
109262306a36Sopenharmony_ci		u32 extent_size;
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
109562306a36Sopenharmony_ci		if (key.type == BTRFS_METADATA_ITEM_KEY)
109662306a36Sopenharmony_ci			extent_size = fs_info->nodesize;
109762306a36Sopenharmony_ci		else
109862306a36Sopenharmony_ci			extent_size = key.offset;
109962306a36Sopenharmony_ci		btrfs_print_leaf(leaf);
110062306a36Sopenharmony_ci		btrfs_err(fs_info,
110162306a36Sopenharmony_ci	"invalid refs_to_mod for extent %llu num_bytes %u, has %d expect >= -%llu",
110262306a36Sopenharmony_ci			  key.objectid, extent_size, refs_to_mod, refs);
110362306a36Sopenharmony_ci		return -EUCLEAN;
110462306a36Sopenharmony_ci	}
110562306a36Sopenharmony_ci	refs += refs_to_mod;
110662306a36Sopenharmony_ci	btrfs_set_extent_refs(leaf, ei, refs);
110762306a36Sopenharmony_ci	if (extent_op)
110862306a36Sopenharmony_ci		__run_delayed_extent_op(extent_op, leaf, ei);
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
111162306a36Sopenharmony_ci	/*
111262306a36Sopenharmony_ci	 * Function btrfs_get_extent_inline_ref_type() has already printed
111362306a36Sopenharmony_ci	 * error messages.
111462306a36Sopenharmony_ci	 */
111562306a36Sopenharmony_ci	if (unlikely(type == BTRFS_REF_TYPE_INVALID))
111662306a36Sopenharmony_ci		return -EUCLEAN;
111762306a36Sopenharmony_ci
111862306a36Sopenharmony_ci	if (type == BTRFS_EXTENT_DATA_REF_KEY) {
111962306a36Sopenharmony_ci		dref = (struct btrfs_extent_data_ref *)(&iref->offset);
112062306a36Sopenharmony_ci		refs = btrfs_extent_data_ref_count(leaf, dref);
112162306a36Sopenharmony_ci	} else if (type == BTRFS_SHARED_DATA_REF_KEY) {
112262306a36Sopenharmony_ci		sref = (struct btrfs_shared_data_ref *)(iref + 1);
112362306a36Sopenharmony_ci		refs = btrfs_shared_data_ref_count(leaf, sref);
112462306a36Sopenharmony_ci	} else {
112562306a36Sopenharmony_ci		refs = 1;
112662306a36Sopenharmony_ci		/*
112762306a36Sopenharmony_ci		 * For tree blocks we can only drop one ref for it, and tree
112862306a36Sopenharmony_ci		 * blocks should not have refs > 1.
112962306a36Sopenharmony_ci		 *
113062306a36Sopenharmony_ci		 * Furthermore if we're inserting a new inline backref, we
113162306a36Sopenharmony_ci		 * won't reach this path either. That would be
113262306a36Sopenharmony_ci		 * setup_inline_extent_backref().
113362306a36Sopenharmony_ci		 */
113462306a36Sopenharmony_ci		if (unlikely(refs_to_mod != -1)) {
113562306a36Sopenharmony_ci			struct btrfs_key key;
113662306a36Sopenharmony_ci
113762306a36Sopenharmony_ci			btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
113862306a36Sopenharmony_ci
113962306a36Sopenharmony_ci			btrfs_print_leaf(leaf);
114062306a36Sopenharmony_ci			btrfs_err(fs_info,
114162306a36Sopenharmony_ci			"invalid refs_to_mod for tree block %llu, has %d expect -1",
114262306a36Sopenharmony_ci				  key.objectid, refs_to_mod);
114362306a36Sopenharmony_ci			return -EUCLEAN;
114462306a36Sopenharmony_ci		}
114562306a36Sopenharmony_ci	}
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci	if (unlikely(refs_to_mod < 0 && refs < -refs_to_mod)) {
114862306a36Sopenharmony_ci		struct btrfs_key key;
114962306a36Sopenharmony_ci		u32 extent_size;
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
115262306a36Sopenharmony_ci		if (key.type == BTRFS_METADATA_ITEM_KEY)
115362306a36Sopenharmony_ci			extent_size = fs_info->nodesize;
115462306a36Sopenharmony_ci		else
115562306a36Sopenharmony_ci			extent_size = key.offset;
115662306a36Sopenharmony_ci		btrfs_print_leaf(leaf);
115762306a36Sopenharmony_ci		btrfs_err(fs_info,
115862306a36Sopenharmony_ci"invalid refs_to_mod for backref entry, iref %lu extent %llu num_bytes %u, has %d expect >= -%llu",
115962306a36Sopenharmony_ci			  (unsigned long)iref, key.objectid, extent_size,
116062306a36Sopenharmony_ci			  refs_to_mod, refs);
116162306a36Sopenharmony_ci		return -EUCLEAN;
116262306a36Sopenharmony_ci	}
116362306a36Sopenharmony_ci	refs += refs_to_mod;
116462306a36Sopenharmony_ci
116562306a36Sopenharmony_ci	if (refs > 0) {
116662306a36Sopenharmony_ci		if (type == BTRFS_EXTENT_DATA_REF_KEY)
116762306a36Sopenharmony_ci			btrfs_set_extent_data_ref_count(leaf, dref, refs);
116862306a36Sopenharmony_ci		else
116962306a36Sopenharmony_ci			btrfs_set_shared_data_ref_count(leaf, sref, refs);
117062306a36Sopenharmony_ci	} else {
117162306a36Sopenharmony_ci		size =  btrfs_extent_inline_ref_size(type);
117262306a36Sopenharmony_ci		item_size = btrfs_item_size(leaf, path->slots[0]);
117362306a36Sopenharmony_ci		ptr = (unsigned long)iref;
117462306a36Sopenharmony_ci		end = (unsigned long)ei + item_size;
117562306a36Sopenharmony_ci		if (ptr + size < end)
117662306a36Sopenharmony_ci			memmove_extent_buffer(leaf, ptr, ptr + size,
117762306a36Sopenharmony_ci					      end - ptr - size);
117862306a36Sopenharmony_ci		item_size -= size;
117962306a36Sopenharmony_ci		btrfs_truncate_item(trans, path, item_size, 1);
118062306a36Sopenharmony_ci	}
118162306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, leaf);
118262306a36Sopenharmony_ci	return 0;
118362306a36Sopenharmony_ci}
118462306a36Sopenharmony_ci
118562306a36Sopenharmony_cistatic noinline_for_stack
118662306a36Sopenharmony_ciint insert_inline_extent_backref(struct btrfs_trans_handle *trans,
118762306a36Sopenharmony_ci				 struct btrfs_path *path,
118862306a36Sopenharmony_ci				 u64 bytenr, u64 num_bytes, u64 parent,
118962306a36Sopenharmony_ci				 u64 root_objectid, u64 owner,
119062306a36Sopenharmony_ci				 u64 offset, int refs_to_add,
119162306a36Sopenharmony_ci				 struct btrfs_delayed_extent_op *extent_op)
119262306a36Sopenharmony_ci{
119362306a36Sopenharmony_ci	struct btrfs_extent_inline_ref *iref;
119462306a36Sopenharmony_ci	int ret;
119562306a36Sopenharmony_ci
119662306a36Sopenharmony_ci	ret = lookup_inline_extent_backref(trans, path, &iref, bytenr,
119762306a36Sopenharmony_ci					   num_bytes, parent, root_objectid,
119862306a36Sopenharmony_ci					   owner, offset, 1);
119962306a36Sopenharmony_ci	if (ret == 0) {
120062306a36Sopenharmony_ci		/*
120162306a36Sopenharmony_ci		 * We're adding refs to a tree block we already own, this
120262306a36Sopenharmony_ci		 * should not happen at all.
120362306a36Sopenharmony_ci		 */
120462306a36Sopenharmony_ci		if (owner < BTRFS_FIRST_FREE_OBJECTID) {
120562306a36Sopenharmony_ci			btrfs_print_leaf(path->nodes[0]);
120662306a36Sopenharmony_ci			btrfs_crit(trans->fs_info,
120762306a36Sopenharmony_ci"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u",
120862306a36Sopenharmony_ci				   bytenr, num_bytes, root_objectid, path->slots[0]);
120962306a36Sopenharmony_ci			return -EUCLEAN;
121062306a36Sopenharmony_ci		}
121162306a36Sopenharmony_ci		ret = update_inline_extent_backref(trans, path, iref,
121262306a36Sopenharmony_ci						   refs_to_add, extent_op);
121362306a36Sopenharmony_ci	} else if (ret == -ENOENT) {
121462306a36Sopenharmony_ci		setup_inline_extent_backref(trans, path, iref, parent,
121562306a36Sopenharmony_ci					    root_objectid, owner, offset,
121662306a36Sopenharmony_ci					    refs_to_add, extent_op);
121762306a36Sopenharmony_ci		ret = 0;
121862306a36Sopenharmony_ci	}
121962306a36Sopenharmony_ci	return ret;
122062306a36Sopenharmony_ci}
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_cistatic int remove_extent_backref(struct btrfs_trans_handle *trans,
122362306a36Sopenharmony_ci				 struct btrfs_root *root,
122462306a36Sopenharmony_ci				 struct btrfs_path *path,
122562306a36Sopenharmony_ci				 struct btrfs_extent_inline_ref *iref,
122662306a36Sopenharmony_ci				 int refs_to_drop, int is_data)
122762306a36Sopenharmony_ci{
122862306a36Sopenharmony_ci	int ret = 0;
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_ci	BUG_ON(!is_data && refs_to_drop != 1);
123162306a36Sopenharmony_ci	if (iref)
123262306a36Sopenharmony_ci		ret = update_inline_extent_backref(trans, path, iref,
123362306a36Sopenharmony_ci						   -refs_to_drop, NULL);
123462306a36Sopenharmony_ci	else if (is_data)
123562306a36Sopenharmony_ci		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
123662306a36Sopenharmony_ci	else
123762306a36Sopenharmony_ci		ret = btrfs_del_item(trans, root, path);
123862306a36Sopenharmony_ci	return ret;
123962306a36Sopenharmony_ci}
124062306a36Sopenharmony_ci
124162306a36Sopenharmony_cistatic int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
124262306a36Sopenharmony_ci			       u64 *discarded_bytes)
124362306a36Sopenharmony_ci{
124462306a36Sopenharmony_ci	int j, ret = 0;
124562306a36Sopenharmony_ci	u64 bytes_left, end;
124662306a36Sopenharmony_ci	u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
124762306a36Sopenharmony_ci
124862306a36Sopenharmony_ci	/* Adjust the range to be aligned to 512B sectors if necessary. */
124962306a36Sopenharmony_ci	if (start != aligned_start) {
125062306a36Sopenharmony_ci		len -= aligned_start - start;
125162306a36Sopenharmony_ci		len = round_down(len, 1 << SECTOR_SHIFT);
125262306a36Sopenharmony_ci		start = aligned_start;
125362306a36Sopenharmony_ci	}
125462306a36Sopenharmony_ci
125562306a36Sopenharmony_ci	*discarded_bytes = 0;
125662306a36Sopenharmony_ci
125762306a36Sopenharmony_ci	if (!len)
125862306a36Sopenharmony_ci		return 0;
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci	end = start + len;
126162306a36Sopenharmony_ci	bytes_left = len;
126262306a36Sopenharmony_ci
126362306a36Sopenharmony_ci	/* Skip any superblocks on this device. */
126462306a36Sopenharmony_ci	for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
126562306a36Sopenharmony_ci		u64 sb_start = btrfs_sb_offset(j);
126662306a36Sopenharmony_ci		u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
126762306a36Sopenharmony_ci		u64 size = sb_start - start;
126862306a36Sopenharmony_ci
126962306a36Sopenharmony_ci		if (!in_range(sb_start, start, bytes_left) &&
127062306a36Sopenharmony_ci		    !in_range(sb_end, start, bytes_left) &&
127162306a36Sopenharmony_ci		    !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
127262306a36Sopenharmony_ci			continue;
127362306a36Sopenharmony_ci
127462306a36Sopenharmony_ci		/*
127562306a36Sopenharmony_ci		 * Superblock spans beginning of range.  Adjust start and
127662306a36Sopenharmony_ci		 * try again.
127762306a36Sopenharmony_ci		 */
127862306a36Sopenharmony_ci		if (sb_start <= start) {
127962306a36Sopenharmony_ci			start += sb_end - start;
128062306a36Sopenharmony_ci			if (start > end) {
128162306a36Sopenharmony_ci				bytes_left = 0;
128262306a36Sopenharmony_ci				break;
128362306a36Sopenharmony_ci			}
128462306a36Sopenharmony_ci			bytes_left = end - start;
128562306a36Sopenharmony_ci			continue;
128662306a36Sopenharmony_ci		}
128762306a36Sopenharmony_ci
128862306a36Sopenharmony_ci		if (size) {
128962306a36Sopenharmony_ci			ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
129062306a36Sopenharmony_ci						   size >> SECTOR_SHIFT,
129162306a36Sopenharmony_ci						   GFP_NOFS);
129262306a36Sopenharmony_ci			if (!ret)
129362306a36Sopenharmony_ci				*discarded_bytes += size;
129462306a36Sopenharmony_ci			else if (ret != -EOPNOTSUPP)
129562306a36Sopenharmony_ci				return ret;
129662306a36Sopenharmony_ci		}
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_ci		start = sb_end;
129962306a36Sopenharmony_ci		if (start > end) {
130062306a36Sopenharmony_ci			bytes_left = 0;
130162306a36Sopenharmony_ci			break;
130262306a36Sopenharmony_ci		}
130362306a36Sopenharmony_ci		bytes_left = end - start;
130462306a36Sopenharmony_ci	}
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_ci	if (bytes_left) {
130762306a36Sopenharmony_ci		ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
130862306a36Sopenharmony_ci					   bytes_left >> SECTOR_SHIFT,
130962306a36Sopenharmony_ci					   GFP_NOFS);
131062306a36Sopenharmony_ci		if (!ret)
131162306a36Sopenharmony_ci			*discarded_bytes += bytes_left;
131262306a36Sopenharmony_ci	}
131362306a36Sopenharmony_ci	return ret;
131462306a36Sopenharmony_ci}
131562306a36Sopenharmony_ci
131662306a36Sopenharmony_cistatic int do_discard_extent(struct btrfs_discard_stripe *stripe, u64 *bytes)
131762306a36Sopenharmony_ci{
131862306a36Sopenharmony_ci	struct btrfs_device *dev = stripe->dev;
131962306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = dev->fs_info;
132062306a36Sopenharmony_ci	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
132162306a36Sopenharmony_ci	u64 phys = stripe->physical;
132262306a36Sopenharmony_ci	u64 len = stripe->length;
132362306a36Sopenharmony_ci	u64 discarded = 0;
132462306a36Sopenharmony_ci	int ret = 0;
132562306a36Sopenharmony_ci
132662306a36Sopenharmony_ci	/* Zone reset on a zoned filesystem */
132762306a36Sopenharmony_ci	if (btrfs_can_zone_reset(dev, phys, len)) {
132862306a36Sopenharmony_ci		u64 src_disc;
132962306a36Sopenharmony_ci
133062306a36Sopenharmony_ci		ret = btrfs_reset_device_zone(dev, phys, len, &discarded);
133162306a36Sopenharmony_ci		if (ret)
133262306a36Sopenharmony_ci			goto out;
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci		if (!btrfs_dev_replace_is_ongoing(dev_replace) ||
133562306a36Sopenharmony_ci		    dev != dev_replace->srcdev)
133662306a36Sopenharmony_ci			goto out;
133762306a36Sopenharmony_ci
133862306a36Sopenharmony_ci		src_disc = discarded;
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci		/* Send to replace target as well */
134162306a36Sopenharmony_ci		ret = btrfs_reset_device_zone(dev_replace->tgtdev, phys, len,
134262306a36Sopenharmony_ci					      &discarded);
134362306a36Sopenharmony_ci		discarded += src_disc;
134462306a36Sopenharmony_ci	} else if (bdev_max_discard_sectors(stripe->dev->bdev)) {
134562306a36Sopenharmony_ci		ret = btrfs_issue_discard(dev->bdev, phys, len, &discarded);
134662306a36Sopenharmony_ci	} else {
134762306a36Sopenharmony_ci		ret = 0;
134862306a36Sopenharmony_ci		*bytes = 0;
134962306a36Sopenharmony_ci	}
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ciout:
135262306a36Sopenharmony_ci	*bytes = discarded;
135362306a36Sopenharmony_ci	return ret;
135462306a36Sopenharmony_ci}
135562306a36Sopenharmony_ci
135662306a36Sopenharmony_ciint btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
135762306a36Sopenharmony_ci			 u64 num_bytes, u64 *actual_bytes)
135862306a36Sopenharmony_ci{
135962306a36Sopenharmony_ci	int ret = 0;
136062306a36Sopenharmony_ci	u64 discarded_bytes = 0;
136162306a36Sopenharmony_ci	u64 end = bytenr + num_bytes;
136262306a36Sopenharmony_ci	u64 cur = bytenr;
136362306a36Sopenharmony_ci
136462306a36Sopenharmony_ci	/*
136562306a36Sopenharmony_ci	 * Avoid races with device replace and make sure the devices in the
136662306a36Sopenharmony_ci	 * stripes don't go away while we are discarding.
136762306a36Sopenharmony_ci	 */
136862306a36Sopenharmony_ci	btrfs_bio_counter_inc_blocked(fs_info);
136962306a36Sopenharmony_ci	while (cur < end) {
137062306a36Sopenharmony_ci		struct btrfs_discard_stripe *stripes;
137162306a36Sopenharmony_ci		unsigned int num_stripes;
137262306a36Sopenharmony_ci		int i;
137362306a36Sopenharmony_ci
137462306a36Sopenharmony_ci		num_bytes = end - cur;
137562306a36Sopenharmony_ci		stripes = btrfs_map_discard(fs_info, cur, &num_bytes, &num_stripes);
137662306a36Sopenharmony_ci		if (IS_ERR(stripes)) {
137762306a36Sopenharmony_ci			ret = PTR_ERR(stripes);
137862306a36Sopenharmony_ci			if (ret == -EOPNOTSUPP)
137962306a36Sopenharmony_ci				ret = 0;
138062306a36Sopenharmony_ci			break;
138162306a36Sopenharmony_ci		}
138262306a36Sopenharmony_ci
138362306a36Sopenharmony_ci		for (i = 0; i < num_stripes; i++) {
138462306a36Sopenharmony_ci			struct btrfs_discard_stripe *stripe = stripes + i;
138562306a36Sopenharmony_ci			u64 bytes;
138662306a36Sopenharmony_ci
138762306a36Sopenharmony_ci			if (!stripe->dev->bdev) {
138862306a36Sopenharmony_ci				ASSERT(btrfs_test_opt(fs_info, DEGRADED));
138962306a36Sopenharmony_ci				continue;
139062306a36Sopenharmony_ci			}
139162306a36Sopenharmony_ci
139262306a36Sopenharmony_ci			if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
139362306a36Sopenharmony_ci					&stripe->dev->dev_state))
139462306a36Sopenharmony_ci				continue;
139562306a36Sopenharmony_ci
139662306a36Sopenharmony_ci			ret = do_discard_extent(stripe, &bytes);
139762306a36Sopenharmony_ci			if (ret) {
139862306a36Sopenharmony_ci				/*
139962306a36Sopenharmony_ci				 * Keep going if discard is not supported by the
140062306a36Sopenharmony_ci				 * device.
140162306a36Sopenharmony_ci				 */
140262306a36Sopenharmony_ci				if (ret != -EOPNOTSUPP)
140362306a36Sopenharmony_ci					break;
140462306a36Sopenharmony_ci				ret = 0;
140562306a36Sopenharmony_ci			} else {
140662306a36Sopenharmony_ci				discarded_bytes += bytes;
140762306a36Sopenharmony_ci			}
140862306a36Sopenharmony_ci		}
140962306a36Sopenharmony_ci		kfree(stripes);
141062306a36Sopenharmony_ci		if (ret)
141162306a36Sopenharmony_ci			break;
141262306a36Sopenharmony_ci		cur += num_bytes;
141362306a36Sopenharmony_ci	}
141462306a36Sopenharmony_ci	btrfs_bio_counter_dec(fs_info);
141562306a36Sopenharmony_ci	if (actual_bytes)
141662306a36Sopenharmony_ci		*actual_bytes = discarded_bytes;
141762306a36Sopenharmony_ci	return ret;
141862306a36Sopenharmony_ci}
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_ci/* Can return -ENOMEM */
142162306a36Sopenharmony_ciint btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
142262306a36Sopenharmony_ci			 struct btrfs_ref *generic_ref)
142362306a36Sopenharmony_ci{
142462306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
142562306a36Sopenharmony_ci	int ret;
142662306a36Sopenharmony_ci
142762306a36Sopenharmony_ci	ASSERT(generic_ref->type != BTRFS_REF_NOT_SET &&
142862306a36Sopenharmony_ci	       generic_ref->action);
142962306a36Sopenharmony_ci	BUG_ON(generic_ref->type == BTRFS_REF_METADATA &&
143062306a36Sopenharmony_ci	       generic_ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID);
143162306a36Sopenharmony_ci
143262306a36Sopenharmony_ci	if (generic_ref->type == BTRFS_REF_METADATA)
143362306a36Sopenharmony_ci		ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL);
143462306a36Sopenharmony_ci	else
143562306a36Sopenharmony_ci		ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0);
143662306a36Sopenharmony_ci
143762306a36Sopenharmony_ci	btrfs_ref_tree_mod(fs_info, generic_ref);
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	return ret;
144062306a36Sopenharmony_ci}
144162306a36Sopenharmony_ci
144262306a36Sopenharmony_ci/*
144362306a36Sopenharmony_ci * __btrfs_inc_extent_ref - insert backreference for a given extent
144462306a36Sopenharmony_ci *
144562306a36Sopenharmony_ci * The counterpart is in __btrfs_free_extent(), with examples and more details
144662306a36Sopenharmony_ci * how it works.
144762306a36Sopenharmony_ci *
144862306a36Sopenharmony_ci * @trans:	    Handle of transaction
144962306a36Sopenharmony_ci *
145062306a36Sopenharmony_ci * @node:	    The delayed ref node used to get the bytenr/length for
145162306a36Sopenharmony_ci *		    extent whose references are incremented.
145262306a36Sopenharmony_ci *
145362306a36Sopenharmony_ci * @parent:	    If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/
145462306a36Sopenharmony_ci *		    BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical
145562306a36Sopenharmony_ci *		    bytenr of the parent block. Since new extents are always
145662306a36Sopenharmony_ci *		    created with indirect references, this will only be the case
145762306a36Sopenharmony_ci *		    when relocating a shared extent. In that case, root_objectid
145862306a36Sopenharmony_ci *		    will be BTRFS_TREE_RELOC_OBJECTID. Otherwise, parent must
145962306a36Sopenharmony_ci *		    be 0
146062306a36Sopenharmony_ci *
146162306a36Sopenharmony_ci * @root_objectid:  The id of the root where this modification has originated,
146262306a36Sopenharmony_ci *		    this can be either one of the well-known metadata trees or
146362306a36Sopenharmony_ci *		    the subvolume id which references this extent.
146462306a36Sopenharmony_ci *
146562306a36Sopenharmony_ci * @owner:	    For data extents it is the inode number of the owning file.
146662306a36Sopenharmony_ci *		    For metadata extents this parameter holds the level in the
146762306a36Sopenharmony_ci *		    tree of the extent.
146862306a36Sopenharmony_ci *
146962306a36Sopenharmony_ci * @offset:	    For metadata extents the offset is ignored and is currently
147062306a36Sopenharmony_ci *		    always passed as 0. For data extents it is the fileoffset
147162306a36Sopenharmony_ci *		    this extent belongs to.
147262306a36Sopenharmony_ci *
147362306a36Sopenharmony_ci * @refs_to_add     Number of references to add
147462306a36Sopenharmony_ci *
147562306a36Sopenharmony_ci * @extent_op       Pointer to a structure, holding information necessary when
147662306a36Sopenharmony_ci *                  updating a tree block's flags
147762306a36Sopenharmony_ci *
147862306a36Sopenharmony_ci */
147962306a36Sopenharmony_cistatic int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
148062306a36Sopenharmony_ci				  struct btrfs_delayed_ref_node *node,
148162306a36Sopenharmony_ci				  u64 parent, u64 root_objectid,
148262306a36Sopenharmony_ci				  u64 owner, u64 offset, int refs_to_add,
148362306a36Sopenharmony_ci				  struct btrfs_delayed_extent_op *extent_op)
148462306a36Sopenharmony_ci{
148562306a36Sopenharmony_ci	struct btrfs_path *path;
148662306a36Sopenharmony_ci	struct extent_buffer *leaf;
148762306a36Sopenharmony_ci	struct btrfs_extent_item *item;
148862306a36Sopenharmony_ci	struct btrfs_key key;
148962306a36Sopenharmony_ci	u64 bytenr = node->bytenr;
149062306a36Sopenharmony_ci	u64 num_bytes = node->num_bytes;
149162306a36Sopenharmony_ci	u64 refs;
149262306a36Sopenharmony_ci	int ret;
149362306a36Sopenharmony_ci
149462306a36Sopenharmony_ci	path = btrfs_alloc_path();
149562306a36Sopenharmony_ci	if (!path)
149662306a36Sopenharmony_ci		return -ENOMEM;
149762306a36Sopenharmony_ci
149862306a36Sopenharmony_ci	/* this will setup the path even if it fails to insert the back ref */
149962306a36Sopenharmony_ci	ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
150062306a36Sopenharmony_ci					   parent, root_objectid, owner,
150162306a36Sopenharmony_ci					   offset, refs_to_add, extent_op);
150262306a36Sopenharmony_ci	if ((ret < 0 && ret != -EAGAIN) || !ret)
150362306a36Sopenharmony_ci		goto out;
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_ci	/*
150662306a36Sopenharmony_ci	 * Ok we had -EAGAIN which means we didn't have space to insert and
150762306a36Sopenharmony_ci	 * inline extent ref, so just update the reference count and add a
150862306a36Sopenharmony_ci	 * normal backref.
150962306a36Sopenharmony_ci	 */
151062306a36Sopenharmony_ci	leaf = path->nodes[0];
151162306a36Sopenharmony_ci	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
151262306a36Sopenharmony_ci	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
151362306a36Sopenharmony_ci	refs = btrfs_extent_refs(leaf, item);
151462306a36Sopenharmony_ci	btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
151562306a36Sopenharmony_ci	if (extent_op)
151662306a36Sopenharmony_ci		__run_delayed_extent_op(extent_op, leaf, item);
151762306a36Sopenharmony_ci
151862306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, leaf);
151962306a36Sopenharmony_ci	btrfs_release_path(path);
152062306a36Sopenharmony_ci
152162306a36Sopenharmony_ci	/* now insert the actual backref */
152262306a36Sopenharmony_ci	if (owner < BTRFS_FIRST_FREE_OBJECTID)
152362306a36Sopenharmony_ci		ret = insert_tree_block_ref(trans, path, bytenr, parent,
152462306a36Sopenharmony_ci					    root_objectid);
152562306a36Sopenharmony_ci	else
152662306a36Sopenharmony_ci		ret = insert_extent_data_ref(trans, path, bytenr, parent,
152762306a36Sopenharmony_ci					     root_objectid, owner, offset,
152862306a36Sopenharmony_ci					     refs_to_add);
152962306a36Sopenharmony_ci
153062306a36Sopenharmony_ci	if (ret)
153162306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
153262306a36Sopenharmony_ciout:
153362306a36Sopenharmony_ci	btrfs_free_path(path);
153462306a36Sopenharmony_ci	return ret;
153562306a36Sopenharmony_ci}
153662306a36Sopenharmony_ci
153762306a36Sopenharmony_cistatic int run_delayed_data_ref(struct btrfs_trans_handle *trans,
153862306a36Sopenharmony_ci				struct btrfs_delayed_ref_node *node,
153962306a36Sopenharmony_ci				struct btrfs_delayed_extent_op *extent_op,
154062306a36Sopenharmony_ci				bool insert_reserved)
154162306a36Sopenharmony_ci{
154262306a36Sopenharmony_ci	int ret = 0;
154362306a36Sopenharmony_ci	struct btrfs_delayed_data_ref *ref;
154462306a36Sopenharmony_ci	struct btrfs_key ins;
154562306a36Sopenharmony_ci	u64 parent = 0;
154662306a36Sopenharmony_ci	u64 ref_root = 0;
154762306a36Sopenharmony_ci	u64 flags = 0;
154862306a36Sopenharmony_ci
154962306a36Sopenharmony_ci	ins.objectid = node->bytenr;
155062306a36Sopenharmony_ci	ins.offset = node->num_bytes;
155162306a36Sopenharmony_ci	ins.type = BTRFS_EXTENT_ITEM_KEY;
155262306a36Sopenharmony_ci
155362306a36Sopenharmony_ci	ref = btrfs_delayed_node_to_data_ref(node);
155462306a36Sopenharmony_ci	trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action);
155562306a36Sopenharmony_ci
155662306a36Sopenharmony_ci	if (node->type == BTRFS_SHARED_DATA_REF_KEY)
155762306a36Sopenharmony_ci		parent = ref->parent;
155862306a36Sopenharmony_ci	ref_root = ref->root;
155962306a36Sopenharmony_ci
156062306a36Sopenharmony_ci	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
156162306a36Sopenharmony_ci		if (extent_op)
156262306a36Sopenharmony_ci			flags |= extent_op->flags_to_set;
156362306a36Sopenharmony_ci		ret = alloc_reserved_file_extent(trans, parent, ref_root,
156462306a36Sopenharmony_ci						 flags, ref->objectid,
156562306a36Sopenharmony_ci						 ref->offset, &ins,
156662306a36Sopenharmony_ci						 node->ref_mod);
156762306a36Sopenharmony_ci	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
156862306a36Sopenharmony_ci		ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
156962306a36Sopenharmony_ci					     ref->objectid, ref->offset,
157062306a36Sopenharmony_ci					     node->ref_mod, extent_op);
157162306a36Sopenharmony_ci	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
157262306a36Sopenharmony_ci		ret = __btrfs_free_extent(trans, node, parent,
157362306a36Sopenharmony_ci					  ref_root, ref->objectid,
157462306a36Sopenharmony_ci					  ref->offset, node->ref_mod,
157562306a36Sopenharmony_ci					  extent_op);
157662306a36Sopenharmony_ci	} else {
157762306a36Sopenharmony_ci		BUG();
157862306a36Sopenharmony_ci	}
157962306a36Sopenharmony_ci	return ret;
158062306a36Sopenharmony_ci}
158162306a36Sopenharmony_ci
158262306a36Sopenharmony_cistatic void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
158362306a36Sopenharmony_ci				    struct extent_buffer *leaf,
158462306a36Sopenharmony_ci				    struct btrfs_extent_item *ei)
158562306a36Sopenharmony_ci{
158662306a36Sopenharmony_ci	u64 flags = btrfs_extent_flags(leaf, ei);
158762306a36Sopenharmony_ci	if (extent_op->update_flags) {
158862306a36Sopenharmony_ci		flags |= extent_op->flags_to_set;
158962306a36Sopenharmony_ci		btrfs_set_extent_flags(leaf, ei, flags);
159062306a36Sopenharmony_ci	}
159162306a36Sopenharmony_ci
159262306a36Sopenharmony_ci	if (extent_op->update_key) {
159362306a36Sopenharmony_ci		struct btrfs_tree_block_info *bi;
159462306a36Sopenharmony_ci		BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK));
159562306a36Sopenharmony_ci		bi = (struct btrfs_tree_block_info *)(ei + 1);
159662306a36Sopenharmony_ci		btrfs_set_tree_block_key(leaf, bi, &extent_op->key);
159762306a36Sopenharmony_ci	}
159862306a36Sopenharmony_ci}
159962306a36Sopenharmony_ci
160062306a36Sopenharmony_cistatic int run_delayed_extent_op(struct btrfs_trans_handle *trans,
160162306a36Sopenharmony_ci				 struct btrfs_delayed_ref_head *head,
160262306a36Sopenharmony_ci				 struct btrfs_delayed_extent_op *extent_op)
160362306a36Sopenharmony_ci{
160462306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
160562306a36Sopenharmony_ci	struct btrfs_root *root;
160662306a36Sopenharmony_ci	struct btrfs_key key;
160762306a36Sopenharmony_ci	struct btrfs_path *path;
160862306a36Sopenharmony_ci	struct btrfs_extent_item *ei;
160962306a36Sopenharmony_ci	struct extent_buffer *leaf;
161062306a36Sopenharmony_ci	u32 item_size;
161162306a36Sopenharmony_ci	int ret;
161262306a36Sopenharmony_ci	int err = 0;
161362306a36Sopenharmony_ci	int metadata = 1;
161462306a36Sopenharmony_ci
161562306a36Sopenharmony_ci	if (TRANS_ABORTED(trans))
161662306a36Sopenharmony_ci		return 0;
161762306a36Sopenharmony_ci
161862306a36Sopenharmony_ci	if (!btrfs_fs_incompat(fs_info, SKINNY_METADATA))
161962306a36Sopenharmony_ci		metadata = 0;
162062306a36Sopenharmony_ci
162162306a36Sopenharmony_ci	path = btrfs_alloc_path();
162262306a36Sopenharmony_ci	if (!path)
162362306a36Sopenharmony_ci		return -ENOMEM;
162462306a36Sopenharmony_ci
162562306a36Sopenharmony_ci	key.objectid = head->bytenr;
162662306a36Sopenharmony_ci
162762306a36Sopenharmony_ci	if (metadata) {
162862306a36Sopenharmony_ci		key.type = BTRFS_METADATA_ITEM_KEY;
162962306a36Sopenharmony_ci		key.offset = extent_op->level;
163062306a36Sopenharmony_ci	} else {
163162306a36Sopenharmony_ci		key.type = BTRFS_EXTENT_ITEM_KEY;
163262306a36Sopenharmony_ci		key.offset = head->num_bytes;
163362306a36Sopenharmony_ci	}
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_ci	root = btrfs_extent_root(fs_info, key.objectid);
163662306a36Sopenharmony_ciagain:
163762306a36Sopenharmony_ci	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
163862306a36Sopenharmony_ci	if (ret < 0) {
163962306a36Sopenharmony_ci		err = ret;
164062306a36Sopenharmony_ci		goto out;
164162306a36Sopenharmony_ci	}
164262306a36Sopenharmony_ci	if (ret > 0) {
164362306a36Sopenharmony_ci		if (metadata) {
164462306a36Sopenharmony_ci			if (path->slots[0] > 0) {
164562306a36Sopenharmony_ci				path->slots[0]--;
164662306a36Sopenharmony_ci				btrfs_item_key_to_cpu(path->nodes[0], &key,
164762306a36Sopenharmony_ci						      path->slots[0]);
164862306a36Sopenharmony_ci				if (key.objectid == head->bytenr &&
164962306a36Sopenharmony_ci				    key.type == BTRFS_EXTENT_ITEM_KEY &&
165062306a36Sopenharmony_ci				    key.offset == head->num_bytes)
165162306a36Sopenharmony_ci					ret = 0;
165262306a36Sopenharmony_ci			}
165362306a36Sopenharmony_ci			if (ret > 0) {
165462306a36Sopenharmony_ci				btrfs_release_path(path);
165562306a36Sopenharmony_ci				metadata = 0;
165662306a36Sopenharmony_ci
165762306a36Sopenharmony_ci				key.objectid = head->bytenr;
165862306a36Sopenharmony_ci				key.offset = head->num_bytes;
165962306a36Sopenharmony_ci				key.type = BTRFS_EXTENT_ITEM_KEY;
166062306a36Sopenharmony_ci				goto again;
166162306a36Sopenharmony_ci			}
166262306a36Sopenharmony_ci		} else {
166362306a36Sopenharmony_ci			err = -EUCLEAN;
166462306a36Sopenharmony_ci			btrfs_err(fs_info,
166562306a36Sopenharmony_ci		  "missing extent item for extent %llu num_bytes %llu level %d",
166662306a36Sopenharmony_ci				  head->bytenr, head->num_bytes, extent_op->level);
166762306a36Sopenharmony_ci			goto out;
166862306a36Sopenharmony_ci		}
166962306a36Sopenharmony_ci	}
167062306a36Sopenharmony_ci
167162306a36Sopenharmony_ci	leaf = path->nodes[0];
167262306a36Sopenharmony_ci	item_size = btrfs_item_size(leaf, path->slots[0]);
167362306a36Sopenharmony_ci
167462306a36Sopenharmony_ci	if (unlikely(item_size < sizeof(*ei))) {
167562306a36Sopenharmony_ci		err = -EUCLEAN;
167662306a36Sopenharmony_ci		btrfs_err(fs_info,
167762306a36Sopenharmony_ci			  "unexpected extent item size, has %u expect >= %zu",
167862306a36Sopenharmony_ci			  item_size, sizeof(*ei));
167962306a36Sopenharmony_ci		btrfs_abort_transaction(trans, err);
168062306a36Sopenharmony_ci		goto out;
168162306a36Sopenharmony_ci	}
168262306a36Sopenharmony_ci
168362306a36Sopenharmony_ci	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
168462306a36Sopenharmony_ci	__run_delayed_extent_op(extent_op, leaf, ei);
168562306a36Sopenharmony_ci
168662306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, leaf);
168762306a36Sopenharmony_ciout:
168862306a36Sopenharmony_ci	btrfs_free_path(path);
168962306a36Sopenharmony_ci	return err;
169062306a36Sopenharmony_ci}
169162306a36Sopenharmony_ci
169262306a36Sopenharmony_cistatic int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
169362306a36Sopenharmony_ci				struct btrfs_delayed_ref_node *node,
169462306a36Sopenharmony_ci				struct btrfs_delayed_extent_op *extent_op,
169562306a36Sopenharmony_ci				bool insert_reserved)
169662306a36Sopenharmony_ci{
169762306a36Sopenharmony_ci	int ret = 0;
169862306a36Sopenharmony_ci	struct btrfs_delayed_tree_ref *ref;
169962306a36Sopenharmony_ci	u64 parent = 0;
170062306a36Sopenharmony_ci	u64 ref_root = 0;
170162306a36Sopenharmony_ci
170262306a36Sopenharmony_ci	ref = btrfs_delayed_node_to_tree_ref(node);
170362306a36Sopenharmony_ci	trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action);
170462306a36Sopenharmony_ci
170562306a36Sopenharmony_ci	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
170662306a36Sopenharmony_ci		parent = ref->parent;
170762306a36Sopenharmony_ci	ref_root = ref->root;
170862306a36Sopenharmony_ci
170962306a36Sopenharmony_ci	if (unlikely(node->ref_mod != 1)) {
171062306a36Sopenharmony_ci		btrfs_err(trans->fs_info,
171162306a36Sopenharmony_ci	"btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu",
171262306a36Sopenharmony_ci			  node->bytenr, node->ref_mod, node->action, ref_root,
171362306a36Sopenharmony_ci			  parent);
171462306a36Sopenharmony_ci		return -EUCLEAN;
171562306a36Sopenharmony_ci	}
171662306a36Sopenharmony_ci	if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
171762306a36Sopenharmony_ci		BUG_ON(!extent_op || !extent_op->update_flags);
171862306a36Sopenharmony_ci		ret = alloc_reserved_tree_block(trans, node, extent_op);
171962306a36Sopenharmony_ci	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
172062306a36Sopenharmony_ci		ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root,
172162306a36Sopenharmony_ci					     ref->level, 0, 1, extent_op);
172262306a36Sopenharmony_ci	} else if (node->action == BTRFS_DROP_DELAYED_REF) {
172362306a36Sopenharmony_ci		ret = __btrfs_free_extent(trans, node, parent, ref_root,
172462306a36Sopenharmony_ci					  ref->level, 0, 1, extent_op);
172562306a36Sopenharmony_ci	} else {
172662306a36Sopenharmony_ci		BUG();
172762306a36Sopenharmony_ci	}
172862306a36Sopenharmony_ci	return ret;
172962306a36Sopenharmony_ci}
173062306a36Sopenharmony_ci
173162306a36Sopenharmony_ci/* helper function to actually process a single delayed ref entry */
173262306a36Sopenharmony_cistatic int run_one_delayed_ref(struct btrfs_trans_handle *trans,
173362306a36Sopenharmony_ci			       struct btrfs_delayed_ref_node *node,
173462306a36Sopenharmony_ci			       struct btrfs_delayed_extent_op *extent_op,
173562306a36Sopenharmony_ci			       bool insert_reserved)
173662306a36Sopenharmony_ci{
173762306a36Sopenharmony_ci	int ret = 0;
173862306a36Sopenharmony_ci
173962306a36Sopenharmony_ci	if (TRANS_ABORTED(trans)) {
174062306a36Sopenharmony_ci		if (insert_reserved)
174162306a36Sopenharmony_ci			btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
174262306a36Sopenharmony_ci		return 0;
174362306a36Sopenharmony_ci	}
174462306a36Sopenharmony_ci
174562306a36Sopenharmony_ci	if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
174662306a36Sopenharmony_ci	    node->type == BTRFS_SHARED_BLOCK_REF_KEY)
174762306a36Sopenharmony_ci		ret = run_delayed_tree_ref(trans, node, extent_op,
174862306a36Sopenharmony_ci					   insert_reserved);
174962306a36Sopenharmony_ci	else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
175062306a36Sopenharmony_ci		 node->type == BTRFS_SHARED_DATA_REF_KEY)
175162306a36Sopenharmony_ci		ret = run_delayed_data_ref(trans, node, extent_op,
175262306a36Sopenharmony_ci					   insert_reserved);
175362306a36Sopenharmony_ci	else
175462306a36Sopenharmony_ci		BUG();
175562306a36Sopenharmony_ci	if (ret && insert_reserved)
175662306a36Sopenharmony_ci		btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
175762306a36Sopenharmony_ci	if (ret < 0)
175862306a36Sopenharmony_ci		btrfs_err(trans->fs_info,
175962306a36Sopenharmony_ci"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d",
176062306a36Sopenharmony_ci			  node->bytenr, node->num_bytes, node->type,
176162306a36Sopenharmony_ci			  node->action, node->ref_mod, ret);
176262306a36Sopenharmony_ci	return ret;
176362306a36Sopenharmony_ci}
176462306a36Sopenharmony_ci
176562306a36Sopenharmony_cistatic inline struct btrfs_delayed_ref_node *
176662306a36Sopenharmony_ciselect_delayed_ref(struct btrfs_delayed_ref_head *head)
176762306a36Sopenharmony_ci{
176862306a36Sopenharmony_ci	struct btrfs_delayed_ref_node *ref;
176962306a36Sopenharmony_ci
177062306a36Sopenharmony_ci	if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
177162306a36Sopenharmony_ci		return NULL;
177262306a36Sopenharmony_ci
177362306a36Sopenharmony_ci	/*
177462306a36Sopenharmony_ci	 * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
177562306a36Sopenharmony_ci	 * This is to prevent a ref count from going down to zero, which deletes
177662306a36Sopenharmony_ci	 * the extent item from the extent tree, when there still are references
177762306a36Sopenharmony_ci	 * to add, which would fail because they would not find the extent item.
177862306a36Sopenharmony_ci	 */
177962306a36Sopenharmony_ci	if (!list_empty(&head->ref_add_list))
178062306a36Sopenharmony_ci		return list_first_entry(&head->ref_add_list,
178162306a36Sopenharmony_ci				struct btrfs_delayed_ref_node, add_list);
178262306a36Sopenharmony_ci
178362306a36Sopenharmony_ci	ref = rb_entry(rb_first_cached(&head->ref_tree),
178462306a36Sopenharmony_ci		       struct btrfs_delayed_ref_node, ref_node);
178562306a36Sopenharmony_ci	ASSERT(list_empty(&ref->add_list));
178662306a36Sopenharmony_ci	return ref;
178762306a36Sopenharmony_ci}
178862306a36Sopenharmony_ci
178962306a36Sopenharmony_cistatic void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
179062306a36Sopenharmony_ci				      struct btrfs_delayed_ref_head *head)
179162306a36Sopenharmony_ci{
179262306a36Sopenharmony_ci	spin_lock(&delayed_refs->lock);
179362306a36Sopenharmony_ci	head->processing = false;
179462306a36Sopenharmony_ci	delayed_refs->num_heads_ready++;
179562306a36Sopenharmony_ci	spin_unlock(&delayed_refs->lock);
179662306a36Sopenharmony_ci	btrfs_delayed_ref_unlock(head);
179762306a36Sopenharmony_ci}
179862306a36Sopenharmony_ci
179962306a36Sopenharmony_cistatic struct btrfs_delayed_extent_op *cleanup_extent_op(
180062306a36Sopenharmony_ci				struct btrfs_delayed_ref_head *head)
180162306a36Sopenharmony_ci{
180262306a36Sopenharmony_ci	struct btrfs_delayed_extent_op *extent_op = head->extent_op;
180362306a36Sopenharmony_ci
180462306a36Sopenharmony_ci	if (!extent_op)
180562306a36Sopenharmony_ci		return NULL;
180662306a36Sopenharmony_ci
180762306a36Sopenharmony_ci	if (head->must_insert_reserved) {
180862306a36Sopenharmony_ci		head->extent_op = NULL;
180962306a36Sopenharmony_ci		btrfs_free_delayed_extent_op(extent_op);
181062306a36Sopenharmony_ci		return NULL;
181162306a36Sopenharmony_ci	}
181262306a36Sopenharmony_ci	return extent_op;
181362306a36Sopenharmony_ci}
181462306a36Sopenharmony_ci
181562306a36Sopenharmony_cistatic int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans,
181662306a36Sopenharmony_ci				     struct btrfs_delayed_ref_head *head)
181762306a36Sopenharmony_ci{
181862306a36Sopenharmony_ci	struct btrfs_delayed_extent_op *extent_op;
181962306a36Sopenharmony_ci	int ret;
182062306a36Sopenharmony_ci
182162306a36Sopenharmony_ci	extent_op = cleanup_extent_op(head);
182262306a36Sopenharmony_ci	if (!extent_op)
182362306a36Sopenharmony_ci		return 0;
182462306a36Sopenharmony_ci	head->extent_op = NULL;
182562306a36Sopenharmony_ci	spin_unlock(&head->lock);
182662306a36Sopenharmony_ci	ret = run_delayed_extent_op(trans, head, extent_op);
182762306a36Sopenharmony_ci	btrfs_free_delayed_extent_op(extent_op);
182862306a36Sopenharmony_ci	return ret ? ret : 1;
182962306a36Sopenharmony_ci}
183062306a36Sopenharmony_ci
183162306a36Sopenharmony_civoid btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
183262306a36Sopenharmony_ci				  struct btrfs_delayed_ref_root *delayed_refs,
183362306a36Sopenharmony_ci				  struct btrfs_delayed_ref_head *head)
183462306a36Sopenharmony_ci{
183562306a36Sopenharmony_ci	int nr_items = 1;	/* Dropping this ref head update. */
183662306a36Sopenharmony_ci
183762306a36Sopenharmony_ci	/*
183862306a36Sopenharmony_ci	 * We had csum deletions accounted for in our delayed refs rsv, we need
183962306a36Sopenharmony_ci	 * to drop the csum leaves for this update from our delayed_refs_rsv.
184062306a36Sopenharmony_ci	 */
184162306a36Sopenharmony_ci	if (head->total_ref_mod < 0 && head->is_data) {
184262306a36Sopenharmony_ci		spin_lock(&delayed_refs->lock);
184362306a36Sopenharmony_ci		delayed_refs->pending_csums -= head->num_bytes;
184462306a36Sopenharmony_ci		spin_unlock(&delayed_refs->lock);
184562306a36Sopenharmony_ci		nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes);
184662306a36Sopenharmony_ci	}
184762306a36Sopenharmony_ci
184862306a36Sopenharmony_ci	btrfs_delayed_refs_rsv_release(fs_info, nr_items);
184962306a36Sopenharmony_ci}
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_cistatic int cleanup_ref_head(struct btrfs_trans_handle *trans,
185262306a36Sopenharmony_ci			    struct btrfs_delayed_ref_head *head)
185362306a36Sopenharmony_ci{
185462306a36Sopenharmony_ci
185562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
185662306a36Sopenharmony_ci	struct btrfs_delayed_ref_root *delayed_refs;
185762306a36Sopenharmony_ci	int ret;
185862306a36Sopenharmony_ci
185962306a36Sopenharmony_ci	delayed_refs = &trans->transaction->delayed_refs;
186062306a36Sopenharmony_ci
186162306a36Sopenharmony_ci	ret = run_and_cleanup_extent_op(trans, head);
186262306a36Sopenharmony_ci	if (ret < 0) {
186362306a36Sopenharmony_ci		unselect_delayed_ref_head(delayed_refs, head);
186462306a36Sopenharmony_ci		btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
186562306a36Sopenharmony_ci		return ret;
186662306a36Sopenharmony_ci	} else if (ret) {
186762306a36Sopenharmony_ci		return ret;
186862306a36Sopenharmony_ci	}
186962306a36Sopenharmony_ci
187062306a36Sopenharmony_ci	/*
187162306a36Sopenharmony_ci	 * Need to drop our head ref lock and re-acquire the delayed ref lock
187262306a36Sopenharmony_ci	 * and then re-check to make sure nobody got added.
187362306a36Sopenharmony_ci	 */
187462306a36Sopenharmony_ci	spin_unlock(&head->lock);
187562306a36Sopenharmony_ci	spin_lock(&delayed_refs->lock);
187662306a36Sopenharmony_ci	spin_lock(&head->lock);
187762306a36Sopenharmony_ci	if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) {
187862306a36Sopenharmony_ci		spin_unlock(&head->lock);
187962306a36Sopenharmony_ci		spin_unlock(&delayed_refs->lock);
188062306a36Sopenharmony_ci		return 1;
188162306a36Sopenharmony_ci	}
188262306a36Sopenharmony_ci	btrfs_delete_ref_head(delayed_refs, head);
188362306a36Sopenharmony_ci	spin_unlock(&head->lock);
188462306a36Sopenharmony_ci	spin_unlock(&delayed_refs->lock);
188562306a36Sopenharmony_ci
188662306a36Sopenharmony_ci	if (head->must_insert_reserved) {
188762306a36Sopenharmony_ci		btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
188862306a36Sopenharmony_ci		if (head->is_data) {
188962306a36Sopenharmony_ci			struct btrfs_root *csum_root;
189062306a36Sopenharmony_ci
189162306a36Sopenharmony_ci			csum_root = btrfs_csum_root(fs_info, head->bytenr);
189262306a36Sopenharmony_ci			ret = btrfs_del_csums(trans, csum_root, head->bytenr,
189362306a36Sopenharmony_ci					      head->num_bytes);
189462306a36Sopenharmony_ci		}
189562306a36Sopenharmony_ci	}
189662306a36Sopenharmony_ci
189762306a36Sopenharmony_ci	btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
189862306a36Sopenharmony_ci
189962306a36Sopenharmony_ci	trace_run_delayed_ref_head(fs_info, head, 0);
190062306a36Sopenharmony_ci	btrfs_delayed_ref_unlock(head);
190162306a36Sopenharmony_ci	btrfs_put_delayed_ref_head(head);
190262306a36Sopenharmony_ci	return ret;
190362306a36Sopenharmony_ci}
190462306a36Sopenharmony_ci
190562306a36Sopenharmony_cistatic struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
190662306a36Sopenharmony_ci					struct btrfs_trans_handle *trans)
190762306a36Sopenharmony_ci{
190862306a36Sopenharmony_ci	struct btrfs_delayed_ref_root *delayed_refs =
190962306a36Sopenharmony_ci		&trans->transaction->delayed_refs;
191062306a36Sopenharmony_ci	struct btrfs_delayed_ref_head *head = NULL;
191162306a36Sopenharmony_ci	int ret;
191262306a36Sopenharmony_ci
191362306a36Sopenharmony_ci	spin_lock(&delayed_refs->lock);
191462306a36Sopenharmony_ci	head = btrfs_select_ref_head(delayed_refs);
191562306a36Sopenharmony_ci	if (!head) {
191662306a36Sopenharmony_ci		spin_unlock(&delayed_refs->lock);
191762306a36Sopenharmony_ci		return head;
191862306a36Sopenharmony_ci	}
191962306a36Sopenharmony_ci
192062306a36Sopenharmony_ci	/*
192162306a36Sopenharmony_ci	 * Grab the lock that says we are going to process all the refs for
192262306a36Sopenharmony_ci	 * this head
192362306a36Sopenharmony_ci	 */
192462306a36Sopenharmony_ci	ret = btrfs_delayed_ref_lock(delayed_refs, head);
192562306a36Sopenharmony_ci	spin_unlock(&delayed_refs->lock);
192662306a36Sopenharmony_ci
192762306a36Sopenharmony_ci	/*
192862306a36Sopenharmony_ci	 * We may have dropped the spin lock to get the head mutex lock, and
192962306a36Sopenharmony_ci	 * that might have given someone else time to free the head.  If that's
193062306a36Sopenharmony_ci	 * true, it has been removed from our list and we can move on.
193162306a36Sopenharmony_ci	 */
193262306a36Sopenharmony_ci	if (ret == -EAGAIN)
193362306a36Sopenharmony_ci		head = ERR_PTR(-EAGAIN);
193462306a36Sopenharmony_ci
193562306a36Sopenharmony_ci	return head;
193662306a36Sopenharmony_ci}
193762306a36Sopenharmony_ci
193862306a36Sopenharmony_cistatic int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
193962306a36Sopenharmony_ci					   struct btrfs_delayed_ref_head *locked_ref)
194062306a36Sopenharmony_ci{
194162306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
194262306a36Sopenharmony_ci	struct btrfs_delayed_ref_root *delayed_refs;
194362306a36Sopenharmony_ci	struct btrfs_delayed_extent_op *extent_op;
194462306a36Sopenharmony_ci	struct btrfs_delayed_ref_node *ref;
194562306a36Sopenharmony_ci	bool must_insert_reserved;
194662306a36Sopenharmony_ci	int ret;
194762306a36Sopenharmony_ci
194862306a36Sopenharmony_ci	delayed_refs = &trans->transaction->delayed_refs;
194962306a36Sopenharmony_ci
195062306a36Sopenharmony_ci	lockdep_assert_held(&locked_ref->mutex);
195162306a36Sopenharmony_ci	lockdep_assert_held(&locked_ref->lock);
195262306a36Sopenharmony_ci
195362306a36Sopenharmony_ci	while ((ref = select_delayed_ref(locked_ref))) {
195462306a36Sopenharmony_ci		if (ref->seq &&
195562306a36Sopenharmony_ci		    btrfs_check_delayed_seq(fs_info, ref->seq)) {
195662306a36Sopenharmony_ci			spin_unlock(&locked_ref->lock);
195762306a36Sopenharmony_ci			unselect_delayed_ref_head(delayed_refs, locked_ref);
195862306a36Sopenharmony_ci			return -EAGAIN;
195962306a36Sopenharmony_ci		}
196062306a36Sopenharmony_ci
196162306a36Sopenharmony_ci		rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
196262306a36Sopenharmony_ci		RB_CLEAR_NODE(&ref->ref_node);
196362306a36Sopenharmony_ci		if (!list_empty(&ref->add_list))
196462306a36Sopenharmony_ci			list_del(&ref->add_list);
196562306a36Sopenharmony_ci		/*
196662306a36Sopenharmony_ci		 * When we play the delayed ref, also correct the ref_mod on
196762306a36Sopenharmony_ci		 * head
196862306a36Sopenharmony_ci		 */
196962306a36Sopenharmony_ci		switch (ref->action) {
197062306a36Sopenharmony_ci		case BTRFS_ADD_DELAYED_REF:
197162306a36Sopenharmony_ci		case BTRFS_ADD_DELAYED_EXTENT:
197262306a36Sopenharmony_ci			locked_ref->ref_mod -= ref->ref_mod;
197362306a36Sopenharmony_ci			break;
197462306a36Sopenharmony_ci		case BTRFS_DROP_DELAYED_REF:
197562306a36Sopenharmony_ci			locked_ref->ref_mod += ref->ref_mod;
197662306a36Sopenharmony_ci			break;
197762306a36Sopenharmony_ci		default:
197862306a36Sopenharmony_ci			WARN_ON(1);
197962306a36Sopenharmony_ci		}
198062306a36Sopenharmony_ci		atomic_dec(&delayed_refs->num_entries);
198162306a36Sopenharmony_ci
198262306a36Sopenharmony_ci		/*
198362306a36Sopenharmony_ci		 * Record the must_insert_reserved flag before we drop the
198462306a36Sopenharmony_ci		 * spin lock.
198562306a36Sopenharmony_ci		 */
198662306a36Sopenharmony_ci		must_insert_reserved = locked_ref->must_insert_reserved;
198762306a36Sopenharmony_ci		locked_ref->must_insert_reserved = false;
198862306a36Sopenharmony_ci
198962306a36Sopenharmony_ci		extent_op = locked_ref->extent_op;
199062306a36Sopenharmony_ci		locked_ref->extent_op = NULL;
199162306a36Sopenharmony_ci		spin_unlock(&locked_ref->lock);
199262306a36Sopenharmony_ci
199362306a36Sopenharmony_ci		ret = run_one_delayed_ref(trans, ref, extent_op,
199462306a36Sopenharmony_ci					  must_insert_reserved);
199562306a36Sopenharmony_ci
199662306a36Sopenharmony_ci		btrfs_free_delayed_extent_op(extent_op);
199762306a36Sopenharmony_ci		if (ret) {
199862306a36Sopenharmony_ci			unselect_delayed_ref_head(delayed_refs, locked_ref);
199962306a36Sopenharmony_ci			btrfs_put_delayed_ref(ref);
200062306a36Sopenharmony_ci			return ret;
200162306a36Sopenharmony_ci		}
200262306a36Sopenharmony_ci
200362306a36Sopenharmony_ci		btrfs_put_delayed_ref(ref);
200462306a36Sopenharmony_ci		cond_resched();
200562306a36Sopenharmony_ci
200662306a36Sopenharmony_ci		spin_lock(&locked_ref->lock);
200762306a36Sopenharmony_ci		btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
200862306a36Sopenharmony_ci	}
200962306a36Sopenharmony_ci
201062306a36Sopenharmony_ci	return 0;
201162306a36Sopenharmony_ci}
201262306a36Sopenharmony_ci
201362306a36Sopenharmony_ci/*
201462306a36Sopenharmony_ci * Returns 0 on success or if called with an already aborted transaction.
201562306a36Sopenharmony_ci * Returns -ENOMEM or -EIO on failure and will abort the transaction.
201662306a36Sopenharmony_ci */
201762306a36Sopenharmony_cistatic noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
201862306a36Sopenharmony_ci					     unsigned long nr)
201962306a36Sopenharmony_ci{
202062306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
202162306a36Sopenharmony_ci	struct btrfs_delayed_ref_root *delayed_refs;
202262306a36Sopenharmony_ci	struct btrfs_delayed_ref_head *locked_ref = NULL;
202362306a36Sopenharmony_ci	int ret;
202462306a36Sopenharmony_ci	unsigned long count = 0;
202562306a36Sopenharmony_ci
202662306a36Sopenharmony_ci	delayed_refs = &trans->transaction->delayed_refs;
202762306a36Sopenharmony_ci	do {
202862306a36Sopenharmony_ci		if (!locked_ref) {
202962306a36Sopenharmony_ci			locked_ref = btrfs_obtain_ref_head(trans);
203062306a36Sopenharmony_ci			if (IS_ERR_OR_NULL(locked_ref)) {
203162306a36Sopenharmony_ci				if (PTR_ERR(locked_ref) == -EAGAIN) {
203262306a36Sopenharmony_ci					continue;
203362306a36Sopenharmony_ci				} else {
203462306a36Sopenharmony_ci					break;
203562306a36Sopenharmony_ci				}
203662306a36Sopenharmony_ci			}
203762306a36Sopenharmony_ci			count++;
203862306a36Sopenharmony_ci		}
203962306a36Sopenharmony_ci		/*
204062306a36Sopenharmony_ci		 * We need to try and merge add/drops of the same ref since we
204162306a36Sopenharmony_ci		 * can run into issues with relocate dropping the implicit ref
204262306a36Sopenharmony_ci		 * and then it being added back again before the drop can
204362306a36Sopenharmony_ci		 * finish.  If we merged anything we need to re-loop so we can
204462306a36Sopenharmony_ci		 * get a good ref.
204562306a36Sopenharmony_ci		 * Or we can get node references of the same type that weren't
204662306a36Sopenharmony_ci		 * merged when created due to bumps in the tree mod seq, and
204762306a36Sopenharmony_ci		 * we need to merge them to prevent adding an inline extent
204862306a36Sopenharmony_ci		 * backref before dropping it (triggering a BUG_ON at
204962306a36Sopenharmony_ci		 * insert_inline_extent_backref()).
205062306a36Sopenharmony_ci		 */
205162306a36Sopenharmony_ci		spin_lock(&locked_ref->lock);
205262306a36Sopenharmony_ci		btrfs_merge_delayed_refs(fs_info, delayed_refs, locked_ref);
205362306a36Sopenharmony_ci
205462306a36Sopenharmony_ci		ret = btrfs_run_delayed_refs_for_head(trans, locked_ref);
205562306a36Sopenharmony_ci		if (ret < 0 && ret != -EAGAIN) {
205662306a36Sopenharmony_ci			/*
205762306a36Sopenharmony_ci			 * Error, btrfs_run_delayed_refs_for_head already
205862306a36Sopenharmony_ci			 * unlocked everything so just bail out
205962306a36Sopenharmony_ci			 */
206062306a36Sopenharmony_ci			return ret;
206162306a36Sopenharmony_ci		} else if (!ret) {
206262306a36Sopenharmony_ci			/*
206362306a36Sopenharmony_ci			 * Success, perform the usual cleanup of a processed
206462306a36Sopenharmony_ci			 * head
206562306a36Sopenharmony_ci			 */
206662306a36Sopenharmony_ci			ret = cleanup_ref_head(trans, locked_ref);
206762306a36Sopenharmony_ci			if (ret > 0 ) {
206862306a36Sopenharmony_ci				/* We dropped our lock, we need to loop. */
206962306a36Sopenharmony_ci				ret = 0;
207062306a36Sopenharmony_ci				continue;
207162306a36Sopenharmony_ci			} else if (ret) {
207262306a36Sopenharmony_ci				return ret;
207362306a36Sopenharmony_ci			}
207462306a36Sopenharmony_ci		}
207562306a36Sopenharmony_ci
207662306a36Sopenharmony_ci		/*
207762306a36Sopenharmony_ci		 * Either success case or btrfs_run_delayed_refs_for_head
207862306a36Sopenharmony_ci		 * returned -EAGAIN, meaning we need to select another head
207962306a36Sopenharmony_ci		 */
208062306a36Sopenharmony_ci
208162306a36Sopenharmony_ci		locked_ref = NULL;
208262306a36Sopenharmony_ci		cond_resched();
208362306a36Sopenharmony_ci	} while ((nr != -1 && count < nr) || locked_ref);
208462306a36Sopenharmony_ci
208562306a36Sopenharmony_ci	return 0;
208662306a36Sopenharmony_ci}
208762306a36Sopenharmony_ci
208862306a36Sopenharmony_ci#ifdef SCRAMBLE_DELAYED_REFS
208962306a36Sopenharmony_ci/*
209062306a36Sopenharmony_ci * Normally delayed refs get processed in ascending bytenr order. This
209162306a36Sopenharmony_ci * correlates in most cases to the order added. To expose dependencies on this
209262306a36Sopenharmony_ci * order, we start to process the tree in the middle instead of the beginning
209362306a36Sopenharmony_ci */
209462306a36Sopenharmony_cistatic u64 find_middle(struct rb_root *root)
209562306a36Sopenharmony_ci{
209662306a36Sopenharmony_ci	struct rb_node *n = root->rb_node;
209762306a36Sopenharmony_ci	struct btrfs_delayed_ref_node *entry;
209862306a36Sopenharmony_ci	int alt = 1;
209962306a36Sopenharmony_ci	u64 middle;
210062306a36Sopenharmony_ci	u64 first = 0, last = 0;
210162306a36Sopenharmony_ci
210262306a36Sopenharmony_ci	n = rb_first(root);
210362306a36Sopenharmony_ci	if (n) {
210462306a36Sopenharmony_ci		entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
210562306a36Sopenharmony_ci		first = entry->bytenr;
210662306a36Sopenharmony_ci	}
210762306a36Sopenharmony_ci	n = rb_last(root);
210862306a36Sopenharmony_ci	if (n) {
210962306a36Sopenharmony_ci		entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
211062306a36Sopenharmony_ci		last = entry->bytenr;
211162306a36Sopenharmony_ci	}
211262306a36Sopenharmony_ci	n = root->rb_node;
211362306a36Sopenharmony_ci
211462306a36Sopenharmony_ci	while (n) {
211562306a36Sopenharmony_ci		entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
211662306a36Sopenharmony_ci		WARN_ON(!entry->in_tree);
211762306a36Sopenharmony_ci
211862306a36Sopenharmony_ci		middle = entry->bytenr;
211962306a36Sopenharmony_ci
212062306a36Sopenharmony_ci		if (alt)
212162306a36Sopenharmony_ci			n = n->rb_left;
212262306a36Sopenharmony_ci		else
212362306a36Sopenharmony_ci			n = n->rb_right;
212462306a36Sopenharmony_ci
212562306a36Sopenharmony_ci		alt = 1 - alt;
212662306a36Sopenharmony_ci	}
212762306a36Sopenharmony_ci	return middle;
212862306a36Sopenharmony_ci}
212962306a36Sopenharmony_ci#endif
213062306a36Sopenharmony_ci
213162306a36Sopenharmony_ci/*
213262306a36Sopenharmony_ci * this starts processing the delayed reference count updates and
213362306a36Sopenharmony_ci * extent insertions we have queued up so far.  count can be
213462306a36Sopenharmony_ci * 0, which means to process everything in the tree at the start
213562306a36Sopenharmony_ci * of the run (but not newly added entries), or it can be some target
213662306a36Sopenharmony_ci * number you'd like to process.
213762306a36Sopenharmony_ci *
213862306a36Sopenharmony_ci * Returns 0 on success or if called with an aborted transaction
213962306a36Sopenharmony_ci * Returns <0 on error and aborts the transaction
214062306a36Sopenharmony_ci */
214162306a36Sopenharmony_ciint btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
214262306a36Sopenharmony_ci			   unsigned long count)
214362306a36Sopenharmony_ci{
214462306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
214562306a36Sopenharmony_ci	struct rb_node *node;
214662306a36Sopenharmony_ci	struct btrfs_delayed_ref_root *delayed_refs;
214762306a36Sopenharmony_ci	struct btrfs_delayed_ref_head *head;
214862306a36Sopenharmony_ci	int ret;
214962306a36Sopenharmony_ci	int run_all = count == (unsigned long)-1;
215062306a36Sopenharmony_ci
215162306a36Sopenharmony_ci	/* We'll clean this up in btrfs_cleanup_transaction */
215262306a36Sopenharmony_ci	if (TRANS_ABORTED(trans))
215362306a36Sopenharmony_ci		return 0;
215462306a36Sopenharmony_ci
215562306a36Sopenharmony_ci	if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
215662306a36Sopenharmony_ci		return 0;
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ci	delayed_refs = &trans->transaction->delayed_refs;
215962306a36Sopenharmony_ci	if (count == 0)
216062306a36Sopenharmony_ci		count = delayed_refs->num_heads_ready;
216162306a36Sopenharmony_ci
216262306a36Sopenharmony_ciagain:
216362306a36Sopenharmony_ci#ifdef SCRAMBLE_DELAYED_REFS
216462306a36Sopenharmony_ci	delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
216562306a36Sopenharmony_ci#endif
216662306a36Sopenharmony_ci	ret = __btrfs_run_delayed_refs(trans, count);
216762306a36Sopenharmony_ci	if (ret < 0) {
216862306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
216962306a36Sopenharmony_ci		return ret;
217062306a36Sopenharmony_ci	}
217162306a36Sopenharmony_ci
217262306a36Sopenharmony_ci	if (run_all) {
217362306a36Sopenharmony_ci		btrfs_create_pending_block_groups(trans);
217462306a36Sopenharmony_ci
217562306a36Sopenharmony_ci		spin_lock(&delayed_refs->lock);
217662306a36Sopenharmony_ci		node = rb_first_cached(&delayed_refs->href_root);
217762306a36Sopenharmony_ci		if (!node) {
217862306a36Sopenharmony_ci			spin_unlock(&delayed_refs->lock);
217962306a36Sopenharmony_ci			goto out;
218062306a36Sopenharmony_ci		}
218162306a36Sopenharmony_ci		head = rb_entry(node, struct btrfs_delayed_ref_head,
218262306a36Sopenharmony_ci				href_node);
218362306a36Sopenharmony_ci		refcount_inc(&head->refs);
218462306a36Sopenharmony_ci		spin_unlock(&delayed_refs->lock);
218562306a36Sopenharmony_ci
218662306a36Sopenharmony_ci		/* Mutex was contended, block until it's released and retry. */
218762306a36Sopenharmony_ci		mutex_lock(&head->mutex);
218862306a36Sopenharmony_ci		mutex_unlock(&head->mutex);
218962306a36Sopenharmony_ci
219062306a36Sopenharmony_ci		btrfs_put_delayed_ref_head(head);
219162306a36Sopenharmony_ci		cond_resched();
219262306a36Sopenharmony_ci		goto again;
219362306a36Sopenharmony_ci	}
219462306a36Sopenharmony_ciout:
219562306a36Sopenharmony_ci	return 0;
219662306a36Sopenharmony_ci}
219762306a36Sopenharmony_ci
219862306a36Sopenharmony_ciint btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
219962306a36Sopenharmony_ci				struct extent_buffer *eb, u64 flags)
220062306a36Sopenharmony_ci{
220162306a36Sopenharmony_ci	struct btrfs_delayed_extent_op *extent_op;
220262306a36Sopenharmony_ci	int level = btrfs_header_level(eb);
220362306a36Sopenharmony_ci	int ret;
220462306a36Sopenharmony_ci
220562306a36Sopenharmony_ci	extent_op = btrfs_alloc_delayed_extent_op();
220662306a36Sopenharmony_ci	if (!extent_op)
220762306a36Sopenharmony_ci		return -ENOMEM;
220862306a36Sopenharmony_ci
220962306a36Sopenharmony_ci	extent_op->flags_to_set = flags;
221062306a36Sopenharmony_ci	extent_op->update_flags = true;
221162306a36Sopenharmony_ci	extent_op->update_key = false;
221262306a36Sopenharmony_ci	extent_op->level = level;
221362306a36Sopenharmony_ci
221462306a36Sopenharmony_ci	ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
221562306a36Sopenharmony_ci	if (ret)
221662306a36Sopenharmony_ci		btrfs_free_delayed_extent_op(extent_op);
221762306a36Sopenharmony_ci	return ret;
221862306a36Sopenharmony_ci}
221962306a36Sopenharmony_ci
222062306a36Sopenharmony_cistatic noinline int check_delayed_ref(struct btrfs_root *root,
222162306a36Sopenharmony_ci				      struct btrfs_path *path,
222262306a36Sopenharmony_ci				      u64 objectid, u64 offset, u64 bytenr)
222362306a36Sopenharmony_ci{
222462306a36Sopenharmony_ci	struct btrfs_delayed_ref_head *head;
222562306a36Sopenharmony_ci	struct btrfs_delayed_ref_node *ref;
222662306a36Sopenharmony_ci	struct btrfs_delayed_data_ref *data_ref;
222762306a36Sopenharmony_ci	struct btrfs_delayed_ref_root *delayed_refs;
222862306a36Sopenharmony_ci	struct btrfs_transaction *cur_trans;
222962306a36Sopenharmony_ci	struct rb_node *node;
223062306a36Sopenharmony_ci	int ret = 0;
223162306a36Sopenharmony_ci
223262306a36Sopenharmony_ci	spin_lock(&root->fs_info->trans_lock);
223362306a36Sopenharmony_ci	cur_trans = root->fs_info->running_transaction;
223462306a36Sopenharmony_ci	if (cur_trans)
223562306a36Sopenharmony_ci		refcount_inc(&cur_trans->use_count);
223662306a36Sopenharmony_ci	spin_unlock(&root->fs_info->trans_lock);
223762306a36Sopenharmony_ci	if (!cur_trans)
223862306a36Sopenharmony_ci		return 0;
223962306a36Sopenharmony_ci
224062306a36Sopenharmony_ci	delayed_refs = &cur_trans->delayed_refs;
224162306a36Sopenharmony_ci	spin_lock(&delayed_refs->lock);
224262306a36Sopenharmony_ci	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
224362306a36Sopenharmony_ci	if (!head) {
224462306a36Sopenharmony_ci		spin_unlock(&delayed_refs->lock);
224562306a36Sopenharmony_ci		btrfs_put_transaction(cur_trans);
224662306a36Sopenharmony_ci		return 0;
224762306a36Sopenharmony_ci	}
224862306a36Sopenharmony_ci
224962306a36Sopenharmony_ci	if (!mutex_trylock(&head->mutex)) {
225062306a36Sopenharmony_ci		if (path->nowait) {
225162306a36Sopenharmony_ci			spin_unlock(&delayed_refs->lock);
225262306a36Sopenharmony_ci			btrfs_put_transaction(cur_trans);
225362306a36Sopenharmony_ci			return -EAGAIN;
225462306a36Sopenharmony_ci		}
225562306a36Sopenharmony_ci
225662306a36Sopenharmony_ci		refcount_inc(&head->refs);
225762306a36Sopenharmony_ci		spin_unlock(&delayed_refs->lock);
225862306a36Sopenharmony_ci
225962306a36Sopenharmony_ci		btrfs_release_path(path);
226062306a36Sopenharmony_ci
226162306a36Sopenharmony_ci		/*
226262306a36Sopenharmony_ci		 * Mutex was contended, block until it's released and let
226362306a36Sopenharmony_ci		 * caller try again
226462306a36Sopenharmony_ci		 */
226562306a36Sopenharmony_ci		mutex_lock(&head->mutex);
226662306a36Sopenharmony_ci		mutex_unlock(&head->mutex);
226762306a36Sopenharmony_ci		btrfs_put_delayed_ref_head(head);
226862306a36Sopenharmony_ci		btrfs_put_transaction(cur_trans);
226962306a36Sopenharmony_ci		return -EAGAIN;
227062306a36Sopenharmony_ci	}
227162306a36Sopenharmony_ci	spin_unlock(&delayed_refs->lock);
227262306a36Sopenharmony_ci
227362306a36Sopenharmony_ci	spin_lock(&head->lock);
227462306a36Sopenharmony_ci	/*
227562306a36Sopenharmony_ci	 * XXX: We should replace this with a proper search function in the
227662306a36Sopenharmony_ci	 * future.
227762306a36Sopenharmony_ci	 */
227862306a36Sopenharmony_ci	for (node = rb_first_cached(&head->ref_tree); node;
227962306a36Sopenharmony_ci	     node = rb_next(node)) {
228062306a36Sopenharmony_ci		ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node);
228162306a36Sopenharmony_ci		/* If it's a shared ref we know a cross reference exists */
228262306a36Sopenharmony_ci		if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
228362306a36Sopenharmony_ci			ret = 1;
228462306a36Sopenharmony_ci			break;
228562306a36Sopenharmony_ci		}
228662306a36Sopenharmony_ci
228762306a36Sopenharmony_ci		data_ref = btrfs_delayed_node_to_data_ref(ref);
228862306a36Sopenharmony_ci
228962306a36Sopenharmony_ci		/*
229062306a36Sopenharmony_ci		 * If our ref doesn't match the one we're currently looking at
229162306a36Sopenharmony_ci		 * then we have a cross reference.
229262306a36Sopenharmony_ci		 */
229362306a36Sopenharmony_ci		if (data_ref->root != root->root_key.objectid ||
229462306a36Sopenharmony_ci		    data_ref->objectid != objectid ||
229562306a36Sopenharmony_ci		    data_ref->offset != offset) {
229662306a36Sopenharmony_ci			ret = 1;
229762306a36Sopenharmony_ci			break;
229862306a36Sopenharmony_ci		}
229962306a36Sopenharmony_ci	}
230062306a36Sopenharmony_ci	spin_unlock(&head->lock);
230162306a36Sopenharmony_ci	mutex_unlock(&head->mutex);
230262306a36Sopenharmony_ci	btrfs_put_transaction(cur_trans);
230362306a36Sopenharmony_ci	return ret;
230462306a36Sopenharmony_ci}
230562306a36Sopenharmony_ci
230662306a36Sopenharmony_cistatic noinline int check_committed_ref(struct btrfs_root *root,
230762306a36Sopenharmony_ci					struct btrfs_path *path,
230862306a36Sopenharmony_ci					u64 objectid, u64 offset, u64 bytenr,
230962306a36Sopenharmony_ci					bool strict)
231062306a36Sopenharmony_ci{
231162306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
231262306a36Sopenharmony_ci	struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
231362306a36Sopenharmony_ci	struct extent_buffer *leaf;
231462306a36Sopenharmony_ci	struct btrfs_extent_data_ref *ref;
231562306a36Sopenharmony_ci	struct btrfs_extent_inline_ref *iref;
231662306a36Sopenharmony_ci	struct btrfs_extent_item *ei;
231762306a36Sopenharmony_ci	struct btrfs_key key;
231862306a36Sopenharmony_ci	u32 item_size;
231962306a36Sopenharmony_ci	int type;
232062306a36Sopenharmony_ci	int ret;
232162306a36Sopenharmony_ci
232262306a36Sopenharmony_ci	key.objectid = bytenr;
232362306a36Sopenharmony_ci	key.offset = (u64)-1;
232462306a36Sopenharmony_ci	key.type = BTRFS_EXTENT_ITEM_KEY;
232562306a36Sopenharmony_ci
232662306a36Sopenharmony_ci	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
232762306a36Sopenharmony_ci	if (ret < 0)
232862306a36Sopenharmony_ci		goto out;
232962306a36Sopenharmony_ci	BUG_ON(ret == 0); /* Corruption */
233062306a36Sopenharmony_ci
233162306a36Sopenharmony_ci	ret = -ENOENT;
233262306a36Sopenharmony_ci	if (path->slots[0] == 0)
233362306a36Sopenharmony_ci		goto out;
233462306a36Sopenharmony_ci
233562306a36Sopenharmony_ci	path->slots[0]--;
233662306a36Sopenharmony_ci	leaf = path->nodes[0];
233762306a36Sopenharmony_ci	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
233862306a36Sopenharmony_ci
233962306a36Sopenharmony_ci	if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
234062306a36Sopenharmony_ci		goto out;
234162306a36Sopenharmony_ci
234262306a36Sopenharmony_ci	ret = 1;
234362306a36Sopenharmony_ci	item_size = btrfs_item_size(leaf, path->slots[0]);
234462306a36Sopenharmony_ci	ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
234562306a36Sopenharmony_ci
234662306a36Sopenharmony_ci	/* If extent item has more than 1 inline ref then it's shared */
234762306a36Sopenharmony_ci	if (item_size != sizeof(*ei) +
234862306a36Sopenharmony_ci	    btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
234962306a36Sopenharmony_ci		goto out;
235062306a36Sopenharmony_ci
235162306a36Sopenharmony_ci	/*
235262306a36Sopenharmony_ci	 * If extent created before last snapshot => it's shared unless the
235362306a36Sopenharmony_ci	 * snapshot has been deleted. Use the heuristic if strict is false.
235462306a36Sopenharmony_ci	 */
235562306a36Sopenharmony_ci	if (!strict &&
235662306a36Sopenharmony_ci	    (btrfs_extent_generation(leaf, ei) <=
235762306a36Sopenharmony_ci	     btrfs_root_last_snapshot(&root->root_item)))
235862306a36Sopenharmony_ci		goto out;
235962306a36Sopenharmony_ci
236062306a36Sopenharmony_ci	iref = (struct btrfs_extent_inline_ref *)(ei + 1);
236162306a36Sopenharmony_ci
236262306a36Sopenharmony_ci	/* If this extent has SHARED_DATA_REF then it's shared */
236362306a36Sopenharmony_ci	type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
236462306a36Sopenharmony_ci	if (type != BTRFS_EXTENT_DATA_REF_KEY)
236562306a36Sopenharmony_ci		goto out;
236662306a36Sopenharmony_ci
236762306a36Sopenharmony_ci	ref = (struct btrfs_extent_data_ref *)(&iref->offset);
236862306a36Sopenharmony_ci	if (btrfs_extent_refs(leaf, ei) !=
236962306a36Sopenharmony_ci	    btrfs_extent_data_ref_count(leaf, ref) ||
237062306a36Sopenharmony_ci	    btrfs_extent_data_ref_root(leaf, ref) !=
237162306a36Sopenharmony_ci	    root->root_key.objectid ||
237262306a36Sopenharmony_ci	    btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
237362306a36Sopenharmony_ci	    btrfs_extent_data_ref_offset(leaf, ref) != offset)
237462306a36Sopenharmony_ci		goto out;
237562306a36Sopenharmony_ci
237662306a36Sopenharmony_ci	ret = 0;
237762306a36Sopenharmony_ciout:
237862306a36Sopenharmony_ci	return ret;
237962306a36Sopenharmony_ci}
238062306a36Sopenharmony_ci
238162306a36Sopenharmony_ciint btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
238262306a36Sopenharmony_ci			  u64 bytenr, bool strict, struct btrfs_path *path)
238362306a36Sopenharmony_ci{
238462306a36Sopenharmony_ci	int ret;
238562306a36Sopenharmony_ci
238662306a36Sopenharmony_ci	do {
238762306a36Sopenharmony_ci		ret = check_committed_ref(root, path, objectid,
238862306a36Sopenharmony_ci					  offset, bytenr, strict);
238962306a36Sopenharmony_ci		if (ret && ret != -ENOENT)
239062306a36Sopenharmony_ci			goto out;
239162306a36Sopenharmony_ci
239262306a36Sopenharmony_ci		ret = check_delayed_ref(root, path, objectid, offset, bytenr);
239362306a36Sopenharmony_ci	} while (ret == -EAGAIN);
239462306a36Sopenharmony_ci
239562306a36Sopenharmony_ciout:
239662306a36Sopenharmony_ci	btrfs_release_path(path);
239762306a36Sopenharmony_ci	if (btrfs_is_data_reloc_root(root))
239862306a36Sopenharmony_ci		WARN_ON(ret > 0);
239962306a36Sopenharmony_ci	return ret;
240062306a36Sopenharmony_ci}
240162306a36Sopenharmony_ci
240262306a36Sopenharmony_cistatic int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
240362306a36Sopenharmony_ci			   struct btrfs_root *root,
240462306a36Sopenharmony_ci			   struct extent_buffer *buf,
240562306a36Sopenharmony_ci			   int full_backref, int inc)
240662306a36Sopenharmony_ci{
240762306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
240862306a36Sopenharmony_ci	u64 bytenr;
240962306a36Sopenharmony_ci	u64 num_bytes;
241062306a36Sopenharmony_ci	u64 parent;
241162306a36Sopenharmony_ci	u64 ref_root;
241262306a36Sopenharmony_ci	u32 nritems;
241362306a36Sopenharmony_ci	struct btrfs_key key;
241462306a36Sopenharmony_ci	struct btrfs_file_extent_item *fi;
241562306a36Sopenharmony_ci	struct btrfs_ref generic_ref = { 0 };
241662306a36Sopenharmony_ci	bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC);
241762306a36Sopenharmony_ci	int i;
241862306a36Sopenharmony_ci	int action;
241962306a36Sopenharmony_ci	int level;
242062306a36Sopenharmony_ci	int ret = 0;
242162306a36Sopenharmony_ci
242262306a36Sopenharmony_ci	if (btrfs_is_testing(fs_info))
242362306a36Sopenharmony_ci		return 0;
242462306a36Sopenharmony_ci
242562306a36Sopenharmony_ci	ref_root = btrfs_header_owner(buf);
242662306a36Sopenharmony_ci	nritems = btrfs_header_nritems(buf);
242762306a36Sopenharmony_ci	level = btrfs_header_level(buf);
242862306a36Sopenharmony_ci
242962306a36Sopenharmony_ci	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0)
243062306a36Sopenharmony_ci		return 0;
243162306a36Sopenharmony_ci
243262306a36Sopenharmony_ci	if (full_backref)
243362306a36Sopenharmony_ci		parent = buf->start;
243462306a36Sopenharmony_ci	else
243562306a36Sopenharmony_ci		parent = 0;
243662306a36Sopenharmony_ci	if (inc)
243762306a36Sopenharmony_ci		action = BTRFS_ADD_DELAYED_REF;
243862306a36Sopenharmony_ci	else
243962306a36Sopenharmony_ci		action = BTRFS_DROP_DELAYED_REF;
244062306a36Sopenharmony_ci
244162306a36Sopenharmony_ci	for (i = 0; i < nritems; i++) {
244262306a36Sopenharmony_ci		if (level == 0) {
244362306a36Sopenharmony_ci			btrfs_item_key_to_cpu(buf, &key, i);
244462306a36Sopenharmony_ci			if (key.type != BTRFS_EXTENT_DATA_KEY)
244562306a36Sopenharmony_ci				continue;
244662306a36Sopenharmony_ci			fi = btrfs_item_ptr(buf, i,
244762306a36Sopenharmony_ci					    struct btrfs_file_extent_item);
244862306a36Sopenharmony_ci			if (btrfs_file_extent_type(buf, fi) ==
244962306a36Sopenharmony_ci			    BTRFS_FILE_EXTENT_INLINE)
245062306a36Sopenharmony_ci				continue;
245162306a36Sopenharmony_ci			bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
245262306a36Sopenharmony_ci			if (bytenr == 0)
245362306a36Sopenharmony_ci				continue;
245462306a36Sopenharmony_ci
245562306a36Sopenharmony_ci			num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi);
245662306a36Sopenharmony_ci			key.offset -= btrfs_file_extent_offset(buf, fi);
245762306a36Sopenharmony_ci			btrfs_init_generic_ref(&generic_ref, action, bytenr,
245862306a36Sopenharmony_ci					       num_bytes, parent);
245962306a36Sopenharmony_ci			btrfs_init_data_ref(&generic_ref, ref_root, key.objectid,
246062306a36Sopenharmony_ci					    key.offset, root->root_key.objectid,
246162306a36Sopenharmony_ci					    for_reloc);
246262306a36Sopenharmony_ci			if (inc)
246362306a36Sopenharmony_ci				ret = btrfs_inc_extent_ref(trans, &generic_ref);
246462306a36Sopenharmony_ci			else
246562306a36Sopenharmony_ci				ret = btrfs_free_extent(trans, &generic_ref);
246662306a36Sopenharmony_ci			if (ret)
246762306a36Sopenharmony_ci				goto fail;
246862306a36Sopenharmony_ci		} else {
246962306a36Sopenharmony_ci			bytenr = btrfs_node_blockptr(buf, i);
247062306a36Sopenharmony_ci			num_bytes = fs_info->nodesize;
247162306a36Sopenharmony_ci			btrfs_init_generic_ref(&generic_ref, action, bytenr,
247262306a36Sopenharmony_ci					       num_bytes, parent);
247362306a36Sopenharmony_ci			btrfs_init_tree_ref(&generic_ref, level - 1, ref_root,
247462306a36Sopenharmony_ci					    root->root_key.objectid, for_reloc);
247562306a36Sopenharmony_ci			if (inc)
247662306a36Sopenharmony_ci				ret = btrfs_inc_extent_ref(trans, &generic_ref);
247762306a36Sopenharmony_ci			else
247862306a36Sopenharmony_ci				ret = btrfs_free_extent(trans, &generic_ref);
247962306a36Sopenharmony_ci			if (ret)
248062306a36Sopenharmony_ci				goto fail;
248162306a36Sopenharmony_ci		}
248262306a36Sopenharmony_ci	}
248362306a36Sopenharmony_ci	return 0;
248462306a36Sopenharmony_cifail:
248562306a36Sopenharmony_ci	return ret;
248662306a36Sopenharmony_ci}
248762306a36Sopenharmony_ci
248862306a36Sopenharmony_ciint btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
248962306a36Sopenharmony_ci		  struct extent_buffer *buf, int full_backref)
249062306a36Sopenharmony_ci{
249162306a36Sopenharmony_ci	return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
249262306a36Sopenharmony_ci}
249362306a36Sopenharmony_ci
249462306a36Sopenharmony_ciint btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
249562306a36Sopenharmony_ci		  struct extent_buffer *buf, int full_backref)
249662306a36Sopenharmony_ci{
249762306a36Sopenharmony_ci	return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
249862306a36Sopenharmony_ci}
249962306a36Sopenharmony_ci
250062306a36Sopenharmony_cistatic u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
250162306a36Sopenharmony_ci{
250262306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
250362306a36Sopenharmony_ci	u64 flags;
250462306a36Sopenharmony_ci	u64 ret;
250562306a36Sopenharmony_ci
250662306a36Sopenharmony_ci	if (data)
250762306a36Sopenharmony_ci		flags = BTRFS_BLOCK_GROUP_DATA;
250862306a36Sopenharmony_ci	else if (root == fs_info->chunk_root)
250962306a36Sopenharmony_ci		flags = BTRFS_BLOCK_GROUP_SYSTEM;
251062306a36Sopenharmony_ci	else
251162306a36Sopenharmony_ci		flags = BTRFS_BLOCK_GROUP_METADATA;
251262306a36Sopenharmony_ci
251362306a36Sopenharmony_ci	ret = btrfs_get_alloc_profile(fs_info, flags);
251462306a36Sopenharmony_ci	return ret;
251562306a36Sopenharmony_ci}
251662306a36Sopenharmony_ci
251762306a36Sopenharmony_cistatic u64 first_logical_byte(struct btrfs_fs_info *fs_info)
251862306a36Sopenharmony_ci{
251962306a36Sopenharmony_ci	struct rb_node *leftmost;
252062306a36Sopenharmony_ci	u64 bytenr = 0;
252162306a36Sopenharmony_ci
252262306a36Sopenharmony_ci	read_lock(&fs_info->block_group_cache_lock);
252362306a36Sopenharmony_ci	/* Get the block group with the lowest logical start address. */
252462306a36Sopenharmony_ci	leftmost = rb_first_cached(&fs_info->block_group_cache_tree);
252562306a36Sopenharmony_ci	if (leftmost) {
252662306a36Sopenharmony_ci		struct btrfs_block_group *bg;
252762306a36Sopenharmony_ci
252862306a36Sopenharmony_ci		bg = rb_entry(leftmost, struct btrfs_block_group, cache_node);
252962306a36Sopenharmony_ci		bytenr = bg->start;
253062306a36Sopenharmony_ci	}
253162306a36Sopenharmony_ci	read_unlock(&fs_info->block_group_cache_lock);
253262306a36Sopenharmony_ci
253362306a36Sopenharmony_ci	return bytenr;
253462306a36Sopenharmony_ci}
253562306a36Sopenharmony_ci
253662306a36Sopenharmony_cistatic int pin_down_extent(struct btrfs_trans_handle *trans,
253762306a36Sopenharmony_ci			   struct btrfs_block_group *cache,
253862306a36Sopenharmony_ci			   u64 bytenr, u64 num_bytes, int reserved)
253962306a36Sopenharmony_ci{
254062306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = cache->fs_info;
254162306a36Sopenharmony_ci
254262306a36Sopenharmony_ci	spin_lock(&cache->space_info->lock);
254362306a36Sopenharmony_ci	spin_lock(&cache->lock);
254462306a36Sopenharmony_ci	cache->pinned += num_bytes;
254562306a36Sopenharmony_ci	btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
254662306a36Sopenharmony_ci					     num_bytes);
254762306a36Sopenharmony_ci	if (reserved) {
254862306a36Sopenharmony_ci		cache->reserved -= num_bytes;
254962306a36Sopenharmony_ci		cache->space_info->bytes_reserved -= num_bytes;
255062306a36Sopenharmony_ci	}
255162306a36Sopenharmony_ci	spin_unlock(&cache->lock);
255262306a36Sopenharmony_ci	spin_unlock(&cache->space_info->lock);
255362306a36Sopenharmony_ci
255462306a36Sopenharmony_ci	set_extent_bit(&trans->transaction->pinned_extents, bytenr,
255562306a36Sopenharmony_ci		       bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
255662306a36Sopenharmony_ci	return 0;
255762306a36Sopenharmony_ci}
255862306a36Sopenharmony_ci
255962306a36Sopenharmony_ciint btrfs_pin_extent(struct btrfs_trans_handle *trans,
256062306a36Sopenharmony_ci		     u64 bytenr, u64 num_bytes, int reserved)
256162306a36Sopenharmony_ci{
256262306a36Sopenharmony_ci	struct btrfs_block_group *cache;
256362306a36Sopenharmony_ci
256462306a36Sopenharmony_ci	cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
256562306a36Sopenharmony_ci	BUG_ON(!cache); /* Logic error */
256662306a36Sopenharmony_ci
256762306a36Sopenharmony_ci	pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
256862306a36Sopenharmony_ci
256962306a36Sopenharmony_ci	btrfs_put_block_group(cache);
257062306a36Sopenharmony_ci	return 0;
257162306a36Sopenharmony_ci}
257262306a36Sopenharmony_ci
257362306a36Sopenharmony_ci/*
257462306a36Sopenharmony_ci * this function must be called within transaction
257562306a36Sopenharmony_ci */
257662306a36Sopenharmony_ciint btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
257762306a36Sopenharmony_ci				    u64 bytenr, u64 num_bytes)
257862306a36Sopenharmony_ci{
257962306a36Sopenharmony_ci	struct btrfs_block_group *cache;
258062306a36Sopenharmony_ci	int ret;
258162306a36Sopenharmony_ci
258262306a36Sopenharmony_ci	cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
258362306a36Sopenharmony_ci	if (!cache)
258462306a36Sopenharmony_ci		return -EINVAL;
258562306a36Sopenharmony_ci
258662306a36Sopenharmony_ci	/*
258762306a36Sopenharmony_ci	 * Fully cache the free space first so that our pin removes the free space
258862306a36Sopenharmony_ci	 * from the cache.
258962306a36Sopenharmony_ci	 */
259062306a36Sopenharmony_ci	ret = btrfs_cache_block_group(cache, true);
259162306a36Sopenharmony_ci	if (ret)
259262306a36Sopenharmony_ci		goto out;
259362306a36Sopenharmony_ci
259462306a36Sopenharmony_ci	pin_down_extent(trans, cache, bytenr, num_bytes, 0);
259562306a36Sopenharmony_ci
259662306a36Sopenharmony_ci	/* remove us from the free space cache (if we're there at all) */
259762306a36Sopenharmony_ci	ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
259862306a36Sopenharmony_ciout:
259962306a36Sopenharmony_ci	btrfs_put_block_group(cache);
260062306a36Sopenharmony_ci	return ret;
260162306a36Sopenharmony_ci}
260262306a36Sopenharmony_ci
260362306a36Sopenharmony_cistatic int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
260462306a36Sopenharmony_ci				   u64 start, u64 num_bytes)
260562306a36Sopenharmony_ci{
260662306a36Sopenharmony_ci	int ret;
260762306a36Sopenharmony_ci	struct btrfs_block_group *block_group;
260862306a36Sopenharmony_ci
260962306a36Sopenharmony_ci	block_group = btrfs_lookup_block_group(fs_info, start);
261062306a36Sopenharmony_ci	if (!block_group)
261162306a36Sopenharmony_ci		return -EINVAL;
261262306a36Sopenharmony_ci
261362306a36Sopenharmony_ci	ret = btrfs_cache_block_group(block_group, true);
261462306a36Sopenharmony_ci	if (ret)
261562306a36Sopenharmony_ci		goto out;
261662306a36Sopenharmony_ci
261762306a36Sopenharmony_ci	ret = btrfs_remove_free_space(block_group, start, num_bytes);
261862306a36Sopenharmony_ciout:
261962306a36Sopenharmony_ci	btrfs_put_block_group(block_group);
262062306a36Sopenharmony_ci	return ret;
262162306a36Sopenharmony_ci}
262262306a36Sopenharmony_ci
262362306a36Sopenharmony_ciint btrfs_exclude_logged_extents(struct extent_buffer *eb)
262462306a36Sopenharmony_ci{
262562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = eb->fs_info;
262662306a36Sopenharmony_ci	struct btrfs_file_extent_item *item;
262762306a36Sopenharmony_ci	struct btrfs_key key;
262862306a36Sopenharmony_ci	int found_type;
262962306a36Sopenharmony_ci	int i;
263062306a36Sopenharmony_ci	int ret = 0;
263162306a36Sopenharmony_ci
263262306a36Sopenharmony_ci	if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS))
263362306a36Sopenharmony_ci		return 0;
263462306a36Sopenharmony_ci
263562306a36Sopenharmony_ci	for (i = 0; i < btrfs_header_nritems(eb); i++) {
263662306a36Sopenharmony_ci		btrfs_item_key_to_cpu(eb, &key, i);
263762306a36Sopenharmony_ci		if (key.type != BTRFS_EXTENT_DATA_KEY)
263862306a36Sopenharmony_ci			continue;
263962306a36Sopenharmony_ci		item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
264062306a36Sopenharmony_ci		found_type = btrfs_file_extent_type(eb, item);
264162306a36Sopenharmony_ci		if (found_type == BTRFS_FILE_EXTENT_INLINE)
264262306a36Sopenharmony_ci			continue;
264362306a36Sopenharmony_ci		if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
264462306a36Sopenharmony_ci			continue;
264562306a36Sopenharmony_ci		key.objectid = btrfs_file_extent_disk_bytenr(eb, item);
264662306a36Sopenharmony_ci		key.offset = btrfs_file_extent_disk_num_bytes(eb, item);
264762306a36Sopenharmony_ci		ret = __exclude_logged_extent(fs_info, key.objectid, key.offset);
264862306a36Sopenharmony_ci		if (ret)
264962306a36Sopenharmony_ci			break;
265062306a36Sopenharmony_ci	}
265162306a36Sopenharmony_ci
265262306a36Sopenharmony_ci	return ret;
265362306a36Sopenharmony_ci}
265462306a36Sopenharmony_ci
265562306a36Sopenharmony_cistatic void
265662306a36Sopenharmony_cibtrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
265762306a36Sopenharmony_ci{
265862306a36Sopenharmony_ci	atomic_inc(&bg->reservations);
265962306a36Sopenharmony_ci}
266062306a36Sopenharmony_ci
266162306a36Sopenharmony_ci/*
266262306a36Sopenharmony_ci * Returns the free cluster for the given space info and sets empty_cluster to
266362306a36Sopenharmony_ci * what it should be based on the mount options.
266462306a36Sopenharmony_ci */
266562306a36Sopenharmony_cistatic struct btrfs_free_cluster *
266662306a36Sopenharmony_cifetch_cluster_info(struct btrfs_fs_info *fs_info,
266762306a36Sopenharmony_ci		   struct btrfs_space_info *space_info, u64 *empty_cluster)
266862306a36Sopenharmony_ci{
266962306a36Sopenharmony_ci	struct btrfs_free_cluster *ret = NULL;
267062306a36Sopenharmony_ci
267162306a36Sopenharmony_ci	*empty_cluster = 0;
267262306a36Sopenharmony_ci	if (btrfs_mixed_space_info(space_info))
267362306a36Sopenharmony_ci		return ret;
267462306a36Sopenharmony_ci
267562306a36Sopenharmony_ci	if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
267662306a36Sopenharmony_ci		ret = &fs_info->meta_alloc_cluster;
267762306a36Sopenharmony_ci		if (btrfs_test_opt(fs_info, SSD))
267862306a36Sopenharmony_ci			*empty_cluster = SZ_2M;
267962306a36Sopenharmony_ci		else
268062306a36Sopenharmony_ci			*empty_cluster = SZ_64K;
268162306a36Sopenharmony_ci	} else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) &&
268262306a36Sopenharmony_ci		   btrfs_test_opt(fs_info, SSD_SPREAD)) {
268362306a36Sopenharmony_ci		*empty_cluster = SZ_2M;
268462306a36Sopenharmony_ci		ret = &fs_info->data_alloc_cluster;
268562306a36Sopenharmony_ci	}
268662306a36Sopenharmony_ci
268762306a36Sopenharmony_ci	return ret;
268862306a36Sopenharmony_ci}
268962306a36Sopenharmony_ci
269062306a36Sopenharmony_cistatic int unpin_extent_range(struct btrfs_fs_info *fs_info,
269162306a36Sopenharmony_ci			      u64 start, u64 end,
269262306a36Sopenharmony_ci			      const bool return_free_space)
269362306a36Sopenharmony_ci{
269462306a36Sopenharmony_ci	struct btrfs_block_group *cache = NULL;
269562306a36Sopenharmony_ci	struct btrfs_space_info *space_info;
269662306a36Sopenharmony_ci	struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
269762306a36Sopenharmony_ci	struct btrfs_free_cluster *cluster = NULL;
269862306a36Sopenharmony_ci	u64 len;
269962306a36Sopenharmony_ci	u64 total_unpinned = 0;
270062306a36Sopenharmony_ci	u64 empty_cluster = 0;
270162306a36Sopenharmony_ci	bool readonly;
270262306a36Sopenharmony_ci
270362306a36Sopenharmony_ci	while (start <= end) {
270462306a36Sopenharmony_ci		readonly = false;
270562306a36Sopenharmony_ci		if (!cache ||
270662306a36Sopenharmony_ci		    start >= cache->start + cache->length) {
270762306a36Sopenharmony_ci			if (cache)
270862306a36Sopenharmony_ci				btrfs_put_block_group(cache);
270962306a36Sopenharmony_ci			total_unpinned = 0;
271062306a36Sopenharmony_ci			cache = btrfs_lookup_block_group(fs_info, start);
271162306a36Sopenharmony_ci			BUG_ON(!cache); /* Logic error */
271262306a36Sopenharmony_ci
271362306a36Sopenharmony_ci			cluster = fetch_cluster_info(fs_info,
271462306a36Sopenharmony_ci						     cache->space_info,
271562306a36Sopenharmony_ci						     &empty_cluster);
271662306a36Sopenharmony_ci			empty_cluster <<= 1;
271762306a36Sopenharmony_ci		}
271862306a36Sopenharmony_ci
271962306a36Sopenharmony_ci		len = cache->start + cache->length - start;
272062306a36Sopenharmony_ci		len = min(len, end + 1 - start);
272162306a36Sopenharmony_ci
272262306a36Sopenharmony_ci		if (return_free_space)
272362306a36Sopenharmony_ci			btrfs_add_free_space(cache, start, len);
272462306a36Sopenharmony_ci
272562306a36Sopenharmony_ci		start += len;
272662306a36Sopenharmony_ci		total_unpinned += len;
272762306a36Sopenharmony_ci		space_info = cache->space_info;
272862306a36Sopenharmony_ci
272962306a36Sopenharmony_ci		/*
273062306a36Sopenharmony_ci		 * If this space cluster has been marked as fragmented and we've
273162306a36Sopenharmony_ci		 * unpinned enough in this block group to potentially allow a
273262306a36Sopenharmony_ci		 * cluster to be created inside of it go ahead and clear the
273362306a36Sopenharmony_ci		 * fragmented check.
273462306a36Sopenharmony_ci		 */
273562306a36Sopenharmony_ci		if (cluster && cluster->fragmented &&
273662306a36Sopenharmony_ci		    total_unpinned > empty_cluster) {
273762306a36Sopenharmony_ci			spin_lock(&cluster->lock);
273862306a36Sopenharmony_ci			cluster->fragmented = 0;
273962306a36Sopenharmony_ci			spin_unlock(&cluster->lock);
274062306a36Sopenharmony_ci		}
274162306a36Sopenharmony_ci
274262306a36Sopenharmony_ci		spin_lock(&space_info->lock);
274362306a36Sopenharmony_ci		spin_lock(&cache->lock);
274462306a36Sopenharmony_ci		cache->pinned -= len;
274562306a36Sopenharmony_ci		btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
274662306a36Sopenharmony_ci		space_info->max_extent_size = 0;
274762306a36Sopenharmony_ci		if (cache->ro) {
274862306a36Sopenharmony_ci			space_info->bytes_readonly += len;
274962306a36Sopenharmony_ci			readonly = true;
275062306a36Sopenharmony_ci		} else if (btrfs_is_zoned(fs_info)) {
275162306a36Sopenharmony_ci			/* Need reset before reusing in a zoned block group */
275262306a36Sopenharmony_ci			space_info->bytes_zone_unusable += len;
275362306a36Sopenharmony_ci			readonly = true;
275462306a36Sopenharmony_ci		}
275562306a36Sopenharmony_ci		spin_unlock(&cache->lock);
275662306a36Sopenharmony_ci		if (!readonly && return_free_space &&
275762306a36Sopenharmony_ci		    global_rsv->space_info == space_info) {
275862306a36Sopenharmony_ci			spin_lock(&global_rsv->lock);
275962306a36Sopenharmony_ci			if (!global_rsv->full) {
276062306a36Sopenharmony_ci				u64 to_add = min(len, global_rsv->size -
276162306a36Sopenharmony_ci						      global_rsv->reserved);
276262306a36Sopenharmony_ci
276362306a36Sopenharmony_ci				global_rsv->reserved += to_add;
276462306a36Sopenharmony_ci				btrfs_space_info_update_bytes_may_use(fs_info,
276562306a36Sopenharmony_ci						space_info, to_add);
276662306a36Sopenharmony_ci				if (global_rsv->reserved >= global_rsv->size)
276762306a36Sopenharmony_ci					global_rsv->full = 1;
276862306a36Sopenharmony_ci				len -= to_add;
276962306a36Sopenharmony_ci			}
277062306a36Sopenharmony_ci			spin_unlock(&global_rsv->lock);
277162306a36Sopenharmony_ci		}
277262306a36Sopenharmony_ci		/* Add to any tickets we may have */
277362306a36Sopenharmony_ci		if (!readonly && return_free_space && len)
277462306a36Sopenharmony_ci			btrfs_try_granting_tickets(fs_info, space_info);
277562306a36Sopenharmony_ci		spin_unlock(&space_info->lock);
277662306a36Sopenharmony_ci	}
277762306a36Sopenharmony_ci
277862306a36Sopenharmony_ci	if (cache)
277962306a36Sopenharmony_ci		btrfs_put_block_group(cache);
278062306a36Sopenharmony_ci	return 0;
278162306a36Sopenharmony_ci}
278262306a36Sopenharmony_ci
278362306a36Sopenharmony_ciint btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
278462306a36Sopenharmony_ci{
278562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
278662306a36Sopenharmony_ci	struct btrfs_block_group *block_group, *tmp;
278762306a36Sopenharmony_ci	struct list_head *deleted_bgs;
278862306a36Sopenharmony_ci	struct extent_io_tree *unpin;
278962306a36Sopenharmony_ci	u64 start;
279062306a36Sopenharmony_ci	u64 end;
279162306a36Sopenharmony_ci	int ret;
279262306a36Sopenharmony_ci
279362306a36Sopenharmony_ci	unpin = &trans->transaction->pinned_extents;
279462306a36Sopenharmony_ci
279562306a36Sopenharmony_ci	while (!TRANS_ABORTED(trans)) {
279662306a36Sopenharmony_ci		struct extent_state *cached_state = NULL;
279762306a36Sopenharmony_ci
279862306a36Sopenharmony_ci		mutex_lock(&fs_info->unused_bg_unpin_mutex);
279962306a36Sopenharmony_ci		if (!find_first_extent_bit(unpin, 0, &start, &end,
280062306a36Sopenharmony_ci					   EXTENT_DIRTY, &cached_state)) {
280162306a36Sopenharmony_ci			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
280262306a36Sopenharmony_ci			break;
280362306a36Sopenharmony_ci		}
280462306a36Sopenharmony_ci
280562306a36Sopenharmony_ci		if (btrfs_test_opt(fs_info, DISCARD_SYNC))
280662306a36Sopenharmony_ci			ret = btrfs_discard_extent(fs_info, start,
280762306a36Sopenharmony_ci						   end + 1 - start, NULL);
280862306a36Sopenharmony_ci
280962306a36Sopenharmony_ci		clear_extent_dirty(unpin, start, end, &cached_state);
281062306a36Sopenharmony_ci		unpin_extent_range(fs_info, start, end, true);
281162306a36Sopenharmony_ci		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
281262306a36Sopenharmony_ci		free_extent_state(cached_state);
281362306a36Sopenharmony_ci		cond_resched();
281462306a36Sopenharmony_ci	}
281562306a36Sopenharmony_ci
281662306a36Sopenharmony_ci	if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
281762306a36Sopenharmony_ci		btrfs_discard_calc_delay(&fs_info->discard_ctl);
281862306a36Sopenharmony_ci		btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
281962306a36Sopenharmony_ci	}
282062306a36Sopenharmony_ci
282162306a36Sopenharmony_ci	/*
282262306a36Sopenharmony_ci	 * Transaction is finished.  We don't need the lock anymore.  We
282362306a36Sopenharmony_ci	 * do need to clean up the block groups in case of a transaction
282462306a36Sopenharmony_ci	 * abort.
282562306a36Sopenharmony_ci	 */
282662306a36Sopenharmony_ci	deleted_bgs = &trans->transaction->deleted_bgs;
282762306a36Sopenharmony_ci	list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
282862306a36Sopenharmony_ci		u64 trimmed = 0;
282962306a36Sopenharmony_ci
283062306a36Sopenharmony_ci		ret = -EROFS;
283162306a36Sopenharmony_ci		if (!TRANS_ABORTED(trans))
283262306a36Sopenharmony_ci			ret = btrfs_discard_extent(fs_info,
283362306a36Sopenharmony_ci						   block_group->start,
283462306a36Sopenharmony_ci						   block_group->length,
283562306a36Sopenharmony_ci						   &trimmed);
283662306a36Sopenharmony_ci
283762306a36Sopenharmony_ci		list_del_init(&block_group->bg_list);
283862306a36Sopenharmony_ci		btrfs_unfreeze_block_group(block_group);
283962306a36Sopenharmony_ci		btrfs_put_block_group(block_group);
284062306a36Sopenharmony_ci
284162306a36Sopenharmony_ci		if (ret) {
284262306a36Sopenharmony_ci			const char *errstr = btrfs_decode_error(ret);
284362306a36Sopenharmony_ci			btrfs_warn(fs_info,
284462306a36Sopenharmony_ci			   "discard failed while removing blockgroup: errno=%d %s",
284562306a36Sopenharmony_ci				   ret, errstr);
284662306a36Sopenharmony_ci		}
284762306a36Sopenharmony_ci	}
284862306a36Sopenharmony_ci
284962306a36Sopenharmony_ci	return 0;
285062306a36Sopenharmony_ci}
285162306a36Sopenharmony_ci
285262306a36Sopenharmony_cistatic int do_free_extent_accounting(struct btrfs_trans_handle *trans,
285362306a36Sopenharmony_ci				     u64 bytenr, u64 num_bytes, bool is_data)
285462306a36Sopenharmony_ci{
285562306a36Sopenharmony_ci	int ret;
285662306a36Sopenharmony_ci
285762306a36Sopenharmony_ci	if (is_data) {
285862306a36Sopenharmony_ci		struct btrfs_root *csum_root;
285962306a36Sopenharmony_ci
286062306a36Sopenharmony_ci		csum_root = btrfs_csum_root(trans->fs_info, bytenr);
286162306a36Sopenharmony_ci		ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
286262306a36Sopenharmony_ci		if (ret) {
286362306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
286462306a36Sopenharmony_ci			return ret;
286562306a36Sopenharmony_ci		}
286662306a36Sopenharmony_ci	}
286762306a36Sopenharmony_ci
286862306a36Sopenharmony_ci	ret = add_to_free_space_tree(trans, bytenr, num_bytes);
286962306a36Sopenharmony_ci	if (ret) {
287062306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
287162306a36Sopenharmony_ci		return ret;
287262306a36Sopenharmony_ci	}
287362306a36Sopenharmony_ci
287462306a36Sopenharmony_ci	ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
287562306a36Sopenharmony_ci	if (ret)
287662306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
287762306a36Sopenharmony_ci
287862306a36Sopenharmony_ci	return ret;
287962306a36Sopenharmony_ci}
288062306a36Sopenharmony_ci
288162306a36Sopenharmony_ci#define abort_and_dump(trans, path, fmt, args...)	\
288262306a36Sopenharmony_ci({							\
288362306a36Sopenharmony_ci	btrfs_abort_transaction(trans, -EUCLEAN);	\
288462306a36Sopenharmony_ci	btrfs_print_leaf(path->nodes[0]);		\
288562306a36Sopenharmony_ci	btrfs_crit(trans->fs_info, fmt, ##args);	\
288662306a36Sopenharmony_ci})
288762306a36Sopenharmony_ci
288862306a36Sopenharmony_ci/*
288962306a36Sopenharmony_ci * Drop one or more refs of @node.
289062306a36Sopenharmony_ci *
289162306a36Sopenharmony_ci * 1. Locate the extent refs.
289262306a36Sopenharmony_ci *    It's either inline in EXTENT/METADATA_ITEM or in keyed SHARED_* item.
289362306a36Sopenharmony_ci *    Locate it, then reduce the refs number or remove the ref line completely.
289462306a36Sopenharmony_ci *
289562306a36Sopenharmony_ci * 2. Update the refs count in EXTENT/METADATA_ITEM
289662306a36Sopenharmony_ci *
289762306a36Sopenharmony_ci * Inline backref case:
289862306a36Sopenharmony_ci *
289962306a36Sopenharmony_ci * in extent tree we have:
290062306a36Sopenharmony_ci *
290162306a36Sopenharmony_ci * 	item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
290262306a36Sopenharmony_ci *		refs 2 gen 6 flags DATA
290362306a36Sopenharmony_ci *		extent data backref root FS_TREE objectid 258 offset 0 count 1
290462306a36Sopenharmony_ci *		extent data backref root FS_TREE objectid 257 offset 0 count 1
290562306a36Sopenharmony_ci *
290662306a36Sopenharmony_ci * This function gets called with:
290762306a36Sopenharmony_ci *
290862306a36Sopenharmony_ci *    node->bytenr = 13631488
290962306a36Sopenharmony_ci *    node->num_bytes = 1048576
291062306a36Sopenharmony_ci *    root_objectid = FS_TREE
291162306a36Sopenharmony_ci *    owner_objectid = 257
291262306a36Sopenharmony_ci *    owner_offset = 0
291362306a36Sopenharmony_ci *    refs_to_drop = 1
291462306a36Sopenharmony_ci *
291562306a36Sopenharmony_ci * Then we should get some like:
291662306a36Sopenharmony_ci *
291762306a36Sopenharmony_ci * 	item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82
291862306a36Sopenharmony_ci *		refs 1 gen 6 flags DATA
291962306a36Sopenharmony_ci *		extent data backref root FS_TREE objectid 258 offset 0 count 1
292062306a36Sopenharmony_ci *
292162306a36Sopenharmony_ci * Keyed backref case:
292262306a36Sopenharmony_ci *
292362306a36Sopenharmony_ci * in extent tree we have:
292462306a36Sopenharmony_ci *
292562306a36Sopenharmony_ci *	item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
292662306a36Sopenharmony_ci *		refs 754 gen 6 flags DATA
292762306a36Sopenharmony_ci *	[...]
292862306a36Sopenharmony_ci *	item 2 key (13631488 EXTENT_DATA_REF <HASH>) itemoff 3915 itemsize 28
292962306a36Sopenharmony_ci *		extent data backref root FS_TREE objectid 866 offset 0 count 1
293062306a36Sopenharmony_ci *
293162306a36Sopenharmony_ci * This function get called with:
293262306a36Sopenharmony_ci *
293362306a36Sopenharmony_ci *    node->bytenr = 13631488
293462306a36Sopenharmony_ci *    node->num_bytes = 1048576
293562306a36Sopenharmony_ci *    root_objectid = FS_TREE
293662306a36Sopenharmony_ci *    owner_objectid = 866
293762306a36Sopenharmony_ci *    owner_offset = 0
293862306a36Sopenharmony_ci *    refs_to_drop = 1
293962306a36Sopenharmony_ci *
294062306a36Sopenharmony_ci * Then we should get some like:
294162306a36Sopenharmony_ci *
294262306a36Sopenharmony_ci *	item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24
294362306a36Sopenharmony_ci *		refs 753 gen 6 flags DATA
294462306a36Sopenharmony_ci *
294562306a36Sopenharmony_ci * And that (13631488 EXTENT_DATA_REF <HASH>) gets removed.
294662306a36Sopenharmony_ci */
294762306a36Sopenharmony_cistatic int __btrfs_free_extent(struct btrfs_trans_handle *trans,
294862306a36Sopenharmony_ci			       struct btrfs_delayed_ref_node *node, u64 parent,
294962306a36Sopenharmony_ci			       u64 root_objectid, u64 owner_objectid,
295062306a36Sopenharmony_ci			       u64 owner_offset, int refs_to_drop,
295162306a36Sopenharmony_ci			       struct btrfs_delayed_extent_op *extent_op)
295262306a36Sopenharmony_ci{
295362306a36Sopenharmony_ci	struct btrfs_fs_info *info = trans->fs_info;
295462306a36Sopenharmony_ci	struct btrfs_key key;
295562306a36Sopenharmony_ci	struct btrfs_path *path;
295662306a36Sopenharmony_ci	struct btrfs_root *extent_root;
295762306a36Sopenharmony_ci	struct extent_buffer *leaf;
295862306a36Sopenharmony_ci	struct btrfs_extent_item *ei;
295962306a36Sopenharmony_ci	struct btrfs_extent_inline_ref *iref;
296062306a36Sopenharmony_ci	int ret;
296162306a36Sopenharmony_ci	int is_data;
296262306a36Sopenharmony_ci	int extent_slot = 0;
296362306a36Sopenharmony_ci	int found_extent = 0;
296462306a36Sopenharmony_ci	int num_to_del = 1;
296562306a36Sopenharmony_ci	u32 item_size;
296662306a36Sopenharmony_ci	u64 refs;
296762306a36Sopenharmony_ci	u64 bytenr = node->bytenr;
296862306a36Sopenharmony_ci	u64 num_bytes = node->num_bytes;
296962306a36Sopenharmony_ci	bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
297062306a36Sopenharmony_ci
297162306a36Sopenharmony_ci	extent_root = btrfs_extent_root(info, bytenr);
297262306a36Sopenharmony_ci	ASSERT(extent_root);
297362306a36Sopenharmony_ci
297462306a36Sopenharmony_ci	path = btrfs_alloc_path();
297562306a36Sopenharmony_ci	if (!path)
297662306a36Sopenharmony_ci		return -ENOMEM;
297762306a36Sopenharmony_ci
297862306a36Sopenharmony_ci	is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
297962306a36Sopenharmony_ci
298062306a36Sopenharmony_ci	if (!is_data && refs_to_drop != 1) {
298162306a36Sopenharmony_ci		btrfs_crit(info,
298262306a36Sopenharmony_ci"invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u",
298362306a36Sopenharmony_ci			   node->bytenr, refs_to_drop);
298462306a36Sopenharmony_ci		ret = -EINVAL;
298562306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
298662306a36Sopenharmony_ci		goto out;
298762306a36Sopenharmony_ci	}
298862306a36Sopenharmony_ci
298962306a36Sopenharmony_ci	if (is_data)
299062306a36Sopenharmony_ci		skinny_metadata = false;
299162306a36Sopenharmony_ci
299262306a36Sopenharmony_ci	ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes,
299362306a36Sopenharmony_ci				    parent, root_objectid, owner_objectid,
299462306a36Sopenharmony_ci				    owner_offset);
299562306a36Sopenharmony_ci	if (ret == 0) {
299662306a36Sopenharmony_ci		/*
299762306a36Sopenharmony_ci		 * Either the inline backref or the SHARED_DATA_REF/
299862306a36Sopenharmony_ci		 * SHARED_BLOCK_REF is found
299962306a36Sopenharmony_ci		 *
300062306a36Sopenharmony_ci		 * Here is a quick path to locate EXTENT/METADATA_ITEM.
300162306a36Sopenharmony_ci		 * It's possible the EXTENT/METADATA_ITEM is near current slot.
300262306a36Sopenharmony_ci		 */
300362306a36Sopenharmony_ci		extent_slot = path->slots[0];
300462306a36Sopenharmony_ci		while (extent_slot >= 0) {
300562306a36Sopenharmony_ci			btrfs_item_key_to_cpu(path->nodes[0], &key,
300662306a36Sopenharmony_ci					      extent_slot);
300762306a36Sopenharmony_ci			if (key.objectid != bytenr)
300862306a36Sopenharmony_ci				break;
300962306a36Sopenharmony_ci			if (key.type == BTRFS_EXTENT_ITEM_KEY &&
301062306a36Sopenharmony_ci			    key.offset == num_bytes) {
301162306a36Sopenharmony_ci				found_extent = 1;
301262306a36Sopenharmony_ci				break;
301362306a36Sopenharmony_ci			}
301462306a36Sopenharmony_ci			if (key.type == BTRFS_METADATA_ITEM_KEY &&
301562306a36Sopenharmony_ci			    key.offset == owner_objectid) {
301662306a36Sopenharmony_ci				found_extent = 1;
301762306a36Sopenharmony_ci				break;
301862306a36Sopenharmony_ci			}
301962306a36Sopenharmony_ci
302062306a36Sopenharmony_ci			/* Quick path didn't find the EXTEMT/METADATA_ITEM */
302162306a36Sopenharmony_ci			if (path->slots[0] - extent_slot > 5)
302262306a36Sopenharmony_ci				break;
302362306a36Sopenharmony_ci			extent_slot--;
302462306a36Sopenharmony_ci		}
302562306a36Sopenharmony_ci
302662306a36Sopenharmony_ci		if (!found_extent) {
302762306a36Sopenharmony_ci			if (iref) {
302862306a36Sopenharmony_ci				abort_and_dump(trans, path,
302962306a36Sopenharmony_ci"invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
303062306a36Sopenharmony_ci					   path->slots[0]);
303162306a36Sopenharmony_ci				ret = -EUCLEAN;
303262306a36Sopenharmony_ci				goto out;
303362306a36Sopenharmony_ci			}
303462306a36Sopenharmony_ci			/* Must be SHARED_* item, remove the backref first */
303562306a36Sopenharmony_ci			ret = remove_extent_backref(trans, extent_root, path,
303662306a36Sopenharmony_ci						    NULL, refs_to_drop, is_data);
303762306a36Sopenharmony_ci			if (ret) {
303862306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
303962306a36Sopenharmony_ci				goto out;
304062306a36Sopenharmony_ci			}
304162306a36Sopenharmony_ci			btrfs_release_path(path);
304262306a36Sopenharmony_ci
304362306a36Sopenharmony_ci			/* Slow path to locate EXTENT/METADATA_ITEM */
304462306a36Sopenharmony_ci			key.objectid = bytenr;
304562306a36Sopenharmony_ci			key.type = BTRFS_EXTENT_ITEM_KEY;
304662306a36Sopenharmony_ci			key.offset = num_bytes;
304762306a36Sopenharmony_ci
304862306a36Sopenharmony_ci			if (!is_data && skinny_metadata) {
304962306a36Sopenharmony_ci				key.type = BTRFS_METADATA_ITEM_KEY;
305062306a36Sopenharmony_ci				key.offset = owner_objectid;
305162306a36Sopenharmony_ci			}
305262306a36Sopenharmony_ci
305362306a36Sopenharmony_ci			ret = btrfs_search_slot(trans, extent_root,
305462306a36Sopenharmony_ci						&key, path, -1, 1);
305562306a36Sopenharmony_ci			if (ret > 0 && skinny_metadata && path->slots[0]) {
305662306a36Sopenharmony_ci				/*
305762306a36Sopenharmony_ci				 * Couldn't find our skinny metadata item,
305862306a36Sopenharmony_ci				 * see if we have ye olde extent item.
305962306a36Sopenharmony_ci				 */
306062306a36Sopenharmony_ci				path->slots[0]--;
306162306a36Sopenharmony_ci				btrfs_item_key_to_cpu(path->nodes[0], &key,
306262306a36Sopenharmony_ci						      path->slots[0]);
306362306a36Sopenharmony_ci				if (key.objectid == bytenr &&
306462306a36Sopenharmony_ci				    key.type == BTRFS_EXTENT_ITEM_KEY &&
306562306a36Sopenharmony_ci				    key.offset == num_bytes)
306662306a36Sopenharmony_ci					ret = 0;
306762306a36Sopenharmony_ci			}
306862306a36Sopenharmony_ci
306962306a36Sopenharmony_ci			if (ret > 0 && skinny_metadata) {
307062306a36Sopenharmony_ci				skinny_metadata = false;
307162306a36Sopenharmony_ci				key.objectid = bytenr;
307262306a36Sopenharmony_ci				key.type = BTRFS_EXTENT_ITEM_KEY;
307362306a36Sopenharmony_ci				key.offset = num_bytes;
307462306a36Sopenharmony_ci				btrfs_release_path(path);
307562306a36Sopenharmony_ci				ret = btrfs_search_slot(trans, extent_root,
307662306a36Sopenharmony_ci							&key, path, -1, 1);
307762306a36Sopenharmony_ci			}
307862306a36Sopenharmony_ci
307962306a36Sopenharmony_ci			if (ret) {
308062306a36Sopenharmony_ci				if (ret > 0)
308162306a36Sopenharmony_ci					btrfs_print_leaf(path->nodes[0]);
308262306a36Sopenharmony_ci				btrfs_err(info,
308362306a36Sopenharmony_ci			"umm, got %d back from search, was looking for %llu, slot %d",
308462306a36Sopenharmony_ci					  ret, bytenr, path->slots[0]);
308562306a36Sopenharmony_ci			}
308662306a36Sopenharmony_ci			if (ret < 0) {
308762306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
308862306a36Sopenharmony_ci				goto out;
308962306a36Sopenharmony_ci			}
309062306a36Sopenharmony_ci			extent_slot = path->slots[0];
309162306a36Sopenharmony_ci		}
309262306a36Sopenharmony_ci	} else if (WARN_ON(ret == -ENOENT)) {
309362306a36Sopenharmony_ci		abort_and_dump(trans, path,
309462306a36Sopenharmony_ci"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d",
309562306a36Sopenharmony_ci			       bytenr, parent, root_objectid, owner_objectid,
309662306a36Sopenharmony_ci			       owner_offset, path->slots[0]);
309762306a36Sopenharmony_ci		goto out;
309862306a36Sopenharmony_ci	} else {
309962306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
310062306a36Sopenharmony_ci		goto out;
310162306a36Sopenharmony_ci	}
310262306a36Sopenharmony_ci
310362306a36Sopenharmony_ci	leaf = path->nodes[0];
310462306a36Sopenharmony_ci	item_size = btrfs_item_size(leaf, extent_slot);
310562306a36Sopenharmony_ci	if (unlikely(item_size < sizeof(*ei))) {
310662306a36Sopenharmony_ci		ret = -EUCLEAN;
310762306a36Sopenharmony_ci		btrfs_err(trans->fs_info,
310862306a36Sopenharmony_ci			  "unexpected extent item size, has %u expect >= %zu",
310962306a36Sopenharmony_ci			  item_size, sizeof(*ei));
311062306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
311162306a36Sopenharmony_ci		goto out;
311262306a36Sopenharmony_ci	}
311362306a36Sopenharmony_ci	ei = btrfs_item_ptr(leaf, extent_slot,
311462306a36Sopenharmony_ci			    struct btrfs_extent_item);
311562306a36Sopenharmony_ci	if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
311662306a36Sopenharmony_ci	    key.type == BTRFS_EXTENT_ITEM_KEY) {
311762306a36Sopenharmony_ci		struct btrfs_tree_block_info *bi;
311862306a36Sopenharmony_ci
311962306a36Sopenharmony_ci		if (item_size < sizeof(*ei) + sizeof(*bi)) {
312062306a36Sopenharmony_ci			abort_and_dump(trans, path,
312162306a36Sopenharmony_ci"invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu",
312262306a36Sopenharmony_ci				       key.objectid, key.type, key.offset,
312362306a36Sopenharmony_ci				       path->slots[0], owner_objectid, item_size,
312462306a36Sopenharmony_ci				       sizeof(*ei) + sizeof(*bi));
312562306a36Sopenharmony_ci			ret = -EUCLEAN;
312662306a36Sopenharmony_ci			goto out;
312762306a36Sopenharmony_ci		}
312862306a36Sopenharmony_ci		bi = (struct btrfs_tree_block_info *)(ei + 1);
312962306a36Sopenharmony_ci		WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
313062306a36Sopenharmony_ci	}
313162306a36Sopenharmony_ci
313262306a36Sopenharmony_ci	refs = btrfs_extent_refs(leaf, ei);
313362306a36Sopenharmony_ci	if (refs < refs_to_drop) {
313462306a36Sopenharmony_ci		abort_and_dump(trans, path,
313562306a36Sopenharmony_ci		"trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
313662306a36Sopenharmony_ci			       refs_to_drop, refs, bytenr, path->slots[0]);
313762306a36Sopenharmony_ci		ret = -EUCLEAN;
313862306a36Sopenharmony_ci		goto out;
313962306a36Sopenharmony_ci	}
314062306a36Sopenharmony_ci	refs -= refs_to_drop;
314162306a36Sopenharmony_ci
314262306a36Sopenharmony_ci	if (refs > 0) {
314362306a36Sopenharmony_ci		if (extent_op)
314462306a36Sopenharmony_ci			__run_delayed_extent_op(extent_op, leaf, ei);
314562306a36Sopenharmony_ci		/*
314662306a36Sopenharmony_ci		 * In the case of inline back ref, reference count will
314762306a36Sopenharmony_ci		 * be updated by remove_extent_backref
314862306a36Sopenharmony_ci		 */
314962306a36Sopenharmony_ci		if (iref) {
315062306a36Sopenharmony_ci			if (!found_extent) {
315162306a36Sopenharmony_ci				abort_and_dump(trans, path,
315262306a36Sopenharmony_ci"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
315362306a36Sopenharmony_ci					       path->slots[0]);
315462306a36Sopenharmony_ci				ret = -EUCLEAN;
315562306a36Sopenharmony_ci				goto out;
315662306a36Sopenharmony_ci			}
315762306a36Sopenharmony_ci		} else {
315862306a36Sopenharmony_ci			btrfs_set_extent_refs(leaf, ei, refs);
315962306a36Sopenharmony_ci			btrfs_mark_buffer_dirty(trans, leaf);
316062306a36Sopenharmony_ci		}
316162306a36Sopenharmony_ci		if (found_extent) {
316262306a36Sopenharmony_ci			ret = remove_extent_backref(trans, extent_root, path,
316362306a36Sopenharmony_ci						    iref, refs_to_drop, is_data);
316462306a36Sopenharmony_ci			if (ret) {
316562306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
316662306a36Sopenharmony_ci				goto out;
316762306a36Sopenharmony_ci			}
316862306a36Sopenharmony_ci		}
316962306a36Sopenharmony_ci	} else {
317062306a36Sopenharmony_ci		/* In this branch refs == 1 */
317162306a36Sopenharmony_ci		if (found_extent) {
317262306a36Sopenharmony_ci			if (is_data && refs_to_drop !=
317362306a36Sopenharmony_ci			    extent_data_ref_count(path, iref)) {
317462306a36Sopenharmony_ci				abort_and_dump(trans, path,
317562306a36Sopenharmony_ci		"invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
317662306a36Sopenharmony_ci					       extent_data_ref_count(path, iref),
317762306a36Sopenharmony_ci					       refs_to_drop, path->slots[0]);
317862306a36Sopenharmony_ci				ret = -EUCLEAN;
317962306a36Sopenharmony_ci				goto out;
318062306a36Sopenharmony_ci			}
318162306a36Sopenharmony_ci			if (iref) {
318262306a36Sopenharmony_ci				if (path->slots[0] != extent_slot) {
318362306a36Sopenharmony_ci					abort_and_dump(trans, path,
318462306a36Sopenharmony_ci"invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
318562306a36Sopenharmony_ci						       key.objectid, key.type,
318662306a36Sopenharmony_ci						       key.offset, path->slots[0]);
318762306a36Sopenharmony_ci					ret = -EUCLEAN;
318862306a36Sopenharmony_ci					goto out;
318962306a36Sopenharmony_ci				}
319062306a36Sopenharmony_ci			} else {
319162306a36Sopenharmony_ci				/*
319262306a36Sopenharmony_ci				 * No inline ref, we must be at SHARED_* item,
319362306a36Sopenharmony_ci				 * And it's single ref, it must be:
319462306a36Sopenharmony_ci				 * |	extent_slot	  ||extent_slot + 1|
319562306a36Sopenharmony_ci				 * [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
319662306a36Sopenharmony_ci				 */
319762306a36Sopenharmony_ci				if (path->slots[0] != extent_slot + 1) {
319862306a36Sopenharmony_ci					abort_and_dump(trans, path,
319962306a36Sopenharmony_ci	"invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
320062306a36Sopenharmony_ci						       path->slots[0]);
320162306a36Sopenharmony_ci					ret = -EUCLEAN;
320262306a36Sopenharmony_ci					goto out;
320362306a36Sopenharmony_ci				}
320462306a36Sopenharmony_ci				path->slots[0] = extent_slot;
320562306a36Sopenharmony_ci				num_to_del = 2;
320662306a36Sopenharmony_ci			}
320762306a36Sopenharmony_ci		}
320862306a36Sopenharmony_ci
320962306a36Sopenharmony_ci		ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
321062306a36Sopenharmony_ci				      num_to_del);
321162306a36Sopenharmony_ci		if (ret) {
321262306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
321362306a36Sopenharmony_ci			goto out;
321462306a36Sopenharmony_ci		}
321562306a36Sopenharmony_ci		btrfs_release_path(path);
321662306a36Sopenharmony_ci
321762306a36Sopenharmony_ci		ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
321862306a36Sopenharmony_ci	}
321962306a36Sopenharmony_ci	btrfs_release_path(path);
322062306a36Sopenharmony_ci
322162306a36Sopenharmony_ciout:
322262306a36Sopenharmony_ci	btrfs_free_path(path);
322362306a36Sopenharmony_ci	return ret;
322462306a36Sopenharmony_ci}
322562306a36Sopenharmony_ci
322662306a36Sopenharmony_ci/*
322762306a36Sopenharmony_ci * when we free an block, it is possible (and likely) that we free the last
322862306a36Sopenharmony_ci * delayed ref for that extent as well.  This searches the delayed ref tree for
322962306a36Sopenharmony_ci * a given extent, and if there are no other delayed refs to be processed, it
323062306a36Sopenharmony_ci * removes it from the tree.
323162306a36Sopenharmony_ci */
323262306a36Sopenharmony_cistatic noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
323362306a36Sopenharmony_ci				      u64 bytenr)
323462306a36Sopenharmony_ci{
323562306a36Sopenharmony_ci	struct btrfs_delayed_ref_head *head;
323662306a36Sopenharmony_ci	struct btrfs_delayed_ref_root *delayed_refs;
323762306a36Sopenharmony_ci	int ret = 0;
323862306a36Sopenharmony_ci
323962306a36Sopenharmony_ci	delayed_refs = &trans->transaction->delayed_refs;
324062306a36Sopenharmony_ci	spin_lock(&delayed_refs->lock);
324162306a36Sopenharmony_ci	head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
324262306a36Sopenharmony_ci	if (!head)
324362306a36Sopenharmony_ci		goto out_delayed_unlock;
324462306a36Sopenharmony_ci
324562306a36Sopenharmony_ci	spin_lock(&head->lock);
324662306a36Sopenharmony_ci	if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root))
324762306a36Sopenharmony_ci		goto out;
324862306a36Sopenharmony_ci
324962306a36Sopenharmony_ci	if (cleanup_extent_op(head) != NULL)
325062306a36Sopenharmony_ci		goto out;
325162306a36Sopenharmony_ci
325262306a36Sopenharmony_ci	/*
325362306a36Sopenharmony_ci	 * waiting for the lock here would deadlock.  If someone else has it
325462306a36Sopenharmony_ci	 * locked they are already in the process of dropping it anyway
325562306a36Sopenharmony_ci	 */
325662306a36Sopenharmony_ci	if (!mutex_trylock(&head->mutex))
325762306a36Sopenharmony_ci		goto out;
325862306a36Sopenharmony_ci
325962306a36Sopenharmony_ci	btrfs_delete_ref_head(delayed_refs, head);
326062306a36Sopenharmony_ci	head->processing = false;
326162306a36Sopenharmony_ci
326262306a36Sopenharmony_ci	spin_unlock(&head->lock);
326362306a36Sopenharmony_ci	spin_unlock(&delayed_refs->lock);
326462306a36Sopenharmony_ci
326562306a36Sopenharmony_ci	BUG_ON(head->extent_op);
326662306a36Sopenharmony_ci	if (head->must_insert_reserved)
326762306a36Sopenharmony_ci		ret = 1;
326862306a36Sopenharmony_ci
326962306a36Sopenharmony_ci	btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
327062306a36Sopenharmony_ci	mutex_unlock(&head->mutex);
327162306a36Sopenharmony_ci	btrfs_put_delayed_ref_head(head);
327262306a36Sopenharmony_ci	return ret;
327362306a36Sopenharmony_ciout:
327462306a36Sopenharmony_ci	spin_unlock(&head->lock);
327562306a36Sopenharmony_ci
327662306a36Sopenharmony_ciout_delayed_unlock:
327762306a36Sopenharmony_ci	spin_unlock(&delayed_refs->lock);
327862306a36Sopenharmony_ci	return 0;
327962306a36Sopenharmony_ci}
328062306a36Sopenharmony_ci
328162306a36Sopenharmony_civoid btrfs_free_tree_block(struct btrfs_trans_handle *trans,
328262306a36Sopenharmony_ci			   u64 root_id,
328362306a36Sopenharmony_ci			   struct extent_buffer *buf,
328462306a36Sopenharmony_ci			   u64 parent, int last_ref)
328562306a36Sopenharmony_ci{
328662306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
328762306a36Sopenharmony_ci	struct btrfs_ref generic_ref = { 0 };
328862306a36Sopenharmony_ci	int ret;
328962306a36Sopenharmony_ci
329062306a36Sopenharmony_ci	btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
329162306a36Sopenharmony_ci			       buf->start, buf->len, parent);
329262306a36Sopenharmony_ci	btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf),
329362306a36Sopenharmony_ci			    root_id, 0, false);
329462306a36Sopenharmony_ci
329562306a36Sopenharmony_ci	if (root_id != BTRFS_TREE_LOG_OBJECTID) {
329662306a36Sopenharmony_ci		btrfs_ref_tree_mod(fs_info, &generic_ref);
329762306a36Sopenharmony_ci		ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL);
329862306a36Sopenharmony_ci		BUG_ON(ret); /* -ENOMEM */
329962306a36Sopenharmony_ci	}
330062306a36Sopenharmony_ci
330162306a36Sopenharmony_ci	if (last_ref && btrfs_header_generation(buf) == trans->transid) {
330262306a36Sopenharmony_ci		struct btrfs_block_group *cache;
330362306a36Sopenharmony_ci		bool must_pin = false;
330462306a36Sopenharmony_ci
330562306a36Sopenharmony_ci		if (root_id != BTRFS_TREE_LOG_OBJECTID) {
330662306a36Sopenharmony_ci			ret = check_ref_cleanup(trans, buf->start);
330762306a36Sopenharmony_ci			if (!ret) {
330862306a36Sopenharmony_ci				btrfs_redirty_list_add(trans->transaction, buf);
330962306a36Sopenharmony_ci				goto out;
331062306a36Sopenharmony_ci			}
331162306a36Sopenharmony_ci		}
331262306a36Sopenharmony_ci
331362306a36Sopenharmony_ci		cache = btrfs_lookup_block_group(fs_info, buf->start);
331462306a36Sopenharmony_ci
331562306a36Sopenharmony_ci		if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
331662306a36Sopenharmony_ci			pin_down_extent(trans, cache, buf->start, buf->len, 1);
331762306a36Sopenharmony_ci			btrfs_put_block_group(cache);
331862306a36Sopenharmony_ci			goto out;
331962306a36Sopenharmony_ci		}
332062306a36Sopenharmony_ci
332162306a36Sopenharmony_ci		/*
332262306a36Sopenharmony_ci		 * If there are tree mod log users we may have recorded mod log
332362306a36Sopenharmony_ci		 * operations for this node.  If we re-allocate this node we
332462306a36Sopenharmony_ci		 * could replay operations on this node that happened when it
332562306a36Sopenharmony_ci		 * existed in a completely different root.  For example if it
332662306a36Sopenharmony_ci		 * was part of root A, then was reallocated to root B, and we
332762306a36Sopenharmony_ci		 * are doing a btrfs_old_search_slot(root b), we could replay
332862306a36Sopenharmony_ci		 * operations that happened when the block was part of root A,
332962306a36Sopenharmony_ci		 * giving us an inconsistent view of the btree.
333062306a36Sopenharmony_ci		 *
333162306a36Sopenharmony_ci		 * We are safe from races here because at this point no other
333262306a36Sopenharmony_ci		 * node or root points to this extent buffer, so if after this
333362306a36Sopenharmony_ci		 * check a new tree mod log user joins we will not have an
333462306a36Sopenharmony_ci		 * existing log of operations on this node that we have to
333562306a36Sopenharmony_ci		 * contend with.
333662306a36Sopenharmony_ci		 */
333762306a36Sopenharmony_ci		if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
333862306a36Sopenharmony_ci			must_pin = true;
333962306a36Sopenharmony_ci
334062306a36Sopenharmony_ci		if (must_pin || btrfs_is_zoned(fs_info)) {
334162306a36Sopenharmony_ci			btrfs_redirty_list_add(trans->transaction, buf);
334262306a36Sopenharmony_ci			pin_down_extent(trans, cache, buf->start, buf->len, 1);
334362306a36Sopenharmony_ci			btrfs_put_block_group(cache);
334462306a36Sopenharmony_ci			goto out;
334562306a36Sopenharmony_ci		}
334662306a36Sopenharmony_ci
334762306a36Sopenharmony_ci		WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
334862306a36Sopenharmony_ci
334962306a36Sopenharmony_ci		btrfs_add_free_space(cache, buf->start, buf->len);
335062306a36Sopenharmony_ci		btrfs_free_reserved_bytes(cache, buf->len, 0);
335162306a36Sopenharmony_ci		btrfs_put_block_group(cache);
335262306a36Sopenharmony_ci		trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
335362306a36Sopenharmony_ci	}
335462306a36Sopenharmony_ciout:
335562306a36Sopenharmony_ci	if (last_ref) {
335662306a36Sopenharmony_ci		/*
335762306a36Sopenharmony_ci		 * Deleting the buffer, clear the corrupt flag since it doesn't
335862306a36Sopenharmony_ci		 * matter anymore.
335962306a36Sopenharmony_ci		 */
336062306a36Sopenharmony_ci		clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
336162306a36Sopenharmony_ci	}
336262306a36Sopenharmony_ci}
336362306a36Sopenharmony_ci
336462306a36Sopenharmony_ci/* Can return -ENOMEM */
336562306a36Sopenharmony_ciint btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
336662306a36Sopenharmony_ci{
336762306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
336862306a36Sopenharmony_ci	int ret;
336962306a36Sopenharmony_ci
337062306a36Sopenharmony_ci	if (btrfs_is_testing(fs_info))
337162306a36Sopenharmony_ci		return 0;
337262306a36Sopenharmony_ci
337362306a36Sopenharmony_ci	/*
337462306a36Sopenharmony_ci	 * tree log blocks never actually go into the extent allocation
337562306a36Sopenharmony_ci	 * tree, just update pinning info and exit early.
337662306a36Sopenharmony_ci	 */
337762306a36Sopenharmony_ci	if ((ref->type == BTRFS_REF_METADATA &&
337862306a36Sopenharmony_ci	     ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
337962306a36Sopenharmony_ci	    (ref->type == BTRFS_REF_DATA &&
338062306a36Sopenharmony_ci	     ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)) {
338162306a36Sopenharmony_ci		/* unlocks the pinned mutex */
338262306a36Sopenharmony_ci		btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
338362306a36Sopenharmony_ci		ret = 0;
338462306a36Sopenharmony_ci	} else if (ref->type == BTRFS_REF_METADATA) {
338562306a36Sopenharmony_ci		ret = btrfs_add_delayed_tree_ref(trans, ref, NULL);
338662306a36Sopenharmony_ci	} else {
338762306a36Sopenharmony_ci		ret = btrfs_add_delayed_data_ref(trans, ref, 0);
338862306a36Sopenharmony_ci	}
338962306a36Sopenharmony_ci
339062306a36Sopenharmony_ci	if (!((ref->type == BTRFS_REF_METADATA &&
339162306a36Sopenharmony_ci	       ref->tree_ref.owning_root == BTRFS_TREE_LOG_OBJECTID) ||
339262306a36Sopenharmony_ci	      (ref->type == BTRFS_REF_DATA &&
339362306a36Sopenharmony_ci	       ref->data_ref.owning_root == BTRFS_TREE_LOG_OBJECTID)))
339462306a36Sopenharmony_ci		btrfs_ref_tree_mod(fs_info, ref);
339562306a36Sopenharmony_ci
339662306a36Sopenharmony_ci	return ret;
339762306a36Sopenharmony_ci}
339862306a36Sopenharmony_ci
339962306a36Sopenharmony_cienum btrfs_loop_type {
340062306a36Sopenharmony_ci	/*
340162306a36Sopenharmony_ci	 * Start caching block groups but do not wait for progress or for them
340262306a36Sopenharmony_ci	 * to be done.
340362306a36Sopenharmony_ci	 */
340462306a36Sopenharmony_ci	LOOP_CACHING_NOWAIT,
340562306a36Sopenharmony_ci
340662306a36Sopenharmony_ci	/*
340762306a36Sopenharmony_ci	 * Wait for the block group free_space >= the space we're waiting for if
340862306a36Sopenharmony_ci	 * the block group isn't cached.
340962306a36Sopenharmony_ci	 */
341062306a36Sopenharmony_ci	LOOP_CACHING_WAIT,
341162306a36Sopenharmony_ci
341262306a36Sopenharmony_ci	/*
341362306a36Sopenharmony_ci	 * Allow allocations to happen from block groups that do not yet have a
341462306a36Sopenharmony_ci	 * size classification.
341562306a36Sopenharmony_ci	 */
341662306a36Sopenharmony_ci	LOOP_UNSET_SIZE_CLASS,
341762306a36Sopenharmony_ci
341862306a36Sopenharmony_ci	/*
341962306a36Sopenharmony_ci	 * Allocate a chunk and then retry the allocation.
342062306a36Sopenharmony_ci	 */
342162306a36Sopenharmony_ci	LOOP_ALLOC_CHUNK,
342262306a36Sopenharmony_ci
342362306a36Sopenharmony_ci	/*
342462306a36Sopenharmony_ci	 * Ignore the size class restrictions for this allocation.
342562306a36Sopenharmony_ci	 */
342662306a36Sopenharmony_ci	LOOP_WRONG_SIZE_CLASS,
342762306a36Sopenharmony_ci
342862306a36Sopenharmony_ci	/*
342962306a36Sopenharmony_ci	 * Ignore the empty size, only try to allocate the number of bytes
343062306a36Sopenharmony_ci	 * needed for this allocation.
343162306a36Sopenharmony_ci	 */
343262306a36Sopenharmony_ci	LOOP_NO_EMPTY_SIZE,
343362306a36Sopenharmony_ci};
343462306a36Sopenharmony_ci
343562306a36Sopenharmony_cistatic inline void
343662306a36Sopenharmony_cibtrfs_lock_block_group(struct btrfs_block_group *cache,
343762306a36Sopenharmony_ci		       int delalloc)
343862306a36Sopenharmony_ci{
343962306a36Sopenharmony_ci	if (delalloc)
344062306a36Sopenharmony_ci		down_read(&cache->data_rwsem);
344162306a36Sopenharmony_ci}
344262306a36Sopenharmony_ci
344362306a36Sopenharmony_cistatic inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
344462306a36Sopenharmony_ci		       int delalloc)
344562306a36Sopenharmony_ci{
344662306a36Sopenharmony_ci	btrfs_get_block_group(cache);
344762306a36Sopenharmony_ci	if (delalloc)
344862306a36Sopenharmony_ci		down_read(&cache->data_rwsem);
344962306a36Sopenharmony_ci}
345062306a36Sopenharmony_ci
345162306a36Sopenharmony_cistatic struct btrfs_block_group *btrfs_lock_cluster(
345262306a36Sopenharmony_ci		   struct btrfs_block_group *block_group,
345362306a36Sopenharmony_ci		   struct btrfs_free_cluster *cluster,
345462306a36Sopenharmony_ci		   int delalloc)
345562306a36Sopenharmony_ci	__acquires(&cluster->refill_lock)
345662306a36Sopenharmony_ci{
345762306a36Sopenharmony_ci	struct btrfs_block_group *used_bg = NULL;
345862306a36Sopenharmony_ci
345962306a36Sopenharmony_ci	spin_lock(&cluster->refill_lock);
346062306a36Sopenharmony_ci	while (1) {
346162306a36Sopenharmony_ci		used_bg = cluster->block_group;
346262306a36Sopenharmony_ci		if (!used_bg)
346362306a36Sopenharmony_ci			return NULL;
346462306a36Sopenharmony_ci
346562306a36Sopenharmony_ci		if (used_bg == block_group)
346662306a36Sopenharmony_ci			return used_bg;
346762306a36Sopenharmony_ci
346862306a36Sopenharmony_ci		btrfs_get_block_group(used_bg);
346962306a36Sopenharmony_ci
347062306a36Sopenharmony_ci		if (!delalloc)
347162306a36Sopenharmony_ci			return used_bg;
347262306a36Sopenharmony_ci
347362306a36Sopenharmony_ci		if (down_read_trylock(&used_bg->data_rwsem))
347462306a36Sopenharmony_ci			return used_bg;
347562306a36Sopenharmony_ci
347662306a36Sopenharmony_ci		spin_unlock(&cluster->refill_lock);
347762306a36Sopenharmony_ci
347862306a36Sopenharmony_ci		/* We should only have one-level nested. */
347962306a36Sopenharmony_ci		down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING);
348062306a36Sopenharmony_ci
348162306a36Sopenharmony_ci		spin_lock(&cluster->refill_lock);
348262306a36Sopenharmony_ci		if (used_bg == cluster->block_group)
348362306a36Sopenharmony_ci			return used_bg;
348462306a36Sopenharmony_ci
348562306a36Sopenharmony_ci		up_read(&used_bg->data_rwsem);
348662306a36Sopenharmony_ci		btrfs_put_block_group(used_bg);
348762306a36Sopenharmony_ci	}
348862306a36Sopenharmony_ci}
348962306a36Sopenharmony_ci
349062306a36Sopenharmony_cistatic inline void
349162306a36Sopenharmony_cibtrfs_release_block_group(struct btrfs_block_group *cache,
349262306a36Sopenharmony_ci			 int delalloc)
349362306a36Sopenharmony_ci{
349462306a36Sopenharmony_ci	if (delalloc)
349562306a36Sopenharmony_ci		up_read(&cache->data_rwsem);
349662306a36Sopenharmony_ci	btrfs_put_block_group(cache);
349762306a36Sopenharmony_ci}
349862306a36Sopenharmony_ci
349962306a36Sopenharmony_ci/*
350062306a36Sopenharmony_ci * Helper function for find_free_extent().
350162306a36Sopenharmony_ci *
350262306a36Sopenharmony_ci * Return -ENOENT to inform caller that we need fallback to unclustered mode.
350362306a36Sopenharmony_ci * Return >0 to inform caller that we find nothing
350462306a36Sopenharmony_ci * Return 0 means we have found a location and set ffe_ctl->found_offset.
350562306a36Sopenharmony_ci */
350662306a36Sopenharmony_cistatic int find_free_extent_clustered(struct btrfs_block_group *bg,
350762306a36Sopenharmony_ci				      struct find_free_extent_ctl *ffe_ctl,
350862306a36Sopenharmony_ci				      struct btrfs_block_group **cluster_bg_ret)
350962306a36Sopenharmony_ci{
351062306a36Sopenharmony_ci	struct btrfs_block_group *cluster_bg;
351162306a36Sopenharmony_ci	struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
351262306a36Sopenharmony_ci	u64 aligned_cluster;
351362306a36Sopenharmony_ci	u64 offset;
351462306a36Sopenharmony_ci	int ret;
351562306a36Sopenharmony_ci
351662306a36Sopenharmony_ci	cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc);
351762306a36Sopenharmony_ci	if (!cluster_bg)
351862306a36Sopenharmony_ci		goto refill_cluster;
351962306a36Sopenharmony_ci	if (cluster_bg != bg && (cluster_bg->ro ||
352062306a36Sopenharmony_ci	    !block_group_bits(cluster_bg, ffe_ctl->flags)))
352162306a36Sopenharmony_ci		goto release_cluster;
352262306a36Sopenharmony_ci
352362306a36Sopenharmony_ci	offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
352462306a36Sopenharmony_ci			ffe_ctl->num_bytes, cluster_bg->start,
352562306a36Sopenharmony_ci			&ffe_ctl->max_extent_size);
352662306a36Sopenharmony_ci	if (offset) {
352762306a36Sopenharmony_ci		/* We have a block, we're done */
352862306a36Sopenharmony_ci		spin_unlock(&last_ptr->refill_lock);
352962306a36Sopenharmony_ci		trace_btrfs_reserve_extent_cluster(cluster_bg, ffe_ctl);
353062306a36Sopenharmony_ci		*cluster_bg_ret = cluster_bg;
353162306a36Sopenharmony_ci		ffe_ctl->found_offset = offset;
353262306a36Sopenharmony_ci		return 0;
353362306a36Sopenharmony_ci	}
353462306a36Sopenharmony_ci	WARN_ON(last_ptr->block_group != cluster_bg);
353562306a36Sopenharmony_ci
353662306a36Sopenharmony_cirelease_cluster:
353762306a36Sopenharmony_ci	/*
353862306a36Sopenharmony_ci	 * If we are on LOOP_NO_EMPTY_SIZE, we can't set up a new clusters, so
353962306a36Sopenharmony_ci	 * lets just skip it and let the allocator find whatever block it can
354062306a36Sopenharmony_ci	 * find. If we reach this point, we will have tried the cluster
354162306a36Sopenharmony_ci	 * allocator plenty of times and not have found anything, so we are
354262306a36Sopenharmony_ci	 * likely way too fragmented for the clustering stuff to find anything.
354362306a36Sopenharmony_ci	 *
354462306a36Sopenharmony_ci	 * However, if the cluster is taken from the current block group,
354562306a36Sopenharmony_ci	 * release the cluster first, so that we stand a better chance of
354662306a36Sopenharmony_ci	 * succeeding in the unclustered allocation.
354762306a36Sopenharmony_ci	 */
354862306a36Sopenharmony_ci	if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) {
354962306a36Sopenharmony_ci		spin_unlock(&last_ptr->refill_lock);
355062306a36Sopenharmony_ci		btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
355162306a36Sopenharmony_ci		return -ENOENT;
355262306a36Sopenharmony_ci	}
355362306a36Sopenharmony_ci
355462306a36Sopenharmony_ci	/* This cluster didn't work out, free it and start over */
355562306a36Sopenharmony_ci	btrfs_return_cluster_to_free_space(NULL, last_ptr);
355662306a36Sopenharmony_ci
355762306a36Sopenharmony_ci	if (cluster_bg != bg)
355862306a36Sopenharmony_ci		btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc);
355962306a36Sopenharmony_ci
356062306a36Sopenharmony_cirefill_cluster:
356162306a36Sopenharmony_ci	if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) {
356262306a36Sopenharmony_ci		spin_unlock(&last_ptr->refill_lock);
356362306a36Sopenharmony_ci		return -ENOENT;
356462306a36Sopenharmony_ci	}
356562306a36Sopenharmony_ci
356662306a36Sopenharmony_ci	aligned_cluster = max_t(u64,
356762306a36Sopenharmony_ci			ffe_ctl->empty_cluster + ffe_ctl->empty_size,
356862306a36Sopenharmony_ci			bg->full_stripe_len);
356962306a36Sopenharmony_ci	ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start,
357062306a36Sopenharmony_ci			ffe_ctl->num_bytes, aligned_cluster);
357162306a36Sopenharmony_ci	if (ret == 0) {
357262306a36Sopenharmony_ci		/* Now pull our allocation out of this cluster */
357362306a36Sopenharmony_ci		offset = btrfs_alloc_from_cluster(bg, last_ptr,
357462306a36Sopenharmony_ci				ffe_ctl->num_bytes, ffe_ctl->search_start,
357562306a36Sopenharmony_ci				&ffe_ctl->max_extent_size);
357662306a36Sopenharmony_ci		if (offset) {
357762306a36Sopenharmony_ci			/* We found one, proceed */
357862306a36Sopenharmony_ci			spin_unlock(&last_ptr->refill_lock);
357962306a36Sopenharmony_ci			ffe_ctl->found_offset = offset;
358062306a36Sopenharmony_ci			trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
358162306a36Sopenharmony_ci			return 0;
358262306a36Sopenharmony_ci		}
358362306a36Sopenharmony_ci	}
358462306a36Sopenharmony_ci	/*
358562306a36Sopenharmony_ci	 * At this point we either didn't find a cluster or we weren't able to
358662306a36Sopenharmony_ci	 * allocate a block from our cluster.  Free the cluster we've been
358762306a36Sopenharmony_ci	 * trying to use, and go to the next block group.
358862306a36Sopenharmony_ci	 */
358962306a36Sopenharmony_ci	btrfs_return_cluster_to_free_space(NULL, last_ptr);
359062306a36Sopenharmony_ci	spin_unlock(&last_ptr->refill_lock);
359162306a36Sopenharmony_ci	return 1;
359262306a36Sopenharmony_ci}
359362306a36Sopenharmony_ci
359462306a36Sopenharmony_ci/*
359562306a36Sopenharmony_ci * Return >0 to inform caller that we find nothing
359662306a36Sopenharmony_ci * Return 0 when we found an free extent and set ffe_ctrl->found_offset
359762306a36Sopenharmony_ci */
359862306a36Sopenharmony_cistatic int find_free_extent_unclustered(struct btrfs_block_group *bg,
359962306a36Sopenharmony_ci					struct find_free_extent_ctl *ffe_ctl)
360062306a36Sopenharmony_ci{
360162306a36Sopenharmony_ci	struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
360262306a36Sopenharmony_ci	u64 offset;
360362306a36Sopenharmony_ci
360462306a36Sopenharmony_ci	/*
360562306a36Sopenharmony_ci	 * We are doing an unclustered allocation, set the fragmented flag so
360662306a36Sopenharmony_ci	 * we don't bother trying to setup a cluster again until we get more
360762306a36Sopenharmony_ci	 * space.
360862306a36Sopenharmony_ci	 */
360962306a36Sopenharmony_ci	if (unlikely(last_ptr)) {
361062306a36Sopenharmony_ci		spin_lock(&last_ptr->lock);
361162306a36Sopenharmony_ci		last_ptr->fragmented = 1;
361262306a36Sopenharmony_ci		spin_unlock(&last_ptr->lock);
361362306a36Sopenharmony_ci	}
361462306a36Sopenharmony_ci	if (ffe_ctl->cached) {
361562306a36Sopenharmony_ci		struct btrfs_free_space_ctl *free_space_ctl;
361662306a36Sopenharmony_ci
361762306a36Sopenharmony_ci		free_space_ctl = bg->free_space_ctl;
361862306a36Sopenharmony_ci		spin_lock(&free_space_ctl->tree_lock);
361962306a36Sopenharmony_ci		if (free_space_ctl->free_space <
362062306a36Sopenharmony_ci		    ffe_ctl->num_bytes + ffe_ctl->empty_cluster +
362162306a36Sopenharmony_ci		    ffe_ctl->empty_size) {
362262306a36Sopenharmony_ci			ffe_ctl->total_free_space = max_t(u64,
362362306a36Sopenharmony_ci					ffe_ctl->total_free_space,
362462306a36Sopenharmony_ci					free_space_ctl->free_space);
362562306a36Sopenharmony_ci			spin_unlock(&free_space_ctl->tree_lock);
362662306a36Sopenharmony_ci			return 1;
362762306a36Sopenharmony_ci		}
362862306a36Sopenharmony_ci		spin_unlock(&free_space_ctl->tree_lock);
362962306a36Sopenharmony_ci	}
363062306a36Sopenharmony_ci
363162306a36Sopenharmony_ci	offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
363262306a36Sopenharmony_ci			ffe_ctl->num_bytes, ffe_ctl->empty_size,
363362306a36Sopenharmony_ci			&ffe_ctl->max_extent_size);
363462306a36Sopenharmony_ci	if (!offset)
363562306a36Sopenharmony_ci		return 1;
363662306a36Sopenharmony_ci	ffe_ctl->found_offset = offset;
363762306a36Sopenharmony_ci	return 0;
363862306a36Sopenharmony_ci}
363962306a36Sopenharmony_ci
364062306a36Sopenharmony_cistatic int do_allocation_clustered(struct btrfs_block_group *block_group,
364162306a36Sopenharmony_ci				   struct find_free_extent_ctl *ffe_ctl,
364262306a36Sopenharmony_ci				   struct btrfs_block_group **bg_ret)
364362306a36Sopenharmony_ci{
364462306a36Sopenharmony_ci	int ret;
364562306a36Sopenharmony_ci
364662306a36Sopenharmony_ci	/* We want to try and use the cluster allocator, so lets look there */
364762306a36Sopenharmony_ci	if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
364862306a36Sopenharmony_ci		ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
364962306a36Sopenharmony_ci		if (ret >= 0)
365062306a36Sopenharmony_ci			return ret;
365162306a36Sopenharmony_ci		/* ret == -ENOENT case falls through */
365262306a36Sopenharmony_ci	}
365362306a36Sopenharmony_ci
365462306a36Sopenharmony_ci	return find_free_extent_unclustered(block_group, ffe_ctl);
365562306a36Sopenharmony_ci}
365662306a36Sopenharmony_ci
365762306a36Sopenharmony_ci/*
365862306a36Sopenharmony_ci * Tree-log block group locking
365962306a36Sopenharmony_ci * ============================
366062306a36Sopenharmony_ci *
366162306a36Sopenharmony_ci * fs_info::treelog_bg_lock protects the fs_info::treelog_bg which
366262306a36Sopenharmony_ci * indicates the starting address of a block group, which is reserved only
366362306a36Sopenharmony_ci * for tree-log metadata.
366462306a36Sopenharmony_ci *
366562306a36Sopenharmony_ci * Lock nesting
366662306a36Sopenharmony_ci * ============
366762306a36Sopenharmony_ci *
366862306a36Sopenharmony_ci * space_info::lock
366962306a36Sopenharmony_ci *   block_group::lock
367062306a36Sopenharmony_ci *     fs_info::treelog_bg_lock
367162306a36Sopenharmony_ci */
367262306a36Sopenharmony_ci
367362306a36Sopenharmony_ci/*
367462306a36Sopenharmony_ci * Simple allocator for sequential-only block group. It only allows sequential
367562306a36Sopenharmony_ci * allocation. No need to play with trees. This function also reserves the
367662306a36Sopenharmony_ci * bytes as in btrfs_add_reserved_bytes.
367762306a36Sopenharmony_ci */
367862306a36Sopenharmony_cistatic int do_allocation_zoned(struct btrfs_block_group *block_group,
367962306a36Sopenharmony_ci			       struct find_free_extent_ctl *ffe_ctl,
368062306a36Sopenharmony_ci			       struct btrfs_block_group **bg_ret)
368162306a36Sopenharmony_ci{
368262306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = block_group->fs_info;
368362306a36Sopenharmony_ci	struct btrfs_space_info *space_info = block_group->space_info;
368462306a36Sopenharmony_ci	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
368562306a36Sopenharmony_ci	u64 start = block_group->start;
368662306a36Sopenharmony_ci	u64 num_bytes = ffe_ctl->num_bytes;
368762306a36Sopenharmony_ci	u64 avail;
368862306a36Sopenharmony_ci	u64 bytenr = block_group->start;
368962306a36Sopenharmony_ci	u64 log_bytenr;
369062306a36Sopenharmony_ci	u64 data_reloc_bytenr;
369162306a36Sopenharmony_ci	int ret = 0;
369262306a36Sopenharmony_ci	bool skip = false;
369362306a36Sopenharmony_ci
369462306a36Sopenharmony_ci	ASSERT(btrfs_is_zoned(block_group->fs_info));
369562306a36Sopenharmony_ci
369662306a36Sopenharmony_ci	/*
369762306a36Sopenharmony_ci	 * Do not allow non-tree-log blocks in the dedicated tree-log block
369862306a36Sopenharmony_ci	 * group, and vice versa.
369962306a36Sopenharmony_ci	 */
370062306a36Sopenharmony_ci	spin_lock(&fs_info->treelog_bg_lock);
370162306a36Sopenharmony_ci	log_bytenr = fs_info->treelog_bg;
370262306a36Sopenharmony_ci	if (log_bytenr && ((ffe_ctl->for_treelog && bytenr != log_bytenr) ||
370362306a36Sopenharmony_ci			   (!ffe_ctl->for_treelog && bytenr == log_bytenr)))
370462306a36Sopenharmony_ci		skip = true;
370562306a36Sopenharmony_ci	spin_unlock(&fs_info->treelog_bg_lock);
370662306a36Sopenharmony_ci	if (skip)
370762306a36Sopenharmony_ci		return 1;
370862306a36Sopenharmony_ci
370962306a36Sopenharmony_ci	/*
371062306a36Sopenharmony_ci	 * Do not allow non-relocation blocks in the dedicated relocation block
371162306a36Sopenharmony_ci	 * group, and vice versa.
371262306a36Sopenharmony_ci	 */
371362306a36Sopenharmony_ci	spin_lock(&fs_info->relocation_bg_lock);
371462306a36Sopenharmony_ci	data_reloc_bytenr = fs_info->data_reloc_bg;
371562306a36Sopenharmony_ci	if (data_reloc_bytenr &&
371662306a36Sopenharmony_ci	    ((ffe_ctl->for_data_reloc && bytenr != data_reloc_bytenr) ||
371762306a36Sopenharmony_ci	     (!ffe_ctl->for_data_reloc && bytenr == data_reloc_bytenr)))
371862306a36Sopenharmony_ci		skip = true;
371962306a36Sopenharmony_ci	spin_unlock(&fs_info->relocation_bg_lock);
372062306a36Sopenharmony_ci	if (skip)
372162306a36Sopenharmony_ci		return 1;
372262306a36Sopenharmony_ci
372362306a36Sopenharmony_ci	/* Check RO and no space case before trying to activate it */
372462306a36Sopenharmony_ci	spin_lock(&block_group->lock);
372562306a36Sopenharmony_ci	if (block_group->ro || btrfs_zoned_bg_is_full(block_group)) {
372662306a36Sopenharmony_ci		ret = 1;
372762306a36Sopenharmony_ci		/*
372862306a36Sopenharmony_ci		 * May need to clear fs_info->{treelog,data_reloc}_bg.
372962306a36Sopenharmony_ci		 * Return the error after taking the locks.
373062306a36Sopenharmony_ci		 */
373162306a36Sopenharmony_ci	}
373262306a36Sopenharmony_ci	spin_unlock(&block_group->lock);
373362306a36Sopenharmony_ci
373462306a36Sopenharmony_ci	/* Metadata block group is activated at write time. */
373562306a36Sopenharmony_ci	if (!ret && (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
373662306a36Sopenharmony_ci	    !btrfs_zone_activate(block_group)) {
373762306a36Sopenharmony_ci		ret = 1;
373862306a36Sopenharmony_ci		/*
373962306a36Sopenharmony_ci		 * May need to clear fs_info->{treelog,data_reloc}_bg.
374062306a36Sopenharmony_ci		 * Return the error after taking the locks.
374162306a36Sopenharmony_ci		 */
374262306a36Sopenharmony_ci	}
374362306a36Sopenharmony_ci
374462306a36Sopenharmony_ci	spin_lock(&space_info->lock);
374562306a36Sopenharmony_ci	spin_lock(&block_group->lock);
374662306a36Sopenharmony_ci	spin_lock(&fs_info->treelog_bg_lock);
374762306a36Sopenharmony_ci	spin_lock(&fs_info->relocation_bg_lock);
374862306a36Sopenharmony_ci
374962306a36Sopenharmony_ci	if (ret)
375062306a36Sopenharmony_ci		goto out;
375162306a36Sopenharmony_ci
375262306a36Sopenharmony_ci	ASSERT(!ffe_ctl->for_treelog ||
375362306a36Sopenharmony_ci	       block_group->start == fs_info->treelog_bg ||
375462306a36Sopenharmony_ci	       fs_info->treelog_bg == 0);
375562306a36Sopenharmony_ci	ASSERT(!ffe_ctl->for_data_reloc ||
375662306a36Sopenharmony_ci	       block_group->start == fs_info->data_reloc_bg ||
375762306a36Sopenharmony_ci	       fs_info->data_reloc_bg == 0);
375862306a36Sopenharmony_ci
375962306a36Sopenharmony_ci	if (block_group->ro ||
376062306a36Sopenharmony_ci	    (!ffe_ctl->for_data_reloc &&
376162306a36Sopenharmony_ci	     test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) {
376262306a36Sopenharmony_ci		ret = 1;
376362306a36Sopenharmony_ci		goto out;
376462306a36Sopenharmony_ci	}
376562306a36Sopenharmony_ci
376662306a36Sopenharmony_ci	/*
376762306a36Sopenharmony_ci	 * Do not allow currently using block group to be tree-log dedicated
376862306a36Sopenharmony_ci	 * block group.
376962306a36Sopenharmony_ci	 */
377062306a36Sopenharmony_ci	if (ffe_ctl->for_treelog && !fs_info->treelog_bg &&
377162306a36Sopenharmony_ci	    (block_group->used || block_group->reserved)) {
377262306a36Sopenharmony_ci		ret = 1;
377362306a36Sopenharmony_ci		goto out;
377462306a36Sopenharmony_ci	}
377562306a36Sopenharmony_ci
377662306a36Sopenharmony_ci	/*
377762306a36Sopenharmony_ci	 * Do not allow currently used block group to be the data relocation
377862306a36Sopenharmony_ci	 * dedicated block group.
377962306a36Sopenharmony_ci	 */
378062306a36Sopenharmony_ci	if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg &&
378162306a36Sopenharmony_ci	    (block_group->used || block_group->reserved)) {
378262306a36Sopenharmony_ci		ret = 1;
378362306a36Sopenharmony_ci		goto out;
378462306a36Sopenharmony_ci	}
378562306a36Sopenharmony_ci
378662306a36Sopenharmony_ci	WARN_ON_ONCE(block_group->alloc_offset > block_group->zone_capacity);
378762306a36Sopenharmony_ci	avail = block_group->zone_capacity - block_group->alloc_offset;
378862306a36Sopenharmony_ci	if (avail < num_bytes) {
378962306a36Sopenharmony_ci		if (ffe_ctl->max_extent_size < avail) {
379062306a36Sopenharmony_ci			/*
379162306a36Sopenharmony_ci			 * With sequential allocator, free space is always
379262306a36Sopenharmony_ci			 * contiguous
379362306a36Sopenharmony_ci			 */
379462306a36Sopenharmony_ci			ffe_ctl->max_extent_size = avail;
379562306a36Sopenharmony_ci			ffe_ctl->total_free_space = avail;
379662306a36Sopenharmony_ci		}
379762306a36Sopenharmony_ci		ret = 1;
379862306a36Sopenharmony_ci		goto out;
379962306a36Sopenharmony_ci	}
380062306a36Sopenharmony_ci
380162306a36Sopenharmony_ci	if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
380262306a36Sopenharmony_ci		fs_info->treelog_bg = block_group->start;
380362306a36Sopenharmony_ci
380462306a36Sopenharmony_ci	if (ffe_ctl->for_data_reloc) {
380562306a36Sopenharmony_ci		if (!fs_info->data_reloc_bg)
380662306a36Sopenharmony_ci			fs_info->data_reloc_bg = block_group->start;
380762306a36Sopenharmony_ci		/*
380862306a36Sopenharmony_ci		 * Do not allow allocations from this block group, unless it is
380962306a36Sopenharmony_ci		 * for data relocation. Compared to increasing the ->ro, setting
381062306a36Sopenharmony_ci		 * the ->zoned_data_reloc_ongoing flag still allows nocow
381162306a36Sopenharmony_ci		 * writers to come in. See btrfs_inc_nocow_writers().
381262306a36Sopenharmony_ci		 *
381362306a36Sopenharmony_ci		 * We need to disable an allocation to avoid an allocation of
381462306a36Sopenharmony_ci		 * regular (non-relocation data) extent. With mix of relocation
381562306a36Sopenharmony_ci		 * extents and regular extents, we can dispatch WRITE commands
381662306a36Sopenharmony_ci		 * (for relocation extents) and ZONE APPEND commands (for
381762306a36Sopenharmony_ci		 * regular extents) at the same time to the same zone, which
381862306a36Sopenharmony_ci		 * easily break the write pointer.
381962306a36Sopenharmony_ci		 *
382062306a36Sopenharmony_ci		 * Also, this flag avoids this block group to be zone finished.
382162306a36Sopenharmony_ci		 */
382262306a36Sopenharmony_ci		set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
382362306a36Sopenharmony_ci	}
382462306a36Sopenharmony_ci
382562306a36Sopenharmony_ci	ffe_ctl->found_offset = start + block_group->alloc_offset;
382662306a36Sopenharmony_ci	block_group->alloc_offset += num_bytes;
382762306a36Sopenharmony_ci	spin_lock(&ctl->tree_lock);
382862306a36Sopenharmony_ci	ctl->free_space -= num_bytes;
382962306a36Sopenharmony_ci	spin_unlock(&ctl->tree_lock);
383062306a36Sopenharmony_ci
383162306a36Sopenharmony_ci	/*
383262306a36Sopenharmony_ci	 * We do not check if found_offset is aligned to stripesize. The
383362306a36Sopenharmony_ci	 * address is anyway rewritten when using zone append writing.
383462306a36Sopenharmony_ci	 */
383562306a36Sopenharmony_ci
383662306a36Sopenharmony_ci	ffe_ctl->search_start = ffe_ctl->found_offset;
383762306a36Sopenharmony_ci
383862306a36Sopenharmony_ciout:
383962306a36Sopenharmony_ci	if (ret && ffe_ctl->for_treelog)
384062306a36Sopenharmony_ci		fs_info->treelog_bg = 0;
384162306a36Sopenharmony_ci	if (ret && ffe_ctl->for_data_reloc)
384262306a36Sopenharmony_ci		fs_info->data_reloc_bg = 0;
384362306a36Sopenharmony_ci	spin_unlock(&fs_info->relocation_bg_lock);
384462306a36Sopenharmony_ci	spin_unlock(&fs_info->treelog_bg_lock);
384562306a36Sopenharmony_ci	spin_unlock(&block_group->lock);
384662306a36Sopenharmony_ci	spin_unlock(&space_info->lock);
384762306a36Sopenharmony_ci	return ret;
384862306a36Sopenharmony_ci}
384962306a36Sopenharmony_ci
385062306a36Sopenharmony_cistatic int do_allocation(struct btrfs_block_group *block_group,
385162306a36Sopenharmony_ci			 struct find_free_extent_ctl *ffe_ctl,
385262306a36Sopenharmony_ci			 struct btrfs_block_group **bg_ret)
385362306a36Sopenharmony_ci{
385462306a36Sopenharmony_ci	switch (ffe_ctl->policy) {
385562306a36Sopenharmony_ci	case BTRFS_EXTENT_ALLOC_CLUSTERED:
385662306a36Sopenharmony_ci		return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
385762306a36Sopenharmony_ci	case BTRFS_EXTENT_ALLOC_ZONED:
385862306a36Sopenharmony_ci		return do_allocation_zoned(block_group, ffe_ctl, bg_ret);
385962306a36Sopenharmony_ci	default:
386062306a36Sopenharmony_ci		BUG();
386162306a36Sopenharmony_ci	}
386262306a36Sopenharmony_ci}
386362306a36Sopenharmony_ci
386462306a36Sopenharmony_cistatic void release_block_group(struct btrfs_block_group *block_group,
386562306a36Sopenharmony_ci				struct find_free_extent_ctl *ffe_ctl,
386662306a36Sopenharmony_ci				int delalloc)
386762306a36Sopenharmony_ci{
386862306a36Sopenharmony_ci	switch (ffe_ctl->policy) {
386962306a36Sopenharmony_ci	case BTRFS_EXTENT_ALLOC_CLUSTERED:
387062306a36Sopenharmony_ci		ffe_ctl->retry_uncached = false;
387162306a36Sopenharmony_ci		break;
387262306a36Sopenharmony_ci	case BTRFS_EXTENT_ALLOC_ZONED:
387362306a36Sopenharmony_ci		/* Nothing to do */
387462306a36Sopenharmony_ci		break;
387562306a36Sopenharmony_ci	default:
387662306a36Sopenharmony_ci		BUG();
387762306a36Sopenharmony_ci	}
387862306a36Sopenharmony_ci
387962306a36Sopenharmony_ci	BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
388062306a36Sopenharmony_ci	       ffe_ctl->index);
388162306a36Sopenharmony_ci	btrfs_release_block_group(block_group, delalloc);
388262306a36Sopenharmony_ci}
388362306a36Sopenharmony_ci
388462306a36Sopenharmony_cistatic void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
388562306a36Sopenharmony_ci				   struct btrfs_key *ins)
388662306a36Sopenharmony_ci{
388762306a36Sopenharmony_ci	struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
388862306a36Sopenharmony_ci
388962306a36Sopenharmony_ci	if (!ffe_ctl->use_cluster && last_ptr) {
389062306a36Sopenharmony_ci		spin_lock(&last_ptr->lock);
389162306a36Sopenharmony_ci		last_ptr->window_start = ins->objectid;
389262306a36Sopenharmony_ci		spin_unlock(&last_ptr->lock);
389362306a36Sopenharmony_ci	}
389462306a36Sopenharmony_ci}
389562306a36Sopenharmony_ci
389662306a36Sopenharmony_cistatic void found_extent(struct find_free_extent_ctl *ffe_ctl,
389762306a36Sopenharmony_ci			 struct btrfs_key *ins)
389862306a36Sopenharmony_ci{
389962306a36Sopenharmony_ci	switch (ffe_ctl->policy) {
390062306a36Sopenharmony_ci	case BTRFS_EXTENT_ALLOC_CLUSTERED:
390162306a36Sopenharmony_ci		found_extent_clustered(ffe_ctl, ins);
390262306a36Sopenharmony_ci		break;
390362306a36Sopenharmony_ci	case BTRFS_EXTENT_ALLOC_ZONED:
390462306a36Sopenharmony_ci		/* Nothing to do */
390562306a36Sopenharmony_ci		break;
390662306a36Sopenharmony_ci	default:
390762306a36Sopenharmony_ci		BUG();
390862306a36Sopenharmony_ci	}
390962306a36Sopenharmony_ci}
391062306a36Sopenharmony_ci
391162306a36Sopenharmony_cistatic int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
391262306a36Sopenharmony_ci				    struct find_free_extent_ctl *ffe_ctl)
391362306a36Sopenharmony_ci{
391462306a36Sopenharmony_ci	/* Block group's activeness is not a requirement for METADATA block groups. */
391562306a36Sopenharmony_ci	if (!(ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA))
391662306a36Sopenharmony_ci		return 0;
391762306a36Sopenharmony_ci
391862306a36Sopenharmony_ci	/* If we can activate new zone, just allocate a chunk and use it */
391962306a36Sopenharmony_ci	if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
392062306a36Sopenharmony_ci		return 0;
392162306a36Sopenharmony_ci
392262306a36Sopenharmony_ci	/*
392362306a36Sopenharmony_ci	 * We already reached the max active zones. Try to finish one block
392462306a36Sopenharmony_ci	 * group to make a room for a new block group. This is only possible
392562306a36Sopenharmony_ci	 * for a data block group because btrfs_zone_finish() may need to wait
392662306a36Sopenharmony_ci	 * for a running transaction which can cause a deadlock for metadata
392762306a36Sopenharmony_ci	 * allocation.
392862306a36Sopenharmony_ci	 */
392962306a36Sopenharmony_ci	if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
393062306a36Sopenharmony_ci		int ret = btrfs_zone_finish_one_bg(fs_info);
393162306a36Sopenharmony_ci
393262306a36Sopenharmony_ci		if (ret == 1)
393362306a36Sopenharmony_ci			return 0;
393462306a36Sopenharmony_ci		else if (ret < 0)
393562306a36Sopenharmony_ci			return ret;
393662306a36Sopenharmony_ci	}
393762306a36Sopenharmony_ci
393862306a36Sopenharmony_ci	/*
393962306a36Sopenharmony_ci	 * If we have enough free space left in an already active block group
394062306a36Sopenharmony_ci	 * and we can't activate any other zone now, do not allow allocating a
394162306a36Sopenharmony_ci	 * new chunk and let find_free_extent() retry with a smaller size.
394262306a36Sopenharmony_ci	 */
394362306a36Sopenharmony_ci	if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
394462306a36Sopenharmony_ci		return -ENOSPC;
394562306a36Sopenharmony_ci
394662306a36Sopenharmony_ci	/*
394762306a36Sopenharmony_ci	 * Even min_alloc_size is not left in any block groups. Since we cannot
394862306a36Sopenharmony_ci	 * activate a new block group, allocating it may not help. Let's tell a
394962306a36Sopenharmony_ci	 * caller to try again and hope it progress something by writing some
395062306a36Sopenharmony_ci	 * parts of the region. That is only possible for data block groups,
395162306a36Sopenharmony_ci	 * where a part of the region can be written.
395262306a36Sopenharmony_ci	 */
395362306a36Sopenharmony_ci	if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
395462306a36Sopenharmony_ci		return -EAGAIN;
395562306a36Sopenharmony_ci
395662306a36Sopenharmony_ci	/*
395762306a36Sopenharmony_ci	 * We cannot activate a new block group and no enough space left in any
395862306a36Sopenharmony_ci	 * block groups. So, allocating a new block group may not help. But,
395962306a36Sopenharmony_ci	 * there is nothing to do anyway, so let's go with it.
396062306a36Sopenharmony_ci	 */
396162306a36Sopenharmony_ci	return 0;
396262306a36Sopenharmony_ci}
396362306a36Sopenharmony_ci
396462306a36Sopenharmony_cistatic int can_allocate_chunk(struct btrfs_fs_info *fs_info,
396562306a36Sopenharmony_ci			      struct find_free_extent_ctl *ffe_ctl)
396662306a36Sopenharmony_ci{
396762306a36Sopenharmony_ci	switch (ffe_ctl->policy) {
396862306a36Sopenharmony_ci	case BTRFS_EXTENT_ALLOC_CLUSTERED:
396962306a36Sopenharmony_ci		return 0;
397062306a36Sopenharmony_ci	case BTRFS_EXTENT_ALLOC_ZONED:
397162306a36Sopenharmony_ci		return can_allocate_chunk_zoned(fs_info, ffe_ctl);
397262306a36Sopenharmony_ci	default:
397362306a36Sopenharmony_ci		BUG();
397462306a36Sopenharmony_ci	}
397562306a36Sopenharmony_ci}
397662306a36Sopenharmony_ci
397762306a36Sopenharmony_ci/*
397862306a36Sopenharmony_ci * Return >0 means caller needs to re-search for free extent
397962306a36Sopenharmony_ci * Return 0 means we have the needed free extent.
398062306a36Sopenharmony_ci * Return <0 means we failed to locate any free extent.
398162306a36Sopenharmony_ci */
398262306a36Sopenharmony_cistatic int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
398362306a36Sopenharmony_ci					struct btrfs_key *ins,
398462306a36Sopenharmony_ci					struct find_free_extent_ctl *ffe_ctl,
398562306a36Sopenharmony_ci					bool full_search)
398662306a36Sopenharmony_ci{
398762306a36Sopenharmony_ci	struct btrfs_root *root = fs_info->chunk_root;
398862306a36Sopenharmony_ci	int ret;
398962306a36Sopenharmony_ci
399062306a36Sopenharmony_ci	if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) &&
399162306a36Sopenharmony_ci	    ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg)
399262306a36Sopenharmony_ci		ffe_ctl->orig_have_caching_bg = true;
399362306a36Sopenharmony_ci
399462306a36Sopenharmony_ci	if (ins->objectid) {
399562306a36Sopenharmony_ci		found_extent(ffe_ctl, ins);
399662306a36Sopenharmony_ci		return 0;
399762306a36Sopenharmony_ci	}
399862306a36Sopenharmony_ci
399962306a36Sopenharmony_ci	if (ffe_ctl->loop >= LOOP_CACHING_WAIT && ffe_ctl->have_caching_bg)
400062306a36Sopenharmony_ci		return 1;
400162306a36Sopenharmony_ci
400262306a36Sopenharmony_ci	ffe_ctl->index++;
400362306a36Sopenharmony_ci	if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
400462306a36Sopenharmony_ci		return 1;
400562306a36Sopenharmony_ci
400662306a36Sopenharmony_ci	/* See the comments for btrfs_loop_type for an explanation of the phases. */
400762306a36Sopenharmony_ci	if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
400862306a36Sopenharmony_ci		ffe_ctl->index = 0;
400962306a36Sopenharmony_ci		/*
401062306a36Sopenharmony_ci		 * We want to skip the LOOP_CACHING_WAIT step if we don't have
401162306a36Sopenharmony_ci		 * any uncached bgs and we've already done a full search
401262306a36Sopenharmony_ci		 * through.
401362306a36Sopenharmony_ci		 */
401462306a36Sopenharmony_ci		if (ffe_ctl->loop == LOOP_CACHING_NOWAIT &&
401562306a36Sopenharmony_ci		    (!ffe_ctl->orig_have_caching_bg && full_search))
401662306a36Sopenharmony_ci			ffe_ctl->loop++;
401762306a36Sopenharmony_ci		ffe_ctl->loop++;
401862306a36Sopenharmony_ci
401962306a36Sopenharmony_ci		if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) {
402062306a36Sopenharmony_ci			struct btrfs_trans_handle *trans;
402162306a36Sopenharmony_ci			int exist = 0;
402262306a36Sopenharmony_ci
402362306a36Sopenharmony_ci			/* Check if allocation policy allows to create a new chunk */
402462306a36Sopenharmony_ci			ret = can_allocate_chunk(fs_info, ffe_ctl);
402562306a36Sopenharmony_ci			if (ret)
402662306a36Sopenharmony_ci				return ret;
402762306a36Sopenharmony_ci
402862306a36Sopenharmony_ci			trans = current->journal_info;
402962306a36Sopenharmony_ci			if (trans)
403062306a36Sopenharmony_ci				exist = 1;
403162306a36Sopenharmony_ci			else
403262306a36Sopenharmony_ci				trans = btrfs_join_transaction(root);
403362306a36Sopenharmony_ci
403462306a36Sopenharmony_ci			if (IS_ERR(trans)) {
403562306a36Sopenharmony_ci				ret = PTR_ERR(trans);
403662306a36Sopenharmony_ci				return ret;
403762306a36Sopenharmony_ci			}
403862306a36Sopenharmony_ci
403962306a36Sopenharmony_ci			ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
404062306a36Sopenharmony_ci						CHUNK_ALLOC_FORCE_FOR_EXTENT);
404162306a36Sopenharmony_ci
404262306a36Sopenharmony_ci			/* Do not bail out on ENOSPC since we can do more. */
404362306a36Sopenharmony_ci			if (ret == -ENOSPC) {
404462306a36Sopenharmony_ci				ret = 0;
404562306a36Sopenharmony_ci				ffe_ctl->loop++;
404662306a36Sopenharmony_ci			}
404762306a36Sopenharmony_ci			else if (ret < 0)
404862306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
404962306a36Sopenharmony_ci			else
405062306a36Sopenharmony_ci				ret = 0;
405162306a36Sopenharmony_ci			if (!exist)
405262306a36Sopenharmony_ci				btrfs_end_transaction(trans);
405362306a36Sopenharmony_ci			if (ret)
405462306a36Sopenharmony_ci				return ret;
405562306a36Sopenharmony_ci		}
405662306a36Sopenharmony_ci
405762306a36Sopenharmony_ci		if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
405862306a36Sopenharmony_ci			if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
405962306a36Sopenharmony_ci				return -ENOSPC;
406062306a36Sopenharmony_ci
406162306a36Sopenharmony_ci			/*
406262306a36Sopenharmony_ci			 * Don't loop again if we already have no empty_size and
406362306a36Sopenharmony_ci			 * no empty_cluster.
406462306a36Sopenharmony_ci			 */
406562306a36Sopenharmony_ci			if (ffe_ctl->empty_size == 0 &&
406662306a36Sopenharmony_ci			    ffe_ctl->empty_cluster == 0)
406762306a36Sopenharmony_ci				return -ENOSPC;
406862306a36Sopenharmony_ci			ffe_ctl->empty_size = 0;
406962306a36Sopenharmony_ci			ffe_ctl->empty_cluster = 0;
407062306a36Sopenharmony_ci		}
407162306a36Sopenharmony_ci		return 1;
407262306a36Sopenharmony_ci	}
407362306a36Sopenharmony_ci	return -ENOSPC;
407462306a36Sopenharmony_ci}
407562306a36Sopenharmony_ci
407662306a36Sopenharmony_cistatic bool find_free_extent_check_size_class(struct find_free_extent_ctl *ffe_ctl,
407762306a36Sopenharmony_ci					      struct btrfs_block_group *bg)
407862306a36Sopenharmony_ci{
407962306a36Sopenharmony_ci	if (ffe_ctl->policy == BTRFS_EXTENT_ALLOC_ZONED)
408062306a36Sopenharmony_ci		return true;
408162306a36Sopenharmony_ci	if (!btrfs_block_group_should_use_size_class(bg))
408262306a36Sopenharmony_ci		return true;
408362306a36Sopenharmony_ci	if (ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS)
408462306a36Sopenharmony_ci		return true;
408562306a36Sopenharmony_ci	if (ffe_ctl->loop >= LOOP_UNSET_SIZE_CLASS &&
408662306a36Sopenharmony_ci	    bg->size_class == BTRFS_BG_SZ_NONE)
408762306a36Sopenharmony_ci		return true;
408862306a36Sopenharmony_ci	return ffe_ctl->size_class == bg->size_class;
408962306a36Sopenharmony_ci}
409062306a36Sopenharmony_ci
409162306a36Sopenharmony_cistatic int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
409262306a36Sopenharmony_ci					struct find_free_extent_ctl *ffe_ctl,
409362306a36Sopenharmony_ci					struct btrfs_space_info *space_info,
409462306a36Sopenharmony_ci					struct btrfs_key *ins)
409562306a36Sopenharmony_ci{
409662306a36Sopenharmony_ci	/*
409762306a36Sopenharmony_ci	 * If our free space is heavily fragmented we may not be able to make
409862306a36Sopenharmony_ci	 * big contiguous allocations, so instead of doing the expensive search
409962306a36Sopenharmony_ci	 * for free space, simply return ENOSPC with our max_extent_size so we
410062306a36Sopenharmony_ci	 * can go ahead and search for a more manageable chunk.
410162306a36Sopenharmony_ci	 *
410262306a36Sopenharmony_ci	 * If our max_extent_size is large enough for our allocation simply
410362306a36Sopenharmony_ci	 * disable clustering since we will likely not be able to find enough
410462306a36Sopenharmony_ci	 * space to create a cluster and induce latency trying.
410562306a36Sopenharmony_ci	 */
410662306a36Sopenharmony_ci	if (space_info->max_extent_size) {
410762306a36Sopenharmony_ci		spin_lock(&space_info->lock);
410862306a36Sopenharmony_ci		if (space_info->max_extent_size &&
410962306a36Sopenharmony_ci		    ffe_ctl->num_bytes > space_info->max_extent_size) {
411062306a36Sopenharmony_ci			ins->offset = space_info->max_extent_size;
411162306a36Sopenharmony_ci			spin_unlock(&space_info->lock);
411262306a36Sopenharmony_ci			return -ENOSPC;
411362306a36Sopenharmony_ci		} else if (space_info->max_extent_size) {
411462306a36Sopenharmony_ci			ffe_ctl->use_cluster = false;
411562306a36Sopenharmony_ci		}
411662306a36Sopenharmony_ci		spin_unlock(&space_info->lock);
411762306a36Sopenharmony_ci	}
411862306a36Sopenharmony_ci
411962306a36Sopenharmony_ci	ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
412062306a36Sopenharmony_ci					       &ffe_ctl->empty_cluster);
412162306a36Sopenharmony_ci	if (ffe_ctl->last_ptr) {
412262306a36Sopenharmony_ci		struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
412362306a36Sopenharmony_ci
412462306a36Sopenharmony_ci		spin_lock(&last_ptr->lock);
412562306a36Sopenharmony_ci		if (last_ptr->block_group)
412662306a36Sopenharmony_ci			ffe_ctl->hint_byte = last_ptr->window_start;
412762306a36Sopenharmony_ci		if (last_ptr->fragmented) {
412862306a36Sopenharmony_ci			/*
412962306a36Sopenharmony_ci			 * We still set window_start so we can keep track of the
413062306a36Sopenharmony_ci			 * last place we found an allocation to try and save
413162306a36Sopenharmony_ci			 * some time.
413262306a36Sopenharmony_ci			 */
413362306a36Sopenharmony_ci			ffe_ctl->hint_byte = last_ptr->window_start;
413462306a36Sopenharmony_ci			ffe_ctl->use_cluster = false;
413562306a36Sopenharmony_ci		}
413662306a36Sopenharmony_ci		spin_unlock(&last_ptr->lock);
413762306a36Sopenharmony_ci	}
413862306a36Sopenharmony_ci
413962306a36Sopenharmony_ci	return 0;
414062306a36Sopenharmony_ci}
414162306a36Sopenharmony_ci
414262306a36Sopenharmony_cistatic int prepare_allocation_zoned(struct btrfs_fs_info *fs_info,
414362306a36Sopenharmony_ci				    struct find_free_extent_ctl *ffe_ctl)
414462306a36Sopenharmony_ci{
414562306a36Sopenharmony_ci	if (ffe_ctl->for_treelog) {
414662306a36Sopenharmony_ci		spin_lock(&fs_info->treelog_bg_lock);
414762306a36Sopenharmony_ci		if (fs_info->treelog_bg)
414862306a36Sopenharmony_ci			ffe_ctl->hint_byte = fs_info->treelog_bg;
414962306a36Sopenharmony_ci		spin_unlock(&fs_info->treelog_bg_lock);
415062306a36Sopenharmony_ci	} else if (ffe_ctl->for_data_reloc) {
415162306a36Sopenharmony_ci		spin_lock(&fs_info->relocation_bg_lock);
415262306a36Sopenharmony_ci		if (fs_info->data_reloc_bg)
415362306a36Sopenharmony_ci			ffe_ctl->hint_byte = fs_info->data_reloc_bg;
415462306a36Sopenharmony_ci		spin_unlock(&fs_info->relocation_bg_lock);
415562306a36Sopenharmony_ci	} else if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
415662306a36Sopenharmony_ci		struct btrfs_block_group *block_group;
415762306a36Sopenharmony_ci
415862306a36Sopenharmony_ci		spin_lock(&fs_info->zone_active_bgs_lock);
415962306a36Sopenharmony_ci		list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
416062306a36Sopenharmony_ci			/*
416162306a36Sopenharmony_ci			 * No lock is OK here because avail is monotinically
416262306a36Sopenharmony_ci			 * decreasing, and this is just a hint.
416362306a36Sopenharmony_ci			 */
416462306a36Sopenharmony_ci			u64 avail = block_group->zone_capacity - block_group->alloc_offset;
416562306a36Sopenharmony_ci
416662306a36Sopenharmony_ci			if (block_group_bits(block_group, ffe_ctl->flags) &&
416762306a36Sopenharmony_ci			    avail >= ffe_ctl->num_bytes) {
416862306a36Sopenharmony_ci				ffe_ctl->hint_byte = block_group->start;
416962306a36Sopenharmony_ci				break;
417062306a36Sopenharmony_ci			}
417162306a36Sopenharmony_ci		}
417262306a36Sopenharmony_ci		spin_unlock(&fs_info->zone_active_bgs_lock);
417362306a36Sopenharmony_ci	}
417462306a36Sopenharmony_ci
417562306a36Sopenharmony_ci	return 0;
417662306a36Sopenharmony_ci}
417762306a36Sopenharmony_ci
417862306a36Sopenharmony_cistatic int prepare_allocation(struct btrfs_fs_info *fs_info,
417962306a36Sopenharmony_ci			      struct find_free_extent_ctl *ffe_ctl,
418062306a36Sopenharmony_ci			      struct btrfs_space_info *space_info,
418162306a36Sopenharmony_ci			      struct btrfs_key *ins)
418262306a36Sopenharmony_ci{
418362306a36Sopenharmony_ci	switch (ffe_ctl->policy) {
418462306a36Sopenharmony_ci	case BTRFS_EXTENT_ALLOC_CLUSTERED:
418562306a36Sopenharmony_ci		return prepare_allocation_clustered(fs_info, ffe_ctl,
418662306a36Sopenharmony_ci						    space_info, ins);
418762306a36Sopenharmony_ci	case BTRFS_EXTENT_ALLOC_ZONED:
418862306a36Sopenharmony_ci		return prepare_allocation_zoned(fs_info, ffe_ctl);
418962306a36Sopenharmony_ci	default:
419062306a36Sopenharmony_ci		BUG();
419162306a36Sopenharmony_ci	}
419262306a36Sopenharmony_ci}
419362306a36Sopenharmony_ci
419462306a36Sopenharmony_ci/*
419562306a36Sopenharmony_ci * walks the btree of allocated extents and find a hole of a given size.
419662306a36Sopenharmony_ci * The key ins is changed to record the hole:
419762306a36Sopenharmony_ci * ins->objectid == start position
419862306a36Sopenharmony_ci * ins->flags = BTRFS_EXTENT_ITEM_KEY
419962306a36Sopenharmony_ci * ins->offset == the size of the hole.
420062306a36Sopenharmony_ci * Any available blocks before search_start are skipped.
420162306a36Sopenharmony_ci *
420262306a36Sopenharmony_ci * If there is no suitable free space, we will record the max size of
420362306a36Sopenharmony_ci * the free space extent currently.
420462306a36Sopenharmony_ci *
420562306a36Sopenharmony_ci * The overall logic and call chain:
420662306a36Sopenharmony_ci *
420762306a36Sopenharmony_ci * find_free_extent()
420862306a36Sopenharmony_ci * |- Iterate through all block groups
420962306a36Sopenharmony_ci * |  |- Get a valid block group
421062306a36Sopenharmony_ci * |  |- Try to do clustered allocation in that block group
421162306a36Sopenharmony_ci * |  |- Try to do unclustered allocation in that block group
421262306a36Sopenharmony_ci * |  |- Check if the result is valid
421362306a36Sopenharmony_ci * |  |  |- If valid, then exit
421462306a36Sopenharmony_ci * |  |- Jump to next block group
421562306a36Sopenharmony_ci * |
421662306a36Sopenharmony_ci * |- Push harder to find free extents
421762306a36Sopenharmony_ci *    |- If not found, re-iterate all block groups
421862306a36Sopenharmony_ci */
421962306a36Sopenharmony_cistatic noinline int find_free_extent(struct btrfs_root *root,
422062306a36Sopenharmony_ci				     struct btrfs_key *ins,
422162306a36Sopenharmony_ci				     struct find_free_extent_ctl *ffe_ctl)
422262306a36Sopenharmony_ci{
422362306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
422462306a36Sopenharmony_ci	int ret = 0;
422562306a36Sopenharmony_ci	int cache_block_group_error = 0;
422662306a36Sopenharmony_ci	struct btrfs_block_group *block_group = NULL;
422762306a36Sopenharmony_ci	struct btrfs_space_info *space_info;
422862306a36Sopenharmony_ci	bool full_search = false;
422962306a36Sopenharmony_ci
423062306a36Sopenharmony_ci	WARN_ON(ffe_ctl->num_bytes < fs_info->sectorsize);
423162306a36Sopenharmony_ci
423262306a36Sopenharmony_ci	ffe_ctl->search_start = 0;
423362306a36Sopenharmony_ci	/* For clustered allocation */
423462306a36Sopenharmony_ci	ffe_ctl->empty_cluster = 0;
423562306a36Sopenharmony_ci	ffe_ctl->last_ptr = NULL;
423662306a36Sopenharmony_ci	ffe_ctl->use_cluster = true;
423762306a36Sopenharmony_ci	ffe_ctl->have_caching_bg = false;
423862306a36Sopenharmony_ci	ffe_ctl->orig_have_caching_bg = false;
423962306a36Sopenharmony_ci	ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
424062306a36Sopenharmony_ci	ffe_ctl->loop = 0;
424162306a36Sopenharmony_ci	ffe_ctl->retry_uncached = false;
424262306a36Sopenharmony_ci	ffe_ctl->cached = 0;
424362306a36Sopenharmony_ci	ffe_ctl->max_extent_size = 0;
424462306a36Sopenharmony_ci	ffe_ctl->total_free_space = 0;
424562306a36Sopenharmony_ci	ffe_ctl->found_offset = 0;
424662306a36Sopenharmony_ci	ffe_ctl->policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
424762306a36Sopenharmony_ci	ffe_ctl->size_class = btrfs_calc_block_group_size_class(ffe_ctl->num_bytes);
424862306a36Sopenharmony_ci
424962306a36Sopenharmony_ci	if (btrfs_is_zoned(fs_info))
425062306a36Sopenharmony_ci		ffe_ctl->policy = BTRFS_EXTENT_ALLOC_ZONED;
425162306a36Sopenharmony_ci
425262306a36Sopenharmony_ci	ins->type = BTRFS_EXTENT_ITEM_KEY;
425362306a36Sopenharmony_ci	ins->objectid = 0;
425462306a36Sopenharmony_ci	ins->offset = 0;
425562306a36Sopenharmony_ci
425662306a36Sopenharmony_ci	trace_find_free_extent(root, ffe_ctl);
425762306a36Sopenharmony_ci
425862306a36Sopenharmony_ci	space_info = btrfs_find_space_info(fs_info, ffe_ctl->flags);
425962306a36Sopenharmony_ci	if (!space_info) {
426062306a36Sopenharmony_ci		btrfs_err(fs_info, "No space info for %llu", ffe_ctl->flags);
426162306a36Sopenharmony_ci		return -ENOSPC;
426262306a36Sopenharmony_ci	}
426362306a36Sopenharmony_ci
426462306a36Sopenharmony_ci	ret = prepare_allocation(fs_info, ffe_ctl, space_info, ins);
426562306a36Sopenharmony_ci	if (ret < 0)
426662306a36Sopenharmony_ci		return ret;
426762306a36Sopenharmony_ci
426862306a36Sopenharmony_ci	ffe_ctl->search_start = max(ffe_ctl->search_start,
426962306a36Sopenharmony_ci				    first_logical_byte(fs_info));
427062306a36Sopenharmony_ci	ffe_ctl->search_start = max(ffe_ctl->search_start, ffe_ctl->hint_byte);
427162306a36Sopenharmony_ci	if (ffe_ctl->search_start == ffe_ctl->hint_byte) {
427262306a36Sopenharmony_ci		block_group = btrfs_lookup_block_group(fs_info,
427362306a36Sopenharmony_ci						       ffe_ctl->search_start);
427462306a36Sopenharmony_ci		/*
427562306a36Sopenharmony_ci		 * we don't want to use the block group if it doesn't match our
427662306a36Sopenharmony_ci		 * allocation bits, or if its not cached.
427762306a36Sopenharmony_ci		 *
427862306a36Sopenharmony_ci		 * However if we are re-searching with an ideal block group
427962306a36Sopenharmony_ci		 * picked out then we don't care that the block group is cached.
428062306a36Sopenharmony_ci		 */
428162306a36Sopenharmony_ci		if (block_group && block_group_bits(block_group, ffe_ctl->flags) &&
428262306a36Sopenharmony_ci		    block_group->cached != BTRFS_CACHE_NO) {
428362306a36Sopenharmony_ci			down_read(&space_info->groups_sem);
428462306a36Sopenharmony_ci			if (list_empty(&block_group->list) ||
428562306a36Sopenharmony_ci			    block_group->ro) {
428662306a36Sopenharmony_ci				/*
428762306a36Sopenharmony_ci				 * someone is removing this block group,
428862306a36Sopenharmony_ci				 * we can't jump into the have_block_group
428962306a36Sopenharmony_ci				 * target because our list pointers are not
429062306a36Sopenharmony_ci				 * valid
429162306a36Sopenharmony_ci				 */
429262306a36Sopenharmony_ci				btrfs_put_block_group(block_group);
429362306a36Sopenharmony_ci				up_read(&space_info->groups_sem);
429462306a36Sopenharmony_ci			} else {
429562306a36Sopenharmony_ci				ffe_ctl->index = btrfs_bg_flags_to_raid_index(
429662306a36Sopenharmony_ci							block_group->flags);
429762306a36Sopenharmony_ci				btrfs_lock_block_group(block_group,
429862306a36Sopenharmony_ci						       ffe_ctl->delalloc);
429962306a36Sopenharmony_ci				ffe_ctl->hinted = true;
430062306a36Sopenharmony_ci				goto have_block_group;
430162306a36Sopenharmony_ci			}
430262306a36Sopenharmony_ci		} else if (block_group) {
430362306a36Sopenharmony_ci			btrfs_put_block_group(block_group);
430462306a36Sopenharmony_ci		}
430562306a36Sopenharmony_ci	}
430662306a36Sopenharmony_cisearch:
430762306a36Sopenharmony_ci	trace_find_free_extent_search_loop(root, ffe_ctl);
430862306a36Sopenharmony_ci	ffe_ctl->have_caching_bg = false;
430962306a36Sopenharmony_ci	if (ffe_ctl->index == btrfs_bg_flags_to_raid_index(ffe_ctl->flags) ||
431062306a36Sopenharmony_ci	    ffe_ctl->index == 0)
431162306a36Sopenharmony_ci		full_search = true;
431262306a36Sopenharmony_ci	down_read(&space_info->groups_sem);
431362306a36Sopenharmony_ci	list_for_each_entry(block_group,
431462306a36Sopenharmony_ci			    &space_info->block_groups[ffe_ctl->index], list) {
431562306a36Sopenharmony_ci		struct btrfs_block_group *bg_ret;
431662306a36Sopenharmony_ci
431762306a36Sopenharmony_ci		ffe_ctl->hinted = false;
431862306a36Sopenharmony_ci		/* If the block group is read-only, we can skip it entirely. */
431962306a36Sopenharmony_ci		if (unlikely(block_group->ro)) {
432062306a36Sopenharmony_ci			if (ffe_ctl->for_treelog)
432162306a36Sopenharmony_ci				btrfs_clear_treelog_bg(block_group);
432262306a36Sopenharmony_ci			if (ffe_ctl->for_data_reloc)
432362306a36Sopenharmony_ci				btrfs_clear_data_reloc_bg(block_group);
432462306a36Sopenharmony_ci			continue;
432562306a36Sopenharmony_ci		}
432662306a36Sopenharmony_ci
432762306a36Sopenharmony_ci		btrfs_grab_block_group(block_group, ffe_ctl->delalloc);
432862306a36Sopenharmony_ci		ffe_ctl->search_start = block_group->start;
432962306a36Sopenharmony_ci
433062306a36Sopenharmony_ci		/*
433162306a36Sopenharmony_ci		 * this can happen if we end up cycling through all the
433262306a36Sopenharmony_ci		 * raid types, but we want to make sure we only allocate
433362306a36Sopenharmony_ci		 * for the proper type.
433462306a36Sopenharmony_ci		 */
433562306a36Sopenharmony_ci		if (!block_group_bits(block_group, ffe_ctl->flags)) {
433662306a36Sopenharmony_ci			u64 extra = BTRFS_BLOCK_GROUP_DUP |
433762306a36Sopenharmony_ci				BTRFS_BLOCK_GROUP_RAID1_MASK |
433862306a36Sopenharmony_ci				BTRFS_BLOCK_GROUP_RAID56_MASK |
433962306a36Sopenharmony_ci				BTRFS_BLOCK_GROUP_RAID10;
434062306a36Sopenharmony_ci
434162306a36Sopenharmony_ci			/*
434262306a36Sopenharmony_ci			 * if they asked for extra copies and this block group
434362306a36Sopenharmony_ci			 * doesn't provide them, bail.  This does allow us to
434462306a36Sopenharmony_ci			 * fill raid0 from raid1.
434562306a36Sopenharmony_ci			 */
434662306a36Sopenharmony_ci			if ((ffe_ctl->flags & extra) && !(block_group->flags & extra))
434762306a36Sopenharmony_ci				goto loop;
434862306a36Sopenharmony_ci
434962306a36Sopenharmony_ci			/*
435062306a36Sopenharmony_ci			 * This block group has different flags than we want.
435162306a36Sopenharmony_ci			 * It's possible that we have MIXED_GROUP flag but no
435262306a36Sopenharmony_ci			 * block group is mixed.  Just skip such block group.
435362306a36Sopenharmony_ci			 */
435462306a36Sopenharmony_ci			btrfs_release_block_group(block_group, ffe_ctl->delalloc);
435562306a36Sopenharmony_ci			continue;
435662306a36Sopenharmony_ci		}
435762306a36Sopenharmony_ci
435862306a36Sopenharmony_cihave_block_group:
435962306a36Sopenharmony_ci		trace_find_free_extent_have_block_group(root, ffe_ctl, block_group);
436062306a36Sopenharmony_ci		ffe_ctl->cached = btrfs_block_group_done(block_group);
436162306a36Sopenharmony_ci		if (unlikely(!ffe_ctl->cached)) {
436262306a36Sopenharmony_ci			ffe_ctl->have_caching_bg = true;
436362306a36Sopenharmony_ci			ret = btrfs_cache_block_group(block_group, false);
436462306a36Sopenharmony_ci
436562306a36Sopenharmony_ci			/*
436662306a36Sopenharmony_ci			 * If we get ENOMEM here or something else we want to
436762306a36Sopenharmony_ci			 * try other block groups, because it may not be fatal.
436862306a36Sopenharmony_ci			 * However if we can't find anything else we need to
436962306a36Sopenharmony_ci			 * save our return here so that we return the actual
437062306a36Sopenharmony_ci			 * error that caused problems, not ENOSPC.
437162306a36Sopenharmony_ci			 */
437262306a36Sopenharmony_ci			if (ret < 0) {
437362306a36Sopenharmony_ci				if (!cache_block_group_error)
437462306a36Sopenharmony_ci					cache_block_group_error = ret;
437562306a36Sopenharmony_ci				ret = 0;
437662306a36Sopenharmony_ci				goto loop;
437762306a36Sopenharmony_ci			}
437862306a36Sopenharmony_ci			ret = 0;
437962306a36Sopenharmony_ci		}
438062306a36Sopenharmony_ci
438162306a36Sopenharmony_ci		if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
438262306a36Sopenharmony_ci			if (!cache_block_group_error)
438362306a36Sopenharmony_ci				cache_block_group_error = -EIO;
438462306a36Sopenharmony_ci			goto loop;
438562306a36Sopenharmony_ci		}
438662306a36Sopenharmony_ci
438762306a36Sopenharmony_ci		if (!find_free_extent_check_size_class(ffe_ctl, block_group))
438862306a36Sopenharmony_ci			goto loop;
438962306a36Sopenharmony_ci
439062306a36Sopenharmony_ci		bg_ret = NULL;
439162306a36Sopenharmony_ci		ret = do_allocation(block_group, ffe_ctl, &bg_ret);
439262306a36Sopenharmony_ci		if (ret > 0)
439362306a36Sopenharmony_ci			goto loop;
439462306a36Sopenharmony_ci
439562306a36Sopenharmony_ci		if (bg_ret && bg_ret != block_group) {
439662306a36Sopenharmony_ci			btrfs_release_block_group(block_group, ffe_ctl->delalloc);
439762306a36Sopenharmony_ci			block_group = bg_ret;
439862306a36Sopenharmony_ci		}
439962306a36Sopenharmony_ci
440062306a36Sopenharmony_ci		/* Checks */
440162306a36Sopenharmony_ci		ffe_ctl->search_start = round_up(ffe_ctl->found_offset,
440262306a36Sopenharmony_ci						 fs_info->stripesize);
440362306a36Sopenharmony_ci
440462306a36Sopenharmony_ci		/* move on to the next group */
440562306a36Sopenharmony_ci		if (ffe_ctl->search_start + ffe_ctl->num_bytes >
440662306a36Sopenharmony_ci		    block_group->start + block_group->length) {
440762306a36Sopenharmony_ci			btrfs_add_free_space_unused(block_group,
440862306a36Sopenharmony_ci					    ffe_ctl->found_offset,
440962306a36Sopenharmony_ci					    ffe_ctl->num_bytes);
441062306a36Sopenharmony_ci			goto loop;
441162306a36Sopenharmony_ci		}
441262306a36Sopenharmony_ci
441362306a36Sopenharmony_ci		if (ffe_ctl->found_offset < ffe_ctl->search_start)
441462306a36Sopenharmony_ci			btrfs_add_free_space_unused(block_group,
441562306a36Sopenharmony_ci					ffe_ctl->found_offset,
441662306a36Sopenharmony_ci					ffe_ctl->search_start - ffe_ctl->found_offset);
441762306a36Sopenharmony_ci
441862306a36Sopenharmony_ci		ret = btrfs_add_reserved_bytes(block_group, ffe_ctl->ram_bytes,
441962306a36Sopenharmony_ci					       ffe_ctl->num_bytes,
442062306a36Sopenharmony_ci					       ffe_ctl->delalloc,
442162306a36Sopenharmony_ci					       ffe_ctl->loop >= LOOP_WRONG_SIZE_CLASS);
442262306a36Sopenharmony_ci		if (ret == -EAGAIN) {
442362306a36Sopenharmony_ci			btrfs_add_free_space_unused(block_group,
442462306a36Sopenharmony_ci					ffe_ctl->found_offset,
442562306a36Sopenharmony_ci					ffe_ctl->num_bytes);
442662306a36Sopenharmony_ci			goto loop;
442762306a36Sopenharmony_ci		}
442862306a36Sopenharmony_ci		btrfs_inc_block_group_reservations(block_group);
442962306a36Sopenharmony_ci
443062306a36Sopenharmony_ci		/* we are all good, lets return */
443162306a36Sopenharmony_ci		ins->objectid = ffe_ctl->search_start;
443262306a36Sopenharmony_ci		ins->offset = ffe_ctl->num_bytes;
443362306a36Sopenharmony_ci
443462306a36Sopenharmony_ci		trace_btrfs_reserve_extent(block_group, ffe_ctl);
443562306a36Sopenharmony_ci		btrfs_release_block_group(block_group, ffe_ctl->delalloc);
443662306a36Sopenharmony_ci		break;
443762306a36Sopenharmony_ciloop:
443862306a36Sopenharmony_ci		if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
443962306a36Sopenharmony_ci		    !ffe_ctl->retry_uncached) {
444062306a36Sopenharmony_ci			ffe_ctl->retry_uncached = true;
444162306a36Sopenharmony_ci			btrfs_wait_block_group_cache_progress(block_group,
444262306a36Sopenharmony_ci						ffe_ctl->num_bytes +
444362306a36Sopenharmony_ci						ffe_ctl->empty_cluster +
444462306a36Sopenharmony_ci						ffe_ctl->empty_size);
444562306a36Sopenharmony_ci			goto have_block_group;
444662306a36Sopenharmony_ci		}
444762306a36Sopenharmony_ci		release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
444862306a36Sopenharmony_ci		cond_resched();
444962306a36Sopenharmony_ci	}
445062306a36Sopenharmony_ci	up_read(&space_info->groups_sem);
445162306a36Sopenharmony_ci
445262306a36Sopenharmony_ci	ret = find_free_extent_update_loop(fs_info, ins, ffe_ctl, full_search);
445362306a36Sopenharmony_ci	if (ret > 0)
445462306a36Sopenharmony_ci		goto search;
445562306a36Sopenharmony_ci
445662306a36Sopenharmony_ci	if (ret == -ENOSPC && !cache_block_group_error) {
445762306a36Sopenharmony_ci		/*
445862306a36Sopenharmony_ci		 * Use ffe_ctl->total_free_space as fallback if we can't find
445962306a36Sopenharmony_ci		 * any contiguous hole.
446062306a36Sopenharmony_ci		 */
446162306a36Sopenharmony_ci		if (!ffe_ctl->max_extent_size)
446262306a36Sopenharmony_ci			ffe_ctl->max_extent_size = ffe_ctl->total_free_space;
446362306a36Sopenharmony_ci		spin_lock(&space_info->lock);
446462306a36Sopenharmony_ci		space_info->max_extent_size = ffe_ctl->max_extent_size;
446562306a36Sopenharmony_ci		spin_unlock(&space_info->lock);
446662306a36Sopenharmony_ci		ins->offset = ffe_ctl->max_extent_size;
446762306a36Sopenharmony_ci	} else if (ret == -ENOSPC) {
446862306a36Sopenharmony_ci		ret = cache_block_group_error;
446962306a36Sopenharmony_ci	}
447062306a36Sopenharmony_ci	return ret;
447162306a36Sopenharmony_ci}
447262306a36Sopenharmony_ci
447362306a36Sopenharmony_ci/*
447462306a36Sopenharmony_ci * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a
447562306a36Sopenharmony_ci *			  hole that is at least as big as @num_bytes.
447662306a36Sopenharmony_ci *
447762306a36Sopenharmony_ci * @root           -	The root that will contain this extent
447862306a36Sopenharmony_ci *
447962306a36Sopenharmony_ci * @ram_bytes      -	The amount of space in ram that @num_bytes take. This
448062306a36Sopenharmony_ci *			is used for accounting purposes. This value differs
448162306a36Sopenharmony_ci *			from @num_bytes only in the case of compressed extents.
448262306a36Sopenharmony_ci *
448362306a36Sopenharmony_ci * @num_bytes      -	Number of bytes to allocate on-disk.
448462306a36Sopenharmony_ci *
448562306a36Sopenharmony_ci * @min_alloc_size -	Indicates the minimum amount of space that the
448662306a36Sopenharmony_ci *			allocator should try to satisfy. In some cases
448762306a36Sopenharmony_ci *			@num_bytes may be larger than what is required and if
448862306a36Sopenharmony_ci *			the filesystem is fragmented then allocation fails.
448962306a36Sopenharmony_ci *			However, the presence of @min_alloc_size gives a
449062306a36Sopenharmony_ci *			chance to try and satisfy the smaller allocation.
449162306a36Sopenharmony_ci *
449262306a36Sopenharmony_ci * @empty_size     -	A hint that you plan on doing more COW. This is the
449362306a36Sopenharmony_ci *			size in bytes the allocator should try to find free
449462306a36Sopenharmony_ci *			next to the block it returns.  This is just a hint and
449562306a36Sopenharmony_ci *			may be ignored by the allocator.
449662306a36Sopenharmony_ci *
449762306a36Sopenharmony_ci * @hint_byte      -	Hint to the allocator to start searching above the byte
449862306a36Sopenharmony_ci *			address passed. It might be ignored.
449962306a36Sopenharmony_ci *
450062306a36Sopenharmony_ci * @ins            -	This key is modified to record the found hole. It will
450162306a36Sopenharmony_ci *			have the following values:
450262306a36Sopenharmony_ci *			ins->objectid == start position
450362306a36Sopenharmony_ci *			ins->flags = BTRFS_EXTENT_ITEM_KEY
450462306a36Sopenharmony_ci *			ins->offset == the size of the hole.
450562306a36Sopenharmony_ci *
450662306a36Sopenharmony_ci * @is_data        -	Boolean flag indicating whether an extent is
450762306a36Sopenharmony_ci *			allocated for data (true) or metadata (false)
450862306a36Sopenharmony_ci *
450962306a36Sopenharmony_ci * @delalloc       -	Boolean flag indicating whether this allocation is for
451062306a36Sopenharmony_ci *			delalloc or not. If 'true' data_rwsem of block groups
451162306a36Sopenharmony_ci *			is going to be acquired.
451262306a36Sopenharmony_ci *
451362306a36Sopenharmony_ci *
451462306a36Sopenharmony_ci * Returns 0 when an allocation succeeded or < 0 when an error occurred. In
451562306a36Sopenharmony_ci * case -ENOSPC is returned then @ins->offset will contain the size of the
451662306a36Sopenharmony_ci * largest available hole the allocator managed to find.
451762306a36Sopenharmony_ci */
451862306a36Sopenharmony_ciint btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
451962306a36Sopenharmony_ci			 u64 num_bytes, u64 min_alloc_size,
452062306a36Sopenharmony_ci			 u64 empty_size, u64 hint_byte,
452162306a36Sopenharmony_ci			 struct btrfs_key *ins, int is_data, int delalloc)
452262306a36Sopenharmony_ci{
452362306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
452462306a36Sopenharmony_ci	struct find_free_extent_ctl ffe_ctl = {};
452562306a36Sopenharmony_ci	bool final_tried = num_bytes == min_alloc_size;
452662306a36Sopenharmony_ci	u64 flags;
452762306a36Sopenharmony_ci	int ret;
452862306a36Sopenharmony_ci	bool for_treelog = (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
452962306a36Sopenharmony_ci	bool for_data_reloc = (btrfs_is_data_reloc_root(root) && is_data);
453062306a36Sopenharmony_ci
453162306a36Sopenharmony_ci	flags = get_alloc_profile_by_root(root, is_data);
453262306a36Sopenharmony_ciagain:
453362306a36Sopenharmony_ci	WARN_ON(num_bytes < fs_info->sectorsize);
453462306a36Sopenharmony_ci
453562306a36Sopenharmony_ci	ffe_ctl.ram_bytes = ram_bytes;
453662306a36Sopenharmony_ci	ffe_ctl.num_bytes = num_bytes;
453762306a36Sopenharmony_ci	ffe_ctl.min_alloc_size = min_alloc_size;
453862306a36Sopenharmony_ci	ffe_ctl.empty_size = empty_size;
453962306a36Sopenharmony_ci	ffe_ctl.flags = flags;
454062306a36Sopenharmony_ci	ffe_ctl.delalloc = delalloc;
454162306a36Sopenharmony_ci	ffe_ctl.hint_byte = hint_byte;
454262306a36Sopenharmony_ci	ffe_ctl.for_treelog = for_treelog;
454362306a36Sopenharmony_ci	ffe_ctl.for_data_reloc = for_data_reloc;
454462306a36Sopenharmony_ci
454562306a36Sopenharmony_ci	ret = find_free_extent(root, ins, &ffe_ctl);
454662306a36Sopenharmony_ci	if (!ret && !is_data) {
454762306a36Sopenharmony_ci		btrfs_dec_block_group_reservations(fs_info, ins->objectid);
454862306a36Sopenharmony_ci	} else if (ret == -ENOSPC) {
454962306a36Sopenharmony_ci		if (!final_tried && ins->offset) {
455062306a36Sopenharmony_ci			num_bytes = min(num_bytes >> 1, ins->offset);
455162306a36Sopenharmony_ci			num_bytes = round_down(num_bytes,
455262306a36Sopenharmony_ci					       fs_info->sectorsize);
455362306a36Sopenharmony_ci			num_bytes = max(num_bytes, min_alloc_size);
455462306a36Sopenharmony_ci			ram_bytes = num_bytes;
455562306a36Sopenharmony_ci			if (num_bytes == min_alloc_size)
455662306a36Sopenharmony_ci				final_tried = true;
455762306a36Sopenharmony_ci			goto again;
455862306a36Sopenharmony_ci		} else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
455962306a36Sopenharmony_ci			struct btrfs_space_info *sinfo;
456062306a36Sopenharmony_ci
456162306a36Sopenharmony_ci			sinfo = btrfs_find_space_info(fs_info, flags);
456262306a36Sopenharmony_ci			btrfs_err(fs_info,
456362306a36Sopenharmony_ci	"allocation failed flags %llu, wanted %llu tree-log %d, relocation: %d",
456462306a36Sopenharmony_ci				  flags, num_bytes, for_treelog, for_data_reloc);
456562306a36Sopenharmony_ci			if (sinfo)
456662306a36Sopenharmony_ci				btrfs_dump_space_info(fs_info, sinfo,
456762306a36Sopenharmony_ci						      num_bytes, 1);
456862306a36Sopenharmony_ci		}
456962306a36Sopenharmony_ci	}
457062306a36Sopenharmony_ci
457162306a36Sopenharmony_ci	return ret;
457262306a36Sopenharmony_ci}
457362306a36Sopenharmony_ci
457462306a36Sopenharmony_ciint btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
457562306a36Sopenharmony_ci			       u64 start, u64 len, int delalloc)
457662306a36Sopenharmony_ci{
457762306a36Sopenharmony_ci	struct btrfs_block_group *cache;
457862306a36Sopenharmony_ci
457962306a36Sopenharmony_ci	cache = btrfs_lookup_block_group(fs_info, start);
458062306a36Sopenharmony_ci	if (!cache) {
458162306a36Sopenharmony_ci		btrfs_err(fs_info, "Unable to find block group for %llu",
458262306a36Sopenharmony_ci			  start);
458362306a36Sopenharmony_ci		return -ENOSPC;
458462306a36Sopenharmony_ci	}
458562306a36Sopenharmony_ci
458662306a36Sopenharmony_ci	btrfs_add_free_space(cache, start, len);
458762306a36Sopenharmony_ci	btrfs_free_reserved_bytes(cache, len, delalloc);
458862306a36Sopenharmony_ci	trace_btrfs_reserved_extent_free(fs_info, start, len);
458962306a36Sopenharmony_ci
459062306a36Sopenharmony_ci	btrfs_put_block_group(cache);
459162306a36Sopenharmony_ci	return 0;
459262306a36Sopenharmony_ci}
459362306a36Sopenharmony_ci
459462306a36Sopenharmony_ciint btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
459562306a36Sopenharmony_ci			      u64 len)
459662306a36Sopenharmony_ci{
459762306a36Sopenharmony_ci	struct btrfs_block_group *cache;
459862306a36Sopenharmony_ci	int ret = 0;
459962306a36Sopenharmony_ci
460062306a36Sopenharmony_ci	cache = btrfs_lookup_block_group(trans->fs_info, start);
460162306a36Sopenharmony_ci	if (!cache) {
460262306a36Sopenharmony_ci		btrfs_err(trans->fs_info, "unable to find block group for %llu",
460362306a36Sopenharmony_ci			  start);
460462306a36Sopenharmony_ci		return -ENOSPC;
460562306a36Sopenharmony_ci	}
460662306a36Sopenharmony_ci
460762306a36Sopenharmony_ci	ret = pin_down_extent(trans, cache, start, len, 1);
460862306a36Sopenharmony_ci	btrfs_put_block_group(cache);
460962306a36Sopenharmony_ci	return ret;
461062306a36Sopenharmony_ci}
461162306a36Sopenharmony_ci
461262306a36Sopenharmony_cistatic int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
461362306a36Sopenharmony_ci				 u64 num_bytes)
461462306a36Sopenharmony_ci{
461562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
461662306a36Sopenharmony_ci	int ret;
461762306a36Sopenharmony_ci
461862306a36Sopenharmony_ci	ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
461962306a36Sopenharmony_ci	if (ret)
462062306a36Sopenharmony_ci		return ret;
462162306a36Sopenharmony_ci
462262306a36Sopenharmony_ci	ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
462362306a36Sopenharmony_ci	if (ret) {
462462306a36Sopenharmony_ci		ASSERT(!ret);
462562306a36Sopenharmony_ci		btrfs_err(fs_info, "update block group failed for %llu %llu",
462662306a36Sopenharmony_ci			  bytenr, num_bytes);
462762306a36Sopenharmony_ci		return ret;
462862306a36Sopenharmony_ci	}
462962306a36Sopenharmony_ci
463062306a36Sopenharmony_ci	trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
463162306a36Sopenharmony_ci	return 0;
463262306a36Sopenharmony_ci}
463362306a36Sopenharmony_ci
463462306a36Sopenharmony_cistatic int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
463562306a36Sopenharmony_ci				      u64 parent, u64 root_objectid,
463662306a36Sopenharmony_ci				      u64 flags, u64 owner, u64 offset,
463762306a36Sopenharmony_ci				      struct btrfs_key *ins, int ref_mod)
463862306a36Sopenharmony_ci{
463962306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
464062306a36Sopenharmony_ci	struct btrfs_root *extent_root;
464162306a36Sopenharmony_ci	int ret;
464262306a36Sopenharmony_ci	struct btrfs_extent_item *extent_item;
464362306a36Sopenharmony_ci	struct btrfs_extent_inline_ref *iref;
464462306a36Sopenharmony_ci	struct btrfs_path *path;
464562306a36Sopenharmony_ci	struct extent_buffer *leaf;
464662306a36Sopenharmony_ci	int type;
464762306a36Sopenharmony_ci	u32 size;
464862306a36Sopenharmony_ci
464962306a36Sopenharmony_ci	if (parent > 0)
465062306a36Sopenharmony_ci		type = BTRFS_SHARED_DATA_REF_KEY;
465162306a36Sopenharmony_ci	else
465262306a36Sopenharmony_ci		type = BTRFS_EXTENT_DATA_REF_KEY;
465362306a36Sopenharmony_ci
465462306a36Sopenharmony_ci	size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
465562306a36Sopenharmony_ci
465662306a36Sopenharmony_ci	path = btrfs_alloc_path();
465762306a36Sopenharmony_ci	if (!path)
465862306a36Sopenharmony_ci		return -ENOMEM;
465962306a36Sopenharmony_ci
466062306a36Sopenharmony_ci	extent_root = btrfs_extent_root(fs_info, ins->objectid);
466162306a36Sopenharmony_ci	ret = btrfs_insert_empty_item(trans, extent_root, path, ins, size);
466262306a36Sopenharmony_ci	if (ret) {
466362306a36Sopenharmony_ci		btrfs_free_path(path);
466462306a36Sopenharmony_ci		return ret;
466562306a36Sopenharmony_ci	}
466662306a36Sopenharmony_ci
466762306a36Sopenharmony_ci	leaf = path->nodes[0];
466862306a36Sopenharmony_ci	extent_item = btrfs_item_ptr(leaf, path->slots[0],
466962306a36Sopenharmony_ci				     struct btrfs_extent_item);
467062306a36Sopenharmony_ci	btrfs_set_extent_refs(leaf, extent_item, ref_mod);
467162306a36Sopenharmony_ci	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
467262306a36Sopenharmony_ci	btrfs_set_extent_flags(leaf, extent_item,
467362306a36Sopenharmony_ci			       flags | BTRFS_EXTENT_FLAG_DATA);
467462306a36Sopenharmony_ci
467562306a36Sopenharmony_ci	iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
467662306a36Sopenharmony_ci	btrfs_set_extent_inline_ref_type(leaf, iref, type);
467762306a36Sopenharmony_ci	if (parent > 0) {
467862306a36Sopenharmony_ci		struct btrfs_shared_data_ref *ref;
467962306a36Sopenharmony_ci		ref = (struct btrfs_shared_data_ref *)(iref + 1);
468062306a36Sopenharmony_ci		btrfs_set_extent_inline_ref_offset(leaf, iref, parent);
468162306a36Sopenharmony_ci		btrfs_set_shared_data_ref_count(leaf, ref, ref_mod);
468262306a36Sopenharmony_ci	} else {
468362306a36Sopenharmony_ci		struct btrfs_extent_data_ref *ref;
468462306a36Sopenharmony_ci		ref = (struct btrfs_extent_data_ref *)(&iref->offset);
468562306a36Sopenharmony_ci		btrfs_set_extent_data_ref_root(leaf, ref, root_objectid);
468662306a36Sopenharmony_ci		btrfs_set_extent_data_ref_objectid(leaf, ref, owner);
468762306a36Sopenharmony_ci		btrfs_set_extent_data_ref_offset(leaf, ref, offset);
468862306a36Sopenharmony_ci		btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
468962306a36Sopenharmony_ci	}
469062306a36Sopenharmony_ci
469162306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, path->nodes[0]);
469262306a36Sopenharmony_ci	btrfs_free_path(path);
469362306a36Sopenharmony_ci
469462306a36Sopenharmony_ci	return alloc_reserved_extent(trans, ins->objectid, ins->offset);
469562306a36Sopenharmony_ci}
469662306a36Sopenharmony_ci
469762306a36Sopenharmony_cistatic int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
469862306a36Sopenharmony_ci				     struct btrfs_delayed_ref_node *node,
469962306a36Sopenharmony_ci				     struct btrfs_delayed_extent_op *extent_op)
470062306a36Sopenharmony_ci{
470162306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
470262306a36Sopenharmony_ci	struct btrfs_root *extent_root;
470362306a36Sopenharmony_ci	int ret;
470462306a36Sopenharmony_ci	struct btrfs_extent_item *extent_item;
470562306a36Sopenharmony_ci	struct btrfs_key extent_key;
470662306a36Sopenharmony_ci	struct btrfs_tree_block_info *block_info;
470762306a36Sopenharmony_ci	struct btrfs_extent_inline_ref *iref;
470862306a36Sopenharmony_ci	struct btrfs_path *path;
470962306a36Sopenharmony_ci	struct extent_buffer *leaf;
471062306a36Sopenharmony_ci	struct btrfs_delayed_tree_ref *ref;
471162306a36Sopenharmony_ci	u32 size = sizeof(*extent_item) + sizeof(*iref);
471262306a36Sopenharmony_ci	u64 flags = extent_op->flags_to_set;
471362306a36Sopenharmony_ci	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
471462306a36Sopenharmony_ci
471562306a36Sopenharmony_ci	ref = btrfs_delayed_node_to_tree_ref(node);
471662306a36Sopenharmony_ci
471762306a36Sopenharmony_ci	extent_key.objectid = node->bytenr;
471862306a36Sopenharmony_ci	if (skinny_metadata) {
471962306a36Sopenharmony_ci		extent_key.offset = ref->level;
472062306a36Sopenharmony_ci		extent_key.type = BTRFS_METADATA_ITEM_KEY;
472162306a36Sopenharmony_ci	} else {
472262306a36Sopenharmony_ci		extent_key.offset = node->num_bytes;
472362306a36Sopenharmony_ci		extent_key.type = BTRFS_EXTENT_ITEM_KEY;
472462306a36Sopenharmony_ci		size += sizeof(*block_info);
472562306a36Sopenharmony_ci	}
472662306a36Sopenharmony_ci
472762306a36Sopenharmony_ci	path = btrfs_alloc_path();
472862306a36Sopenharmony_ci	if (!path)
472962306a36Sopenharmony_ci		return -ENOMEM;
473062306a36Sopenharmony_ci
473162306a36Sopenharmony_ci	extent_root = btrfs_extent_root(fs_info, extent_key.objectid);
473262306a36Sopenharmony_ci	ret = btrfs_insert_empty_item(trans, extent_root, path, &extent_key,
473362306a36Sopenharmony_ci				      size);
473462306a36Sopenharmony_ci	if (ret) {
473562306a36Sopenharmony_ci		btrfs_free_path(path);
473662306a36Sopenharmony_ci		return ret;
473762306a36Sopenharmony_ci	}
473862306a36Sopenharmony_ci
473962306a36Sopenharmony_ci	leaf = path->nodes[0];
474062306a36Sopenharmony_ci	extent_item = btrfs_item_ptr(leaf, path->slots[0],
474162306a36Sopenharmony_ci				     struct btrfs_extent_item);
474262306a36Sopenharmony_ci	btrfs_set_extent_refs(leaf, extent_item, 1);
474362306a36Sopenharmony_ci	btrfs_set_extent_generation(leaf, extent_item, trans->transid);
474462306a36Sopenharmony_ci	btrfs_set_extent_flags(leaf, extent_item,
474562306a36Sopenharmony_ci			       flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
474662306a36Sopenharmony_ci
474762306a36Sopenharmony_ci	if (skinny_metadata) {
474862306a36Sopenharmony_ci		iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
474962306a36Sopenharmony_ci	} else {
475062306a36Sopenharmony_ci		block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
475162306a36Sopenharmony_ci		btrfs_set_tree_block_key(leaf, block_info, &extent_op->key);
475262306a36Sopenharmony_ci		btrfs_set_tree_block_level(leaf, block_info, ref->level);
475362306a36Sopenharmony_ci		iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
475462306a36Sopenharmony_ci	}
475562306a36Sopenharmony_ci
475662306a36Sopenharmony_ci	if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
475762306a36Sopenharmony_ci		btrfs_set_extent_inline_ref_type(leaf, iref,
475862306a36Sopenharmony_ci						 BTRFS_SHARED_BLOCK_REF_KEY);
475962306a36Sopenharmony_ci		btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent);
476062306a36Sopenharmony_ci	} else {
476162306a36Sopenharmony_ci		btrfs_set_extent_inline_ref_type(leaf, iref,
476262306a36Sopenharmony_ci						 BTRFS_TREE_BLOCK_REF_KEY);
476362306a36Sopenharmony_ci		btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root);
476462306a36Sopenharmony_ci	}
476562306a36Sopenharmony_ci
476662306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, leaf);
476762306a36Sopenharmony_ci	btrfs_free_path(path);
476862306a36Sopenharmony_ci
476962306a36Sopenharmony_ci	return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
477062306a36Sopenharmony_ci}
477162306a36Sopenharmony_ci
477262306a36Sopenharmony_ciint btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
477362306a36Sopenharmony_ci				     struct btrfs_root *root, u64 owner,
477462306a36Sopenharmony_ci				     u64 offset, u64 ram_bytes,
477562306a36Sopenharmony_ci				     struct btrfs_key *ins)
477662306a36Sopenharmony_ci{
477762306a36Sopenharmony_ci	struct btrfs_ref generic_ref = { 0 };
477862306a36Sopenharmony_ci
477962306a36Sopenharmony_ci	BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
478062306a36Sopenharmony_ci
478162306a36Sopenharmony_ci	btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
478262306a36Sopenharmony_ci			       ins->objectid, ins->offset, 0);
478362306a36Sopenharmony_ci	btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner,
478462306a36Sopenharmony_ci			    offset, 0, false);
478562306a36Sopenharmony_ci	btrfs_ref_tree_mod(root->fs_info, &generic_ref);
478662306a36Sopenharmony_ci
478762306a36Sopenharmony_ci	return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes);
478862306a36Sopenharmony_ci}
478962306a36Sopenharmony_ci
479062306a36Sopenharmony_ci/*
479162306a36Sopenharmony_ci * this is used by the tree logging recovery code.  It records that
479262306a36Sopenharmony_ci * an extent has been allocated and makes sure to clear the free
479362306a36Sopenharmony_ci * space cache bits as well
479462306a36Sopenharmony_ci */
479562306a36Sopenharmony_ciint btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
479662306a36Sopenharmony_ci				   u64 root_objectid, u64 owner, u64 offset,
479762306a36Sopenharmony_ci				   struct btrfs_key *ins)
479862306a36Sopenharmony_ci{
479962306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
480062306a36Sopenharmony_ci	int ret;
480162306a36Sopenharmony_ci	struct btrfs_block_group *block_group;
480262306a36Sopenharmony_ci	struct btrfs_space_info *space_info;
480362306a36Sopenharmony_ci
480462306a36Sopenharmony_ci	/*
480562306a36Sopenharmony_ci	 * Mixed block groups will exclude before processing the log so we only
480662306a36Sopenharmony_ci	 * need to do the exclude dance if this fs isn't mixed.
480762306a36Sopenharmony_ci	 */
480862306a36Sopenharmony_ci	if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
480962306a36Sopenharmony_ci		ret = __exclude_logged_extent(fs_info, ins->objectid,
481062306a36Sopenharmony_ci					      ins->offset);
481162306a36Sopenharmony_ci		if (ret)
481262306a36Sopenharmony_ci			return ret;
481362306a36Sopenharmony_ci	}
481462306a36Sopenharmony_ci
481562306a36Sopenharmony_ci	block_group = btrfs_lookup_block_group(fs_info, ins->objectid);
481662306a36Sopenharmony_ci	if (!block_group)
481762306a36Sopenharmony_ci		return -EINVAL;
481862306a36Sopenharmony_ci
481962306a36Sopenharmony_ci	space_info = block_group->space_info;
482062306a36Sopenharmony_ci	spin_lock(&space_info->lock);
482162306a36Sopenharmony_ci	spin_lock(&block_group->lock);
482262306a36Sopenharmony_ci	space_info->bytes_reserved += ins->offset;
482362306a36Sopenharmony_ci	block_group->reserved += ins->offset;
482462306a36Sopenharmony_ci	spin_unlock(&block_group->lock);
482562306a36Sopenharmony_ci	spin_unlock(&space_info->lock);
482662306a36Sopenharmony_ci
482762306a36Sopenharmony_ci	ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
482862306a36Sopenharmony_ci					 offset, ins, 1);
482962306a36Sopenharmony_ci	if (ret)
483062306a36Sopenharmony_ci		btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
483162306a36Sopenharmony_ci	btrfs_put_block_group(block_group);
483262306a36Sopenharmony_ci	return ret;
483362306a36Sopenharmony_ci}
483462306a36Sopenharmony_ci
483562306a36Sopenharmony_cistatic struct extent_buffer *
483662306a36Sopenharmony_cibtrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
483762306a36Sopenharmony_ci		      u64 bytenr, int level, u64 owner,
483862306a36Sopenharmony_ci		      enum btrfs_lock_nesting nest)
483962306a36Sopenharmony_ci{
484062306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
484162306a36Sopenharmony_ci	struct extent_buffer *buf;
484262306a36Sopenharmony_ci	u64 lockdep_owner = owner;
484362306a36Sopenharmony_ci
484462306a36Sopenharmony_ci	buf = btrfs_find_create_tree_block(fs_info, bytenr, owner, level);
484562306a36Sopenharmony_ci	if (IS_ERR(buf))
484662306a36Sopenharmony_ci		return buf;
484762306a36Sopenharmony_ci
484862306a36Sopenharmony_ci	/*
484962306a36Sopenharmony_ci	 * Extra safety check in case the extent tree is corrupted and extent
485062306a36Sopenharmony_ci	 * allocator chooses to use a tree block which is already used and
485162306a36Sopenharmony_ci	 * locked.
485262306a36Sopenharmony_ci	 */
485362306a36Sopenharmony_ci	if (buf->lock_owner == current->pid) {
485462306a36Sopenharmony_ci		btrfs_err_rl(fs_info,
485562306a36Sopenharmony_ci"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
485662306a36Sopenharmony_ci			buf->start, btrfs_header_owner(buf), current->pid);
485762306a36Sopenharmony_ci		free_extent_buffer(buf);
485862306a36Sopenharmony_ci		return ERR_PTR(-EUCLEAN);
485962306a36Sopenharmony_ci	}
486062306a36Sopenharmony_ci
486162306a36Sopenharmony_ci	/*
486262306a36Sopenharmony_ci	 * The reloc trees are just snapshots, so we need them to appear to be
486362306a36Sopenharmony_ci	 * just like any other fs tree WRT lockdep.
486462306a36Sopenharmony_ci	 *
486562306a36Sopenharmony_ci	 * The exception however is in replace_path() in relocation, where we
486662306a36Sopenharmony_ci	 * hold the lock on the original fs root and then search for the reloc
486762306a36Sopenharmony_ci	 * root.  At that point we need to make sure any reloc root buffers are
486862306a36Sopenharmony_ci	 * set to the BTRFS_TREE_RELOC_OBJECTID lockdep class in order to make
486962306a36Sopenharmony_ci	 * lockdep happy.
487062306a36Sopenharmony_ci	 */
487162306a36Sopenharmony_ci	if (lockdep_owner == BTRFS_TREE_RELOC_OBJECTID &&
487262306a36Sopenharmony_ci	    !test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
487362306a36Sopenharmony_ci		lockdep_owner = BTRFS_FS_TREE_OBJECTID;
487462306a36Sopenharmony_ci
487562306a36Sopenharmony_ci	/* btrfs_clear_buffer_dirty() accesses generation field. */
487662306a36Sopenharmony_ci	btrfs_set_header_generation(buf, trans->transid);
487762306a36Sopenharmony_ci
487862306a36Sopenharmony_ci	/*
487962306a36Sopenharmony_ci	 * This needs to stay, because we could allocate a freed block from an
488062306a36Sopenharmony_ci	 * old tree into a new tree, so we need to make sure this new block is
488162306a36Sopenharmony_ci	 * set to the appropriate level and owner.
488262306a36Sopenharmony_ci	 */
488362306a36Sopenharmony_ci	btrfs_set_buffer_lockdep_class(lockdep_owner, buf, level);
488462306a36Sopenharmony_ci
488562306a36Sopenharmony_ci	__btrfs_tree_lock(buf, nest);
488662306a36Sopenharmony_ci	btrfs_clear_buffer_dirty(trans, buf);
488762306a36Sopenharmony_ci	clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
488862306a36Sopenharmony_ci	clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
488962306a36Sopenharmony_ci
489062306a36Sopenharmony_ci	set_extent_buffer_uptodate(buf);
489162306a36Sopenharmony_ci
489262306a36Sopenharmony_ci	memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header));
489362306a36Sopenharmony_ci	btrfs_set_header_level(buf, level);
489462306a36Sopenharmony_ci	btrfs_set_header_bytenr(buf, buf->start);
489562306a36Sopenharmony_ci	btrfs_set_header_generation(buf, trans->transid);
489662306a36Sopenharmony_ci	btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV);
489762306a36Sopenharmony_ci	btrfs_set_header_owner(buf, owner);
489862306a36Sopenharmony_ci	write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid);
489962306a36Sopenharmony_ci	write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid);
490062306a36Sopenharmony_ci	if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
490162306a36Sopenharmony_ci		buf->log_index = root->log_transid % 2;
490262306a36Sopenharmony_ci		/*
490362306a36Sopenharmony_ci		 * we allow two log transactions at a time, use different
490462306a36Sopenharmony_ci		 * EXTENT bit to differentiate dirty pages.
490562306a36Sopenharmony_ci		 */
490662306a36Sopenharmony_ci		if (buf->log_index == 0)
490762306a36Sopenharmony_ci			set_extent_bit(&root->dirty_log_pages, buf->start,
490862306a36Sopenharmony_ci				       buf->start + buf->len - 1,
490962306a36Sopenharmony_ci				       EXTENT_DIRTY, NULL);
491062306a36Sopenharmony_ci		else
491162306a36Sopenharmony_ci			set_extent_bit(&root->dirty_log_pages, buf->start,
491262306a36Sopenharmony_ci				       buf->start + buf->len - 1,
491362306a36Sopenharmony_ci				       EXTENT_NEW, NULL);
491462306a36Sopenharmony_ci	} else {
491562306a36Sopenharmony_ci		buf->log_index = -1;
491662306a36Sopenharmony_ci		set_extent_bit(&trans->transaction->dirty_pages, buf->start,
491762306a36Sopenharmony_ci			       buf->start + buf->len - 1, EXTENT_DIRTY, NULL);
491862306a36Sopenharmony_ci	}
491962306a36Sopenharmony_ci	/* this returns a buffer locked for blocking */
492062306a36Sopenharmony_ci	return buf;
492162306a36Sopenharmony_ci}
492262306a36Sopenharmony_ci
492362306a36Sopenharmony_ci/*
492462306a36Sopenharmony_ci * finds a free extent and does all the dirty work required for allocation
492562306a36Sopenharmony_ci * returns the tree buffer or an ERR_PTR on error.
492662306a36Sopenharmony_ci */
492762306a36Sopenharmony_cistruct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
492862306a36Sopenharmony_ci					     struct btrfs_root *root,
492962306a36Sopenharmony_ci					     u64 parent, u64 root_objectid,
493062306a36Sopenharmony_ci					     const struct btrfs_disk_key *key,
493162306a36Sopenharmony_ci					     int level, u64 hint,
493262306a36Sopenharmony_ci					     u64 empty_size,
493362306a36Sopenharmony_ci					     enum btrfs_lock_nesting nest)
493462306a36Sopenharmony_ci{
493562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
493662306a36Sopenharmony_ci	struct btrfs_key ins;
493762306a36Sopenharmony_ci	struct btrfs_block_rsv *block_rsv;
493862306a36Sopenharmony_ci	struct extent_buffer *buf;
493962306a36Sopenharmony_ci	struct btrfs_delayed_extent_op *extent_op;
494062306a36Sopenharmony_ci	struct btrfs_ref generic_ref = { 0 };
494162306a36Sopenharmony_ci	u64 flags = 0;
494262306a36Sopenharmony_ci	int ret;
494362306a36Sopenharmony_ci	u32 blocksize = fs_info->nodesize;
494462306a36Sopenharmony_ci	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
494562306a36Sopenharmony_ci
494662306a36Sopenharmony_ci#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
494762306a36Sopenharmony_ci	if (btrfs_is_testing(fs_info)) {
494862306a36Sopenharmony_ci		buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
494962306a36Sopenharmony_ci					    level, root_objectid, nest);
495062306a36Sopenharmony_ci		if (!IS_ERR(buf))
495162306a36Sopenharmony_ci			root->alloc_bytenr += blocksize;
495262306a36Sopenharmony_ci		return buf;
495362306a36Sopenharmony_ci	}
495462306a36Sopenharmony_ci#endif
495562306a36Sopenharmony_ci
495662306a36Sopenharmony_ci	block_rsv = btrfs_use_block_rsv(trans, root, blocksize);
495762306a36Sopenharmony_ci	if (IS_ERR(block_rsv))
495862306a36Sopenharmony_ci		return ERR_CAST(block_rsv);
495962306a36Sopenharmony_ci
496062306a36Sopenharmony_ci	ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
496162306a36Sopenharmony_ci				   empty_size, hint, &ins, 0, 0);
496262306a36Sopenharmony_ci	if (ret)
496362306a36Sopenharmony_ci		goto out_unuse;
496462306a36Sopenharmony_ci
496562306a36Sopenharmony_ci	buf = btrfs_init_new_buffer(trans, root, ins.objectid, level,
496662306a36Sopenharmony_ci				    root_objectid, nest);
496762306a36Sopenharmony_ci	if (IS_ERR(buf)) {
496862306a36Sopenharmony_ci		ret = PTR_ERR(buf);
496962306a36Sopenharmony_ci		goto out_free_reserved;
497062306a36Sopenharmony_ci	}
497162306a36Sopenharmony_ci
497262306a36Sopenharmony_ci	if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
497362306a36Sopenharmony_ci		if (parent == 0)
497462306a36Sopenharmony_ci			parent = ins.objectid;
497562306a36Sopenharmony_ci		flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
497662306a36Sopenharmony_ci	} else
497762306a36Sopenharmony_ci		BUG_ON(parent > 0);
497862306a36Sopenharmony_ci
497962306a36Sopenharmony_ci	if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
498062306a36Sopenharmony_ci		extent_op = btrfs_alloc_delayed_extent_op();
498162306a36Sopenharmony_ci		if (!extent_op) {
498262306a36Sopenharmony_ci			ret = -ENOMEM;
498362306a36Sopenharmony_ci			goto out_free_buf;
498462306a36Sopenharmony_ci		}
498562306a36Sopenharmony_ci		if (key)
498662306a36Sopenharmony_ci			memcpy(&extent_op->key, key, sizeof(extent_op->key));
498762306a36Sopenharmony_ci		else
498862306a36Sopenharmony_ci			memset(&extent_op->key, 0, sizeof(extent_op->key));
498962306a36Sopenharmony_ci		extent_op->flags_to_set = flags;
499062306a36Sopenharmony_ci		extent_op->update_key = skinny_metadata ? false : true;
499162306a36Sopenharmony_ci		extent_op->update_flags = true;
499262306a36Sopenharmony_ci		extent_op->level = level;
499362306a36Sopenharmony_ci
499462306a36Sopenharmony_ci		btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT,
499562306a36Sopenharmony_ci				       ins.objectid, ins.offset, parent);
499662306a36Sopenharmony_ci		btrfs_init_tree_ref(&generic_ref, level, root_objectid,
499762306a36Sopenharmony_ci				    root->root_key.objectid, false);
499862306a36Sopenharmony_ci		btrfs_ref_tree_mod(fs_info, &generic_ref);
499962306a36Sopenharmony_ci		ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
500062306a36Sopenharmony_ci		if (ret)
500162306a36Sopenharmony_ci			goto out_free_delayed;
500262306a36Sopenharmony_ci	}
500362306a36Sopenharmony_ci	return buf;
500462306a36Sopenharmony_ci
500562306a36Sopenharmony_ciout_free_delayed:
500662306a36Sopenharmony_ci	btrfs_free_delayed_extent_op(extent_op);
500762306a36Sopenharmony_ciout_free_buf:
500862306a36Sopenharmony_ci	btrfs_tree_unlock(buf);
500962306a36Sopenharmony_ci	free_extent_buffer(buf);
501062306a36Sopenharmony_ciout_free_reserved:
501162306a36Sopenharmony_ci	btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0);
501262306a36Sopenharmony_ciout_unuse:
501362306a36Sopenharmony_ci	btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize);
501462306a36Sopenharmony_ci	return ERR_PTR(ret);
501562306a36Sopenharmony_ci}
501662306a36Sopenharmony_ci
501762306a36Sopenharmony_cistruct walk_control {
501862306a36Sopenharmony_ci	u64 refs[BTRFS_MAX_LEVEL];
501962306a36Sopenharmony_ci	u64 flags[BTRFS_MAX_LEVEL];
502062306a36Sopenharmony_ci	struct btrfs_key update_progress;
502162306a36Sopenharmony_ci	struct btrfs_key drop_progress;
502262306a36Sopenharmony_ci	int drop_level;
502362306a36Sopenharmony_ci	int stage;
502462306a36Sopenharmony_ci	int level;
502562306a36Sopenharmony_ci	int shared_level;
502662306a36Sopenharmony_ci	int update_ref;
502762306a36Sopenharmony_ci	int keep_locks;
502862306a36Sopenharmony_ci	int reada_slot;
502962306a36Sopenharmony_ci	int reada_count;
503062306a36Sopenharmony_ci	int restarted;
503162306a36Sopenharmony_ci};
503262306a36Sopenharmony_ci
503362306a36Sopenharmony_ci#define DROP_REFERENCE	1
503462306a36Sopenharmony_ci#define UPDATE_BACKREF	2
503562306a36Sopenharmony_ci
503662306a36Sopenharmony_cistatic noinline void reada_walk_down(struct btrfs_trans_handle *trans,
503762306a36Sopenharmony_ci				     struct btrfs_root *root,
503862306a36Sopenharmony_ci				     struct walk_control *wc,
503962306a36Sopenharmony_ci				     struct btrfs_path *path)
504062306a36Sopenharmony_ci{
504162306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
504262306a36Sopenharmony_ci	u64 bytenr;
504362306a36Sopenharmony_ci	u64 generation;
504462306a36Sopenharmony_ci	u64 refs;
504562306a36Sopenharmony_ci	u64 flags;
504662306a36Sopenharmony_ci	u32 nritems;
504762306a36Sopenharmony_ci	struct btrfs_key key;
504862306a36Sopenharmony_ci	struct extent_buffer *eb;
504962306a36Sopenharmony_ci	int ret;
505062306a36Sopenharmony_ci	int slot;
505162306a36Sopenharmony_ci	int nread = 0;
505262306a36Sopenharmony_ci
505362306a36Sopenharmony_ci	if (path->slots[wc->level] < wc->reada_slot) {
505462306a36Sopenharmony_ci		wc->reada_count = wc->reada_count * 2 / 3;
505562306a36Sopenharmony_ci		wc->reada_count = max(wc->reada_count, 2);
505662306a36Sopenharmony_ci	} else {
505762306a36Sopenharmony_ci		wc->reada_count = wc->reada_count * 3 / 2;
505862306a36Sopenharmony_ci		wc->reada_count = min_t(int, wc->reada_count,
505962306a36Sopenharmony_ci					BTRFS_NODEPTRS_PER_BLOCK(fs_info));
506062306a36Sopenharmony_ci	}
506162306a36Sopenharmony_ci
506262306a36Sopenharmony_ci	eb = path->nodes[wc->level];
506362306a36Sopenharmony_ci	nritems = btrfs_header_nritems(eb);
506462306a36Sopenharmony_ci
506562306a36Sopenharmony_ci	for (slot = path->slots[wc->level]; slot < nritems; slot++) {
506662306a36Sopenharmony_ci		if (nread >= wc->reada_count)
506762306a36Sopenharmony_ci			break;
506862306a36Sopenharmony_ci
506962306a36Sopenharmony_ci		cond_resched();
507062306a36Sopenharmony_ci		bytenr = btrfs_node_blockptr(eb, slot);
507162306a36Sopenharmony_ci		generation = btrfs_node_ptr_generation(eb, slot);
507262306a36Sopenharmony_ci
507362306a36Sopenharmony_ci		if (slot == path->slots[wc->level])
507462306a36Sopenharmony_ci			goto reada;
507562306a36Sopenharmony_ci
507662306a36Sopenharmony_ci		if (wc->stage == UPDATE_BACKREF &&
507762306a36Sopenharmony_ci		    generation <= root->root_key.offset)
507862306a36Sopenharmony_ci			continue;
507962306a36Sopenharmony_ci
508062306a36Sopenharmony_ci		/* We don't lock the tree block, it's OK to be racy here */
508162306a36Sopenharmony_ci		ret = btrfs_lookup_extent_info(trans, fs_info, bytenr,
508262306a36Sopenharmony_ci					       wc->level - 1, 1, &refs,
508362306a36Sopenharmony_ci					       &flags);
508462306a36Sopenharmony_ci		/* We don't care about errors in readahead. */
508562306a36Sopenharmony_ci		if (ret < 0)
508662306a36Sopenharmony_ci			continue;
508762306a36Sopenharmony_ci		BUG_ON(refs == 0);
508862306a36Sopenharmony_ci
508962306a36Sopenharmony_ci		if (wc->stage == DROP_REFERENCE) {
509062306a36Sopenharmony_ci			if (refs == 1)
509162306a36Sopenharmony_ci				goto reada;
509262306a36Sopenharmony_ci
509362306a36Sopenharmony_ci			if (wc->level == 1 &&
509462306a36Sopenharmony_ci			    (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
509562306a36Sopenharmony_ci				continue;
509662306a36Sopenharmony_ci			if (!wc->update_ref ||
509762306a36Sopenharmony_ci			    generation <= root->root_key.offset)
509862306a36Sopenharmony_ci				continue;
509962306a36Sopenharmony_ci			btrfs_node_key_to_cpu(eb, &key, slot);
510062306a36Sopenharmony_ci			ret = btrfs_comp_cpu_keys(&key,
510162306a36Sopenharmony_ci						  &wc->update_progress);
510262306a36Sopenharmony_ci			if (ret < 0)
510362306a36Sopenharmony_ci				continue;
510462306a36Sopenharmony_ci		} else {
510562306a36Sopenharmony_ci			if (wc->level == 1 &&
510662306a36Sopenharmony_ci			    (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
510762306a36Sopenharmony_ci				continue;
510862306a36Sopenharmony_ci		}
510962306a36Sopenharmony_cireada:
511062306a36Sopenharmony_ci		btrfs_readahead_node_child(eb, slot);
511162306a36Sopenharmony_ci		nread++;
511262306a36Sopenharmony_ci	}
511362306a36Sopenharmony_ci	wc->reada_slot = slot;
511462306a36Sopenharmony_ci}
511562306a36Sopenharmony_ci
511662306a36Sopenharmony_ci/*
511762306a36Sopenharmony_ci * helper to process tree block while walking down the tree.
511862306a36Sopenharmony_ci *
511962306a36Sopenharmony_ci * when wc->stage == UPDATE_BACKREF, this function updates
512062306a36Sopenharmony_ci * back refs for pointers in the block.
512162306a36Sopenharmony_ci *
512262306a36Sopenharmony_ci * NOTE: return value 1 means we should stop walking down.
512362306a36Sopenharmony_ci */
512462306a36Sopenharmony_cistatic noinline int walk_down_proc(struct btrfs_trans_handle *trans,
512562306a36Sopenharmony_ci				   struct btrfs_root *root,
512662306a36Sopenharmony_ci				   struct btrfs_path *path,
512762306a36Sopenharmony_ci				   struct walk_control *wc, int lookup_info)
512862306a36Sopenharmony_ci{
512962306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
513062306a36Sopenharmony_ci	int level = wc->level;
513162306a36Sopenharmony_ci	struct extent_buffer *eb = path->nodes[level];
513262306a36Sopenharmony_ci	u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
513362306a36Sopenharmony_ci	int ret;
513462306a36Sopenharmony_ci
513562306a36Sopenharmony_ci	if (wc->stage == UPDATE_BACKREF &&
513662306a36Sopenharmony_ci	    btrfs_header_owner(eb) != root->root_key.objectid)
513762306a36Sopenharmony_ci		return 1;
513862306a36Sopenharmony_ci
513962306a36Sopenharmony_ci	/*
514062306a36Sopenharmony_ci	 * when reference count of tree block is 1, it won't increase
514162306a36Sopenharmony_ci	 * again. once full backref flag is set, we never clear it.
514262306a36Sopenharmony_ci	 */
514362306a36Sopenharmony_ci	if (lookup_info &&
514462306a36Sopenharmony_ci	    ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) ||
514562306a36Sopenharmony_ci	     (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
514662306a36Sopenharmony_ci		BUG_ON(!path->locks[level]);
514762306a36Sopenharmony_ci		ret = btrfs_lookup_extent_info(trans, fs_info,
514862306a36Sopenharmony_ci					       eb->start, level, 1,
514962306a36Sopenharmony_ci					       &wc->refs[level],
515062306a36Sopenharmony_ci					       &wc->flags[level]);
515162306a36Sopenharmony_ci		BUG_ON(ret == -ENOMEM);
515262306a36Sopenharmony_ci		if (ret)
515362306a36Sopenharmony_ci			return ret;
515462306a36Sopenharmony_ci		BUG_ON(wc->refs[level] == 0);
515562306a36Sopenharmony_ci	}
515662306a36Sopenharmony_ci
515762306a36Sopenharmony_ci	if (wc->stage == DROP_REFERENCE) {
515862306a36Sopenharmony_ci		if (wc->refs[level] > 1)
515962306a36Sopenharmony_ci			return 1;
516062306a36Sopenharmony_ci
516162306a36Sopenharmony_ci		if (path->locks[level] && !wc->keep_locks) {
516262306a36Sopenharmony_ci			btrfs_tree_unlock_rw(eb, path->locks[level]);
516362306a36Sopenharmony_ci			path->locks[level] = 0;
516462306a36Sopenharmony_ci		}
516562306a36Sopenharmony_ci		return 0;
516662306a36Sopenharmony_ci	}
516762306a36Sopenharmony_ci
516862306a36Sopenharmony_ci	/* wc->stage == UPDATE_BACKREF */
516962306a36Sopenharmony_ci	if (!(wc->flags[level] & flag)) {
517062306a36Sopenharmony_ci		BUG_ON(!path->locks[level]);
517162306a36Sopenharmony_ci		ret = btrfs_inc_ref(trans, root, eb, 1);
517262306a36Sopenharmony_ci		BUG_ON(ret); /* -ENOMEM */
517362306a36Sopenharmony_ci		ret = btrfs_dec_ref(trans, root, eb, 0);
517462306a36Sopenharmony_ci		BUG_ON(ret); /* -ENOMEM */
517562306a36Sopenharmony_ci		ret = btrfs_set_disk_extent_flags(trans, eb, flag);
517662306a36Sopenharmony_ci		BUG_ON(ret); /* -ENOMEM */
517762306a36Sopenharmony_ci		wc->flags[level] |= flag;
517862306a36Sopenharmony_ci	}
517962306a36Sopenharmony_ci
518062306a36Sopenharmony_ci	/*
518162306a36Sopenharmony_ci	 * the block is shared by multiple trees, so it's not good to
518262306a36Sopenharmony_ci	 * keep the tree lock
518362306a36Sopenharmony_ci	 */
518462306a36Sopenharmony_ci	if (path->locks[level] && level > 0) {
518562306a36Sopenharmony_ci		btrfs_tree_unlock_rw(eb, path->locks[level]);
518662306a36Sopenharmony_ci		path->locks[level] = 0;
518762306a36Sopenharmony_ci	}
518862306a36Sopenharmony_ci	return 0;
518962306a36Sopenharmony_ci}
519062306a36Sopenharmony_ci
519162306a36Sopenharmony_ci/*
519262306a36Sopenharmony_ci * This is used to verify a ref exists for this root to deal with a bug where we
519362306a36Sopenharmony_ci * would have a drop_progress key that hadn't been updated properly.
519462306a36Sopenharmony_ci */
519562306a36Sopenharmony_cistatic int check_ref_exists(struct btrfs_trans_handle *trans,
519662306a36Sopenharmony_ci			    struct btrfs_root *root, u64 bytenr, u64 parent,
519762306a36Sopenharmony_ci			    int level)
519862306a36Sopenharmony_ci{
519962306a36Sopenharmony_ci	struct btrfs_path *path;
520062306a36Sopenharmony_ci	struct btrfs_extent_inline_ref *iref;
520162306a36Sopenharmony_ci	int ret;
520262306a36Sopenharmony_ci
520362306a36Sopenharmony_ci	path = btrfs_alloc_path();
520462306a36Sopenharmony_ci	if (!path)
520562306a36Sopenharmony_ci		return -ENOMEM;
520662306a36Sopenharmony_ci
520762306a36Sopenharmony_ci	ret = lookup_extent_backref(trans, path, &iref, bytenr,
520862306a36Sopenharmony_ci				    root->fs_info->nodesize, parent,
520962306a36Sopenharmony_ci				    root->root_key.objectid, level, 0);
521062306a36Sopenharmony_ci	btrfs_free_path(path);
521162306a36Sopenharmony_ci	if (ret == -ENOENT)
521262306a36Sopenharmony_ci		return 0;
521362306a36Sopenharmony_ci	if (ret < 0)
521462306a36Sopenharmony_ci		return ret;
521562306a36Sopenharmony_ci	return 1;
521662306a36Sopenharmony_ci}
521762306a36Sopenharmony_ci
521862306a36Sopenharmony_ci/*
521962306a36Sopenharmony_ci * helper to process tree block pointer.
522062306a36Sopenharmony_ci *
522162306a36Sopenharmony_ci * when wc->stage == DROP_REFERENCE, this function checks
522262306a36Sopenharmony_ci * reference count of the block pointed to. if the block
522362306a36Sopenharmony_ci * is shared and we need update back refs for the subtree
522462306a36Sopenharmony_ci * rooted at the block, this function changes wc->stage to
522562306a36Sopenharmony_ci * UPDATE_BACKREF. if the block is shared and there is no
522662306a36Sopenharmony_ci * need to update back, this function drops the reference
522762306a36Sopenharmony_ci * to the block.
522862306a36Sopenharmony_ci *
522962306a36Sopenharmony_ci * NOTE: return value 1 means we should stop walking down.
523062306a36Sopenharmony_ci */
523162306a36Sopenharmony_cistatic noinline int do_walk_down(struct btrfs_trans_handle *trans,
523262306a36Sopenharmony_ci				 struct btrfs_root *root,
523362306a36Sopenharmony_ci				 struct btrfs_path *path,
523462306a36Sopenharmony_ci				 struct walk_control *wc, int *lookup_info)
523562306a36Sopenharmony_ci{
523662306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
523762306a36Sopenharmony_ci	u64 bytenr;
523862306a36Sopenharmony_ci	u64 generation;
523962306a36Sopenharmony_ci	u64 parent;
524062306a36Sopenharmony_ci	struct btrfs_tree_parent_check check = { 0 };
524162306a36Sopenharmony_ci	struct btrfs_key key;
524262306a36Sopenharmony_ci	struct btrfs_ref ref = { 0 };
524362306a36Sopenharmony_ci	struct extent_buffer *next;
524462306a36Sopenharmony_ci	int level = wc->level;
524562306a36Sopenharmony_ci	int reada = 0;
524662306a36Sopenharmony_ci	int ret = 0;
524762306a36Sopenharmony_ci	bool need_account = false;
524862306a36Sopenharmony_ci
524962306a36Sopenharmony_ci	generation = btrfs_node_ptr_generation(path->nodes[level],
525062306a36Sopenharmony_ci					       path->slots[level]);
525162306a36Sopenharmony_ci	/*
525262306a36Sopenharmony_ci	 * if the lower level block was created before the snapshot
525362306a36Sopenharmony_ci	 * was created, we know there is no need to update back refs
525462306a36Sopenharmony_ci	 * for the subtree
525562306a36Sopenharmony_ci	 */
525662306a36Sopenharmony_ci	if (wc->stage == UPDATE_BACKREF &&
525762306a36Sopenharmony_ci	    generation <= root->root_key.offset) {
525862306a36Sopenharmony_ci		*lookup_info = 1;
525962306a36Sopenharmony_ci		return 1;
526062306a36Sopenharmony_ci	}
526162306a36Sopenharmony_ci
526262306a36Sopenharmony_ci	bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
526362306a36Sopenharmony_ci
526462306a36Sopenharmony_ci	check.level = level - 1;
526562306a36Sopenharmony_ci	check.transid = generation;
526662306a36Sopenharmony_ci	check.owner_root = root->root_key.objectid;
526762306a36Sopenharmony_ci	check.has_first_key = true;
526862306a36Sopenharmony_ci	btrfs_node_key_to_cpu(path->nodes[level], &check.first_key,
526962306a36Sopenharmony_ci			      path->slots[level]);
527062306a36Sopenharmony_ci
527162306a36Sopenharmony_ci	next = find_extent_buffer(fs_info, bytenr);
527262306a36Sopenharmony_ci	if (!next) {
527362306a36Sopenharmony_ci		next = btrfs_find_create_tree_block(fs_info, bytenr,
527462306a36Sopenharmony_ci				root->root_key.objectid, level - 1);
527562306a36Sopenharmony_ci		if (IS_ERR(next))
527662306a36Sopenharmony_ci			return PTR_ERR(next);
527762306a36Sopenharmony_ci		reada = 1;
527862306a36Sopenharmony_ci	}
527962306a36Sopenharmony_ci	btrfs_tree_lock(next);
528062306a36Sopenharmony_ci
528162306a36Sopenharmony_ci	ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1,
528262306a36Sopenharmony_ci				       &wc->refs[level - 1],
528362306a36Sopenharmony_ci				       &wc->flags[level - 1]);
528462306a36Sopenharmony_ci	if (ret < 0)
528562306a36Sopenharmony_ci		goto out_unlock;
528662306a36Sopenharmony_ci
528762306a36Sopenharmony_ci	if (unlikely(wc->refs[level - 1] == 0)) {
528862306a36Sopenharmony_ci		btrfs_err(fs_info, "Missing references.");
528962306a36Sopenharmony_ci		ret = -EIO;
529062306a36Sopenharmony_ci		goto out_unlock;
529162306a36Sopenharmony_ci	}
529262306a36Sopenharmony_ci	*lookup_info = 0;
529362306a36Sopenharmony_ci
529462306a36Sopenharmony_ci	if (wc->stage == DROP_REFERENCE) {
529562306a36Sopenharmony_ci		if (wc->refs[level - 1] > 1) {
529662306a36Sopenharmony_ci			need_account = true;
529762306a36Sopenharmony_ci			if (level == 1 &&
529862306a36Sopenharmony_ci			    (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
529962306a36Sopenharmony_ci				goto skip;
530062306a36Sopenharmony_ci
530162306a36Sopenharmony_ci			if (!wc->update_ref ||
530262306a36Sopenharmony_ci			    generation <= root->root_key.offset)
530362306a36Sopenharmony_ci				goto skip;
530462306a36Sopenharmony_ci
530562306a36Sopenharmony_ci			btrfs_node_key_to_cpu(path->nodes[level], &key,
530662306a36Sopenharmony_ci					      path->slots[level]);
530762306a36Sopenharmony_ci			ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
530862306a36Sopenharmony_ci			if (ret < 0)
530962306a36Sopenharmony_ci				goto skip;
531062306a36Sopenharmony_ci
531162306a36Sopenharmony_ci			wc->stage = UPDATE_BACKREF;
531262306a36Sopenharmony_ci			wc->shared_level = level - 1;
531362306a36Sopenharmony_ci		}
531462306a36Sopenharmony_ci	} else {
531562306a36Sopenharmony_ci		if (level == 1 &&
531662306a36Sopenharmony_ci		    (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
531762306a36Sopenharmony_ci			goto skip;
531862306a36Sopenharmony_ci	}
531962306a36Sopenharmony_ci
532062306a36Sopenharmony_ci	if (!btrfs_buffer_uptodate(next, generation, 0)) {
532162306a36Sopenharmony_ci		btrfs_tree_unlock(next);
532262306a36Sopenharmony_ci		free_extent_buffer(next);
532362306a36Sopenharmony_ci		next = NULL;
532462306a36Sopenharmony_ci		*lookup_info = 1;
532562306a36Sopenharmony_ci	}
532662306a36Sopenharmony_ci
532762306a36Sopenharmony_ci	if (!next) {
532862306a36Sopenharmony_ci		if (reada && level == 1)
532962306a36Sopenharmony_ci			reada_walk_down(trans, root, wc, path);
533062306a36Sopenharmony_ci		next = read_tree_block(fs_info, bytenr, &check);
533162306a36Sopenharmony_ci		if (IS_ERR(next)) {
533262306a36Sopenharmony_ci			return PTR_ERR(next);
533362306a36Sopenharmony_ci		} else if (!extent_buffer_uptodate(next)) {
533462306a36Sopenharmony_ci			free_extent_buffer(next);
533562306a36Sopenharmony_ci			return -EIO;
533662306a36Sopenharmony_ci		}
533762306a36Sopenharmony_ci		btrfs_tree_lock(next);
533862306a36Sopenharmony_ci	}
533962306a36Sopenharmony_ci
534062306a36Sopenharmony_ci	level--;
534162306a36Sopenharmony_ci	ASSERT(level == btrfs_header_level(next));
534262306a36Sopenharmony_ci	if (level != btrfs_header_level(next)) {
534362306a36Sopenharmony_ci		btrfs_err(root->fs_info, "mismatched level");
534462306a36Sopenharmony_ci		ret = -EIO;
534562306a36Sopenharmony_ci		goto out_unlock;
534662306a36Sopenharmony_ci	}
534762306a36Sopenharmony_ci	path->nodes[level] = next;
534862306a36Sopenharmony_ci	path->slots[level] = 0;
534962306a36Sopenharmony_ci	path->locks[level] = BTRFS_WRITE_LOCK;
535062306a36Sopenharmony_ci	wc->level = level;
535162306a36Sopenharmony_ci	if (wc->level == 1)
535262306a36Sopenharmony_ci		wc->reada_slot = 0;
535362306a36Sopenharmony_ci	return 0;
535462306a36Sopenharmony_ciskip:
535562306a36Sopenharmony_ci	wc->refs[level - 1] = 0;
535662306a36Sopenharmony_ci	wc->flags[level - 1] = 0;
535762306a36Sopenharmony_ci	if (wc->stage == DROP_REFERENCE) {
535862306a36Sopenharmony_ci		if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
535962306a36Sopenharmony_ci			parent = path->nodes[level]->start;
536062306a36Sopenharmony_ci		} else {
536162306a36Sopenharmony_ci			ASSERT(root->root_key.objectid ==
536262306a36Sopenharmony_ci			       btrfs_header_owner(path->nodes[level]));
536362306a36Sopenharmony_ci			if (root->root_key.objectid !=
536462306a36Sopenharmony_ci			    btrfs_header_owner(path->nodes[level])) {
536562306a36Sopenharmony_ci				btrfs_err(root->fs_info,
536662306a36Sopenharmony_ci						"mismatched block owner");
536762306a36Sopenharmony_ci				ret = -EIO;
536862306a36Sopenharmony_ci				goto out_unlock;
536962306a36Sopenharmony_ci			}
537062306a36Sopenharmony_ci			parent = 0;
537162306a36Sopenharmony_ci		}
537262306a36Sopenharmony_ci
537362306a36Sopenharmony_ci		/*
537462306a36Sopenharmony_ci		 * If we had a drop_progress we need to verify the refs are set
537562306a36Sopenharmony_ci		 * as expected.  If we find our ref then we know that from here
537662306a36Sopenharmony_ci		 * on out everything should be correct, and we can clear the
537762306a36Sopenharmony_ci		 * ->restarted flag.
537862306a36Sopenharmony_ci		 */
537962306a36Sopenharmony_ci		if (wc->restarted) {
538062306a36Sopenharmony_ci			ret = check_ref_exists(trans, root, bytenr, parent,
538162306a36Sopenharmony_ci					       level - 1);
538262306a36Sopenharmony_ci			if (ret < 0)
538362306a36Sopenharmony_ci				goto out_unlock;
538462306a36Sopenharmony_ci			if (ret == 0)
538562306a36Sopenharmony_ci				goto no_delete;
538662306a36Sopenharmony_ci			ret = 0;
538762306a36Sopenharmony_ci			wc->restarted = 0;
538862306a36Sopenharmony_ci		}
538962306a36Sopenharmony_ci
539062306a36Sopenharmony_ci		/*
539162306a36Sopenharmony_ci		 * Reloc tree doesn't contribute to qgroup numbers, and we have
539262306a36Sopenharmony_ci		 * already accounted them at merge time (replace_path),
539362306a36Sopenharmony_ci		 * thus we could skip expensive subtree trace here.
539462306a36Sopenharmony_ci		 */
539562306a36Sopenharmony_ci		if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
539662306a36Sopenharmony_ci		    need_account) {
539762306a36Sopenharmony_ci			ret = btrfs_qgroup_trace_subtree(trans, next,
539862306a36Sopenharmony_ci							 generation, level - 1);
539962306a36Sopenharmony_ci			if (ret) {
540062306a36Sopenharmony_ci				btrfs_err_rl(fs_info,
540162306a36Sopenharmony_ci					     "Error %d accounting shared subtree. Quota is out of sync, rescan required.",
540262306a36Sopenharmony_ci					     ret);
540362306a36Sopenharmony_ci			}
540462306a36Sopenharmony_ci		}
540562306a36Sopenharmony_ci
540662306a36Sopenharmony_ci		/*
540762306a36Sopenharmony_ci		 * We need to update the next key in our walk control so we can
540862306a36Sopenharmony_ci		 * update the drop_progress key accordingly.  We don't care if
540962306a36Sopenharmony_ci		 * find_next_key doesn't find a key because that means we're at
541062306a36Sopenharmony_ci		 * the end and are going to clean up now.
541162306a36Sopenharmony_ci		 */
541262306a36Sopenharmony_ci		wc->drop_level = level;
541362306a36Sopenharmony_ci		find_next_key(path, level, &wc->drop_progress);
541462306a36Sopenharmony_ci
541562306a36Sopenharmony_ci		btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
541662306a36Sopenharmony_ci				       fs_info->nodesize, parent);
541762306a36Sopenharmony_ci		btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid,
541862306a36Sopenharmony_ci				    0, false);
541962306a36Sopenharmony_ci		ret = btrfs_free_extent(trans, &ref);
542062306a36Sopenharmony_ci		if (ret)
542162306a36Sopenharmony_ci			goto out_unlock;
542262306a36Sopenharmony_ci	}
542362306a36Sopenharmony_cino_delete:
542462306a36Sopenharmony_ci	*lookup_info = 1;
542562306a36Sopenharmony_ci	ret = 1;
542662306a36Sopenharmony_ci
542762306a36Sopenharmony_ciout_unlock:
542862306a36Sopenharmony_ci	btrfs_tree_unlock(next);
542962306a36Sopenharmony_ci	free_extent_buffer(next);
543062306a36Sopenharmony_ci
543162306a36Sopenharmony_ci	return ret;
543262306a36Sopenharmony_ci}
543362306a36Sopenharmony_ci
543462306a36Sopenharmony_ci/*
543562306a36Sopenharmony_ci * helper to process tree block while walking up the tree.
543662306a36Sopenharmony_ci *
543762306a36Sopenharmony_ci * when wc->stage == DROP_REFERENCE, this function drops
543862306a36Sopenharmony_ci * reference count on the block.
543962306a36Sopenharmony_ci *
544062306a36Sopenharmony_ci * when wc->stage == UPDATE_BACKREF, this function changes
544162306a36Sopenharmony_ci * wc->stage back to DROP_REFERENCE if we changed wc->stage
544262306a36Sopenharmony_ci * to UPDATE_BACKREF previously while processing the block.
544362306a36Sopenharmony_ci *
544462306a36Sopenharmony_ci * NOTE: return value 1 means we should stop walking up.
544562306a36Sopenharmony_ci */
544662306a36Sopenharmony_cistatic noinline int walk_up_proc(struct btrfs_trans_handle *trans,
544762306a36Sopenharmony_ci				 struct btrfs_root *root,
544862306a36Sopenharmony_ci				 struct btrfs_path *path,
544962306a36Sopenharmony_ci				 struct walk_control *wc)
545062306a36Sopenharmony_ci{
545162306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
545262306a36Sopenharmony_ci	int ret;
545362306a36Sopenharmony_ci	int level = wc->level;
545462306a36Sopenharmony_ci	struct extent_buffer *eb = path->nodes[level];
545562306a36Sopenharmony_ci	u64 parent = 0;
545662306a36Sopenharmony_ci
545762306a36Sopenharmony_ci	if (wc->stage == UPDATE_BACKREF) {
545862306a36Sopenharmony_ci		BUG_ON(wc->shared_level < level);
545962306a36Sopenharmony_ci		if (level < wc->shared_level)
546062306a36Sopenharmony_ci			goto out;
546162306a36Sopenharmony_ci
546262306a36Sopenharmony_ci		ret = find_next_key(path, level + 1, &wc->update_progress);
546362306a36Sopenharmony_ci		if (ret > 0)
546462306a36Sopenharmony_ci			wc->update_ref = 0;
546562306a36Sopenharmony_ci
546662306a36Sopenharmony_ci		wc->stage = DROP_REFERENCE;
546762306a36Sopenharmony_ci		wc->shared_level = -1;
546862306a36Sopenharmony_ci		path->slots[level] = 0;
546962306a36Sopenharmony_ci
547062306a36Sopenharmony_ci		/*
547162306a36Sopenharmony_ci		 * check reference count again if the block isn't locked.
547262306a36Sopenharmony_ci		 * we should start walking down the tree again if reference
547362306a36Sopenharmony_ci		 * count is one.
547462306a36Sopenharmony_ci		 */
547562306a36Sopenharmony_ci		if (!path->locks[level]) {
547662306a36Sopenharmony_ci			BUG_ON(level == 0);
547762306a36Sopenharmony_ci			btrfs_tree_lock(eb);
547862306a36Sopenharmony_ci			path->locks[level] = BTRFS_WRITE_LOCK;
547962306a36Sopenharmony_ci
548062306a36Sopenharmony_ci			ret = btrfs_lookup_extent_info(trans, fs_info,
548162306a36Sopenharmony_ci						       eb->start, level, 1,
548262306a36Sopenharmony_ci						       &wc->refs[level],
548362306a36Sopenharmony_ci						       &wc->flags[level]);
548462306a36Sopenharmony_ci			if (ret < 0) {
548562306a36Sopenharmony_ci				btrfs_tree_unlock_rw(eb, path->locks[level]);
548662306a36Sopenharmony_ci				path->locks[level] = 0;
548762306a36Sopenharmony_ci				return ret;
548862306a36Sopenharmony_ci			}
548962306a36Sopenharmony_ci			BUG_ON(wc->refs[level] == 0);
549062306a36Sopenharmony_ci			if (wc->refs[level] == 1) {
549162306a36Sopenharmony_ci				btrfs_tree_unlock_rw(eb, path->locks[level]);
549262306a36Sopenharmony_ci				path->locks[level] = 0;
549362306a36Sopenharmony_ci				return 1;
549462306a36Sopenharmony_ci			}
549562306a36Sopenharmony_ci		}
549662306a36Sopenharmony_ci	}
549762306a36Sopenharmony_ci
549862306a36Sopenharmony_ci	/* wc->stage == DROP_REFERENCE */
549962306a36Sopenharmony_ci	BUG_ON(wc->refs[level] > 1 && !path->locks[level]);
550062306a36Sopenharmony_ci
550162306a36Sopenharmony_ci	if (wc->refs[level] == 1) {
550262306a36Sopenharmony_ci		if (level == 0) {
550362306a36Sopenharmony_ci			if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
550462306a36Sopenharmony_ci				ret = btrfs_dec_ref(trans, root, eb, 1);
550562306a36Sopenharmony_ci			else
550662306a36Sopenharmony_ci				ret = btrfs_dec_ref(trans, root, eb, 0);
550762306a36Sopenharmony_ci			BUG_ON(ret); /* -ENOMEM */
550862306a36Sopenharmony_ci			if (is_fstree(root->root_key.objectid)) {
550962306a36Sopenharmony_ci				ret = btrfs_qgroup_trace_leaf_items(trans, eb);
551062306a36Sopenharmony_ci				if (ret) {
551162306a36Sopenharmony_ci					btrfs_err_rl(fs_info,
551262306a36Sopenharmony_ci	"error %d accounting leaf items, quota is out of sync, rescan required",
551362306a36Sopenharmony_ci					     ret);
551462306a36Sopenharmony_ci				}
551562306a36Sopenharmony_ci			}
551662306a36Sopenharmony_ci		}
551762306a36Sopenharmony_ci		/* Make block locked assertion in btrfs_clear_buffer_dirty happy. */
551862306a36Sopenharmony_ci		if (!path->locks[level]) {
551962306a36Sopenharmony_ci			btrfs_tree_lock(eb);
552062306a36Sopenharmony_ci			path->locks[level] = BTRFS_WRITE_LOCK;
552162306a36Sopenharmony_ci		}
552262306a36Sopenharmony_ci		btrfs_clear_buffer_dirty(trans, eb);
552362306a36Sopenharmony_ci	}
552462306a36Sopenharmony_ci
552562306a36Sopenharmony_ci	if (eb == root->node) {
552662306a36Sopenharmony_ci		if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
552762306a36Sopenharmony_ci			parent = eb->start;
552862306a36Sopenharmony_ci		else if (root->root_key.objectid != btrfs_header_owner(eb))
552962306a36Sopenharmony_ci			goto owner_mismatch;
553062306a36Sopenharmony_ci	} else {
553162306a36Sopenharmony_ci		if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
553262306a36Sopenharmony_ci			parent = path->nodes[level + 1]->start;
553362306a36Sopenharmony_ci		else if (root->root_key.objectid !=
553462306a36Sopenharmony_ci			 btrfs_header_owner(path->nodes[level + 1]))
553562306a36Sopenharmony_ci			goto owner_mismatch;
553662306a36Sopenharmony_ci	}
553762306a36Sopenharmony_ci
553862306a36Sopenharmony_ci	btrfs_free_tree_block(trans, btrfs_root_id(root), eb, parent,
553962306a36Sopenharmony_ci			      wc->refs[level] == 1);
554062306a36Sopenharmony_ciout:
554162306a36Sopenharmony_ci	wc->refs[level] = 0;
554262306a36Sopenharmony_ci	wc->flags[level] = 0;
554362306a36Sopenharmony_ci	return 0;
554462306a36Sopenharmony_ci
554562306a36Sopenharmony_ciowner_mismatch:
554662306a36Sopenharmony_ci	btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu",
554762306a36Sopenharmony_ci		     btrfs_header_owner(eb), root->root_key.objectid);
554862306a36Sopenharmony_ci	return -EUCLEAN;
554962306a36Sopenharmony_ci}
555062306a36Sopenharmony_ci
555162306a36Sopenharmony_cistatic noinline int walk_down_tree(struct btrfs_trans_handle *trans,
555262306a36Sopenharmony_ci				   struct btrfs_root *root,
555362306a36Sopenharmony_ci				   struct btrfs_path *path,
555462306a36Sopenharmony_ci				   struct walk_control *wc)
555562306a36Sopenharmony_ci{
555662306a36Sopenharmony_ci	int level = wc->level;
555762306a36Sopenharmony_ci	int lookup_info = 1;
555862306a36Sopenharmony_ci	int ret = 0;
555962306a36Sopenharmony_ci
556062306a36Sopenharmony_ci	while (level >= 0) {
556162306a36Sopenharmony_ci		ret = walk_down_proc(trans, root, path, wc, lookup_info);
556262306a36Sopenharmony_ci		if (ret)
556362306a36Sopenharmony_ci			break;
556462306a36Sopenharmony_ci
556562306a36Sopenharmony_ci		if (level == 0)
556662306a36Sopenharmony_ci			break;
556762306a36Sopenharmony_ci
556862306a36Sopenharmony_ci		if (path->slots[level] >=
556962306a36Sopenharmony_ci		    btrfs_header_nritems(path->nodes[level]))
557062306a36Sopenharmony_ci			break;
557162306a36Sopenharmony_ci
557262306a36Sopenharmony_ci		ret = do_walk_down(trans, root, path, wc, &lookup_info);
557362306a36Sopenharmony_ci		if (ret > 0) {
557462306a36Sopenharmony_ci			path->slots[level]++;
557562306a36Sopenharmony_ci			continue;
557662306a36Sopenharmony_ci		} else if (ret < 0)
557762306a36Sopenharmony_ci			break;
557862306a36Sopenharmony_ci		level = wc->level;
557962306a36Sopenharmony_ci	}
558062306a36Sopenharmony_ci	return (ret == 1) ? 0 : ret;
558162306a36Sopenharmony_ci}
558262306a36Sopenharmony_ci
558362306a36Sopenharmony_cistatic noinline int walk_up_tree(struct btrfs_trans_handle *trans,
558462306a36Sopenharmony_ci				 struct btrfs_root *root,
558562306a36Sopenharmony_ci				 struct btrfs_path *path,
558662306a36Sopenharmony_ci				 struct walk_control *wc, int max_level)
558762306a36Sopenharmony_ci{
558862306a36Sopenharmony_ci	int level = wc->level;
558962306a36Sopenharmony_ci	int ret;
559062306a36Sopenharmony_ci
559162306a36Sopenharmony_ci	path->slots[level] = btrfs_header_nritems(path->nodes[level]);
559262306a36Sopenharmony_ci	while (level < max_level && path->nodes[level]) {
559362306a36Sopenharmony_ci		wc->level = level;
559462306a36Sopenharmony_ci		if (path->slots[level] + 1 <
559562306a36Sopenharmony_ci		    btrfs_header_nritems(path->nodes[level])) {
559662306a36Sopenharmony_ci			path->slots[level]++;
559762306a36Sopenharmony_ci			return 0;
559862306a36Sopenharmony_ci		} else {
559962306a36Sopenharmony_ci			ret = walk_up_proc(trans, root, path, wc);
560062306a36Sopenharmony_ci			if (ret > 0)
560162306a36Sopenharmony_ci				return 0;
560262306a36Sopenharmony_ci			if (ret < 0)
560362306a36Sopenharmony_ci				return ret;
560462306a36Sopenharmony_ci
560562306a36Sopenharmony_ci			if (path->locks[level]) {
560662306a36Sopenharmony_ci				btrfs_tree_unlock_rw(path->nodes[level],
560762306a36Sopenharmony_ci						     path->locks[level]);
560862306a36Sopenharmony_ci				path->locks[level] = 0;
560962306a36Sopenharmony_ci			}
561062306a36Sopenharmony_ci			free_extent_buffer(path->nodes[level]);
561162306a36Sopenharmony_ci			path->nodes[level] = NULL;
561262306a36Sopenharmony_ci			level++;
561362306a36Sopenharmony_ci		}
561462306a36Sopenharmony_ci	}
561562306a36Sopenharmony_ci	return 1;
561662306a36Sopenharmony_ci}
561762306a36Sopenharmony_ci
561862306a36Sopenharmony_ci/*
561962306a36Sopenharmony_ci * drop a subvolume tree.
562062306a36Sopenharmony_ci *
562162306a36Sopenharmony_ci * this function traverses the tree freeing any blocks that only
562262306a36Sopenharmony_ci * referenced by the tree.
562362306a36Sopenharmony_ci *
562462306a36Sopenharmony_ci * when a shared tree block is found. this function decreases its
562562306a36Sopenharmony_ci * reference count by one. if update_ref is true, this function
562662306a36Sopenharmony_ci * also make sure backrefs for the shared block and all lower level
562762306a36Sopenharmony_ci * blocks are properly updated.
562862306a36Sopenharmony_ci *
562962306a36Sopenharmony_ci * If called with for_reloc == 0, may exit early with -EAGAIN
563062306a36Sopenharmony_ci */
563162306a36Sopenharmony_ciint btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
563262306a36Sopenharmony_ci{
563362306a36Sopenharmony_ci	const bool is_reloc_root = (root->root_key.objectid ==
563462306a36Sopenharmony_ci				    BTRFS_TREE_RELOC_OBJECTID);
563562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
563662306a36Sopenharmony_ci	struct btrfs_path *path;
563762306a36Sopenharmony_ci	struct btrfs_trans_handle *trans;
563862306a36Sopenharmony_ci	struct btrfs_root *tree_root = fs_info->tree_root;
563962306a36Sopenharmony_ci	struct btrfs_root_item *root_item = &root->root_item;
564062306a36Sopenharmony_ci	struct walk_control *wc;
564162306a36Sopenharmony_ci	struct btrfs_key key;
564262306a36Sopenharmony_ci	int err = 0;
564362306a36Sopenharmony_ci	int ret;
564462306a36Sopenharmony_ci	int level;
564562306a36Sopenharmony_ci	bool root_dropped = false;
564662306a36Sopenharmony_ci	bool unfinished_drop = false;
564762306a36Sopenharmony_ci
564862306a36Sopenharmony_ci	btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid);
564962306a36Sopenharmony_ci
565062306a36Sopenharmony_ci	path = btrfs_alloc_path();
565162306a36Sopenharmony_ci	if (!path) {
565262306a36Sopenharmony_ci		err = -ENOMEM;
565362306a36Sopenharmony_ci		goto out;
565462306a36Sopenharmony_ci	}
565562306a36Sopenharmony_ci
565662306a36Sopenharmony_ci	wc = kzalloc(sizeof(*wc), GFP_NOFS);
565762306a36Sopenharmony_ci	if (!wc) {
565862306a36Sopenharmony_ci		btrfs_free_path(path);
565962306a36Sopenharmony_ci		err = -ENOMEM;
566062306a36Sopenharmony_ci		goto out;
566162306a36Sopenharmony_ci	}
566262306a36Sopenharmony_ci
566362306a36Sopenharmony_ci	/*
566462306a36Sopenharmony_ci	 * Use join to avoid potential EINTR from transaction start. See
566562306a36Sopenharmony_ci	 * wait_reserve_ticket and the whole reservation callchain.
566662306a36Sopenharmony_ci	 */
566762306a36Sopenharmony_ci	if (for_reloc)
566862306a36Sopenharmony_ci		trans = btrfs_join_transaction(tree_root);
566962306a36Sopenharmony_ci	else
567062306a36Sopenharmony_ci		trans = btrfs_start_transaction(tree_root, 0);
567162306a36Sopenharmony_ci	if (IS_ERR(trans)) {
567262306a36Sopenharmony_ci		err = PTR_ERR(trans);
567362306a36Sopenharmony_ci		goto out_free;
567462306a36Sopenharmony_ci	}
567562306a36Sopenharmony_ci
567662306a36Sopenharmony_ci	err = btrfs_run_delayed_items(trans);
567762306a36Sopenharmony_ci	if (err)
567862306a36Sopenharmony_ci		goto out_end_trans;
567962306a36Sopenharmony_ci
568062306a36Sopenharmony_ci	/*
568162306a36Sopenharmony_ci	 * This will help us catch people modifying the fs tree while we're
568262306a36Sopenharmony_ci	 * dropping it.  It is unsafe to mess with the fs tree while it's being
568362306a36Sopenharmony_ci	 * dropped as we unlock the root node and parent nodes as we walk down
568462306a36Sopenharmony_ci	 * the tree, assuming nothing will change.  If something does change
568562306a36Sopenharmony_ci	 * then we'll have stale information and drop references to blocks we've
568662306a36Sopenharmony_ci	 * already dropped.
568762306a36Sopenharmony_ci	 */
568862306a36Sopenharmony_ci	set_bit(BTRFS_ROOT_DELETING, &root->state);
568962306a36Sopenharmony_ci	unfinished_drop = test_bit(BTRFS_ROOT_UNFINISHED_DROP, &root->state);
569062306a36Sopenharmony_ci
569162306a36Sopenharmony_ci	if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
569262306a36Sopenharmony_ci		level = btrfs_header_level(root->node);
569362306a36Sopenharmony_ci		path->nodes[level] = btrfs_lock_root_node(root);
569462306a36Sopenharmony_ci		path->slots[level] = 0;
569562306a36Sopenharmony_ci		path->locks[level] = BTRFS_WRITE_LOCK;
569662306a36Sopenharmony_ci		memset(&wc->update_progress, 0,
569762306a36Sopenharmony_ci		       sizeof(wc->update_progress));
569862306a36Sopenharmony_ci	} else {
569962306a36Sopenharmony_ci		btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
570062306a36Sopenharmony_ci		memcpy(&wc->update_progress, &key,
570162306a36Sopenharmony_ci		       sizeof(wc->update_progress));
570262306a36Sopenharmony_ci
570362306a36Sopenharmony_ci		level = btrfs_root_drop_level(root_item);
570462306a36Sopenharmony_ci		BUG_ON(level == 0);
570562306a36Sopenharmony_ci		path->lowest_level = level;
570662306a36Sopenharmony_ci		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
570762306a36Sopenharmony_ci		path->lowest_level = 0;
570862306a36Sopenharmony_ci		if (ret < 0) {
570962306a36Sopenharmony_ci			err = ret;
571062306a36Sopenharmony_ci			goto out_end_trans;
571162306a36Sopenharmony_ci		}
571262306a36Sopenharmony_ci		WARN_ON(ret > 0);
571362306a36Sopenharmony_ci
571462306a36Sopenharmony_ci		/*
571562306a36Sopenharmony_ci		 * unlock our path, this is safe because only this
571662306a36Sopenharmony_ci		 * function is allowed to delete this snapshot
571762306a36Sopenharmony_ci		 */
571862306a36Sopenharmony_ci		btrfs_unlock_up_safe(path, 0);
571962306a36Sopenharmony_ci
572062306a36Sopenharmony_ci		level = btrfs_header_level(root->node);
572162306a36Sopenharmony_ci		while (1) {
572262306a36Sopenharmony_ci			btrfs_tree_lock(path->nodes[level]);
572362306a36Sopenharmony_ci			path->locks[level] = BTRFS_WRITE_LOCK;
572462306a36Sopenharmony_ci
572562306a36Sopenharmony_ci			ret = btrfs_lookup_extent_info(trans, fs_info,
572662306a36Sopenharmony_ci						path->nodes[level]->start,
572762306a36Sopenharmony_ci						level, 1, &wc->refs[level],
572862306a36Sopenharmony_ci						&wc->flags[level]);
572962306a36Sopenharmony_ci			if (ret < 0) {
573062306a36Sopenharmony_ci				err = ret;
573162306a36Sopenharmony_ci				goto out_end_trans;
573262306a36Sopenharmony_ci			}
573362306a36Sopenharmony_ci			BUG_ON(wc->refs[level] == 0);
573462306a36Sopenharmony_ci
573562306a36Sopenharmony_ci			if (level == btrfs_root_drop_level(root_item))
573662306a36Sopenharmony_ci				break;
573762306a36Sopenharmony_ci
573862306a36Sopenharmony_ci			btrfs_tree_unlock(path->nodes[level]);
573962306a36Sopenharmony_ci			path->locks[level] = 0;
574062306a36Sopenharmony_ci			WARN_ON(wc->refs[level] != 1);
574162306a36Sopenharmony_ci			level--;
574262306a36Sopenharmony_ci		}
574362306a36Sopenharmony_ci	}
574462306a36Sopenharmony_ci
574562306a36Sopenharmony_ci	wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
574662306a36Sopenharmony_ci	wc->level = level;
574762306a36Sopenharmony_ci	wc->shared_level = -1;
574862306a36Sopenharmony_ci	wc->stage = DROP_REFERENCE;
574962306a36Sopenharmony_ci	wc->update_ref = update_ref;
575062306a36Sopenharmony_ci	wc->keep_locks = 0;
575162306a36Sopenharmony_ci	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
575262306a36Sopenharmony_ci
575362306a36Sopenharmony_ci	while (1) {
575462306a36Sopenharmony_ci
575562306a36Sopenharmony_ci		ret = walk_down_tree(trans, root, path, wc);
575662306a36Sopenharmony_ci		if (ret < 0) {
575762306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
575862306a36Sopenharmony_ci			err = ret;
575962306a36Sopenharmony_ci			break;
576062306a36Sopenharmony_ci		}
576162306a36Sopenharmony_ci
576262306a36Sopenharmony_ci		ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL);
576362306a36Sopenharmony_ci		if (ret < 0) {
576462306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
576562306a36Sopenharmony_ci			err = ret;
576662306a36Sopenharmony_ci			break;
576762306a36Sopenharmony_ci		}
576862306a36Sopenharmony_ci
576962306a36Sopenharmony_ci		if (ret > 0) {
577062306a36Sopenharmony_ci			BUG_ON(wc->stage != DROP_REFERENCE);
577162306a36Sopenharmony_ci			break;
577262306a36Sopenharmony_ci		}
577362306a36Sopenharmony_ci
577462306a36Sopenharmony_ci		if (wc->stage == DROP_REFERENCE) {
577562306a36Sopenharmony_ci			wc->drop_level = wc->level;
577662306a36Sopenharmony_ci			btrfs_node_key_to_cpu(path->nodes[wc->drop_level],
577762306a36Sopenharmony_ci					      &wc->drop_progress,
577862306a36Sopenharmony_ci					      path->slots[wc->drop_level]);
577962306a36Sopenharmony_ci		}
578062306a36Sopenharmony_ci		btrfs_cpu_key_to_disk(&root_item->drop_progress,
578162306a36Sopenharmony_ci				      &wc->drop_progress);
578262306a36Sopenharmony_ci		btrfs_set_root_drop_level(root_item, wc->drop_level);
578362306a36Sopenharmony_ci
578462306a36Sopenharmony_ci		BUG_ON(wc->level == 0);
578562306a36Sopenharmony_ci		if (btrfs_should_end_transaction(trans) ||
578662306a36Sopenharmony_ci		    (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) {
578762306a36Sopenharmony_ci			ret = btrfs_update_root(trans, tree_root,
578862306a36Sopenharmony_ci						&root->root_key,
578962306a36Sopenharmony_ci						root_item);
579062306a36Sopenharmony_ci			if (ret) {
579162306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
579262306a36Sopenharmony_ci				err = ret;
579362306a36Sopenharmony_ci				goto out_end_trans;
579462306a36Sopenharmony_ci			}
579562306a36Sopenharmony_ci
579662306a36Sopenharmony_ci			if (!is_reloc_root)
579762306a36Sopenharmony_ci				btrfs_set_last_root_drop_gen(fs_info, trans->transid);
579862306a36Sopenharmony_ci
579962306a36Sopenharmony_ci			btrfs_end_transaction_throttle(trans);
580062306a36Sopenharmony_ci			if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
580162306a36Sopenharmony_ci				btrfs_debug(fs_info,
580262306a36Sopenharmony_ci					    "drop snapshot early exit");
580362306a36Sopenharmony_ci				err = -EAGAIN;
580462306a36Sopenharmony_ci				goto out_free;
580562306a36Sopenharmony_ci			}
580662306a36Sopenharmony_ci
580762306a36Sopenharmony_ci		       /*
580862306a36Sopenharmony_ci			* Use join to avoid potential EINTR from transaction
580962306a36Sopenharmony_ci			* start. See wait_reserve_ticket and the whole
581062306a36Sopenharmony_ci			* reservation callchain.
581162306a36Sopenharmony_ci			*/
581262306a36Sopenharmony_ci			if (for_reloc)
581362306a36Sopenharmony_ci				trans = btrfs_join_transaction(tree_root);
581462306a36Sopenharmony_ci			else
581562306a36Sopenharmony_ci				trans = btrfs_start_transaction(tree_root, 0);
581662306a36Sopenharmony_ci			if (IS_ERR(trans)) {
581762306a36Sopenharmony_ci				err = PTR_ERR(trans);
581862306a36Sopenharmony_ci				goto out_free;
581962306a36Sopenharmony_ci			}
582062306a36Sopenharmony_ci		}
582162306a36Sopenharmony_ci	}
582262306a36Sopenharmony_ci	btrfs_release_path(path);
582362306a36Sopenharmony_ci	if (err)
582462306a36Sopenharmony_ci		goto out_end_trans;
582562306a36Sopenharmony_ci
582662306a36Sopenharmony_ci	ret = btrfs_del_root(trans, &root->root_key);
582762306a36Sopenharmony_ci	if (ret) {
582862306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
582962306a36Sopenharmony_ci		err = ret;
583062306a36Sopenharmony_ci		goto out_end_trans;
583162306a36Sopenharmony_ci	}
583262306a36Sopenharmony_ci
583362306a36Sopenharmony_ci	if (!is_reloc_root) {
583462306a36Sopenharmony_ci		ret = btrfs_find_root(tree_root, &root->root_key, path,
583562306a36Sopenharmony_ci				      NULL, NULL);
583662306a36Sopenharmony_ci		if (ret < 0) {
583762306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
583862306a36Sopenharmony_ci			err = ret;
583962306a36Sopenharmony_ci			goto out_end_trans;
584062306a36Sopenharmony_ci		} else if (ret > 0) {
584162306a36Sopenharmony_ci			/* if we fail to delete the orphan item this time
584262306a36Sopenharmony_ci			 * around, it'll get picked up the next time.
584362306a36Sopenharmony_ci			 *
584462306a36Sopenharmony_ci			 * The most common failure here is just -ENOENT.
584562306a36Sopenharmony_ci			 */
584662306a36Sopenharmony_ci			btrfs_del_orphan_item(trans, tree_root,
584762306a36Sopenharmony_ci					      root->root_key.objectid);
584862306a36Sopenharmony_ci		}
584962306a36Sopenharmony_ci	}
585062306a36Sopenharmony_ci
585162306a36Sopenharmony_ci	/*
585262306a36Sopenharmony_ci	 * This subvolume is going to be completely dropped, and won't be
585362306a36Sopenharmony_ci	 * recorded as dirty roots, thus pertrans meta rsv will not be freed at
585462306a36Sopenharmony_ci	 * commit transaction time.  So free it here manually.
585562306a36Sopenharmony_ci	 */
585662306a36Sopenharmony_ci	btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
585762306a36Sopenharmony_ci	btrfs_qgroup_free_meta_all_pertrans(root);
585862306a36Sopenharmony_ci
585962306a36Sopenharmony_ci	if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
586062306a36Sopenharmony_ci		btrfs_add_dropped_root(trans, root);
586162306a36Sopenharmony_ci	else
586262306a36Sopenharmony_ci		btrfs_put_root(root);
586362306a36Sopenharmony_ci	root_dropped = true;
586462306a36Sopenharmony_ciout_end_trans:
586562306a36Sopenharmony_ci	if (!is_reloc_root)
586662306a36Sopenharmony_ci		btrfs_set_last_root_drop_gen(fs_info, trans->transid);
586762306a36Sopenharmony_ci
586862306a36Sopenharmony_ci	btrfs_end_transaction_throttle(trans);
586962306a36Sopenharmony_ciout_free:
587062306a36Sopenharmony_ci	kfree(wc);
587162306a36Sopenharmony_ci	btrfs_free_path(path);
587262306a36Sopenharmony_ciout:
587362306a36Sopenharmony_ci	/*
587462306a36Sopenharmony_ci	 * We were an unfinished drop root, check to see if there are any
587562306a36Sopenharmony_ci	 * pending, and if not clear and wake up any waiters.
587662306a36Sopenharmony_ci	 */
587762306a36Sopenharmony_ci	if (!err && unfinished_drop)
587862306a36Sopenharmony_ci		btrfs_maybe_wake_unfinished_drop(fs_info);
587962306a36Sopenharmony_ci
588062306a36Sopenharmony_ci	/*
588162306a36Sopenharmony_ci	 * So if we need to stop dropping the snapshot for whatever reason we
588262306a36Sopenharmony_ci	 * need to make sure to add it back to the dead root list so that we
588362306a36Sopenharmony_ci	 * keep trying to do the work later.  This also cleans up roots if we
588462306a36Sopenharmony_ci	 * don't have it in the radix (like when we recover after a power fail
588562306a36Sopenharmony_ci	 * or unmount) so we don't leak memory.
588662306a36Sopenharmony_ci	 */
588762306a36Sopenharmony_ci	if (!for_reloc && !root_dropped)
588862306a36Sopenharmony_ci		btrfs_add_dead_root(root);
588962306a36Sopenharmony_ci	return err;
589062306a36Sopenharmony_ci}
589162306a36Sopenharmony_ci
589262306a36Sopenharmony_ci/*
589362306a36Sopenharmony_ci * drop subtree rooted at tree block 'node'.
589462306a36Sopenharmony_ci *
589562306a36Sopenharmony_ci * NOTE: this function will unlock and release tree block 'node'
589662306a36Sopenharmony_ci * only used by relocation code
589762306a36Sopenharmony_ci */
589862306a36Sopenharmony_ciint btrfs_drop_subtree(struct btrfs_trans_handle *trans,
589962306a36Sopenharmony_ci			struct btrfs_root *root,
590062306a36Sopenharmony_ci			struct extent_buffer *node,
590162306a36Sopenharmony_ci			struct extent_buffer *parent)
590262306a36Sopenharmony_ci{
590362306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
590462306a36Sopenharmony_ci	struct btrfs_path *path;
590562306a36Sopenharmony_ci	struct walk_control *wc;
590662306a36Sopenharmony_ci	int level;
590762306a36Sopenharmony_ci	int parent_level;
590862306a36Sopenharmony_ci	int ret = 0;
590962306a36Sopenharmony_ci	int wret;
591062306a36Sopenharmony_ci
591162306a36Sopenharmony_ci	BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
591262306a36Sopenharmony_ci
591362306a36Sopenharmony_ci	path = btrfs_alloc_path();
591462306a36Sopenharmony_ci	if (!path)
591562306a36Sopenharmony_ci		return -ENOMEM;
591662306a36Sopenharmony_ci
591762306a36Sopenharmony_ci	wc = kzalloc(sizeof(*wc), GFP_NOFS);
591862306a36Sopenharmony_ci	if (!wc) {
591962306a36Sopenharmony_ci		btrfs_free_path(path);
592062306a36Sopenharmony_ci		return -ENOMEM;
592162306a36Sopenharmony_ci	}
592262306a36Sopenharmony_ci
592362306a36Sopenharmony_ci	btrfs_assert_tree_write_locked(parent);
592462306a36Sopenharmony_ci	parent_level = btrfs_header_level(parent);
592562306a36Sopenharmony_ci	atomic_inc(&parent->refs);
592662306a36Sopenharmony_ci	path->nodes[parent_level] = parent;
592762306a36Sopenharmony_ci	path->slots[parent_level] = btrfs_header_nritems(parent);
592862306a36Sopenharmony_ci
592962306a36Sopenharmony_ci	btrfs_assert_tree_write_locked(node);
593062306a36Sopenharmony_ci	level = btrfs_header_level(node);
593162306a36Sopenharmony_ci	path->nodes[level] = node;
593262306a36Sopenharmony_ci	path->slots[level] = 0;
593362306a36Sopenharmony_ci	path->locks[level] = BTRFS_WRITE_LOCK;
593462306a36Sopenharmony_ci
593562306a36Sopenharmony_ci	wc->refs[parent_level] = 1;
593662306a36Sopenharmony_ci	wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
593762306a36Sopenharmony_ci	wc->level = level;
593862306a36Sopenharmony_ci	wc->shared_level = -1;
593962306a36Sopenharmony_ci	wc->stage = DROP_REFERENCE;
594062306a36Sopenharmony_ci	wc->update_ref = 0;
594162306a36Sopenharmony_ci	wc->keep_locks = 1;
594262306a36Sopenharmony_ci	wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info);
594362306a36Sopenharmony_ci
594462306a36Sopenharmony_ci	while (1) {
594562306a36Sopenharmony_ci		wret = walk_down_tree(trans, root, path, wc);
594662306a36Sopenharmony_ci		if (wret < 0) {
594762306a36Sopenharmony_ci			ret = wret;
594862306a36Sopenharmony_ci			break;
594962306a36Sopenharmony_ci		}
595062306a36Sopenharmony_ci
595162306a36Sopenharmony_ci		wret = walk_up_tree(trans, root, path, wc, parent_level);
595262306a36Sopenharmony_ci		if (wret < 0)
595362306a36Sopenharmony_ci			ret = wret;
595462306a36Sopenharmony_ci		if (wret != 0)
595562306a36Sopenharmony_ci			break;
595662306a36Sopenharmony_ci	}
595762306a36Sopenharmony_ci
595862306a36Sopenharmony_ci	kfree(wc);
595962306a36Sopenharmony_ci	btrfs_free_path(path);
596062306a36Sopenharmony_ci	return ret;
596162306a36Sopenharmony_ci}
596262306a36Sopenharmony_ci
596362306a36Sopenharmony_ciint btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
596462306a36Sopenharmony_ci				   u64 start, u64 end)
596562306a36Sopenharmony_ci{
596662306a36Sopenharmony_ci	return unpin_extent_range(fs_info, start, end, false);
596762306a36Sopenharmony_ci}
596862306a36Sopenharmony_ci
596962306a36Sopenharmony_ci/*
597062306a36Sopenharmony_ci * It used to be that old block groups would be left around forever.
597162306a36Sopenharmony_ci * Iterating over them would be enough to trim unused space.  Since we
597262306a36Sopenharmony_ci * now automatically remove them, we also need to iterate over unallocated
597362306a36Sopenharmony_ci * space.
597462306a36Sopenharmony_ci *
597562306a36Sopenharmony_ci * We don't want a transaction for this since the discard may take a
597662306a36Sopenharmony_ci * substantial amount of time.  We don't require that a transaction be
597762306a36Sopenharmony_ci * running, but we do need to take a running transaction into account
597862306a36Sopenharmony_ci * to ensure that we're not discarding chunks that were released or
597962306a36Sopenharmony_ci * allocated in the current transaction.
598062306a36Sopenharmony_ci *
598162306a36Sopenharmony_ci * Holding the chunks lock will prevent other threads from allocating
598262306a36Sopenharmony_ci * or releasing chunks, but it won't prevent a running transaction
598362306a36Sopenharmony_ci * from committing and releasing the memory that the pending chunks
598462306a36Sopenharmony_ci * list head uses.  For that, we need to take a reference to the
598562306a36Sopenharmony_ci * transaction and hold the commit root sem.  We only need to hold
598662306a36Sopenharmony_ci * it while performing the free space search since we have already
598762306a36Sopenharmony_ci * held back allocations.
598862306a36Sopenharmony_ci */
598962306a36Sopenharmony_cistatic int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
599062306a36Sopenharmony_ci{
599162306a36Sopenharmony_ci	u64 start = BTRFS_DEVICE_RANGE_RESERVED, len = 0, end = 0;
599262306a36Sopenharmony_ci	int ret;
599362306a36Sopenharmony_ci
599462306a36Sopenharmony_ci	*trimmed = 0;
599562306a36Sopenharmony_ci
599662306a36Sopenharmony_ci	/* Discard not supported = nothing to do. */
599762306a36Sopenharmony_ci	if (!bdev_max_discard_sectors(device->bdev))
599862306a36Sopenharmony_ci		return 0;
599962306a36Sopenharmony_ci
600062306a36Sopenharmony_ci	/* Not writable = nothing to do. */
600162306a36Sopenharmony_ci	if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
600262306a36Sopenharmony_ci		return 0;
600362306a36Sopenharmony_ci
600462306a36Sopenharmony_ci	/* No free space = nothing to do. */
600562306a36Sopenharmony_ci	if (device->total_bytes <= device->bytes_used)
600662306a36Sopenharmony_ci		return 0;
600762306a36Sopenharmony_ci
600862306a36Sopenharmony_ci	ret = 0;
600962306a36Sopenharmony_ci
601062306a36Sopenharmony_ci	while (1) {
601162306a36Sopenharmony_ci		struct btrfs_fs_info *fs_info = device->fs_info;
601262306a36Sopenharmony_ci		u64 bytes;
601362306a36Sopenharmony_ci
601462306a36Sopenharmony_ci		ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
601562306a36Sopenharmony_ci		if (ret)
601662306a36Sopenharmony_ci			break;
601762306a36Sopenharmony_ci
601862306a36Sopenharmony_ci		find_first_clear_extent_bit(&device->alloc_state, start,
601962306a36Sopenharmony_ci					    &start, &end,
602062306a36Sopenharmony_ci					    CHUNK_TRIMMED | CHUNK_ALLOCATED);
602162306a36Sopenharmony_ci
602262306a36Sopenharmony_ci		/* Check if there are any CHUNK_* bits left */
602362306a36Sopenharmony_ci		if (start > device->total_bytes) {
602462306a36Sopenharmony_ci			WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
602562306a36Sopenharmony_ci			btrfs_warn_in_rcu(fs_info,
602662306a36Sopenharmony_ci"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu",
602762306a36Sopenharmony_ci					  start, end - start + 1,
602862306a36Sopenharmony_ci					  btrfs_dev_name(device),
602962306a36Sopenharmony_ci					  device->total_bytes);
603062306a36Sopenharmony_ci			mutex_unlock(&fs_info->chunk_mutex);
603162306a36Sopenharmony_ci			ret = 0;
603262306a36Sopenharmony_ci			break;
603362306a36Sopenharmony_ci		}
603462306a36Sopenharmony_ci
603562306a36Sopenharmony_ci		/* Ensure we skip the reserved space on each device. */
603662306a36Sopenharmony_ci		start = max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
603762306a36Sopenharmony_ci
603862306a36Sopenharmony_ci		/*
603962306a36Sopenharmony_ci		 * If find_first_clear_extent_bit find a range that spans the
604062306a36Sopenharmony_ci		 * end of the device it will set end to -1, in this case it's up
604162306a36Sopenharmony_ci		 * to the caller to trim the value to the size of the device.
604262306a36Sopenharmony_ci		 */
604362306a36Sopenharmony_ci		end = min(end, device->total_bytes - 1);
604462306a36Sopenharmony_ci
604562306a36Sopenharmony_ci		len = end - start + 1;
604662306a36Sopenharmony_ci
604762306a36Sopenharmony_ci		/* We didn't find any extents */
604862306a36Sopenharmony_ci		if (!len) {
604962306a36Sopenharmony_ci			mutex_unlock(&fs_info->chunk_mutex);
605062306a36Sopenharmony_ci			ret = 0;
605162306a36Sopenharmony_ci			break;
605262306a36Sopenharmony_ci		}
605362306a36Sopenharmony_ci
605462306a36Sopenharmony_ci		ret = btrfs_issue_discard(device->bdev, start, len,
605562306a36Sopenharmony_ci					  &bytes);
605662306a36Sopenharmony_ci		if (!ret)
605762306a36Sopenharmony_ci			set_extent_bit(&device->alloc_state, start,
605862306a36Sopenharmony_ci				       start + bytes - 1, CHUNK_TRIMMED, NULL);
605962306a36Sopenharmony_ci		mutex_unlock(&fs_info->chunk_mutex);
606062306a36Sopenharmony_ci
606162306a36Sopenharmony_ci		if (ret)
606262306a36Sopenharmony_ci			break;
606362306a36Sopenharmony_ci
606462306a36Sopenharmony_ci		start += len;
606562306a36Sopenharmony_ci		*trimmed += bytes;
606662306a36Sopenharmony_ci
606762306a36Sopenharmony_ci		if (fatal_signal_pending(current)) {
606862306a36Sopenharmony_ci			ret = -ERESTARTSYS;
606962306a36Sopenharmony_ci			break;
607062306a36Sopenharmony_ci		}
607162306a36Sopenharmony_ci
607262306a36Sopenharmony_ci		cond_resched();
607362306a36Sopenharmony_ci	}
607462306a36Sopenharmony_ci
607562306a36Sopenharmony_ci	return ret;
607662306a36Sopenharmony_ci}
607762306a36Sopenharmony_ci
607862306a36Sopenharmony_ci/*
607962306a36Sopenharmony_ci * Trim the whole filesystem by:
608062306a36Sopenharmony_ci * 1) trimming the free space in each block group
608162306a36Sopenharmony_ci * 2) trimming the unallocated space on each device
608262306a36Sopenharmony_ci *
608362306a36Sopenharmony_ci * This will also continue trimming even if a block group or device encounters
608462306a36Sopenharmony_ci * an error.  The return value will be the last error, or 0 if nothing bad
608562306a36Sopenharmony_ci * happens.
608662306a36Sopenharmony_ci */
608762306a36Sopenharmony_ciint btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
608862306a36Sopenharmony_ci{
608962306a36Sopenharmony_ci	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
609062306a36Sopenharmony_ci	struct btrfs_block_group *cache = NULL;
609162306a36Sopenharmony_ci	struct btrfs_device *device;
609262306a36Sopenharmony_ci	u64 group_trimmed;
609362306a36Sopenharmony_ci	u64 range_end = U64_MAX;
609462306a36Sopenharmony_ci	u64 start;
609562306a36Sopenharmony_ci	u64 end;
609662306a36Sopenharmony_ci	u64 trimmed = 0;
609762306a36Sopenharmony_ci	u64 bg_failed = 0;
609862306a36Sopenharmony_ci	u64 dev_failed = 0;
609962306a36Sopenharmony_ci	int bg_ret = 0;
610062306a36Sopenharmony_ci	int dev_ret = 0;
610162306a36Sopenharmony_ci	int ret = 0;
610262306a36Sopenharmony_ci
610362306a36Sopenharmony_ci	if (range->start == U64_MAX)
610462306a36Sopenharmony_ci		return -EINVAL;
610562306a36Sopenharmony_ci
610662306a36Sopenharmony_ci	/*
610762306a36Sopenharmony_ci	 * Check range overflow if range->len is set.
610862306a36Sopenharmony_ci	 * The default range->len is U64_MAX.
610962306a36Sopenharmony_ci	 */
611062306a36Sopenharmony_ci	if (range->len != U64_MAX &&
611162306a36Sopenharmony_ci	    check_add_overflow(range->start, range->len, &range_end))
611262306a36Sopenharmony_ci		return -EINVAL;
611362306a36Sopenharmony_ci
611462306a36Sopenharmony_ci	cache = btrfs_lookup_first_block_group(fs_info, range->start);
611562306a36Sopenharmony_ci	for (; cache; cache = btrfs_next_block_group(cache)) {
611662306a36Sopenharmony_ci		if (cache->start >= range_end) {
611762306a36Sopenharmony_ci			btrfs_put_block_group(cache);
611862306a36Sopenharmony_ci			break;
611962306a36Sopenharmony_ci		}
612062306a36Sopenharmony_ci
612162306a36Sopenharmony_ci		start = max(range->start, cache->start);
612262306a36Sopenharmony_ci		end = min(range_end, cache->start + cache->length);
612362306a36Sopenharmony_ci
612462306a36Sopenharmony_ci		if (end - start >= range->minlen) {
612562306a36Sopenharmony_ci			if (!btrfs_block_group_done(cache)) {
612662306a36Sopenharmony_ci				ret = btrfs_cache_block_group(cache, true);
612762306a36Sopenharmony_ci				if (ret) {
612862306a36Sopenharmony_ci					bg_failed++;
612962306a36Sopenharmony_ci					bg_ret = ret;
613062306a36Sopenharmony_ci					continue;
613162306a36Sopenharmony_ci				}
613262306a36Sopenharmony_ci			}
613362306a36Sopenharmony_ci			ret = btrfs_trim_block_group(cache,
613462306a36Sopenharmony_ci						     &group_trimmed,
613562306a36Sopenharmony_ci						     start,
613662306a36Sopenharmony_ci						     end,
613762306a36Sopenharmony_ci						     range->minlen);
613862306a36Sopenharmony_ci
613962306a36Sopenharmony_ci			trimmed += group_trimmed;
614062306a36Sopenharmony_ci			if (ret) {
614162306a36Sopenharmony_ci				bg_failed++;
614262306a36Sopenharmony_ci				bg_ret = ret;
614362306a36Sopenharmony_ci				continue;
614462306a36Sopenharmony_ci			}
614562306a36Sopenharmony_ci		}
614662306a36Sopenharmony_ci	}
614762306a36Sopenharmony_ci
614862306a36Sopenharmony_ci	if (bg_failed)
614962306a36Sopenharmony_ci		btrfs_warn(fs_info,
615062306a36Sopenharmony_ci			"failed to trim %llu block group(s), last error %d",
615162306a36Sopenharmony_ci			bg_failed, bg_ret);
615262306a36Sopenharmony_ci
615362306a36Sopenharmony_ci	mutex_lock(&fs_devices->device_list_mutex);
615462306a36Sopenharmony_ci	list_for_each_entry(device, &fs_devices->devices, dev_list) {
615562306a36Sopenharmony_ci		if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
615662306a36Sopenharmony_ci			continue;
615762306a36Sopenharmony_ci
615862306a36Sopenharmony_ci		ret = btrfs_trim_free_extents(device, &group_trimmed);
615962306a36Sopenharmony_ci		if (ret) {
616062306a36Sopenharmony_ci			dev_failed++;
616162306a36Sopenharmony_ci			dev_ret = ret;
616262306a36Sopenharmony_ci			break;
616362306a36Sopenharmony_ci		}
616462306a36Sopenharmony_ci
616562306a36Sopenharmony_ci		trimmed += group_trimmed;
616662306a36Sopenharmony_ci	}
616762306a36Sopenharmony_ci	mutex_unlock(&fs_devices->device_list_mutex);
616862306a36Sopenharmony_ci
616962306a36Sopenharmony_ci	if (dev_failed)
617062306a36Sopenharmony_ci		btrfs_warn(fs_info,
617162306a36Sopenharmony_ci			"failed to trim %llu device(s), last error %d",
617262306a36Sopenharmony_ci			dev_failed, dev_ret);
617362306a36Sopenharmony_ci	range->len = trimmed;
617462306a36Sopenharmony_ci	if (bg_ret)
617562306a36Sopenharmony_ci		return bg_ret;
617662306a36Sopenharmony_ci	return dev_ret;
617762306a36Sopenharmony_ci}
6178