162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * refcounttree.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2009 Oracle.  All rights reserved.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/sort.h>
962306a36Sopenharmony_ci#include <cluster/masklog.h>
1062306a36Sopenharmony_ci#include "ocfs2.h"
1162306a36Sopenharmony_ci#include "inode.h"
1262306a36Sopenharmony_ci#include "alloc.h"
1362306a36Sopenharmony_ci#include "suballoc.h"
1462306a36Sopenharmony_ci#include "journal.h"
1562306a36Sopenharmony_ci#include "uptodate.h"
1662306a36Sopenharmony_ci#include "super.h"
1762306a36Sopenharmony_ci#include "buffer_head_io.h"
1862306a36Sopenharmony_ci#include "blockcheck.h"
1962306a36Sopenharmony_ci#include "refcounttree.h"
2062306a36Sopenharmony_ci#include "sysfile.h"
2162306a36Sopenharmony_ci#include "dlmglue.h"
2262306a36Sopenharmony_ci#include "extent_map.h"
2362306a36Sopenharmony_ci#include "aops.h"
2462306a36Sopenharmony_ci#include "xattr.h"
2562306a36Sopenharmony_ci#include "namei.h"
2662306a36Sopenharmony_ci#include "ocfs2_trace.h"
2762306a36Sopenharmony_ci#include "file.h"
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#include <linux/bio.h>
3062306a36Sopenharmony_ci#include <linux/blkdev.h>
3162306a36Sopenharmony_ci#include <linux/slab.h>
3262306a36Sopenharmony_ci#include <linux/writeback.h>
3362306a36Sopenharmony_ci#include <linux/pagevec.h>
3462306a36Sopenharmony_ci#include <linux/swap.h>
3562306a36Sopenharmony_ci#include <linux/security.h>
3662306a36Sopenharmony_ci#include <linux/fsnotify.h>
3762306a36Sopenharmony_ci#include <linux/quotaops.h>
3862306a36Sopenharmony_ci#include <linux/namei.h>
3962306a36Sopenharmony_ci#include <linux/mount.h>
4062306a36Sopenharmony_ci#include <linux/posix_acl.h>
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cistruct ocfs2_cow_context {
4362306a36Sopenharmony_ci	struct inode *inode;
4462306a36Sopenharmony_ci	u32 cow_start;
4562306a36Sopenharmony_ci	u32 cow_len;
4662306a36Sopenharmony_ci	struct ocfs2_extent_tree data_et;
4762306a36Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree;
4862306a36Sopenharmony_ci	struct buffer_head *ref_root_bh;
4962306a36Sopenharmony_ci	struct ocfs2_alloc_context *meta_ac;
5062306a36Sopenharmony_ci	struct ocfs2_alloc_context *data_ac;
5162306a36Sopenharmony_ci	struct ocfs2_cached_dealloc_ctxt dealloc;
5262306a36Sopenharmony_ci	void *cow_object;
5362306a36Sopenharmony_ci	struct ocfs2_post_refcount *post_refcount;
5462306a36Sopenharmony_ci	int extra_credits;
5562306a36Sopenharmony_ci	int (*get_clusters)(struct ocfs2_cow_context *context,
5662306a36Sopenharmony_ci			    u32 v_cluster, u32 *p_cluster,
5762306a36Sopenharmony_ci			    u32 *num_clusters,
5862306a36Sopenharmony_ci			    unsigned int *extent_flags);
5962306a36Sopenharmony_ci	int (*cow_duplicate_clusters)(handle_t *handle,
6062306a36Sopenharmony_ci				      struct inode *inode,
6162306a36Sopenharmony_ci				      u32 cpos, u32 old_cluster,
6262306a36Sopenharmony_ci				      u32 new_cluster, u32 new_len);
6362306a36Sopenharmony_ci};
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_cistatic inline struct ocfs2_refcount_tree *
6662306a36Sopenharmony_cicache_info_to_refcount(struct ocfs2_caching_info *ci)
6762306a36Sopenharmony_ci{
6862306a36Sopenharmony_ci	return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
6962306a36Sopenharmony_ci}
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_cistatic int ocfs2_validate_refcount_block(struct super_block *sb,
7262306a36Sopenharmony_ci					 struct buffer_head *bh)
7362306a36Sopenharmony_ci{
7462306a36Sopenharmony_ci	int rc;
7562306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb =
7662306a36Sopenharmony_ci		(struct ocfs2_refcount_block *)bh->b_data;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	trace_ocfs2_validate_refcount_block((unsigned long long)bh->b_blocknr);
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	BUG_ON(!buffer_uptodate(bh));
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	/*
8362306a36Sopenharmony_ci	 * If the ecc fails, we return the error but otherwise
8462306a36Sopenharmony_ci	 * leave the filesystem running.  We know any error is
8562306a36Sopenharmony_ci	 * local to this block.
8662306a36Sopenharmony_ci	 */
8762306a36Sopenharmony_ci	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
8862306a36Sopenharmony_ci	if (rc) {
8962306a36Sopenharmony_ci		mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
9062306a36Sopenharmony_ci		     (unsigned long long)bh->b_blocknr);
9162306a36Sopenharmony_ci		return rc;
9262306a36Sopenharmony_ci	}
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
9662306a36Sopenharmony_ci		rc = ocfs2_error(sb,
9762306a36Sopenharmony_ci				 "Refcount block #%llu has bad signature %.*s\n",
9862306a36Sopenharmony_ci				 (unsigned long long)bh->b_blocknr, 7,
9962306a36Sopenharmony_ci				 rb->rf_signature);
10062306a36Sopenharmony_ci		goto out;
10162306a36Sopenharmony_ci	}
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
10462306a36Sopenharmony_ci		rc = ocfs2_error(sb,
10562306a36Sopenharmony_ci				 "Refcount block #%llu has an invalid rf_blkno of %llu\n",
10662306a36Sopenharmony_ci				 (unsigned long long)bh->b_blocknr,
10762306a36Sopenharmony_ci				 (unsigned long long)le64_to_cpu(rb->rf_blkno));
10862306a36Sopenharmony_ci		goto out;
10962306a36Sopenharmony_ci	}
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
11262306a36Sopenharmony_ci		rc = ocfs2_error(sb,
11362306a36Sopenharmony_ci				 "Refcount block #%llu has an invalid rf_fs_generation of #%u\n",
11462306a36Sopenharmony_ci				 (unsigned long long)bh->b_blocknr,
11562306a36Sopenharmony_ci				 le32_to_cpu(rb->rf_fs_generation));
11662306a36Sopenharmony_ci		goto out;
11762306a36Sopenharmony_ci	}
11862306a36Sopenharmony_ciout:
11962306a36Sopenharmony_ci	return rc;
12062306a36Sopenharmony_ci}
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_cistatic int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
12362306a36Sopenharmony_ci				     u64 rb_blkno,
12462306a36Sopenharmony_ci				     struct buffer_head **bh)
12562306a36Sopenharmony_ci{
12662306a36Sopenharmony_ci	int rc;
12762306a36Sopenharmony_ci	struct buffer_head *tmp = *bh;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	rc = ocfs2_read_block(ci, rb_blkno, &tmp,
13062306a36Sopenharmony_ci			      ocfs2_validate_refcount_block);
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	/* If ocfs2_read_block() got us a new bh, pass it up. */
13362306a36Sopenharmony_ci	if (!rc && !*bh)
13462306a36Sopenharmony_ci		*bh = tmp;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	return rc;
13762306a36Sopenharmony_ci}
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_cistatic u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
14062306a36Sopenharmony_ci{
14162306a36Sopenharmony_ci	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	return rf->rf_blkno;
14462306a36Sopenharmony_ci}
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_cistatic struct super_block *
14762306a36Sopenharmony_ciocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
14862306a36Sopenharmony_ci{
14962306a36Sopenharmony_ci	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	return rf->rf_sb;
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_cistatic void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
15562306a36Sopenharmony_ci__acquires(&rf->rf_lock)
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	spin_lock(&rf->rf_lock);
16062306a36Sopenharmony_ci}
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_cistatic void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
16362306a36Sopenharmony_ci__releases(&rf->rf_lock)
16462306a36Sopenharmony_ci{
16562306a36Sopenharmony_ci	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	spin_unlock(&rf->rf_lock);
16862306a36Sopenharmony_ci}
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_cistatic void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
17162306a36Sopenharmony_ci{
17262306a36Sopenharmony_ci	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	mutex_lock(&rf->rf_io_mutex);
17562306a36Sopenharmony_ci}
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_cistatic void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
17862306a36Sopenharmony_ci{
17962306a36Sopenharmony_ci	struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	mutex_unlock(&rf->rf_io_mutex);
18262306a36Sopenharmony_ci}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cistatic const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
18562306a36Sopenharmony_ci	.co_owner		= ocfs2_refcount_cache_owner,
18662306a36Sopenharmony_ci	.co_get_super		= ocfs2_refcount_cache_get_super,
18762306a36Sopenharmony_ci	.co_cache_lock		= ocfs2_refcount_cache_lock,
18862306a36Sopenharmony_ci	.co_cache_unlock	= ocfs2_refcount_cache_unlock,
18962306a36Sopenharmony_ci	.co_io_lock		= ocfs2_refcount_cache_io_lock,
19062306a36Sopenharmony_ci	.co_io_unlock		= ocfs2_refcount_cache_io_unlock,
19162306a36Sopenharmony_ci};
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_cistatic struct ocfs2_refcount_tree *
19462306a36Sopenharmony_ciocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
19562306a36Sopenharmony_ci{
19662306a36Sopenharmony_ci	struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
19762306a36Sopenharmony_ci	struct ocfs2_refcount_tree *tree = NULL;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	while (n) {
20062306a36Sopenharmony_ci		tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci		if (blkno < tree->rf_blkno)
20362306a36Sopenharmony_ci			n = n->rb_left;
20462306a36Sopenharmony_ci		else if (blkno > tree->rf_blkno)
20562306a36Sopenharmony_ci			n = n->rb_right;
20662306a36Sopenharmony_ci		else
20762306a36Sopenharmony_ci			return tree;
20862306a36Sopenharmony_ci	}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	return NULL;
21162306a36Sopenharmony_ci}
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci/* osb_lock is already locked. */
21462306a36Sopenharmony_cistatic void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
21562306a36Sopenharmony_ci				       struct ocfs2_refcount_tree *new)
21662306a36Sopenharmony_ci{
21762306a36Sopenharmony_ci	u64 rf_blkno = new->rf_blkno;
21862306a36Sopenharmony_ci	struct rb_node *parent = NULL;
21962306a36Sopenharmony_ci	struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
22062306a36Sopenharmony_ci	struct ocfs2_refcount_tree *tmp;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	while (*p) {
22362306a36Sopenharmony_ci		parent = *p;
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci		tmp = rb_entry(parent, struct ocfs2_refcount_tree,
22662306a36Sopenharmony_ci			       rf_node);
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci		if (rf_blkno < tmp->rf_blkno)
22962306a36Sopenharmony_ci			p = &(*p)->rb_left;
23062306a36Sopenharmony_ci		else if (rf_blkno > tmp->rf_blkno)
23162306a36Sopenharmony_ci			p = &(*p)->rb_right;
23262306a36Sopenharmony_ci		else {
23362306a36Sopenharmony_ci			/* This should never happen! */
23462306a36Sopenharmony_ci			mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
23562306a36Sopenharmony_ci			     (unsigned long long)rf_blkno);
23662306a36Sopenharmony_ci			BUG();
23762306a36Sopenharmony_ci		}
23862306a36Sopenharmony_ci	}
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	rb_link_node(&new->rf_node, parent, p);
24162306a36Sopenharmony_ci	rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
24262306a36Sopenharmony_ci}
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_cistatic void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
24562306a36Sopenharmony_ci{
24662306a36Sopenharmony_ci	ocfs2_metadata_cache_exit(&tree->rf_ci);
24762306a36Sopenharmony_ci	ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
24862306a36Sopenharmony_ci	ocfs2_lock_res_free(&tree->rf_lockres);
24962306a36Sopenharmony_ci	kfree(tree);
25062306a36Sopenharmony_ci}
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_cistatic inline void
25362306a36Sopenharmony_ciocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
25462306a36Sopenharmony_ci					struct ocfs2_refcount_tree *tree)
25562306a36Sopenharmony_ci{
25662306a36Sopenharmony_ci	rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
25762306a36Sopenharmony_ci	if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
25862306a36Sopenharmony_ci		osb->osb_ref_tree_lru = NULL;
25962306a36Sopenharmony_ci}
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_cistatic void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
26262306a36Sopenharmony_ci					struct ocfs2_refcount_tree *tree)
26362306a36Sopenharmony_ci{
26462306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
26562306a36Sopenharmony_ci	ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
26662306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
26762306a36Sopenharmony_ci}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_cistatic void ocfs2_kref_remove_refcount_tree(struct kref *kref)
27062306a36Sopenharmony_ci{
27162306a36Sopenharmony_ci	struct ocfs2_refcount_tree *tree =
27262306a36Sopenharmony_ci		container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	ocfs2_free_refcount_tree(tree);
27562306a36Sopenharmony_ci}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_cistatic inline void
27862306a36Sopenharmony_ciocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
27962306a36Sopenharmony_ci{
28062306a36Sopenharmony_ci	kref_get(&tree->rf_getcnt);
28162306a36Sopenharmony_ci}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_cistatic inline void
28462306a36Sopenharmony_ciocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
28562306a36Sopenharmony_ci{
28662306a36Sopenharmony_ci	kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
28762306a36Sopenharmony_ci}
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_cistatic inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
29062306a36Sopenharmony_ci					       struct super_block *sb)
29162306a36Sopenharmony_ci{
29262306a36Sopenharmony_ci	ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
29362306a36Sopenharmony_ci	mutex_init(&new->rf_io_mutex);
29462306a36Sopenharmony_ci	new->rf_sb = sb;
29562306a36Sopenharmony_ci	spin_lock_init(&new->rf_lock);
29662306a36Sopenharmony_ci}
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_cistatic inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
29962306a36Sopenharmony_ci					struct ocfs2_refcount_tree *new,
30062306a36Sopenharmony_ci					u64 rf_blkno, u32 generation)
30162306a36Sopenharmony_ci{
30262306a36Sopenharmony_ci	init_rwsem(&new->rf_sem);
30362306a36Sopenharmony_ci	ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
30462306a36Sopenharmony_ci				     rf_blkno, generation);
30562306a36Sopenharmony_ci}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_cistatic struct ocfs2_refcount_tree*
30862306a36Sopenharmony_ciocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno)
30962306a36Sopenharmony_ci{
31062306a36Sopenharmony_ci	struct ocfs2_refcount_tree *new;
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
31362306a36Sopenharmony_ci	if (!new)
31462306a36Sopenharmony_ci		return NULL;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	new->rf_blkno = rf_blkno;
31762306a36Sopenharmony_ci	kref_init(&new->rf_getcnt);
31862306a36Sopenharmony_ci	ocfs2_init_refcount_tree_ci(new, osb->sb);
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	return new;
32162306a36Sopenharmony_ci}
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_cistatic int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
32462306a36Sopenharmony_ci				   struct ocfs2_refcount_tree **ret_tree)
32562306a36Sopenharmony_ci{
32662306a36Sopenharmony_ci	int ret = 0;
32762306a36Sopenharmony_ci	struct ocfs2_refcount_tree *tree, *new = NULL;
32862306a36Sopenharmony_ci	struct buffer_head *ref_root_bh = NULL;
32962306a36Sopenharmony_ci	struct ocfs2_refcount_block *ref_rb;
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
33262306a36Sopenharmony_ci	if (osb->osb_ref_tree_lru &&
33362306a36Sopenharmony_ci	    osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
33462306a36Sopenharmony_ci		tree = osb->osb_ref_tree_lru;
33562306a36Sopenharmony_ci	else
33662306a36Sopenharmony_ci		tree = ocfs2_find_refcount_tree(osb, rf_blkno);
33762306a36Sopenharmony_ci	if (tree)
33862306a36Sopenharmony_ci		goto out;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	new = ocfs2_allocate_refcount_tree(osb, rf_blkno);
34362306a36Sopenharmony_ci	if (!new) {
34462306a36Sopenharmony_ci		ret = -ENOMEM;
34562306a36Sopenharmony_ci		mlog_errno(ret);
34662306a36Sopenharmony_ci		return ret;
34762306a36Sopenharmony_ci	}
34862306a36Sopenharmony_ci	/*
34962306a36Sopenharmony_ci	 * We need the generation to create the refcount tree lock and since
35062306a36Sopenharmony_ci	 * it isn't changed during the tree modification, we are safe here to
35162306a36Sopenharmony_ci	 * read without protection.
35262306a36Sopenharmony_ci	 * We also have to purge the cache after we create the lock since the
35362306a36Sopenharmony_ci	 * refcount block may have the stale data. It can only be trusted when
35462306a36Sopenharmony_ci	 * we hold the refcount lock.
35562306a36Sopenharmony_ci	 */
35662306a36Sopenharmony_ci	ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
35762306a36Sopenharmony_ci	if (ret) {
35862306a36Sopenharmony_ci		mlog_errno(ret);
35962306a36Sopenharmony_ci		ocfs2_metadata_cache_exit(&new->rf_ci);
36062306a36Sopenharmony_ci		kfree(new);
36162306a36Sopenharmony_ci		return ret;
36262306a36Sopenharmony_ci	}
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
36562306a36Sopenharmony_ci	new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
36662306a36Sopenharmony_ci	ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
36762306a36Sopenharmony_ci				      new->rf_generation);
36862306a36Sopenharmony_ci	ocfs2_metadata_cache_purge(&new->rf_ci);
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
37162306a36Sopenharmony_ci	tree = ocfs2_find_refcount_tree(osb, rf_blkno);
37262306a36Sopenharmony_ci	if (tree)
37362306a36Sopenharmony_ci		goto out;
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	ocfs2_insert_refcount_tree(osb, new);
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	tree = new;
37862306a36Sopenharmony_ci	new = NULL;
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ciout:
38162306a36Sopenharmony_ci	*ret_tree = tree;
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci	osb->osb_ref_tree_lru = tree;
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	if (new)
38862306a36Sopenharmony_ci		ocfs2_free_refcount_tree(new);
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	brelse(ref_root_bh);
39162306a36Sopenharmony_ci	return ret;
39262306a36Sopenharmony_ci}
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_cistatic int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
39562306a36Sopenharmony_ci{
39662306a36Sopenharmony_ci	int ret;
39762306a36Sopenharmony_ci	struct buffer_head *di_bh = NULL;
39862306a36Sopenharmony_ci	struct ocfs2_dinode *di;
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	ret = ocfs2_read_inode_block(inode, &di_bh);
40162306a36Sopenharmony_ci	if (ret) {
40262306a36Sopenharmony_ci		mlog_errno(ret);
40362306a36Sopenharmony_ci		goto out;
40462306a36Sopenharmony_ci	}
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_ci	BUG_ON(!ocfs2_is_refcount_inode(inode));
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	di = (struct ocfs2_dinode *)di_bh->b_data;
40962306a36Sopenharmony_ci	*ref_blkno = le64_to_cpu(di->i_refcount_loc);
41062306a36Sopenharmony_ci	brelse(di_bh);
41162306a36Sopenharmony_ciout:
41262306a36Sopenharmony_ci	return ret;
41362306a36Sopenharmony_ci}
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_cistatic int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
41662306a36Sopenharmony_ci				      struct ocfs2_refcount_tree *tree, int rw)
41762306a36Sopenharmony_ci{
41862306a36Sopenharmony_ci	int ret;
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_ci	ret = ocfs2_refcount_lock(tree, rw);
42162306a36Sopenharmony_ci	if (ret) {
42262306a36Sopenharmony_ci		mlog_errno(ret);
42362306a36Sopenharmony_ci		goto out;
42462306a36Sopenharmony_ci	}
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	if (rw)
42762306a36Sopenharmony_ci		down_write(&tree->rf_sem);
42862306a36Sopenharmony_ci	else
42962306a36Sopenharmony_ci		down_read(&tree->rf_sem);
43062306a36Sopenharmony_ci
43162306a36Sopenharmony_ciout:
43262306a36Sopenharmony_ci	return ret;
43362306a36Sopenharmony_ci}
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci/*
43662306a36Sopenharmony_ci * Lock the refcount tree pointed by ref_blkno and return the tree.
43762306a36Sopenharmony_ci * In most case, we lock the tree and read the refcount block.
43862306a36Sopenharmony_ci * So read it here if the caller really needs it.
43962306a36Sopenharmony_ci *
44062306a36Sopenharmony_ci * If the tree has been re-created by other node, it will free the
44162306a36Sopenharmony_ci * old one and re-create it.
44262306a36Sopenharmony_ci */
44362306a36Sopenharmony_ciint ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
44462306a36Sopenharmony_ci			     u64 ref_blkno, int rw,
44562306a36Sopenharmony_ci			     struct ocfs2_refcount_tree **ret_tree,
44662306a36Sopenharmony_ci			     struct buffer_head **ref_bh)
44762306a36Sopenharmony_ci{
44862306a36Sopenharmony_ci	int ret, delete_tree = 0;
44962306a36Sopenharmony_ci	struct ocfs2_refcount_tree *tree = NULL;
45062306a36Sopenharmony_ci	struct buffer_head *ref_root_bh = NULL;
45162306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb;
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ciagain:
45462306a36Sopenharmony_ci	ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
45562306a36Sopenharmony_ci	if (ret) {
45662306a36Sopenharmony_ci		mlog_errno(ret);
45762306a36Sopenharmony_ci		return ret;
45862306a36Sopenharmony_ci	}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	ocfs2_refcount_tree_get(tree);
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
46362306a36Sopenharmony_ci	if (ret) {
46462306a36Sopenharmony_ci		mlog_errno(ret);
46562306a36Sopenharmony_ci		ocfs2_refcount_tree_put(tree);
46662306a36Sopenharmony_ci		goto out;
46762306a36Sopenharmony_ci	}
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
47062306a36Sopenharmony_ci					&ref_root_bh);
47162306a36Sopenharmony_ci	if (ret) {
47262306a36Sopenharmony_ci		mlog_errno(ret);
47362306a36Sopenharmony_ci		ocfs2_unlock_refcount_tree(osb, tree, rw);
47462306a36Sopenharmony_ci		goto out;
47562306a36Sopenharmony_ci	}
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
47862306a36Sopenharmony_ci	/*
47962306a36Sopenharmony_ci	 * If the refcount block has been freed and re-created, we may need
48062306a36Sopenharmony_ci	 * to recreate the refcount tree also.
48162306a36Sopenharmony_ci	 *
48262306a36Sopenharmony_ci	 * Here we just remove the tree from the rb-tree, and the last
48362306a36Sopenharmony_ci	 * kref holder will unlock and delete this refcount_tree.
48462306a36Sopenharmony_ci	 * Then we goto "again" and ocfs2_get_refcount_tree will create
48562306a36Sopenharmony_ci	 * the new refcount tree for us.
48662306a36Sopenharmony_ci	 */
48762306a36Sopenharmony_ci	if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
48862306a36Sopenharmony_ci		if (!tree->rf_removed) {
48962306a36Sopenharmony_ci			ocfs2_erase_refcount_tree_from_list(osb, tree);
49062306a36Sopenharmony_ci			tree->rf_removed = 1;
49162306a36Sopenharmony_ci			delete_tree = 1;
49262306a36Sopenharmony_ci		}
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci		ocfs2_unlock_refcount_tree(osb, tree, rw);
49562306a36Sopenharmony_ci		/*
49662306a36Sopenharmony_ci		 * We get an extra reference when we create the refcount
49762306a36Sopenharmony_ci		 * tree, so another put will destroy it.
49862306a36Sopenharmony_ci		 */
49962306a36Sopenharmony_ci		if (delete_tree)
50062306a36Sopenharmony_ci			ocfs2_refcount_tree_put(tree);
50162306a36Sopenharmony_ci		brelse(ref_root_bh);
50262306a36Sopenharmony_ci		ref_root_bh = NULL;
50362306a36Sopenharmony_ci		goto again;
50462306a36Sopenharmony_ci	}
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	*ret_tree = tree;
50762306a36Sopenharmony_ci	if (ref_bh) {
50862306a36Sopenharmony_ci		*ref_bh = ref_root_bh;
50962306a36Sopenharmony_ci		ref_root_bh = NULL;
51062306a36Sopenharmony_ci	}
51162306a36Sopenharmony_ciout:
51262306a36Sopenharmony_ci	brelse(ref_root_bh);
51362306a36Sopenharmony_ci	return ret;
51462306a36Sopenharmony_ci}
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_civoid ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
51762306a36Sopenharmony_ci				struct ocfs2_refcount_tree *tree, int rw)
51862306a36Sopenharmony_ci{
51962306a36Sopenharmony_ci	if (rw)
52062306a36Sopenharmony_ci		up_write(&tree->rf_sem);
52162306a36Sopenharmony_ci	else
52262306a36Sopenharmony_ci		up_read(&tree->rf_sem);
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	ocfs2_refcount_unlock(tree, rw);
52562306a36Sopenharmony_ci	ocfs2_refcount_tree_put(tree);
52662306a36Sopenharmony_ci}
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_civoid ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
52962306a36Sopenharmony_ci{
53062306a36Sopenharmony_ci	struct rb_node *node;
53162306a36Sopenharmony_ci	struct ocfs2_refcount_tree *tree;
53262306a36Sopenharmony_ci	struct rb_root *root = &osb->osb_rf_lock_tree;
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	while ((node = rb_last(root)) != NULL) {
53562306a36Sopenharmony_ci		tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci		trace_ocfs2_purge_refcount_trees(
53862306a36Sopenharmony_ci				(unsigned long long) tree->rf_blkno);
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci		rb_erase(&tree->rf_node, root);
54162306a36Sopenharmony_ci		ocfs2_free_refcount_tree(tree);
54262306a36Sopenharmony_ci	}
54362306a36Sopenharmony_ci}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci/*
54662306a36Sopenharmony_ci * Create a refcount tree for an inode.
54762306a36Sopenharmony_ci * We take for granted that the inode is already locked.
54862306a36Sopenharmony_ci */
54962306a36Sopenharmony_cistatic int ocfs2_create_refcount_tree(struct inode *inode,
55062306a36Sopenharmony_ci				      struct buffer_head *di_bh)
55162306a36Sopenharmony_ci{
55262306a36Sopenharmony_ci	int ret;
55362306a36Sopenharmony_ci	handle_t *handle = NULL;
55462306a36Sopenharmony_ci	struct ocfs2_alloc_context *meta_ac = NULL;
55562306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
55662306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
55762306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
55862306a36Sopenharmony_ci	struct buffer_head *new_bh = NULL;
55962306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb;
56062306a36Sopenharmony_ci	struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
56162306a36Sopenharmony_ci	u16 suballoc_bit_start;
56262306a36Sopenharmony_ci	u32 num_got;
56362306a36Sopenharmony_ci	u64 suballoc_loc, first_blkno;
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	BUG_ON(ocfs2_is_refcount_inode(inode));
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci	trace_ocfs2_create_refcount_tree(
56862306a36Sopenharmony_ci		(unsigned long long)oi->ip_blkno);
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci	ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
57162306a36Sopenharmony_ci	if (ret) {
57262306a36Sopenharmony_ci		mlog_errno(ret);
57362306a36Sopenharmony_ci		goto out;
57462306a36Sopenharmony_ci	}
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS);
57762306a36Sopenharmony_ci	if (IS_ERR(handle)) {
57862306a36Sopenharmony_ci		ret = PTR_ERR(handle);
57962306a36Sopenharmony_ci		mlog_errno(ret);
58062306a36Sopenharmony_ci		goto out;
58162306a36Sopenharmony_ci	}
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
58462306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
58562306a36Sopenharmony_ci	if (ret) {
58662306a36Sopenharmony_ci		mlog_errno(ret);
58762306a36Sopenharmony_ci		goto out_commit;
58862306a36Sopenharmony_ci	}
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci	ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
59162306a36Sopenharmony_ci				   &suballoc_bit_start, &num_got,
59262306a36Sopenharmony_ci				   &first_blkno);
59362306a36Sopenharmony_ci	if (ret) {
59462306a36Sopenharmony_ci		mlog_errno(ret);
59562306a36Sopenharmony_ci		goto out_commit;
59662306a36Sopenharmony_ci	}
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_ci	new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno);
59962306a36Sopenharmony_ci	if (!new_tree) {
60062306a36Sopenharmony_ci		ret = -ENOMEM;
60162306a36Sopenharmony_ci		mlog_errno(ret);
60262306a36Sopenharmony_ci		goto out_commit;
60362306a36Sopenharmony_ci	}
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci	new_bh = sb_getblk(inode->i_sb, first_blkno);
60662306a36Sopenharmony_ci	if (!new_bh) {
60762306a36Sopenharmony_ci		ret = -ENOMEM;
60862306a36Sopenharmony_ci		mlog_errno(ret);
60962306a36Sopenharmony_ci		goto out_commit;
61062306a36Sopenharmony_ci	}
61162306a36Sopenharmony_ci	ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
61462306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_CREATE);
61562306a36Sopenharmony_ci	if (ret) {
61662306a36Sopenharmony_ci		mlog_errno(ret);
61762306a36Sopenharmony_ci		goto out_commit;
61862306a36Sopenharmony_ci	}
61962306a36Sopenharmony_ci
62062306a36Sopenharmony_ci	/* Initialize ocfs2_refcount_block. */
62162306a36Sopenharmony_ci	rb = (struct ocfs2_refcount_block *)new_bh->b_data;
62262306a36Sopenharmony_ci	memset(rb, 0, inode->i_sb->s_blocksize);
62362306a36Sopenharmony_ci	strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
62462306a36Sopenharmony_ci	rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
62562306a36Sopenharmony_ci	rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
62662306a36Sopenharmony_ci	rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
62762306a36Sopenharmony_ci	rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
62862306a36Sopenharmony_ci	rb->rf_blkno = cpu_to_le64(first_blkno);
62962306a36Sopenharmony_ci	rb->rf_count = cpu_to_le32(1);
63062306a36Sopenharmony_ci	rb->rf_records.rl_count =
63162306a36Sopenharmony_ci			cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
63262306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
63362306a36Sopenharmony_ci	rb->rf_generation = osb->s_next_generation++;
63462306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, new_bh);
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci	spin_lock(&oi->ip_lock);
63962306a36Sopenharmony_ci	oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
64062306a36Sopenharmony_ci	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
64162306a36Sopenharmony_ci	di->i_refcount_loc = cpu_to_le64(first_blkno);
64262306a36Sopenharmony_ci	spin_unlock(&oi->ip_lock);
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci	trace_ocfs2_create_refcount_tree_blkno((unsigned long long)first_blkno);
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, di_bh);
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	/*
64962306a36Sopenharmony_ci	 * We have to init the tree lock here since it will use
65062306a36Sopenharmony_ci	 * the generation number to create it.
65162306a36Sopenharmony_ci	 */
65262306a36Sopenharmony_ci	new_tree->rf_generation = le32_to_cpu(rb->rf_generation);
65362306a36Sopenharmony_ci	ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno,
65462306a36Sopenharmony_ci				      new_tree->rf_generation);
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
65762306a36Sopenharmony_ci	tree = ocfs2_find_refcount_tree(osb, first_blkno);
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci	/*
66062306a36Sopenharmony_ci	 * We've just created a new refcount tree in this block.  If
66162306a36Sopenharmony_ci	 * we found a refcount tree on the ocfs2_super, it must be
66262306a36Sopenharmony_ci	 * one we just deleted.  We free the old tree before
66362306a36Sopenharmony_ci	 * inserting the new tree.
66462306a36Sopenharmony_ci	 */
66562306a36Sopenharmony_ci	BUG_ON(tree && tree->rf_generation == new_tree->rf_generation);
66662306a36Sopenharmony_ci	if (tree)
66762306a36Sopenharmony_ci		ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
66862306a36Sopenharmony_ci	ocfs2_insert_refcount_tree(osb, new_tree);
66962306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
67062306a36Sopenharmony_ci	new_tree = NULL;
67162306a36Sopenharmony_ci	if (tree)
67262306a36Sopenharmony_ci		ocfs2_refcount_tree_put(tree);
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ciout_commit:
67562306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ciout:
67862306a36Sopenharmony_ci	if (new_tree) {
67962306a36Sopenharmony_ci		ocfs2_metadata_cache_exit(&new_tree->rf_ci);
68062306a36Sopenharmony_ci		kfree(new_tree);
68162306a36Sopenharmony_ci	}
68262306a36Sopenharmony_ci
68362306a36Sopenharmony_ci	brelse(new_bh);
68462306a36Sopenharmony_ci	if (meta_ac)
68562306a36Sopenharmony_ci		ocfs2_free_alloc_context(meta_ac);
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci	return ret;
68862306a36Sopenharmony_ci}
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_cistatic int ocfs2_set_refcount_tree(struct inode *inode,
69162306a36Sopenharmony_ci				   struct buffer_head *di_bh,
69262306a36Sopenharmony_ci				   u64 refcount_loc)
69362306a36Sopenharmony_ci{
69462306a36Sopenharmony_ci	int ret;
69562306a36Sopenharmony_ci	handle_t *handle = NULL;
69662306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
69762306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
69862306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
69962306a36Sopenharmony_ci	struct buffer_head *ref_root_bh = NULL;
70062306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb;
70162306a36Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree;
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci	BUG_ON(ocfs2_is_refcount_inode(inode));
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
70662306a36Sopenharmony_ci				       &ref_tree, &ref_root_bh);
70762306a36Sopenharmony_ci	if (ret) {
70862306a36Sopenharmony_ci		mlog_errno(ret);
70962306a36Sopenharmony_ci		return ret;
71062306a36Sopenharmony_ci	}
71162306a36Sopenharmony_ci
71262306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS);
71362306a36Sopenharmony_ci	if (IS_ERR(handle)) {
71462306a36Sopenharmony_ci		ret = PTR_ERR(handle);
71562306a36Sopenharmony_ci		mlog_errno(ret);
71662306a36Sopenharmony_ci		goto out;
71762306a36Sopenharmony_ci	}
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
72062306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
72162306a36Sopenharmony_ci	if (ret) {
72262306a36Sopenharmony_ci		mlog_errno(ret);
72362306a36Sopenharmony_ci		goto out_commit;
72462306a36Sopenharmony_ci	}
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh,
72762306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
72862306a36Sopenharmony_ci	if (ret) {
72962306a36Sopenharmony_ci		mlog_errno(ret);
73062306a36Sopenharmony_ci		goto out_commit;
73162306a36Sopenharmony_ci	}
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
73462306a36Sopenharmony_ci	le32_add_cpu(&rb->rf_count, 1);
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, ref_root_bh);
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	spin_lock(&oi->ip_lock);
73962306a36Sopenharmony_ci	oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
74062306a36Sopenharmony_ci	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
74162306a36Sopenharmony_ci	di->i_refcount_loc = cpu_to_le64(refcount_loc);
74262306a36Sopenharmony_ci	spin_unlock(&oi->ip_lock);
74362306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, di_bh);
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ciout_commit:
74662306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
74762306a36Sopenharmony_ciout:
74862306a36Sopenharmony_ci	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
74962306a36Sopenharmony_ci	brelse(ref_root_bh);
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	return ret;
75262306a36Sopenharmony_ci}
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ciint ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
75562306a36Sopenharmony_ci{
75662306a36Sopenharmony_ci	int ret, delete_tree = 0;
75762306a36Sopenharmony_ci	handle_t *handle = NULL;
75862306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
75962306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
76062306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
76162306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb;
76262306a36Sopenharmony_ci	struct inode *alloc_inode = NULL;
76362306a36Sopenharmony_ci	struct buffer_head *alloc_bh = NULL;
76462306a36Sopenharmony_ci	struct buffer_head *blk_bh = NULL;
76562306a36Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree;
76662306a36Sopenharmony_ci	int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS;
76762306a36Sopenharmony_ci	u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
76862306a36Sopenharmony_ci	u16 bit = 0;
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci	if (!ocfs2_is_refcount_inode(inode))
77162306a36Sopenharmony_ci		return 0;
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	BUG_ON(!ref_blkno);
77462306a36Sopenharmony_ci	ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh);
77562306a36Sopenharmony_ci	if (ret) {
77662306a36Sopenharmony_ci		mlog_errno(ret);
77762306a36Sopenharmony_ci		return ret;
77862306a36Sopenharmony_ci	}
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci	rb = (struct ocfs2_refcount_block *)blk_bh->b_data;
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci	/*
78362306a36Sopenharmony_ci	 * If we are the last user, we need to free the block.
78462306a36Sopenharmony_ci	 * So lock the allocator ahead.
78562306a36Sopenharmony_ci	 */
78662306a36Sopenharmony_ci	if (le32_to_cpu(rb->rf_count) == 1) {
78762306a36Sopenharmony_ci		blk = le64_to_cpu(rb->rf_blkno);
78862306a36Sopenharmony_ci		bit = le16_to_cpu(rb->rf_suballoc_bit);
78962306a36Sopenharmony_ci		if (rb->rf_suballoc_loc)
79062306a36Sopenharmony_ci			bg_blkno = le64_to_cpu(rb->rf_suballoc_loc);
79162306a36Sopenharmony_ci		else
79262306a36Sopenharmony_ci			bg_blkno = ocfs2_which_suballoc_group(blk, bit);
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci		alloc_inode = ocfs2_get_system_file_inode(osb,
79562306a36Sopenharmony_ci					EXTENT_ALLOC_SYSTEM_INODE,
79662306a36Sopenharmony_ci					le16_to_cpu(rb->rf_suballoc_slot));
79762306a36Sopenharmony_ci		if (!alloc_inode) {
79862306a36Sopenharmony_ci			ret = -ENOMEM;
79962306a36Sopenharmony_ci			mlog_errno(ret);
80062306a36Sopenharmony_ci			goto out;
80162306a36Sopenharmony_ci		}
80262306a36Sopenharmony_ci		inode_lock(alloc_inode);
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci		ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
80562306a36Sopenharmony_ci		if (ret) {
80662306a36Sopenharmony_ci			mlog_errno(ret);
80762306a36Sopenharmony_ci			goto out_mutex;
80862306a36Sopenharmony_ci		}
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci		credits += OCFS2_SUBALLOC_FREE;
81162306a36Sopenharmony_ci	}
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, credits);
81462306a36Sopenharmony_ci	if (IS_ERR(handle)) {
81562306a36Sopenharmony_ci		ret = PTR_ERR(handle);
81662306a36Sopenharmony_ci		mlog_errno(ret);
81762306a36Sopenharmony_ci		goto out_unlock;
81862306a36Sopenharmony_ci	}
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
82162306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
82262306a36Sopenharmony_ci	if (ret) {
82362306a36Sopenharmony_ci		mlog_errno(ret);
82462306a36Sopenharmony_ci		goto out_commit;
82562306a36Sopenharmony_ci	}
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh,
82862306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
82962306a36Sopenharmony_ci	if (ret) {
83062306a36Sopenharmony_ci		mlog_errno(ret);
83162306a36Sopenharmony_ci		goto out_commit;
83262306a36Sopenharmony_ci	}
83362306a36Sopenharmony_ci
83462306a36Sopenharmony_ci	spin_lock(&oi->ip_lock);
83562306a36Sopenharmony_ci	oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL;
83662306a36Sopenharmony_ci	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
83762306a36Sopenharmony_ci	di->i_refcount_loc = 0;
83862306a36Sopenharmony_ci	spin_unlock(&oi->ip_lock);
83962306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, di_bh);
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	le32_add_cpu(&rb->rf_count , -1);
84262306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, blk_bh);
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci	if (!rb->rf_count) {
84562306a36Sopenharmony_ci		delete_tree = 1;
84662306a36Sopenharmony_ci		ocfs2_erase_refcount_tree_from_list(osb, ref_tree);
84762306a36Sopenharmony_ci		ret = ocfs2_free_suballoc_bits(handle, alloc_inode,
84862306a36Sopenharmony_ci					       alloc_bh, bit, bg_blkno, 1);
84962306a36Sopenharmony_ci		if (ret)
85062306a36Sopenharmony_ci			mlog_errno(ret);
85162306a36Sopenharmony_ci	}
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ciout_commit:
85462306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
85562306a36Sopenharmony_ciout_unlock:
85662306a36Sopenharmony_ci	if (alloc_inode) {
85762306a36Sopenharmony_ci		ocfs2_inode_unlock(alloc_inode, 1);
85862306a36Sopenharmony_ci		brelse(alloc_bh);
85962306a36Sopenharmony_ci	}
86062306a36Sopenharmony_ciout_mutex:
86162306a36Sopenharmony_ci	if (alloc_inode) {
86262306a36Sopenharmony_ci		inode_unlock(alloc_inode);
86362306a36Sopenharmony_ci		iput(alloc_inode);
86462306a36Sopenharmony_ci	}
86562306a36Sopenharmony_ciout:
86662306a36Sopenharmony_ci	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
86762306a36Sopenharmony_ci	if (delete_tree)
86862306a36Sopenharmony_ci		ocfs2_refcount_tree_put(ref_tree);
86962306a36Sopenharmony_ci	brelse(blk_bh);
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	return ret;
87262306a36Sopenharmony_ci}
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_cistatic void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci,
87562306a36Sopenharmony_ci					  struct buffer_head *ref_leaf_bh,
87662306a36Sopenharmony_ci					  u64 cpos, unsigned int len,
87762306a36Sopenharmony_ci					  struct ocfs2_refcount_rec *ret_rec,
87862306a36Sopenharmony_ci					  int *index)
87962306a36Sopenharmony_ci{
88062306a36Sopenharmony_ci	int i = 0;
88162306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb =
88262306a36Sopenharmony_ci		(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
88362306a36Sopenharmony_ci	struct ocfs2_refcount_rec *rec = NULL;
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) {
88662306a36Sopenharmony_ci		rec = &rb->rf_records.rl_recs[i];
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci		if (le64_to_cpu(rec->r_cpos) +
88962306a36Sopenharmony_ci		    le32_to_cpu(rec->r_clusters) <= cpos)
89062306a36Sopenharmony_ci			continue;
89162306a36Sopenharmony_ci		else if (le64_to_cpu(rec->r_cpos) > cpos)
89262306a36Sopenharmony_ci			break;
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci		/* ok, cpos fail in this rec. Just return. */
89562306a36Sopenharmony_ci		if (ret_rec)
89662306a36Sopenharmony_ci			*ret_rec = *rec;
89762306a36Sopenharmony_ci		goto out;
89862306a36Sopenharmony_ci	}
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci	if (ret_rec) {
90162306a36Sopenharmony_ci		/* We meet with a hole here, so fake the rec. */
90262306a36Sopenharmony_ci		ret_rec->r_cpos = cpu_to_le64(cpos);
90362306a36Sopenharmony_ci		ret_rec->r_refcount = 0;
90462306a36Sopenharmony_ci		if (i < le16_to_cpu(rb->rf_records.rl_used) &&
90562306a36Sopenharmony_ci		    le64_to_cpu(rec->r_cpos) < cpos + len)
90662306a36Sopenharmony_ci			ret_rec->r_clusters =
90762306a36Sopenharmony_ci				cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos);
90862306a36Sopenharmony_ci		else
90962306a36Sopenharmony_ci			ret_rec->r_clusters = cpu_to_le32(len);
91062306a36Sopenharmony_ci	}
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ciout:
91362306a36Sopenharmony_ci	*index = i;
91462306a36Sopenharmony_ci}
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci/*
91762306a36Sopenharmony_ci * Try to remove refcount tree. The mechanism is:
91862306a36Sopenharmony_ci * 1) Check whether i_clusters == 0, if no, exit.
91962306a36Sopenharmony_ci * 2) check whether we have i_xattr_loc in dinode. if yes, exit.
92062306a36Sopenharmony_ci * 3) Check whether we have inline xattr stored outside, if yes, exit.
92162306a36Sopenharmony_ci * 4) Remove the tree.
92262306a36Sopenharmony_ci */
92362306a36Sopenharmony_ciint ocfs2_try_remove_refcount_tree(struct inode *inode,
92462306a36Sopenharmony_ci				   struct buffer_head *di_bh)
92562306a36Sopenharmony_ci{
92662306a36Sopenharmony_ci	int ret;
92762306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
92862306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
92962306a36Sopenharmony_ci
93062306a36Sopenharmony_ci	down_write(&oi->ip_xattr_sem);
93162306a36Sopenharmony_ci	down_write(&oi->ip_alloc_sem);
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci	if (oi->ip_clusters)
93462306a36Sopenharmony_ci		goto out;
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && di->i_xattr_loc)
93762306a36Sopenharmony_ci		goto out;
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci	if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL &&
94062306a36Sopenharmony_ci	    ocfs2_has_inline_xattr_value_outside(inode, di))
94162306a36Sopenharmony_ci		goto out;
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci	ret = ocfs2_remove_refcount_tree(inode, di_bh);
94462306a36Sopenharmony_ci	if (ret)
94562306a36Sopenharmony_ci		mlog_errno(ret);
94662306a36Sopenharmony_ciout:
94762306a36Sopenharmony_ci	up_write(&oi->ip_alloc_sem);
94862306a36Sopenharmony_ci	up_write(&oi->ip_xattr_sem);
94962306a36Sopenharmony_ci	return 0;
95062306a36Sopenharmony_ci}
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci/*
95362306a36Sopenharmony_ci * Find the end range for a leaf refcount block indicated by
95462306a36Sopenharmony_ci * el->l_recs[index].e_blkno.
95562306a36Sopenharmony_ci */
95662306a36Sopenharmony_cistatic int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
95762306a36Sopenharmony_ci				       struct buffer_head *ref_root_bh,
95862306a36Sopenharmony_ci				       struct ocfs2_extent_block *eb,
95962306a36Sopenharmony_ci				       struct ocfs2_extent_list *el,
96062306a36Sopenharmony_ci				       int index,  u32 *cpos_end)
96162306a36Sopenharmony_ci{
96262306a36Sopenharmony_ci	int ret, i, subtree_root;
96362306a36Sopenharmony_ci	u32 cpos;
96462306a36Sopenharmony_ci	u64 blkno;
96562306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
96662306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL, *right_path = NULL;
96762306a36Sopenharmony_ci	struct ocfs2_extent_tree et;
96862306a36Sopenharmony_ci	struct ocfs2_extent_list *tmp_el;
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci	if (index < le16_to_cpu(el->l_next_free_rec) - 1) {
97162306a36Sopenharmony_ci		/*
97262306a36Sopenharmony_ci		 * We have a extent rec after index, so just use the e_cpos
97362306a36Sopenharmony_ci		 * of the next extent rec.
97462306a36Sopenharmony_ci		 */
97562306a36Sopenharmony_ci		*cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos);
97662306a36Sopenharmony_ci		return 0;
97762306a36Sopenharmony_ci	}
97862306a36Sopenharmony_ci
97962306a36Sopenharmony_ci	if (!eb || !eb->h_next_leaf_blk) {
98062306a36Sopenharmony_ci		/*
98162306a36Sopenharmony_ci		 * We are the last extent rec, so any high cpos should
98262306a36Sopenharmony_ci		 * be stored in this leaf refcount block.
98362306a36Sopenharmony_ci		 */
98462306a36Sopenharmony_ci		*cpos_end = UINT_MAX;
98562306a36Sopenharmony_ci		return 0;
98662306a36Sopenharmony_ci	}
98762306a36Sopenharmony_ci
98862306a36Sopenharmony_ci	/*
98962306a36Sopenharmony_ci	 * If the extent block isn't the last one, we have to find
99062306a36Sopenharmony_ci	 * the subtree root between this extent block and the next
99162306a36Sopenharmony_ci	 * leaf extent block and get the corresponding e_cpos from
99262306a36Sopenharmony_ci	 * the subroot. Otherwise we may corrupt the b-tree.
99362306a36Sopenharmony_ci	 */
99462306a36Sopenharmony_ci	ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci	left_path = ocfs2_new_path_from_et(&et);
99762306a36Sopenharmony_ci	if (!left_path) {
99862306a36Sopenharmony_ci		ret = -ENOMEM;
99962306a36Sopenharmony_ci		mlog_errno(ret);
100062306a36Sopenharmony_ci		goto out;
100162306a36Sopenharmony_ci	}
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci	cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos);
100462306a36Sopenharmony_ci	ret = ocfs2_find_path(ci, left_path, cpos);
100562306a36Sopenharmony_ci	if (ret) {
100662306a36Sopenharmony_ci		mlog_errno(ret);
100762306a36Sopenharmony_ci		goto out;
100862306a36Sopenharmony_ci	}
100962306a36Sopenharmony_ci
101062306a36Sopenharmony_ci	right_path = ocfs2_new_path_from_path(left_path);
101162306a36Sopenharmony_ci	if (!right_path) {
101262306a36Sopenharmony_ci		ret = -ENOMEM;
101362306a36Sopenharmony_ci		mlog_errno(ret);
101462306a36Sopenharmony_ci		goto out;
101562306a36Sopenharmony_ci	}
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci	ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos);
101862306a36Sopenharmony_ci	if (ret) {
101962306a36Sopenharmony_ci		mlog_errno(ret);
102062306a36Sopenharmony_ci		goto out;
102162306a36Sopenharmony_ci	}
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci	ret = ocfs2_find_path(ci, right_path, cpos);
102462306a36Sopenharmony_ci	if (ret) {
102562306a36Sopenharmony_ci		mlog_errno(ret);
102662306a36Sopenharmony_ci		goto out;
102762306a36Sopenharmony_ci	}
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci	subtree_root = ocfs2_find_subtree_root(&et, left_path,
103062306a36Sopenharmony_ci					       right_path);
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci	tmp_el = left_path->p_node[subtree_root].el;
103362306a36Sopenharmony_ci	blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
103462306a36Sopenharmony_ci	for (i = 0; i < le16_to_cpu(tmp_el->l_next_free_rec); i++) {
103562306a36Sopenharmony_ci		if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
103662306a36Sopenharmony_ci			*cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
103762306a36Sopenharmony_ci			break;
103862306a36Sopenharmony_ci		}
103962306a36Sopenharmony_ci	}
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci	BUG_ON(i == le16_to_cpu(tmp_el->l_next_free_rec));
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ciout:
104462306a36Sopenharmony_ci	ocfs2_free_path(left_path);
104562306a36Sopenharmony_ci	ocfs2_free_path(right_path);
104662306a36Sopenharmony_ci	return ret;
104762306a36Sopenharmony_ci}
104862306a36Sopenharmony_ci
104962306a36Sopenharmony_ci/*
105062306a36Sopenharmony_ci * Given a cpos and len, try to find the refcount record which contains cpos.
105162306a36Sopenharmony_ci * 1. If cpos can be found in one refcount record, return the record.
105262306a36Sopenharmony_ci * 2. If cpos can't be found, return a fake record which start from cpos
105362306a36Sopenharmony_ci *    and end at a small value between cpos+len and start of the next record.
105462306a36Sopenharmony_ci *    This fake record has r_refcount = 0.
105562306a36Sopenharmony_ci */
105662306a36Sopenharmony_cistatic int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
105762306a36Sopenharmony_ci				  struct buffer_head *ref_root_bh,
105862306a36Sopenharmony_ci				  u64 cpos, unsigned int len,
105962306a36Sopenharmony_ci				  struct ocfs2_refcount_rec *ret_rec,
106062306a36Sopenharmony_ci				  int *index,
106162306a36Sopenharmony_ci				  struct buffer_head **ret_bh)
106262306a36Sopenharmony_ci{
106362306a36Sopenharmony_ci	int ret = 0, i, found;
106462306a36Sopenharmony_ci	u32 low_cpos, cpos_end;
106562306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
106662306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec = NULL;
106762306a36Sopenharmony_ci	struct ocfs2_extent_block *eb = NULL;
106862306a36Sopenharmony_ci	struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
106962306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
107062306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb =
107162306a36Sopenharmony_ci			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
107262306a36Sopenharmony_ci
107362306a36Sopenharmony_ci	if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) {
107462306a36Sopenharmony_ci		ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len,
107562306a36Sopenharmony_ci					      ret_rec, index);
107662306a36Sopenharmony_ci		*ret_bh = ref_root_bh;
107762306a36Sopenharmony_ci		get_bh(ref_root_bh);
107862306a36Sopenharmony_ci		return 0;
107962306a36Sopenharmony_ci	}
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci	el = &rb->rf_list;
108262306a36Sopenharmony_ci	low_cpos = cpos & OCFS2_32BIT_POS_MASK;
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci	if (el->l_tree_depth) {
108562306a36Sopenharmony_ci		ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh);
108662306a36Sopenharmony_ci		if (ret) {
108762306a36Sopenharmony_ci			mlog_errno(ret);
108862306a36Sopenharmony_ci			goto out;
108962306a36Sopenharmony_ci		}
109062306a36Sopenharmony_ci
109162306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
109262306a36Sopenharmony_ci		el = &eb->h_list;
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci		if (el->l_tree_depth) {
109562306a36Sopenharmony_ci			ret = ocfs2_error(sb,
109662306a36Sopenharmony_ci					  "refcount tree %llu has non zero tree depth in leaf btree tree block %llu\n",
109762306a36Sopenharmony_ci					  (unsigned long long)ocfs2_metadata_cache_owner(ci),
109862306a36Sopenharmony_ci					  (unsigned long long)eb_bh->b_blocknr);
109962306a36Sopenharmony_ci			goto out;
110062306a36Sopenharmony_ci		}
110162306a36Sopenharmony_ci	}
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	found = 0;
110462306a36Sopenharmony_ci	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
110562306a36Sopenharmony_ci		rec = &el->l_recs[i];
110662306a36Sopenharmony_ci
110762306a36Sopenharmony_ci		if (le32_to_cpu(rec->e_cpos) <= low_cpos) {
110862306a36Sopenharmony_ci			found = 1;
110962306a36Sopenharmony_ci			break;
111062306a36Sopenharmony_ci		}
111162306a36Sopenharmony_ci	}
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_ci	if (found) {
111462306a36Sopenharmony_ci		ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh,
111562306a36Sopenharmony_ci						  eb, el, i, &cpos_end);
111662306a36Sopenharmony_ci		if (ret) {
111762306a36Sopenharmony_ci			mlog_errno(ret);
111862306a36Sopenharmony_ci			goto out;
111962306a36Sopenharmony_ci		}
112062306a36Sopenharmony_ci
112162306a36Sopenharmony_ci		if (cpos_end < low_cpos + len)
112262306a36Sopenharmony_ci			len = cpos_end - low_cpos;
112362306a36Sopenharmony_ci	}
112462306a36Sopenharmony_ci
112562306a36Sopenharmony_ci	ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
112662306a36Sopenharmony_ci					&ref_leaf_bh);
112762306a36Sopenharmony_ci	if (ret) {
112862306a36Sopenharmony_ci		mlog_errno(ret);
112962306a36Sopenharmony_ci		goto out;
113062306a36Sopenharmony_ci	}
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len,
113362306a36Sopenharmony_ci				      ret_rec, index);
113462306a36Sopenharmony_ci	*ret_bh = ref_leaf_bh;
113562306a36Sopenharmony_ciout:
113662306a36Sopenharmony_ci	brelse(eb_bh);
113762306a36Sopenharmony_ci	return ret;
113862306a36Sopenharmony_ci}
113962306a36Sopenharmony_ci
114062306a36Sopenharmony_cienum ocfs2_ref_rec_contig {
114162306a36Sopenharmony_ci	REF_CONTIG_NONE = 0,
114262306a36Sopenharmony_ci	REF_CONTIG_LEFT,
114362306a36Sopenharmony_ci	REF_CONTIG_RIGHT,
114462306a36Sopenharmony_ci	REF_CONTIG_LEFTRIGHT,
114562306a36Sopenharmony_ci};
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_cistatic enum ocfs2_ref_rec_contig
114862306a36Sopenharmony_ci	ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb,
114962306a36Sopenharmony_ci				    int index)
115062306a36Sopenharmony_ci{
115162306a36Sopenharmony_ci	if ((rb->rf_records.rl_recs[index].r_refcount ==
115262306a36Sopenharmony_ci	    rb->rf_records.rl_recs[index + 1].r_refcount) &&
115362306a36Sopenharmony_ci	    (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) +
115462306a36Sopenharmony_ci	    le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) ==
115562306a36Sopenharmony_ci	    le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos)))
115662306a36Sopenharmony_ci		return REF_CONTIG_RIGHT;
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci	return REF_CONTIG_NONE;
115962306a36Sopenharmony_ci}
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_cistatic enum ocfs2_ref_rec_contig
116262306a36Sopenharmony_ci	ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb,
116362306a36Sopenharmony_ci				  int index)
116462306a36Sopenharmony_ci{
116562306a36Sopenharmony_ci	enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE;
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci	if (index < le16_to_cpu(rb->rf_records.rl_used) - 1)
116862306a36Sopenharmony_ci		ret = ocfs2_refcount_rec_adjacent(rb, index);
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	if (index > 0) {
117162306a36Sopenharmony_ci		enum ocfs2_ref_rec_contig tmp;
117262306a36Sopenharmony_ci
117362306a36Sopenharmony_ci		tmp = ocfs2_refcount_rec_adjacent(rb, index - 1);
117462306a36Sopenharmony_ci
117562306a36Sopenharmony_ci		if (tmp == REF_CONTIG_RIGHT) {
117662306a36Sopenharmony_ci			if (ret == REF_CONTIG_RIGHT)
117762306a36Sopenharmony_ci				ret = REF_CONTIG_LEFTRIGHT;
117862306a36Sopenharmony_ci			else
117962306a36Sopenharmony_ci				ret = REF_CONTIG_LEFT;
118062306a36Sopenharmony_ci		}
118162306a36Sopenharmony_ci	}
118262306a36Sopenharmony_ci
118362306a36Sopenharmony_ci	return ret;
118462306a36Sopenharmony_ci}
118562306a36Sopenharmony_ci
118662306a36Sopenharmony_cistatic void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb,
118762306a36Sopenharmony_ci					   int index)
118862306a36Sopenharmony_ci{
118962306a36Sopenharmony_ci	BUG_ON(rb->rf_records.rl_recs[index].r_refcount !=
119062306a36Sopenharmony_ci	       rb->rf_records.rl_recs[index+1].r_refcount);
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci	le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters,
119362306a36Sopenharmony_ci		     le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters));
119462306a36Sopenharmony_ci
119562306a36Sopenharmony_ci	if (index < le16_to_cpu(rb->rf_records.rl_used) - 2)
119662306a36Sopenharmony_ci		memmove(&rb->rf_records.rl_recs[index + 1],
119762306a36Sopenharmony_ci			&rb->rf_records.rl_recs[index + 2],
119862306a36Sopenharmony_ci			sizeof(struct ocfs2_refcount_rec) *
119962306a36Sopenharmony_ci			(le16_to_cpu(rb->rf_records.rl_used) - index - 2));
120062306a36Sopenharmony_ci
120162306a36Sopenharmony_ci	memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1],
120262306a36Sopenharmony_ci	       0, sizeof(struct ocfs2_refcount_rec));
120362306a36Sopenharmony_ci	le16_add_cpu(&rb->rf_records.rl_used, -1);
120462306a36Sopenharmony_ci}
120562306a36Sopenharmony_ci
120662306a36Sopenharmony_ci/*
120762306a36Sopenharmony_ci * Merge the refcount rec if we are contiguous with the adjacent recs.
120862306a36Sopenharmony_ci */
120962306a36Sopenharmony_cistatic void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb,
121062306a36Sopenharmony_ci				     int index)
121162306a36Sopenharmony_ci{
121262306a36Sopenharmony_ci	enum ocfs2_ref_rec_contig contig =
121362306a36Sopenharmony_ci				ocfs2_refcount_rec_contig(rb, index);
121462306a36Sopenharmony_ci
121562306a36Sopenharmony_ci	if (contig == REF_CONTIG_NONE)
121662306a36Sopenharmony_ci		return;
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) {
121962306a36Sopenharmony_ci		BUG_ON(index == 0);
122062306a36Sopenharmony_ci		index--;
122162306a36Sopenharmony_ci	}
122262306a36Sopenharmony_ci
122362306a36Sopenharmony_ci	ocfs2_rotate_refcount_rec_left(rb, index);
122462306a36Sopenharmony_ci
122562306a36Sopenharmony_ci	if (contig == REF_CONTIG_LEFTRIGHT)
122662306a36Sopenharmony_ci		ocfs2_rotate_refcount_rec_left(rb, index);
122762306a36Sopenharmony_ci}
122862306a36Sopenharmony_ci
122962306a36Sopenharmony_ci/*
123062306a36Sopenharmony_ci * Change the refcount indexed by "index" in ref_bh.
123162306a36Sopenharmony_ci * If refcount reaches 0, remove it.
123262306a36Sopenharmony_ci */
123362306a36Sopenharmony_cistatic int ocfs2_change_refcount_rec(handle_t *handle,
123462306a36Sopenharmony_ci				     struct ocfs2_caching_info *ci,
123562306a36Sopenharmony_ci				     struct buffer_head *ref_leaf_bh,
123662306a36Sopenharmony_ci				     int index, int merge, int change)
123762306a36Sopenharmony_ci{
123862306a36Sopenharmony_ci	int ret;
123962306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb =
124062306a36Sopenharmony_ci			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
124162306a36Sopenharmony_ci	struct ocfs2_refcount_list *rl = &rb->rf_records;
124262306a36Sopenharmony_ci	struct ocfs2_refcount_rec *rec = &rl->rl_recs[index];
124362306a36Sopenharmony_ci
124462306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
124562306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
124662306a36Sopenharmony_ci	if (ret) {
124762306a36Sopenharmony_ci		mlog_errno(ret);
124862306a36Sopenharmony_ci		goto out;
124962306a36Sopenharmony_ci	}
125062306a36Sopenharmony_ci
125162306a36Sopenharmony_ci	trace_ocfs2_change_refcount_rec(
125262306a36Sopenharmony_ci		(unsigned long long)ocfs2_metadata_cache_owner(ci),
125362306a36Sopenharmony_ci		index, le32_to_cpu(rec->r_refcount), change);
125462306a36Sopenharmony_ci	le32_add_cpu(&rec->r_refcount, change);
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	if (!rec->r_refcount) {
125762306a36Sopenharmony_ci		if (index != le16_to_cpu(rl->rl_used) - 1) {
125862306a36Sopenharmony_ci			memmove(rec, rec + 1,
125962306a36Sopenharmony_ci				(le16_to_cpu(rl->rl_used) - index - 1) *
126062306a36Sopenharmony_ci				sizeof(struct ocfs2_refcount_rec));
126162306a36Sopenharmony_ci			memset(&rl->rl_recs[le16_to_cpu(rl->rl_used) - 1],
126262306a36Sopenharmony_ci			       0, sizeof(struct ocfs2_refcount_rec));
126362306a36Sopenharmony_ci		}
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci		le16_add_cpu(&rl->rl_used, -1);
126662306a36Sopenharmony_ci	} else if (merge)
126762306a36Sopenharmony_ci		ocfs2_refcount_rec_merge(rb, index);
126862306a36Sopenharmony_ci
126962306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, ref_leaf_bh);
127062306a36Sopenharmony_ciout:
127162306a36Sopenharmony_ci	return ret;
127262306a36Sopenharmony_ci}
127362306a36Sopenharmony_ci
127462306a36Sopenharmony_cistatic int ocfs2_expand_inline_ref_root(handle_t *handle,
127562306a36Sopenharmony_ci					struct ocfs2_caching_info *ci,
127662306a36Sopenharmony_ci					struct buffer_head *ref_root_bh,
127762306a36Sopenharmony_ci					struct buffer_head **ref_leaf_bh,
127862306a36Sopenharmony_ci					struct ocfs2_alloc_context *meta_ac)
127962306a36Sopenharmony_ci{
128062306a36Sopenharmony_ci	int ret;
128162306a36Sopenharmony_ci	u16 suballoc_bit_start;
128262306a36Sopenharmony_ci	u32 num_got;
128362306a36Sopenharmony_ci	u64 suballoc_loc, blkno;
128462306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
128562306a36Sopenharmony_ci	struct buffer_head *new_bh = NULL;
128662306a36Sopenharmony_ci	struct ocfs2_refcount_block *new_rb;
128762306a36Sopenharmony_ci	struct ocfs2_refcount_block *root_rb =
128862306a36Sopenharmony_ci			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
128962306a36Sopenharmony_ci
129062306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
129162306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
129262306a36Sopenharmony_ci	if (ret) {
129362306a36Sopenharmony_ci		mlog_errno(ret);
129462306a36Sopenharmony_ci		goto out;
129562306a36Sopenharmony_ci	}
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ci	ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
129862306a36Sopenharmony_ci				   &suballoc_bit_start, &num_got,
129962306a36Sopenharmony_ci				   &blkno);
130062306a36Sopenharmony_ci	if (ret) {
130162306a36Sopenharmony_ci		mlog_errno(ret);
130262306a36Sopenharmony_ci		goto out;
130362306a36Sopenharmony_ci	}
130462306a36Sopenharmony_ci
130562306a36Sopenharmony_ci	new_bh = sb_getblk(sb, blkno);
130662306a36Sopenharmony_ci	if (new_bh == NULL) {
130762306a36Sopenharmony_ci		ret = -ENOMEM;
130862306a36Sopenharmony_ci		mlog_errno(ret);
130962306a36Sopenharmony_ci		goto out;
131062306a36Sopenharmony_ci	}
131162306a36Sopenharmony_ci	ocfs2_set_new_buffer_uptodate(ci, new_bh);
131262306a36Sopenharmony_ci
131362306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, ci, new_bh,
131462306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_CREATE);
131562306a36Sopenharmony_ci	if (ret) {
131662306a36Sopenharmony_ci		mlog_errno(ret);
131762306a36Sopenharmony_ci		goto out;
131862306a36Sopenharmony_ci	}
131962306a36Sopenharmony_ci
132062306a36Sopenharmony_ci	/*
132162306a36Sopenharmony_ci	 * Initialize ocfs2_refcount_block.
132262306a36Sopenharmony_ci	 * It should contain the same information as the old root.
132362306a36Sopenharmony_ci	 * so just memcpy it and change the corresponding field.
132462306a36Sopenharmony_ci	 */
132562306a36Sopenharmony_ci	memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize);
132662306a36Sopenharmony_ci
132762306a36Sopenharmony_ci	new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
132862306a36Sopenharmony_ci	new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
132962306a36Sopenharmony_ci	new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
133062306a36Sopenharmony_ci	new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
133162306a36Sopenharmony_ci	new_rb->rf_blkno = cpu_to_le64(blkno);
133262306a36Sopenharmony_ci	new_rb->rf_cpos = cpu_to_le32(0);
133362306a36Sopenharmony_ci	new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
133462306a36Sopenharmony_ci	new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
133562306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, new_bh);
133662306a36Sopenharmony_ci
133762306a36Sopenharmony_ci	/* Now change the root. */
133862306a36Sopenharmony_ci	memset(&root_rb->rf_list, 0, sb->s_blocksize -
133962306a36Sopenharmony_ci	       offsetof(struct ocfs2_refcount_block, rf_list));
134062306a36Sopenharmony_ci	root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb));
134162306a36Sopenharmony_ci	root_rb->rf_clusters = cpu_to_le32(1);
134262306a36Sopenharmony_ci	root_rb->rf_list.l_next_free_rec = cpu_to_le16(1);
134362306a36Sopenharmony_ci	root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
134462306a36Sopenharmony_ci	root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
134562306a36Sopenharmony_ci	root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL);
134662306a36Sopenharmony_ci
134762306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, ref_root_bh);
134862306a36Sopenharmony_ci
134962306a36Sopenharmony_ci	trace_ocfs2_expand_inline_ref_root((unsigned long long)blkno,
135062306a36Sopenharmony_ci		le16_to_cpu(new_rb->rf_records.rl_used));
135162306a36Sopenharmony_ci
135262306a36Sopenharmony_ci	*ref_leaf_bh = new_bh;
135362306a36Sopenharmony_ci	new_bh = NULL;
135462306a36Sopenharmony_ciout:
135562306a36Sopenharmony_ci	brelse(new_bh);
135662306a36Sopenharmony_ci	return ret;
135762306a36Sopenharmony_ci}
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_cistatic int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev,
136062306a36Sopenharmony_ci					   struct ocfs2_refcount_rec *next)
136162306a36Sopenharmony_ci{
136262306a36Sopenharmony_ci	if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <=
136362306a36Sopenharmony_ci		ocfs2_get_ref_rec_low_cpos(next))
136462306a36Sopenharmony_ci		return 1;
136562306a36Sopenharmony_ci
136662306a36Sopenharmony_ci	return 0;
136762306a36Sopenharmony_ci}
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_cistatic int cmp_refcount_rec_by_low_cpos(const void *a, const void *b)
137062306a36Sopenharmony_ci{
137162306a36Sopenharmony_ci	const struct ocfs2_refcount_rec *l = a, *r = b;
137262306a36Sopenharmony_ci	u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l);
137362306a36Sopenharmony_ci	u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r);
137462306a36Sopenharmony_ci
137562306a36Sopenharmony_ci	if (l_cpos > r_cpos)
137662306a36Sopenharmony_ci		return 1;
137762306a36Sopenharmony_ci	if (l_cpos < r_cpos)
137862306a36Sopenharmony_ci		return -1;
137962306a36Sopenharmony_ci	return 0;
138062306a36Sopenharmony_ci}
138162306a36Sopenharmony_ci
138262306a36Sopenharmony_cistatic int cmp_refcount_rec_by_cpos(const void *a, const void *b)
138362306a36Sopenharmony_ci{
138462306a36Sopenharmony_ci	const struct ocfs2_refcount_rec *l = a, *r = b;
138562306a36Sopenharmony_ci	u64 l_cpos = le64_to_cpu(l->r_cpos);
138662306a36Sopenharmony_ci	u64 r_cpos = le64_to_cpu(r->r_cpos);
138762306a36Sopenharmony_ci
138862306a36Sopenharmony_ci	if (l_cpos > r_cpos)
138962306a36Sopenharmony_ci		return 1;
139062306a36Sopenharmony_ci	if (l_cpos < r_cpos)
139162306a36Sopenharmony_ci		return -1;
139262306a36Sopenharmony_ci	return 0;
139362306a36Sopenharmony_ci}
139462306a36Sopenharmony_ci
139562306a36Sopenharmony_cistatic void swap_refcount_rec(void *a, void *b, int size)
139662306a36Sopenharmony_ci{
139762306a36Sopenharmony_ci	struct ocfs2_refcount_rec *l = a, *r = b;
139862306a36Sopenharmony_ci
139962306a36Sopenharmony_ci	swap(*l, *r);
140062306a36Sopenharmony_ci}
140162306a36Sopenharmony_ci
140262306a36Sopenharmony_ci/*
140362306a36Sopenharmony_ci * The refcount cpos are ordered by their 64bit cpos,
140462306a36Sopenharmony_ci * But we will use the low 32 bit to be the e_cpos in the b-tree.
140562306a36Sopenharmony_ci * So we need to make sure that this pos isn't intersected with others.
140662306a36Sopenharmony_ci *
140762306a36Sopenharmony_ci * Note: The refcount block is already sorted by their low 32 bit cpos,
140862306a36Sopenharmony_ci *       So just try the middle pos first, and we will exit when we find
140962306a36Sopenharmony_ci *       the good position.
141062306a36Sopenharmony_ci */
141162306a36Sopenharmony_cistatic int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl,
141262306a36Sopenharmony_ci					 u32 *split_pos, int *split_index)
141362306a36Sopenharmony_ci{
141462306a36Sopenharmony_ci	int num_used = le16_to_cpu(rl->rl_used);
141562306a36Sopenharmony_ci	int delta, middle = num_used / 2;
141662306a36Sopenharmony_ci
141762306a36Sopenharmony_ci	for (delta = 0; delta < middle; delta++) {
141862306a36Sopenharmony_ci		/* Let's check delta earlier than middle */
141962306a36Sopenharmony_ci		if (ocfs2_refcount_rec_no_intersect(
142062306a36Sopenharmony_ci					&rl->rl_recs[middle - delta - 1],
142162306a36Sopenharmony_ci					&rl->rl_recs[middle - delta])) {
142262306a36Sopenharmony_ci			*split_index = middle - delta;
142362306a36Sopenharmony_ci			break;
142462306a36Sopenharmony_ci		}
142562306a36Sopenharmony_ci
142662306a36Sopenharmony_ci		/* For even counts, don't walk off the end */
142762306a36Sopenharmony_ci		if ((middle + delta + 1) == num_used)
142862306a36Sopenharmony_ci			continue;
142962306a36Sopenharmony_ci
143062306a36Sopenharmony_ci		/* Now try delta past middle */
143162306a36Sopenharmony_ci		if (ocfs2_refcount_rec_no_intersect(
143262306a36Sopenharmony_ci					&rl->rl_recs[middle + delta],
143362306a36Sopenharmony_ci					&rl->rl_recs[middle + delta + 1])) {
143462306a36Sopenharmony_ci			*split_index = middle + delta + 1;
143562306a36Sopenharmony_ci			break;
143662306a36Sopenharmony_ci		}
143762306a36Sopenharmony_ci	}
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	if (delta >= middle)
144062306a36Sopenharmony_ci		return -ENOSPC;
144162306a36Sopenharmony_ci
144262306a36Sopenharmony_ci	*split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]);
144362306a36Sopenharmony_ci	return 0;
144462306a36Sopenharmony_ci}
144562306a36Sopenharmony_ci
144662306a36Sopenharmony_cistatic int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
144762306a36Sopenharmony_ci					    struct buffer_head *new_bh,
144862306a36Sopenharmony_ci					    u32 *split_cpos)
144962306a36Sopenharmony_ci{
145062306a36Sopenharmony_ci	int split_index = 0, num_moved, ret;
145162306a36Sopenharmony_ci	u32 cpos = 0;
145262306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb =
145362306a36Sopenharmony_ci			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
145462306a36Sopenharmony_ci	struct ocfs2_refcount_list *rl = &rb->rf_records;
145562306a36Sopenharmony_ci	struct ocfs2_refcount_block *new_rb =
145662306a36Sopenharmony_ci			(struct ocfs2_refcount_block *)new_bh->b_data;
145762306a36Sopenharmony_ci	struct ocfs2_refcount_list *new_rl = &new_rb->rf_records;
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci	trace_ocfs2_divide_leaf_refcount_block(
146062306a36Sopenharmony_ci		(unsigned long long)ref_leaf_bh->b_blocknr,
146162306a36Sopenharmony_ci		le16_to_cpu(rl->rl_count), le16_to_cpu(rl->rl_used));
146262306a36Sopenharmony_ci
146362306a36Sopenharmony_ci	/*
146462306a36Sopenharmony_ci	 * XXX: Improvement later.
146562306a36Sopenharmony_ci	 * If we know all the high 32 bit cpos is the same, no need to sort.
146662306a36Sopenharmony_ci	 *
146762306a36Sopenharmony_ci	 * In order to make the whole process safe, we do:
146862306a36Sopenharmony_ci	 * 1. sort the entries by their low 32 bit cpos first so that we can
146962306a36Sopenharmony_ci	 *    find the split cpos easily.
147062306a36Sopenharmony_ci	 * 2. call ocfs2_insert_extent to insert the new refcount block.
147162306a36Sopenharmony_ci	 * 3. move the refcount rec to the new block.
147262306a36Sopenharmony_ci	 * 4. sort the entries by their 64 bit cpos.
147362306a36Sopenharmony_ci	 * 5. dirty the new_rb and rb.
147462306a36Sopenharmony_ci	 */
147562306a36Sopenharmony_ci	sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
147662306a36Sopenharmony_ci	     sizeof(struct ocfs2_refcount_rec),
147762306a36Sopenharmony_ci	     cmp_refcount_rec_by_low_cpos, swap_refcount_rec);
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci	ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index);
148062306a36Sopenharmony_ci	if (ret) {
148162306a36Sopenharmony_ci		mlog_errno(ret);
148262306a36Sopenharmony_ci		return ret;
148362306a36Sopenharmony_ci	}
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_ci	new_rb->rf_cpos = cpu_to_le32(cpos);
148662306a36Sopenharmony_ci
148762306a36Sopenharmony_ci	/* move refcount records starting from split_index to the new block. */
148862306a36Sopenharmony_ci	num_moved = le16_to_cpu(rl->rl_used) - split_index;
148962306a36Sopenharmony_ci	memcpy(new_rl->rl_recs, &rl->rl_recs[split_index],
149062306a36Sopenharmony_ci	       num_moved * sizeof(struct ocfs2_refcount_rec));
149162306a36Sopenharmony_ci
149262306a36Sopenharmony_ci	/*ok, remove the entries we just moved over to the other block. */
149362306a36Sopenharmony_ci	memset(&rl->rl_recs[split_index], 0,
149462306a36Sopenharmony_ci	       num_moved * sizeof(struct ocfs2_refcount_rec));
149562306a36Sopenharmony_ci
149662306a36Sopenharmony_ci	/* change old and new rl_used accordingly. */
149762306a36Sopenharmony_ci	le16_add_cpu(&rl->rl_used, -num_moved);
149862306a36Sopenharmony_ci	new_rl->rl_used = cpu_to_le16(num_moved);
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci	sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
150162306a36Sopenharmony_ci	     sizeof(struct ocfs2_refcount_rec),
150262306a36Sopenharmony_ci	     cmp_refcount_rec_by_cpos, swap_refcount_rec);
150362306a36Sopenharmony_ci
150462306a36Sopenharmony_ci	sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used),
150562306a36Sopenharmony_ci	     sizeof(struct ocfs2_refcount_rec),
150662306a36Sopenharmony_ci	     cmp_refcount_rec_by_cpos, swap_refcount_rec);
150762306a36Sopenharmony_ci
150862306a36Sopenharmony_ci	*split_cpos = cpos;
150962306a36Sopenharmony_ci	return 0;
151062306a36Sopenharmony_ci}
151162306a36Sopenharmony_ci
151262306a36Sopenharmony_cistatic int ocfs2_new_leaf_refcount_block(handle_t *handle,
151362306a36Sopenharmony_ci					 struct ocfs2_caching_info *ci,
151462306a36Sopenharmony_ci					 struct buffer_head *ref_root_bh,
151562306a36Sopenharmony_ci					 struct buffer_head *ref_leaf_bh,
151662306a36Sopenharmony_ci					 struct ocfs2_alloc_context *meta_ac)
151762306a36Sopenharmony_ci{
151862306a36Sopenharmony_ci	int ret;
151962306a36Sopenharmony_ci	u16 suballoc_bit_start;
152062306a36Sopenharmony_ci	u32 num_got, new_cpos;
152162306a36Sopenharmony_ci	u64 suballoc_loc, blkno;
152262306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
152362306a36Sopenharmony_ci	struct ocfs2_refcount_block *root_rb =
152462306a36Sopenharmony_ci			(struct ocfs2_refcount_block *)ref_root_bh->b_data;
152562306a36Sopenharmony_ci	struct buffer_head *new_bh = NULL;
152662306a36Sopenharmony_ci	struct ocfs2_refcount_block *new_rb;
152762306a36Sopenharmony_ci	struct ocfs2_extent_tree ref_et;
152862306a36Sopenharmony_ci
152962306a36Sopenharmony_ci	BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL));
153062306a36Sopenharmony_ci
153162306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
153262306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
153362306a36Sopenharmony_ci	if (ret) {
153462306a36Sopenharmony_ci		mlog_errno(ret);
153562306a36Sopenharmony_ci		goto out;
153662306a36Sopenharmony_ci	}
153762306a36Sopenharmony_ci
153862306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
153962306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
154062306a36Sopenharmony_ci	if (ret) {
154162306a36Sopenharmony_ci		mlog_errno(ret);
154262306a36Sopenharmony_ci		goto out;
154362306a36Sopenharmony_ci	}
154462306a36Sopenharmony_ci
154562306a36Sopenharmony_ci	ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
154662306a36Sopenharmony_ci				   &suballoc_bit_start, &num_got,
154762306a36Sopenharmony_ci				   &blkno);
154862306a36Sopenharmony_ci	if (ret) {
154962306a36Sopenharmony_ci		mlog_errno(ret);
155062306a36Sopenharmony_ci		goto out;
155162306a36Sopenharmony_ci	}
155262306a36Sopenharmony_ci
155362306a36Sopenharmony_ci	new_bh = sb_getblk(sb, blkno);
155462306a36Sopenharmony_ci	if (new_bh == NULL) {
155562306a36Sopenharmony_ci		ret = -ENOMEM;
155662306a36Sopenharmony_ci		mlog_errno(ret);
155762306a36Sopenharmony_ci		goto out;
155862306a36Sopenharmony_ci	}
155962306a36Sopenharmony_ci	ocfs2_set_new_buffer_uptodate(ci, new_bh);
156062306a36Sopenharmony_ci
156162306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, ci, new_bh,
156262306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_CREATE);
156362306a36Sopenharmony_ci	if (ret) {
156462306a36Sopenharmony_ci		mlog_errno(ret);
156562306a36Sopenharmony_ci		goto out;
156662306a36Sopenharmony_ci	}
156762306a36Sopenharmony_ci
156862306a36Sopenharmony_ci	/* Initialize ocfs2_refcount_block. */
156962306a36Sopenharmony_ci	new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
157062306a36Sopenharmony_ci	memset(new_rb, 0, sb->s_blocksize);
157162306a36Sopenharmony_ci	strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
157262306a36Sopenharmony_ci	new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
157362306a36Sopenharmony_ci	new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
157462306a36Sopenharmony_ci	new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
157562306a36Sopenharmony_ci	new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
157662306a36Sopenharmony_ci	new_rb->rf_blkno = cpu_to_le64(blkno);
157762306a36Sopenharmony_ci	new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
157862306a36Sopenharmony_ci	new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
157962306a36Sopenharmony_ci	new_rb->rf_records.rl_count =
158062306a36Sopenharmony_ci				cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
158162306a36Sopenharmony_ci	new_rb->rf_generation = root_rb->rf_generation;
158262306a36Sopenharmony_ci
158362306a36Sopenharmony_ci	ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos);
158462306a36Sopenharmony_ci	if (ret) {
158562306a36Sopenharmony_ci		mlog_errno(ret);
158662306a36Sopenharmony_ci		goto out;
158762306a36Sopenharmony_ci	}
158862306a36Sopenharmony_ci
158962306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, ref_leaf_bh);
159062306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, new_bh);
159162306a36Sopenharmony_ci
159262306a36Sopenharmony_ci	ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh);
159362306a36Sopenharmony_ci
159462306a36Sopenharmony_ci	trace_ocfs2_new_leaf_refcount_block(
159562306a36Sopenharmony_ci			(unsigned long long)new_bh->b_blocknr, new_cpos);
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_ci	/* Insert the new leaf block with the specific offset cpos. */
159862306a36Sopenharmony_ci	ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr,
159962306a36Sopenharmony_ci				  1, 0, meta_ac);
160062306a36Sopenharmony_ci	if (ret)
160162306a36Sopenharmony_ci		mlog_errno(ret);
160262306a36Sopenharmony_ci
160362306a36Sopenharmony_ciout:
160462306a36Sopenharmony_ci	brelse(new_bh);
160562306a36Sopenharmony_ci	return ret;
160662306a36Sopenharmony_ci}
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_cistatic int ocfs2_expand_refcount_tree(handle_t *handle,
160962306a36Sopenharmony_ci				      struct ocfs2_caching_info *ci,
161062306a36Sopenharmony_ci				      struct buffer_head *ref_root_bh,
161162306a36Sopenharmony_ci				      struct buffer_head *ref_leaf_bh,
161262306a36Sopenharmony_ci				      struct ocfs2_alloc_context *meta_ac)
161362306a36Sopenharmony_ci{
161462306a36Sopenharmony_ci	int ret;
161562306a36Sopenharmony_ci	struct buffer_head *expand_bh = NULL;
161662306a36Sopenharmony_ci
161762306a36Sopenharmony_ci	if (ref_root_bh == ref_leaf_bh) {
161862306a36Sopenharmony_ci		/*
161962306a36Sopenharmony_ci		 * the old root bh hasn't been expanded to a b-tree,
162062306a36Sopenharmony_ci		 * so expand it first.
162162306a36Sopenharmony_ci		 */
162262306a36Sopenharmony_ci		ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh,
162362306a36Sopenharmony_ci						   &expand_bh, meta_ac);
162462306a36Sopenharmony_ci		if (ret) {
162562306a36Sopenharmony_ci			mlog_errno(ret);
162662306a36Sopenharmony_ci			goto out;
162762306a36Sopenharmony_ci		}
162862306a36Sopenharmony_ci	} else {
162962306a36Sopenharmony_ci		expand_bh = ref_leaf_bh;
163062306a36Sopenharmony_ci		get_bh(expand_bh);
163162306a36Sopenharmony_ci	}
163262306a36Sopenharmony_ci
163362306a36Sopenharmony_ci
163462306a36Sopenharmony_ci	/* Now add a new refcount block into the tree.*/
163562306a36Sopenharmony_ci	ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh,
163662306a36Sopenharmony_ci					    expand_bh, meta_ac);
163762306a36Sopenharmony_ci	if (ret)
163862306a36Sopenharmony_ci		mlog_errno(ret);
163962306a36Sopenharmony_ciout:
164062306a36Sopenharmony_ci	brelse(expand_bh);
164162306a36Sopenharmony_ci	return ret;
164262306a36Sopenharmony_ci}
164362306a36Sopenharmony_ci
164462306a36Sopenharmony_ci/*
164562306a36Sopenharmony_ci * Adjust the extent rec in b-tree representing ref_leaf_bh.
164662306a36Sopenharmony_ci *
164762306a36Sopenharmony_ci * Only called when we have inserted a new refcount rec at index 0
164862306a36Sopenharmony_ci * which means ocfs2_extent_rec.e_cpos may need some change.
164962306a36Sopenharmony_ci */
165062306a36Sopenharmony_cistatic int ocfs2_adjust_refcount_rec(handle_t *handle,
165162306a36Sopenharmony_ci				     struct ocfs2_caching_info *ci,
165262306a36Sopenharmony_ci				     struct buffer_head *ref_root_bh,
165362306a36Sopenharmony_ci				     struct buffer_head *ref_leaf_bh,
165462306a36Sopenharmony_ci				     struct ocfs2_refcount_rec *rec)
165562306a36Sopenharmony_ci{
165662306a36Sopenharmony_ci	int ret = 0, i;
165762306a36Sopenharmony_ci	u32 new_cpos, old_cpos;
165862306a36Sopenharmony_ci	struct ocfs2_path *path = NULL;
165962306a36Sopenharmony_ci	struct ocfs2_extent_tree et;
166062306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb =
166162306a36Sopenharmony_ci		(struct ocfs2_refcount_block *)ref_root_bh->b_data;
166262306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
166362306a36Sopenharmony_ci
166462306a36Sopenharmony_ci	if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL))
166562306a36Sopenharmony_ci		goto out;
166662306a36Sopenharmony_ci
166762306a36Sopenharmony_ci	rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
166862306a36Sopenharmony_ci	old_cpos = le32_to_cpu(rb->rf_cpos);
166962306a36Sopenharmony_ci	new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK;
167062306a36Sopenharmony_ci	if (old_cpos <= new_cpos)
167162306a36Sopenharmony_ci		goto out;
167262306a36Sopenharmony_ci
167362306a36Sopenharmony_ci	ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
167462306a36Sopenharmony_ci
167562306a36Sopenharmony_ci	path = ocfs2_new_path_from_et(&et);
167662306a36Sopenharmony_ci	if (!path) {
167762306a36Sopenharmony_ci		ret = -ENOMEM;
167862306a36Sopenharmony_ci		mlog_errno(ret);
167962306a36Sopenharmony_ci		goto out;
168062306a36Sopenharmony_ci	}
168162306a36Sopenharmony_ci
168262306a36Sopenharmony_ci	ret = ocfs2_find_path(ci, path, old_cpos);
168362306a36Sopenharmony_ci	if (ret) {
168462306a36Sopenharmony_ci		mlog_errno(ret);
168562306a36Sopenharmony_ci		goto out;
168662306a36Sopenharmony_ci	}
168762306a36Sopenharmony_ci
168862306a36Sopenharmony_ci	/*
168962306a36Sopenharmony_ci	 * 2 more credits, one for the leaf refcount block, one for
169062306a36Sopenharmony_ci	 * the extent block contains the extent rec.
169162306a36Sopenharmony_ci	 */
169262306a36Sopenharmony_ci	ret = ocfs2_extend_trans(handle, 2);
169362306a36Sopenharmony_ci	if (ret < 0) {
169462306a36Sopenharmony_ci		mlog_errno(ret);
169562306a36Sopenharmony_ci		goto out;
169662306a36Sopenharmony_ci	}
169762306a36Sopenharmony_ci
169862306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
169962306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
170062306a36Sopenharmony_ci	if (ret < 0) {
170162306a36Sopenharmony_ci		mlog_errno(ret);
170262306a36Sopenharmony_ci		goto out;
170362306a36Sopenharmony_ci	}
170462306a36Sopenharmony_ci
170562306a36Sopenharmony_ci	ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path),
170662306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
170762306a36Sopenharmony_ci	if (ret < 0) {
170862306a36Sopenharmony_ci		mlog_errno(ret);
170962306a36Sopenharmony_ci		goto out;
171062306a36Sopenharmony_ci	}
171162306a36Sopenharmony_ci
171262306a36Sopenharmony_ci	/* change the leaf extent block first. */
171362306a36Sopenharmony_ci	el = path_leaf_el(path);
171462306a36Sopenharmony_ci
171562306a36Sopenharmony_ci	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++)
171662306a36Sopenharmony_ci		if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos)
171762306a36Sopenharmony_ci			break;
171862306a36Sopenharmony_ci
171962306a36Sopenharmony_ci	BUG_ON(i == le16_to_cpu(el->l_next_free_rec));
172062306a36Sopenharmony_ci
172162306a36Sopenharmony_ci	el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
172262306a36Sopenharmony_ci
172362306a36Sopenharmony_ci	/* change the r_cpos in the leaf block. */
172462306a36Sopenharmony_ci	rb->rf_cpos = cpu_to_le32(new_cpos);
172562306a36Sopenharmony_ci
172662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, path_leaf_bh(path));
172762306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, ref_leaf_bh);
172862306a36Sopenharmony_ci
172962306a36Sopenharmony_ciout:
173062306a36Sopenharmony_ci	ocfs2_free_path(path);
173162306a36Sopenharmony_ci	return ret;
173262306a36Sopenharmony_ci}
173362306a36Sopenharmony_ci
173462306a36Sopenharmony_cistatic int ocfs2_insert_refcount_rec(handle_t *handle,
173562306a36Sopenharmony_ci				     struct ocfs2_caching_info *ci,
173662306a36Sopenharmony_ci				     struct buffer_head *ref_root_bh,
173762306a36Sopenharmony_ci				     struct buffer_head *ref_leaf_bh,
173862306a36Sopenharmony_ci				     struct ocfs2_refcount_rec *rec,
173962306a36Sopenharmony_ci				     int index, int merge,
174062306a36Sopenharmony_ci				     struct ocfs2_alloc_context *meta_ac)
174162306a36Sopenharmony_ci{
174262306a36Sopenharmony_ci	int ret;
174362306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb =
174462306a36Sopenharmony_ci			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
174562306a36Sopenharmony_ci	struct ocfs2_refcount_list *rf_list = &rb->rf_records;
174662306a36Sopenharmony_ci	struct buffer_head *new_bh = NULL;
174762306a36Sopenharmony_ci
174862306a36Sopenharmony_ci	BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
174962306a36Sopenharmony_ci
175062306a36Sopenharmony_ci	if (rf_list->rl_used == rf_list->rl_count) {
175162306a36Sopenharmony_ci		u64 cpos = le64_to_cpu(rec->r_cpos);
175262306a36Sopenharmony_ci		u32 len = le32_to_cpu(rec->r_clusters);
175362306a36Sopenharmony_ci
175462306a36Sopenharmony_ci		ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
175562306a36Sopenharmony_ci						 ref_leaf_bh, meta_ac);
175662306a36Sopenharmony_ci		if (ret) {
175762306a36Sopenharmony_ci			mlog_errno(ret);
175862306a36Sopenharmony_ci			goto out;
175962306a36Sopenharmony_ci		}
176062306a36Sopenharmony_ci
176162306a36Sopenharmony_ci		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
176262306a36Sopenharmony_ci					     cpos, len, NULL, &index,
176362306a36Sopenharmony_ci					     &new_bh);
176462306a36Sopenharmony_ci		if (ret) {
176562306a36Sopenharmony_ci			mlog_errno(ret);
176662306a36Sopenharmony_ci			goto out;
176762306a36Sopenharmony_ci		}
176862306a36Sopenharmony_ci
176962306a36Sopenharmony_ci		ref_leaf_bh = new_bh;
177062306a36Sopenharmony_ci		rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
177162306a36Sopenharmony_ci		rf_list = &rb->rf_records;
177262306a36Sopenharmony_ci	}
177362306a36Sopenharmony_ci
177462306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
177562306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
177662306a36Sopenharmony_ci	if (ret) {
177762306a36Sopenharmony_ci		mlog_errno(ret);
177862306a36Sopenharmony_ci		goto out;
177962306a36Sopenharmony_ci	}
178062306a36Sopenharmony_ci
178162306a36Sopenharmony_ci	if (index < le16_to_cpu(rf_list->rl_used))
178262306a36Sopenharmony_ci		memmove(&rf_list->rl_recs[index + 1],
178362306a36Sopenharmony_ci			&rf_list->rl_recs[index],
178462306a36Sopenharmony_ci			(le16_to_cpu(rf_list->rl_used) - index) *
178562306a36Sopenharmony_ci			 sizeof(struct ocfs2_refcount_rec));
178662306a36Sopenharmony_ci
178762306a36Sopenharmony_ci	trace_ocfs2_insert_refcount_rec(
178862306a36Sopenharmony_ci		(unsigned long long)ref_leaf_bh->b_blocknr, index,
178962306a36Sopenharmony_ci		(unsigned long long)le64_to_cpu(rec->r_cpos),
179062306a36Sopenharmony_ci		le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount));
179162306a36Sopenharmony_ci
179262306a36Sopenharmony_ci	rf_list->rl_recs[index] = *rec;
179362306a36Sopenharmony_ci
179462306a36Sopenharmony_ci	le16_add_cpu(&rf_list->rl_used, 1);
179562306a36Sopenharmony_ci
179662306a36Sopenharmony_ci	if (merge)
179762306a36Sopenharmony_ci		ocfs2_refcount_rec_merge(rb, index);
179862306a36Sopenharmony_ci
179962306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, ref_leaf_bh);
180062306a36Sopenharmony_ci
180162306a36Sopenharmony_ci	if (index == 0) {
180262306a36Sopenharmony_ci		ret = ocfs2_adjust_refcount_rec(handle, ci,
180362306a36Sopenharmony_ci						ref_root_bh,
180462306a36Sopenharmony_ci						ref_leaf_bh, rec);
180562306a36Sopenharmony_ci		if (ret)
180662306a36Sopenharmony_ci			mlog_errno(ret);
180762306a36Sopenharmony_ci	}
180862306a36Sopenharmony_ciout:
180962306a36Sopenharmony_ci	brelse(new_bh);
181062306a36Sopenharmony_ci	return ret;
181162306a36Sopenharmony_ci}
181262306a36Sopenharmony_ci
181362306a36Sopenharmony_ci/*
181462306a36Sopenharmony_ci * Split the refcount_rec indexed by "index" in ref_leaf_bh.
181562306a36Sopenharmony_ci * This is much simple than our b-tree code.
181662306a36Sopenharmony_ci * split_rec is the new refcount rec we want to insert.
181762306a36Sopenharmony_ci * If split_rec->r_refcount > 0, we are changing the refcount(in case we
181862306a36Sopenharmony_ci * increase refcount or decrease a refcount to non-zero).
181962306a36Sopenharmony_ci * If split_rec->r_refcount == 0, we are punching a hole in current refcount
182062306a36Sopenharmony_ci * rec( in case we decrease a refcount to zero).
182162306a36Sopenharmony_ci */
182262306a36Sopenharmony_cistatic int ocfs2_split_refcount_rec(handle_t *handle,
182362306a36Sopenharmony_ci				    struct ocfs2_caching_info *ci,
182462306a36Sopenharmony_ci				    struct buffer_head *ref_root_bh,
182562306a36Sopenharmony_ci				    struct buffer_head *ref_leaf_bh,
182662306a36Sopenharmony_ci				    struct ocfs2_refcount_rec *split_rec,
182762306a36Sopenharmony_ci				    int index, int merge,
182862306a36Sopenharmony_ci				    struct ocfs2_alloc_context *meta_ac,
182962306a36Sopenharmony_ci				    struct ocfs2_cached_dealloc_ctxt *dealloc)
183062306a36Sopenharmony_ci{
183162306a36Sopenharmony_ci	int ret, recs_need;
183262306a36Sopenharmony_ci	u32 len;
183362306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb =
183462306a36Sopenharmony_ci			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
183562306a36Sopenharmony_ci	struct ocfs2_refcount_list *rf_list = &rb->rf_records;
183662306a36Sopenharmony_ci	struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index];
183762306a36Sopenharmony_ci	struct ocfs2_refcount_rec *tail_rec = NULL;
183862306a36Sopenharmony_ci	struct buffer_head *new_bh = NULL;
183962306a36Sopenharmony_ci
184062306a36Sopenharmony_ci	BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
184162306a36Sopenharmony_ci
184262306a36Sopenharmony_ci	trace_ocfs2_split_refcount_rec(le64_to_cpu(orig_rec->r_cpos),
184362306a36Sopenharmony_ci		le32_to_cpu(orig_rec->r_clusters),
184462306a36Sopenharmony_ci		le32_to_cpu(orig_rec->r_refcount),
184562306a36Sopenharmony_ci		le64_to_cpu(split_rec->r_cpos),
184662306a36Sopenharmony_ci		le32_to_cpu(split_rec->r_clusters),
184762306a36Sopenharmony_ci		le32_to_cpu(split_rec->r_refcount));
184862306a36Sopenharmony_ci
184962306a36Sopenharmony_ci	/*
185062306a36Sopenharmony_ci	 * If we just need to split the header or tail clusters,
185162306a36Sopenharmony_ci	 * no more recs are needed, just split is OK.
185262306a36Sopenharmony_ci	 * Otherwise we at least need one new recs.
185362306a36Sopenharmony_ci	 */
185462306a36Sopenharmony_ci	if (!split_rec->r_refcount &&
185562306a36Sopenharmony_ci	    (split_rec->r_cpos == orig_rec->r_cpos ||
185662306a36Sopenharmony_ci	     le64_to_cpu(split_rec->r_cpos) +
185762306a36Sopenharmony_ci	     le32_to_cpu(split_rec->r_clusters) ==
185862306a36Sopenharmony_ci	     le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
185962306a36Sopenharmony_ci		recs_need = 0;
186062306a36Sopenharmony_ci	else
186162306a36Sopenharmony_ci		recs_need = 1;
186262306a36Sopenharmony_ci
186362306a36Sopenharmony_ci	/*
186462306a36Sopenharmony_ci	 * We need one more rec if we split in the middle and the new rec have
186562306a36Sopenharmony_ci	 * some refcount in it.
186662306a36Sopenharmony_ci	 */
186762306a36Sopenharmony_ci	if (split_rec->r_refcount &&
186862306a36Sopenharmony_ci	    (split_rec->r_cpos != orig_rec->r_cpos &&
186962306a36Sopenharmony_ci	     le64_to_cpu(split_rec->r_cpos) +
187062306a36Sopenharmony_ci	     le32_to_cpu(split_rec->r_clusters) !=
187162306a36Sopenharmony_ci	     le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
187262306a36Sopenharmony_ci		recs_need++;
187362306a36Sopenharmony_ci
187462306a36Sopenharmony_ci	/* If the leaf block don't have enough record, expand it. */
187562306a36Sopenharmony_ci	if (le16_to_cpu(rf_list->rl_used) + recs_need >
187662306a36Sopenharmony_ci					 le16_to_cpu(rf_list->rl_count)) {
187762306a36Sopenharmony_ci		struct ocfs2_refcount_rec tmp_rec;
187862306a36Sopenharmony_ci		u64 cpos = le64_to_cpu(orig_rec->r_cpos);
187962306a36Sopenharmony_ci		len = le32_to_cpu(orig_rec->r_clusters);
188062306a36Sopenharmony_ci		ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
188162306a36Sopenharmony_ci						 ref_leaf_bh, meta_ac);
188262306a36Sopenharmony_ci		if (ret) {
188362306a36Sopenharmony_ci			mlog_errno(ret);
188462306a36Sopenharmony_ci			goto out;
188562306a36Sopenharmony_ci		}
188662306a36Sopenharmony_ci
188762306a36Sopenharmony_ci		/*
188862306a36Sopenharmony_ci		 * We have to re-get it since now cpos may be moved to
188962306a36Sopenharmony_ci		 * another leaf block.
189062306a36Sopenharmony_ci		 */
189162306a36Sopenharmony_ci		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
189262306a36Sopenharmony_ci					     cpos, len, &tmp_rec, &index,
189362306a36Sopenharmony_ci					     &new_bh);
189462306a36Sopenharmony_ci		if (ret) {
189562306a36Sopenharmony_ci			mlog_errno(ret);
189662306a36Sopenharmony_ci			goto out;
189762306a36Sopenharmony_ci		}
189862306a36Sopenharmony_ci
189962306a36Sopenharmony_ci		ref_leaf_bh = new_bh;
190062306a36Sopenharmony_ci		rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
190162306a36Sopenharmony_ci		rf_list = &rb->rf_records;
190262306a36Sopenharmony_ci		orig_rec = &rf_list->rl_recs[index];
190362306a36Sopenharmony_ci	}
190462306a36Sopenharmony_ci
190562306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
190662306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
190762306a36Sopenharmony_ci	if (ret) {
190862306a36Sopenharmony_ci		mlog_errno(ret);
190962306a36Sopenharmony_ci		goto out;
191062306a36Sopenharmony_ci	}
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	/*
191362306a36Sopenharmony_ci	 * We have calculated out how many new records we need and store
191462306a36Sopenharmony_ci	 * in recs_need, so spare enough space first by moving the records
191562306a36Sopenharmony_ci	 * after "index" to the end.
191662306a36Sopenharmony_ci	 */
191762306a36Sopenharmony_ci	if (index != le16_to_cpu(rf_list->rl_used) - 1)
191862306a36Sopenharmony_ci		memmove(&rf_list->rl_recs[index + 1 + recs_need],
191962306a36Sopenharmony_ci			&rf_list->rl_recs[index + 1],
192062306a36Sopenharmony_ci			(le16_to_cpu(rf_list->rl_used) - index - 1) *
192162306a36Sopenharmony_ci			 sizeof(struct ocfs2_refcount_rec));
192262306a36Sopenharmony_ci
192362306a36Sopenharmony_ci	len = (le64_to_cpu(orig_rec->r_cpos) +
192462306a36Sopenharmony_ci	      le32_to_cpu(orig_rec->r_clusters)) -
192562306a36Sopenharmony_ci	      (le64_to_cpu(split_rec->r_cpos) +
192662306a36Sopenharmony_ci	      le32_to_cpu(split_rec->r_clusters));
192762306a36Sopenharmony_ci
192862306a36Sopenharmony_ci	/*
192962306a36Sopenharmony_ci	 * If we have "len", the we will split in the tail and move it
193062306a36Sopenharmony_ci	 * to the end of the space we have just spared.
193162306a36Sopenharmony_ci	 */
193262306a36Sopenharmony_ci	if (len) {
193362306a36Sopenharmony_ci		tail_rec = &rf_list->rl_recs[index + recs_need];
193462306a36Sopenharmony_ci
193562306a36Sopenharmony_ci		memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec));
193662306a36Sopenharmony_ci		le64_add_cpu(&tail_rec->r_cpos,
193762306a36Sopenharmony_ci			     le32_to_cpu(tail_rec->r_clusters) - len);
193862306a36Sopenharmony_ci		tail_rec->r_clusters = cpu_to_le32(len);
193962306a36Sopenharmony_ci	}
194062306a36Sopenharmony_ci
194162306a36Sopenharmony_ci	/*
194262306a36Sopenharmony_ci	 * If the split pos isn't the same as the original one, we need to
194362306a36Sopenharmony_ci	 * split in the head.
194462306a36Sopenharmony_ci	 *
194562306a36Sopenharmony_ci	 * Note: We have the chance that split_rec.r_refcount = 0,
194662306a36Sopenharmony_ci	 * recs_need = 0 and len > 0, which means we just cut the head from
194762306a36Sopenharmony_ci	 * the orig_rec and in that case we have done some modification in
194862306a36Sopenharmony_ci	 * orig_rec above, so the check for r_cpos is faked.
194962306a36Sopenharmony_ci	 */
195062306a36Sopenharmony_ci	if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) {
195162306a36Sopenharmony_ci		len = le64_to_cpu(split_rec->r_cpos) -
195262306a36Sopenharmony_ci		      le64_to_cpu(orig_rec->r_cpos);
195362306a36Sopenharmony_ci		orig_rec->r_clusters = cpu_to_le32(len);
195462306a36Sopenharmony_ci		index++;
195562306a36Sopenharmony_ci	}
195662306a36Sopenharmony_ci
195762306a36Sopenharmony_ci	le16_add_cpu(&rf_list->rl_used, recs_need);
195862306a36Sopenharmony_ci
195962306a36Sopenharmony_ci	if (split_rec->r_refcount) {
196062306a36Sopenharmony_ci		rf_list->rl_recs[index] = *split_rec;
196162306a36Sopenharmony_ci		trace_ocfs2_split_refcount_rec_insert(
196262306a36Sopenharmony_ci			(unsigned long long)ref_leaf_bh->b_blocknr, index,
196362306a36Sopenharmony_ci			(unsigned long long)le64_to_cpu(split_rec->r_cpos),
196462306a36Sopenharmony_ci			le32_to_cpu(split_rec->r_clusters),
196562306a36Sopenharmony_ci			le32_to_cpu(split_rec->r_refcount));
196662306a36Sopenharmony_ci
196762306a36Sopenharmony_ci		if (merge)
196862306a36Sopenharmony_ci			ocfs2_refcount_rec_merge(rb, index);
196962306a36Sopenharmony_ci	}
197062306a36Sopenharmony_ci
197162306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, ref_leaf_bh);
197262306a36Sopenharmony_ci
197362306a36Sopenharmony_ciout:
197462306a36Sopenharmony_ci	brelse(new_bh);
197562306a36Sopenharmony_ci	return ret;
197662306a36Sopenharmony_ci}
197762306a36Sopenharmony_ci
197862306a36Sopenharmony_cistatic int __ocfs2_increase_refcount(handle_t *handle,
197962306a36Sopenharmony_ci				     struct ocfs2_caching_info *ci,
198062306a36Sopenharmony_ci				     struct buffer_head *ref_root_bh,
198162306a36Sopenharmony_ci				     u64 cpos, u32 len, int merge,
198262306a36Sopenharmony_ci				     struct ocfs2_alloc_context *meta_ac,
198362306a36Sopenharmony_ci				     struct ocfs2_cached_dealloc_ctxt *dealloc)
198462306a36Sopenharmony_ci{
198562306a36Sopenharmony_ci	int ret = 0, index;
198662306a36Sopenharmony_ci	struct buffer_head *ref_leaf_bh = NULL;
198762306a36Sopenharmony_ci	struct ocfs2_refcount_rec rec;
198862306a36Sopenharmony_ci	unsigned int set_len = 0;
198962306a36Sopenharmony_ci
199062306a36Sopenharmony_ci	trace_ocfs2_increase_refcount_begin(
199162306a36Sopenharmony_ci	     (unsigned long long)ocfs2_metadata_cache_owner(ci),
199262306a36Sopenharmony_ci	     (unsigned long long)cpos, len);
199362306a36Sopenharmony_ci
199462306a36Sopenharmony_ci	while (len) {
199562306a36Sopenharmony_ci		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
199662306a36Sopenharmony_ci					     cpos, len, &rec, &index,
199762306a36Sopenharmony_ci					     &ref_leaf_bh);
199862306a36Sopenharmony_ci		if (ret) {
199962306a36Sopenharmony_ci			mlog_errno(ret);
200062306a36Sopenharmony_ci			goto out;
200162306a36Sopenharmony_ci		}
200262306a36Sopenharmony_ci
200362306a36Sopenharmony_ci		set_len = le32_to_cpu(rec.r_clusters);
200462306a36Sopenharmony_ci
200562306a36Sopenharmony_ci		/*
200662306a36Sopenharmony_ci		 * Here we may meet with 3 situations:
200762306a36Sopenharmony_ci		 *
200862306a36Sopenharmony_ci		 * 1. If we find an already existing record, and the length
200962306a36Sopenharmony_ci		 *    is the same, cool, we just need to increase the r_refcount
201062306a36Sopenharmony_ci		 *    and it is OK.
201162306a36Sopenharmony_ci		 * 2. If we find a hole, just insert it with r_refcount = 1.
201262306a36Sopenharmony_ci		 * 3. If we are in the middle of one extent record, split
201362306a36Sopenharmony_ci		 *    it.
201462306a36Sopenharmony_ci		 */
201562306a36Sopenharmony_ci		if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos &&
201662306a36Sopenharmony_ci		    set_len <= len) {
201762306a36Sopenharmony_ci			trace_ocfs2_increase_refcount_change(
201862306a36Sopenharmony_ci				(unsigned long long)cpos, set_len,
201962306a36Sopenharmony_ci				le32_to_cpu(rec.r_refcount));
202062306a36Sopenharmony_ci			ret = ocfs2_change_refcount_rec(handle, ci,
202162306a36Sopenharmony_ci							ref_leaf_bh, index,
202262306a36Sopenharmony_ci							merge, 1);
202362306a36Sopenharmony_ci			if (ret) {
202462306a36Sopenharmony_ci				mlog_errno(ret);
202562306a36Sopenharmony_ci				goto out;
202662306a36Sopenharmony_ci			}
202762306a36Sopenharmony_ci		} else if (!rec.r_refcount) {
202862306a36Sopenharmony_ci			rec.r_refcount = cpu_to_le32(1);
202962306a36Sopenharmony_ci
203062306a36Sopenharmony_ci			trace_ocfs2_increase_refcount_insert(
203162306a36Sopenharmony_ci			     (unsigned long long)le64_to_cpu(rec.r_cpos),
203262306a36Sopenharmony_ci			     set_len);
203362306a36Sopenharmony_ci			ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
203462306a36Sopenharmony_ci							ref_leaf_bh,
203562306a36Sopenharmony_ci							&rec, index,
203662306a36Sopenharmony_ci							merge, meta_ac);
203762306a36Sopenharmony_ci			if (ret) {
203862306a36Sopenharmony_ci				mlog_errno(ret);
203962306a36Sopenharmony_ci				goto out;
204062306a36Sopenharmony_ci			}
204162306a36Sopenharmony_ci		} else  {
204262306a36Sopenharmony_ci			set_len = min((u64)(cpos + len),
204362306a36Sopenharmony_ci				      le64_to_cpu(rec.r_cpos) + set_len) - cpos;
204462306a36Sopenharmony_ci			rec.r_cpos = cpu_to_le64(cpos);
204562306a36Sopenharmony_ci			rec.r_clusters = cpu_to_le32(set_len);
204662306a36Sopenharmony_ci			le32_add_cpu(&rec.r_refcount, 1);
204762306a36Sopenharmony_ci
204862306a36Sopenharmony_ci			trace_ocfs2_increase_refcount_split(
204962306a36Sopenharmony_ci			     (unsigned long long)le64_to_cpu(rec.r_cpos),
205062306a36Sopenharmony_ci			     set_len, le32_to_cpu(rec.r_refcount));
205162306a36Sopenharmony_ci			ret = ocfs2_split_refcount_rec(handle, ci,
205262306a36Sopenharmony_ci						       ref_root_bh, ref_leaf_bh,
205362306a36Sopenharmony_ci						       &rec, index, merge,
205462306a36Sopenharmony_ci						       meta_ac, dealloc);
205562306a36Sopenharmony_ci			if (ret) {
205662306a36Sopenharmony_ci				mlog_errno(ret);
205762306a36Sopenharmony_ci				goto out;
205862306a36Sopenharmony_ci			}
205962306a36Sopenharmony_ci		}
206062306a36Sopenharmony_ci
206162306a36Sopenharmony_ci		cpos += set_len;
206262306a36Sopenharmony_ci		len -= set_len;
206362306a36Sopenharmony_ci		brelse(ref_leaf_bh);
206462306a36Sopenharmony_ci		ref_leaf_bh = NULL;
206562306a36Sopenharmony_ci	}
206662306a36Sopenharmony_ci
206762306a36Sopenharmony_ciout:
206862306a36Sopenharmony_ci	brelse(ref_leaf_bh);
206962306a36Sopenharmony_ci	return ret;
207062306a36Sopenharmony_ci}
207162306a36Sopenharmony_ci
207262306a36Sopenharmony_cistatic int ocfs2_remove_refcount_extent(handle_t *handle,
207362306a36Sopenharmony_ci				struct ocfs2_caching_info *ci,
207462306a36Sopenharmony_ci				struct buffer_head *ref_root_bh,
207562306a36Sopenharmony_ci				struct buffer_head *ref_leaf_bh,
207662306a36Sopenharmony_ci				struct ocfs2_alloc_context *meta_ac,
207762306a36Sopenharmony_ci				struct ocfs2_cached_dealloc_ctxt *dealloc)
207862306a36Sopenharmony_ci{
207962306a36Sopenharmony_ci	int ret;
208062306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
208162306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb =
208262306a36Sopenharmony_ci			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
208362306a36Sopenharmony_ci	struct ocfs2_extent_tree et;
208462306a36Sopenharmony_ci
208562306a36Sopenharmony_ci	BUG_ON(rb->rf_records.rl_used);
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci	trace_ocfs2_remove_refcount_extent(
208862306a36Sopenharmony_ci		(unsigned long long)ocfs2_metadata_cache_owner(ci),
208962306a36Sopenharmony_ci		(unsigned long long)ref_leaf_bh->b_blocknr,
209062306a36Sopenharmony_ci		le32_to_cpu(rb->rf_cpos));
209162306a36Sopenharmony_ci
209262306a36Sopenharmony_ci	ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
209362306a36Sopenharmony_ci	ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos),
209462306a36Sopenharmony_ci				  1, meta_ac, dealloc);
209562306a36Sopenharmony_ci	if (ret) {
209662306a36Sopenharmony_ci		mlog_errno(ret);
209762306a36Sopenharmony_ci		goto out;
209862306a36Sopenharmony_ci	}
209962306a36Sopenharmony_ci
210062306a36Sopenharmony_ci	ocfs2_remove_from_cache(ci, ref_leaf_bh);
210162306a36Sopenharmony_ci
210262306a36Sopenharmony_ci	/*
210362306a36Sopenharmony_ci	 * add the freed block to the dealloc so that it will be freed
210462306a36Sopenharmony_ci	 * when we run dealloc.
210562306a36Sopenharmony_ci	 */
210662306a36Sopenharmony_ci	ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
210762306a36Sopenharmony_ci					le16_to_cpu(rb->rf_suballoc_slot),
210862306a36Sopenharmony_ci					le64_to_cpu(rb->rf_suballoc_loc),
210962306a36Sopenharmony_ci					le64_to_cpu(rb->rf_blkno),
211062306a36Sopenharmony_ci					le16_to_cpu(rb->rf_suballoc_bit));
211162306a36Sopenharmony_ci	if (ret) {
211262306a36Sopenharmony_ci		mlog_errno(ret);
211362306a36Sopenharmony_ci		goto out;
211462306a36Sopenharmony_ci	}
211562306a36Sopenharmony_ci
211662306a36Sopenharmony_ci	ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
211762306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
211862306a36Sopenharmony_ci	if (ret) {
211962306a36Sopenharmony_ci		mlog_errno(ret);
212062306a36Sopenharmony_ci		goto out;
212162306a36Sopenharmony_ci	}
212262306a36Sopenharmony_ci
212362306a36Sopenharmony_ci	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
212462306a36Sopenharmony_ci
212562306a36Sopenharmony_ci	le32_add_cpu(&rb->rf_clusters, -1);
212662306a36Sopenharmony_ci
212762306a36Sopenharmony_ci	/*
212862306a36Sopenharmony_ci	 * check whether we need to restore the root refcount block if
212962306a36Sopenharmony_ci	 * there is no leaf extent block at atll.
213062306a36Sopenharmony_ci	 */
213162306a36Sopenharmony_ci	if (!rb->rf_list.l_next_free_rec) {
213262306a36Sopenharmony_ci		BUG_ON(rb->rf_clusters);
213362306a36Sopenharmony_ci
213462306a36Sopenharmony_ci		trace_ocfs2_restore_refcount_block(
213562306a36Sopenharmony_ci		     (unsigned long long)ref_root_bh->b_blocknr);
213662306a36Sopenharmony_ci
213762306a36Sopenharmony_ci		rb->rf_flags = 0;
213862306a36Sopenharmony_ci		rb->rf_parent = 0;
213962306a36Sopenharmony_ci		rb->rf_cpos = 0;
214062306a36Sopenharmony_ci		memset(&rb->rf_records, 0, sb->s_blocksize -
214162306a36Sopenharmony_ci		       offsetof(struct ocfs2_refcount_block, rf_records));
214262306a36Sopenharmony_ci		rb->rf_records.rl_count =
214362306a36Sopenharmony_ci				cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
214462306a36Sopenharmony_ci	}
214562306a36Sopenharmony_ci
214662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, ref_root_bh);
214762306a36Sopenharmony_ci
214862306a36Sopenharmony_ciout:
214962306a36Sopenharmony_ci	return ret;
215062306a36Sopenharmony_ci}
215162306a36Sopenharmony_ci
215262306a36Sopenharmony_ciint ocfs2_increase_refcount(handle_t *handle,
215362306a36Sopenharmony_ci			    struct ocfs2_caching_info *ci,
215462306a36Sopenharmony_ci			    struct buffer_head *ref_root_bh,
215562306a36Sopenharmony_ci			    u64 cpos, u32 len,
215662306a36Sopenharmony_ci			    struct ocfs2_alloc_context *meta_ac,
215762306a36Sopenharmony_ci			    struct ocfs2_cached_dealloc_ctxt *dealloc)
215862306a36Sopenharmony_ci{
215962306a36Sopenharmony_ci	return __ocfs2_increase_refcount(handle, ci, ref_root_bh,
216062306a36Sopenharmony_ci					 cpos, len, 1,
216162306a36Sopenharmony_ci					 meta_ac, dealloc);
216262306a36Sopenharmony_ci}
216362306a36Sopenharmony_ci
216462306a36Sopenharmony_cistatic int ocfs2_decrease_refcount_rec(handle_t *handle,
216562306a36Sopenharmony_ci				struct ocfs2_caching_info *ci,
216662306a36Sopenharmony_ci				struct buffer_head *ref_root_bh,
216762306a36Sopenharmony_ci				struct buffer_head *ref_leaf_bh,
216862306a36Sopenharmony_ci				int index, u64 cpos, unsigned int len,
216962306a36Sopenharmony_ci				struct ocfs2_alloc_context *meta_ac,
217062306a36Sopenharmony_ci				struct ocfs2_cached_dealloc_ctxt *dealloc)
217162306a36Sopenharmony_ci{
217262306a36Sopenharmony_ci	int ret;
217362306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb =
217462306a36Sopenharmony_ci			(struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
217562306a36Sopenharmony_ci	struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index];
217662306a36Sopenharmony_ci
217762306a36Sopenharmony_ci	BUG_ON(cpos < le64_to_cpu(rec->r_cpos));
217862306a36Sopenharmony_ci	BUG_ON(cpos + len >
217962306a36Sopenharmony_ci	       le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters));
218062306a36Sopenharmony_ci
218162306a36Sopenharmony_ci	trace_ocfs2_decrease_refcount_rec(
218262306a36Sopenharmony_ci		(unsigned long long)ocfs2_metadata_cache_owner(ci),
218362306a36Sopenharmony_ci		(unsigned long long)cpos, len);
218462306a36Sopenharmony_ci
218562306a36Sopenharmony_ci	if (cpos == le64_to_cpu(rec->r_cpos) &&
218662306a36Sopenharmony_ci	    len == le32_to_cpu(rec->r_clusters))
218762306a36Sopenharmony_ci		ret = ocfs2_change_refcount_rec(handle, ci,
218862306a36Sopenharmony_ci						ref_leaf_bh, index, 1, -1);
218962306a36Sopenharmony_ci	else {
219062306a36Sopenharmony_ci		struct ocfs2_refcount_rec split = *rec;
219162306a36Sopenharmony_ci		split.r_cpos = cpu_to_le64(cpos);
219262306a36Sopenharmony_ci		split.r_clusters = cpu_to_le32(len);
219362306a36Sopenharmony_ci
219462306a36Sopenharmony_ci		le32_add_cpu(&split.r_refcount, -1);
219562306a36Sopenharmony_ci
219662306a36Sopenharmony_ci		ret = ocfs2_split_refcount_rec(handle, ci,
219762306a36Sopenharmony_ci					       ref_root_bh, ref_leaf_bh,
219862306a36Sopenharmony_ci					       &split, index, 1,
219962306a36Sopenharmony_ci					       meta_ac, dealloc);
220062306a36Sopenharmony_ci	}
220162306a36Sopenharmony_ci
220262306a36Sopenharmony_ci	if (ret) {
220362306a36Sopenharmony_ci		mlog_errno(ret);
220462306a36Sopenharmony_ci		goto out;
220562306a36Sopenharmony_ci	}
220662306a36Sopenharmony_ci
220762306a36Sopenharmony_ci	/* Remove the leaf refcount block if it contains no refcount record. */
220862306a36Sopenharmony_ci	if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) {
220962306a36Sopenharmony_ci		ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh,
221062306a36Sopenharmony_ci						   ref_leaf_bh, meta_ac,
221162306a36Sopenharmony_ci						   dealloc);
221262306a36Sopenharmony_ci		if (ret)
221362306a36Sopenharmony_ci			mlog_errno(ret);
221462306a36Sopenharmony_ci	}
221562306a36Sopenharmony_ci
221662306a36Sopenharmony_ciout:
221762306a36Sopenharmony_ci	return ret;
221862306a36Sopenharmony_ci}
221962306a36Sopenharmony_ci
222062306a36Sopenharmony_cistatic int __ocfs2_decrease_refcount(handle_t *handle,
222162306a36Sopenharmony_ci				     struct ocfs2_caching_info *ci,
222262306a36Sopenharmony_ci				     struct buffer_head *ref_root_bh,
222362306a36Sopenharmony_ci				     u64 cpos, u32 len,
222462306a36Sopenharmony_ci				     struct ocfs2_alloc_context *meta_ac,
222562306a36Sopenharmony_ci				     struct ocfs2_cached_dealloc_ctxt *dealloc,
222662306a36Sopenharmony_ci				     int delete)
222762306a36Sopenharmony_ci{
222862306a36Sopenharmony_ci	int ret = 0, index = 0;
222962306a36Sopenharmony_ci	struct ocfs2_refcount_rec rec;
223062306a36Sopenharmony_ci	unsigned int r_count = 0, r_len;
223162306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
223262306a36Sopenharmony_ci	struct buffer_head *ref_leaf_bh = NULL;
223362306a36Sopenharmony_ci
223462306a36Sopenharmony_ci	trace_ocfs2_decrease_refcount(
223562306a36Sopenharmony_ci		(unsigned long long)ocfs2_metadata_cache_owner(ci),
223662306a36Sopenharmony_ci		(unsigned long long)cpos, len, delete);
223762306a36Sopenharmony_ci
223862306a36Sopenharmony_ci	while (len) {
223962306a36Sopenharmony_ci		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
224062306a36Sopenharmony_ci					     cpos, len, &rec, &index,
224162306a36Sopenharmony_ci					     &ref_leaf_bh);
224262306a36Sopenharmony_ci		if (ret) {
224362306a36Sopenharmony_ci			mlog_errno(ret);
224462306a36Sopenharmony_ci			goto out;
224562306a36Sopenharmony_ci		}
224662306a36Sopenharmony_ci
224762306a36Sopenharmony_ci		r_count = le32_to_cpu(rec.r_refcount);
224862306a36Sopenharmony_ci		BUG_ON(r_count == 0);
224962306a36Sopenharmony_ci		if (!delete)
225062306a36Sopenharmony_ci			BUG_ON(r_count > 1);
225162306a36Sopenharmony_ci
225262306a36Sopenharmony_ci		r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) +
225362306a36Sopenharmony_ci			      le32_to_cpu(rec.r_clusters)) - cpos;
225462306a36Sopenharmony_ci
225562306a36Sopenharmony_ci		ret = ocfs2_decrease_refcount_rec(handle, ci, ref_root_bh,
225662306a36Sopenharmony_ci						  ref_leaf_bh, index,
225762306a36Sopenharmony_ci						  cpos, r_len,
225862306a36Sopenharmony_ci						  meta_ac, dealloc);
225962306a36Sopenharmony_ci		if (ret) {
226062306a36Sopenharmony_ci			mlog_errno(ret);
226162306a36Sopenharmony_ci			goto out;
226262306a36Sopenharmony_ci		}
226362306a36Sopenharmony_ci
226462306a36Sopenharmony_ci		if (le32_to_cpu(rec.r_refcount) == 1 && delete) {
226562306a36Sopenharmony_ci			ret = ocfs2_cache_cluster_dealloc(dealloc,
226662306a36Sopenharmony_ci					  ocfs2_clusters_to_blocks(sb, cpos),
226762306a36Sopenharmony_ci							  r_len);
226862306a36Sopenharmony_ci			if (ret) {
226962306a36Sopenharmony_ci				mlog_errno(ret);
227062306a36Sopenharmony_ci				goto out;
227162306a36Sopenharmony_ci			}
227262306a36Sopenharmony_ci		}
227362306a36Sopenharmony_ci
227462306a36Sopenharmony_ci		cpos += r_len;
227562306a36Sopenharmony_ci		len -= r_len;
227662306a36Sopenharmony_ci		brelse(ref_leaf_bh);
227762306a36Sopenharmony_ci		ref_leaf_bh = NULL;
227862306a36Sopenharmony_ci	}
227962306a36Sopenharmony_ci
228062306a36Sopenharmony_ciout:
228162306a36Sopenharmony_ci	brelse(ref_leaf_bh);
228262306a36Sopenharmony_ci	return ret;
228362306a36Sopenharmony_ci}
228462306a36Sopenharmony_ci
228562306a36Sopenharmony_ci/* Caller must hold refcount tree lock. */
228662306a36Sopenharmony_ciint ocfs2_decrease_refcount(struct inode *inode,
228762306a36Sopenharmony_ci			    handle_t *handle, u32 cpos, u32 len,
228862306a36Sopenharmony_ci			    struct ocfs2_alloc_context *meta_ac,
228962306a36Sopenharmony_ci			    struct ocfs2_cached_dealloc_ctxt *dealloc,
229062306a36Sopenharmony_ci			    int delete)
229162306a36Sopenharmony_ci{
229262306a36Sopenharmony_ci	int ret;
229362306a36Sopenharmony_ci	u64 ref_blkno;
229462306a36Sopenharmony_ci	struct buffer_head *ref_root_bh = NULL;
229562306a36Sopenharmony_ci	struct ocfs2_refcount_tree *tree;
229662306a36Sopenharmony_ci
229762306a36Sopenharmony_ci	BUG_ON(!ocfs2_is_refcount_inode(inode));
229862306a36Sopenharmony_ci
229962306a36Sopenharmony_ci	ret = ocfs2_get_refcount_block(inode, &ref_blkno);
230062306a36Sopenharmony_ci	if (ret) {
230162306a36Sopenharmony_ci		mlog_errno(ret);
230262306a36Sopenharmony_ci		goto out;
230362306a36Sopenharmony_ci	}
230462306a36Sopenharmony_ci
230562306a36Sopenharmony_ci	ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree);
230662306a36Sopenharmony_ci	if (ret) {
230762306a36Sopenharmony_ci		mlog_errno(ret);
230862306a36Sopenharmony_ci		goto out;
230962306a36Sopenharmony_ci	}
231062306a36Sopenharmony_ci
231162306a36Sopenharmony_ci	ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
231262306a36Sopenharmony_ci					&ref_root_bh);
231362306a36Sopenharmony_ci	if (ret) {
231462306a36Sopenharmony_ci		mlog_errno(ret);
231562306a36Sopenharmony_ci		goto out;
231662306a36Sopenharmony_ci	}
231762306a36Sopenharmony_ci
231862306a36Sopenharmony_ci	ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh,
231962306a36Sopenharmony_ci					cpos, len, meta_ac, dealloc, delete);
232062306a36Sopenharmony_ci	if (ret)
232162306a36Sopenharmony_ci		mlog_errno(ret);
232262306a36Sopenharmony_ciout:
232362306a36Sopenharmony_ci	brelse(ref_root_bh);
232462306a36Sopenharmony_ci	return ret;
232562306a36Sopenharmony_ci}
232662306a36Sopenharmony_ci
232762306a36Sopenharmony_ci/*
232862306a36Sopenharmony_ci * Mark the already-existing extent at cpos as refcounted for len clusters.
232962306a36Sopenharmony_ci * This adds the refcount extent flag.
233062306a36Sopenharmony_ci *
233162306a36Sopenharmony_ci * If the existing extent is larger than the request, initiate a
233262306a36Sopenharmony_ci * split. An attempt will be made at merging with adjacent extents.
233362306a36Sopenharmony_ci *
233462306a36Sopenharmony_ci * The caller is responsible for passing down meta_ac if we'll need it.
233562306a36Sopenharmony_ci */
233662306a36Sopenharmony_cistatic int ocfs2_mark_extent_refcounted(struct inode *inode,
233762306a36Sopenharmony_ci				struct ocfs2_extent_tree *et,
233862306a36Sopenharmony_ci				handle_t *handle, u32 cpos,
233962306a36Sopenharmony_ci				u32 len, u32 phys,
234062306a36Sopenharmony_ci				struct ocfs2_alloc_context *meta_ac,
234162306a36Sopenharmony_ci				struct ocfs2_cached_dealloc_ctxt *dealloc)
234262306a36Sopenharmony_ci{
234362306a36Sopenharmony_ci	int ret;
234462306a36Sopenharmony_ci
234562306a36Sopenharmony_ci	trace_ocfs2_mark_extent_refcounted(OCFS2_I(inode)->ip_blkno,
234662306a36Sopenharmony_ci					   cpos, len, phys);
234762306a36Sopenharmony_ci
234862306a36Sopenharmony_ci	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
234962306a36Sopenharmony_ci		ret = ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
235062306a36Sopenharmony_ci				  inode->i_ino);
235162306a36Sopenharmony_ci		goto out;
235262306a36Sopenharmony_ci	}
235362306a36Sopenharmony_ci
235462306a36Sopenharmony_ci	ret = ocfs2_change_extent_flag(handle, et, cpos,
235562306a36Sopenharmony_ci				       len, phys, meta_ac, dealloc,
235662306a36Sopenharmony_ci				       OCFS2_EXT_REFCOUNTED, 0);
235762306a36Sopenharmony_ci	if (ret)
235862306a36Sopenharmony_ci		mlog_errno(ret);
235962306a36Sopenharmony_ci
236062306a36Sopenharmony_ciout:
236162306a36Sopenharmony_ci	return ret;
236262306a36Sopenharmony_ci}
236362306a36Sopenharmony_ci
236462306a36Sopenharmony_ci/*
236562306a36Sopenharmony_ci * Given some contiguous physical clusters, calculate what we need
236662306a36Sopenharmony_ci * for modifying their refcount.
236762306a36Sopenharmony_ci */
236862306a36Sopenharmony_cistatic int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
236962306a36Sopenharmony_ci					    struct ocfs2_caching_info *ci,
237062306a36Sopenharmony_ci					    struct buffer_head *ref_root_bh,
237162306a36Sopenharmony_ci					    u64 start_cpos,
237262306a36Sopenharmony_ci					    u32 clusters,
237362306a36Sopenharmony_ci					    int *meta_add,
237462306a36Sopenharmony_ci					    int *credits)
237562306a36Sopenharmony_ci{
237662306a36Sopenharmony_ci	int ret = 0, index, ref_blocks = 0, recs_add = 0;
237762306a36Sopenharmony_ci	u64 cpos = start_cpos;
237862306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb;
237962306a36Sopenharmony_ci	struct ocfs2_refcount_rec rec;
238062306a36Sopenharmony_ci	struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL;
238162306a36Sopenharmony_ci	u32 len;
238262306a36Sopenharmony_ci
238362306a36Sopenharmony_ci	while (clusters) {
238462306a36Sopenharmony_ci		ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
238562306a36Sopenharmony_ci					     cpos, clusters, &rec,
238662306a36Sopenharmony_ci					     &index, &ref_leaf_bh);
238762306a36Sopenharmony_ci		if (ret) {
238862306a36Sopenharmony_ci			mlog_errno(ret);
238962306a36Sopenharmony_ci			goto out;
239062306a36Sopenharmony_ci		}
239162306a36Sopenharmony_ci
239262306a36Sopenharmony_ci		if (ref_leaf_bh != prev_bh) {
239362306a36Sopenharmony_ci			/*
239462306a36Sopenharmony_ci			 * Now we encounter a new leaf block, so calculate
239562306a36Sopenharmony_ci			 * whether we need to extend the old leaf.
239662306a36Sopenharmony_ci			 */
239762306a36Sopenharmony_ci			if (prev_bh) {
239862306a36Sopenharmony_ci				rb = (struct ocfs2_refcount_block *)
239962306a36Sopenharmony_ci							prev_bh->b_data;
240062306a36Sopenharmony_ci
240162306a36Sopenharmony_ci				if (le16_to_cpu(rb->rf_records.rl_used) +
240262306a36Sopenharmony_ci				    recs_add >
240362306a36Sopenharmony_ci				    le16_to_cpu(rb->rf_records.rl_count))
240462306a36Sopenharmony_ci					ref_blocks++;
240562306a36Sopenharmony_ci			}
240662306a36Sopenharmony_ci
240762306a36Sopenharmony_ci			recs_add = 0;
240862306a36Sopenharmony_ci			*credits += 1;
240962306a36Sopenharmony_ci			brelse(prev_bh);
241062306a36Sopenharmony_ci			prev_bh = ref_leaf_bh;
241162306a36Sopenharmony_ci			get_bh(prev_bh);
241262306a36Sopenharmony_ci		}
241362306a36Sopenharmony_ci
241462306a36Sopenharmony_ci		trace_ocfs2_calc_refcount_meta_credits_iterate(
241562306a36Sopenharmony_ci				recs_add, (unsigned long long)cpos, clusters,
241662306a36Sopenharmony_ci				(unsigned long long)le64_to_cpu(rec.r_cpos),
241762306a36Sopenharmony_ci				le32_to_cpu(rec.r_clusters),
241862306a36Sopenharmony_ci				le32_to_cpu(rec.r_refcount), index);
241962306a36Sopenharmony_ci
242062306a36Sopenharmony_ci		len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
242162306a36Sopenharmony_ci			  le32_to_cpu(rec.r_clusters)) - cpos;
242262306a36Sopenharmony_ci		/*
242362306a36Sopenharmony_ci		 * We record all the records which will be inserted to the
242462306a36Sopenharmony_ci		 * same refcount block, so that we can tell exactly whether
242562306a36Sopenharmony_ci		 * we need a new refcount block or not.
242662306a36Sopenharmony_ci		 *
242762306a36Sopenharmony_ci		 * If we will insert a new one, this is easy and only happens
242862306a36Sopenharmony_ci		 * during adding refcounted flag to the extent, so we don't
242962306a36Sopenharmony_ci		 * have a chance of spliting. We just need one record.
243062306a36Sopenharmony_ci		 *
243162306a36Sopenharmony_ci		 * If the refcount rec already exists, that would be a little
243262306a36Sopenharmony_ci		 * complicated. we may have to:
243362306a36Sopenharmony_ci		 * 1) split at the beginning if the start pos isn't aligned.
243462306a36Sopenharmony_ci		 *    we need 1 more record in this case.
243562306a36Sopenharmony_ci		 * 2) split int the end if the end pos isn't aligned.
243662306a36Sopenharmony_ci		 *    we need 1 more record in this case.
243762306a36Sopenharmony_ci		 * 3) split in the middle because of file system fragmentation.
243862306a36Sopenharmony_ci		 *    we need 2 more records in this case(we can't detect this
243962306a36Sopenharmony_ci		 *    beforehand, so always think of the worst case).
244062306a36Sopenharmony_ci		 */
244162306a36Sopenharmony_ci		if (rec.r_refcount) {
244262306a36Sopenharmony_ci			recs_add += 2;
244362306a36Sopenharmony_ci			/* Check whether we need a split at the beginning. */
244462306a36Sopenharmony_ci			if (cpos == start_cpos &&
244562306a36Sopenharmony_ci			    cpos != le64_to_cpu(rec.r_cpos))
244662306a36Sopenharmony_ci				recs_add++;
244762306a36Sopenharmony_ci
244862306a36Sopenharmony_ci			/* Check whether we need a split in the end. */
244962306a36Sopenharmony_ci			if (cpos + clusters < le64_to_cpu(rec.r_cpos) +
245062306a36Sopenharmony_ci			    le32_to_cpu(rec.r_clusters))
245162306a36Sopenharmony_ci				recs_add++;
245262306a36Sopenharmony_ci		} else
245362306a36Sopenharmony_ci			recs_add++;
245462306a36Sopenharmony_ci
245562306a36Sopenharmony_ci		brelse(ref_leaf_bh);
245662306a36Sopenharmony_ci		ref_leaf_bh = NULL;
245762306a36Sopenharmony_ci		clusters -= len;
245862306a36Sopenharmony_ci		cpos += len;
245962306a36Sopenharmony_ci	}
246062306a36Sopenharmony_ci
246162306a36Sopenharmony_ci	if (prev_bh) {
246262306a36Sopenharmony_ci		rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
246362306a36Sopenharmony_ci
246462306a36Sopenharmony_ci		if (le16_to_cpu(rb->rf_records.rl_used) + recs_add >
246562306a36Sopenharmony_ci		    le16_to_cpu(rb->rf_records.rl_count))
246662306a36Sopenharmony_ci			ref_blocks++;
246762306a36Sopenharmony_ci
246862306a36Sopenharmony_ci		*credits += 1;
246962306a36Sopenharmony_ci	}
247062306a36Sopenharmony_ci
247162306a36Sopenharmony_ci	if (!ref_blocks)
247262306a36Sopenharmony_ci		goto out;
247362306a36Sopenharmony_ci
247462306a36Sopenharmony_ci	*meta_add += ref_blocks;
247562306a36Sopenharmony_ci	*credits += ref_blocks;
247662306a36Sopenharmony_ci
247762306a36Sopenharmony_ci	/*
247862306a36Sopenharmony_ci	 * So we may need ref_blocks to insert into the tree.
247962306a36Sopenharmony_ci	 * That also means we need to change the b-tree and add that number
248062306a36Sopenharmony_ci	 * of records since we never merge them.
248162306a36Sopenharmony_ci	 * We need one more block for expansion since the new created leaf
248262306a36Sopenharmony_ci	 * block is also full and needs split.
248362306a36Sopenharmony_ci	 */
248462306a36Sopenharmony_ci	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
248562306a36Sopenharmony_ci	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) {
248662306a36Sopenharmony_ci		struct ocfs2_extent_tree et;
248762306a36Sopenharmony_ci
248862306a36Sopenharmony_ci		ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
248962306a36Sopenharmony_ci		*meta_add += ocfs2_extend_meta_needed(et.et_root_el);
249062306a36Sopenharmony_ci		*credits += ocfs2_calc_extend_credits(sb,
249162306a36Sopenharmony_ci						      et.et_root_el);
249262306a36Sopenharmony_ci	} else {
249362306a36Sopenharmony_ci		*credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
249462306a36Sopenharmony_ci		*meta_add += 1;
249562306a36Sopenharmony_ci	}
249662306a36Sopenharmony_ci
249762306a36Sopenharmony_ciout:
249862306a36Sopenharmony_ci
249962306a36Sopenharmony_ci	trace_ocfs2_calc_refcount_meta_credits(
250062306a36Sopenharmony_ci		(unsigned long long)start_cpos, clusters,
250162306a36Sopenharmony_ci		*meta_add, *credits);
250262306a36Sopenharmony_ci	brelse(ref_leaf_bh);
250362306a36Sopenharmony_ci	brelse(prev_bh);
250462306a36Sopenharmony_ci	return ret;
250562306a36Sopenharmony_ci}
250662306a36Sopenharmony_ci
250762306a36Sopenharmony_ci/*
250862306a36Sopenharmony_ci * For refcount tree, we will decrease some contiguous clusters
250962306a36Sopenharmony_ci * refcount count, so just go through it to see how many blocks
251062306a36Sopenharmony_ci * we gonna touch and whether we need to create new blocks.
251162306a36Sopenharmony_ci *
251262306a36Sopenharmony_ci * Normally the refcount blocks store these refcount should be
251362306a36Sopenharmony_ci * contiguous also, so that we can get the number easily.
251462306a36Sopenharmony_ci * We will at most add split 2 refcount records and 2 more
251562306a36Sopenharmony_ci * refcount blocks, so just check it in a rough way.
251662306a36Sopenharmony_ci *
251762306a36Sopenharmony_ci * Caller must hold refcount tree lock.
251862306a36Sopenharmony_ci */
251962306a36Sopenharmony_ciint ocfs2_prepare_refcount_change_for_del(struct inode *inode,
252062306a36Sopenharmony_ci					  u64 refcount_loc,
252162306a36Sopenharmony_ci					  u64 phys_blkno,
252262306a36Sopenharmony_ci					  u32 clusters,
252362306a36Sopenharmony_ci					  int *credits,
252462306a36Sopenharmony_ci					  int *ref_blocks)
252562306a36Sopenharmony_ci{
252662306a36Sopenharmony_ci	int ret;
252762306a36Sopenharmony_ci	struct buffer_head *ref_root_bh = NULL;
252862306a36Sopenharmony_ci	struct ocfs2_refcount_tree *tree;
252962306a36Sopenharmony_ci	u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
253062306a36Sopenharmony_ci
253162306a36Sopenharmony_ci	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
253262306a36Sopenharmony_ci		ret = ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
253362306a36Sopenharmony_ci				  inode->i_ino);
253462306a36Sopenharmony_ci		goto out;
253562306a36Sopenharmony_ci	}
253662306a36Sopenharmony_ci
253762306a36Sopenharmony_ci	BUG_ON(!ocfs2_is_refcount_inode(inode));
253862306a36Sopenharmony_ci
253962306a36Sopenharmony_ci	ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
254062306a36Sopenharmony_ci				      refcount_loc, &tree);
254162306a36Sopenharmony_ci	if (ret) {
254262306a36Sopenharmony_ci		mlog_errno(ret);
254362306a36Sopenharmony_ci		goto out;
254462306a36Sopenharmony_ci	}
254562306a36Sopenharmony_ci
254662306a36Sopenharmony_ci	ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc,
254762306a36Sopenharmony_ci					&ref_root_bh);
254862306a36Sopenharmony_ci	if (ret) {
254962306a36Sopenharmony_ci		mlog_errno(ret);
255062306a36Sopenharmony_ci		goto out;
255162306a36Sopenharmony_ci	}
255262306a36Sopenharmony_ci
255362306a36Sopenharmony_ci	ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
255462306a36Sopenharmony_ci					       &tree->rf_ci,
255562306a36Sopenharmony_ci					       ref_root_bh,
255662306a36Sopenharmony_ci					       start_cpos, clusters,
255762306a36Sopenharmony_ci					       ref_blocks, credits);
255862306a36Sopenharmony_ci	if (ret) {
255962306a36Sopenharmony_ci		mlog_errno(ret);
256062306a36Sopenharmony_ci		goto out;
256162306a36Sopenharmony_ci	}
256262306a36Sopenharmony_ci
256362306a36Sopenharmony_ci	trace_ocfs2_prepare_refcount_change_for_del(*ref_blocks, *credits);
256462306a36Sopenharmony_ci
256562306a36Sopenharmony_ciout:
256662306a36Sopenharmony_ci	brelse(ref_root_bh);
256762306a36Sopenharmony_ci	return ret;
256862306a36Sopenharmony_ci}
256962306a36Sopenharmony_ci
257062306a36Sopenharmony_ci#define	MAX_CONTIG_BYTES	1048576
257162306a36Sopenharmony_ci
257262306a36Sopenharmony_cistatic inline unsigned int ocfs2_cow_contig_clusters(struct super_block *sb)
257362306a36Sopenharmony_ci{
257462306a36Sopenharmony_ci	return ocfs2_clusters_for_bytes(sb, MAX_CONTIG_BYTES);
257562306a36Sopenharmony_ci}
257662306a36Sopenharmony_ci
257762306a36Sopenharmony_cistatic inline unsigned int ocfs2_cow_contig_mask(struct super_block *sb)
257862306a36Sopenharmony_ci{
257962306a36Sopenharmony_ci	return ~(ocfs2_cow_contig_clusters(sb) - 1);
258062306a36Sopenharmony_ci}
258162306a36Sopenharmony_ci
258262306a36Sopenharmony_ci/*
258362306a36Sopenharmony_ci * Given an extent that starts at 'start' and an I/O that starts at 'cpos',
258462306a36Sopenharmony_ci * find an offset (start + (n * contig_clusters)) that is closest to cpos
258562306a36Sopenharmony_ci * while still being less than or equal to it.
258662306a36Sopenharmony_ci *
258762306a36Sopenharmony_ci * The goal is to break the extent at a multiple of contig_clusters.
258862306a36Sopenharmony_ci */
258962306a36Sopenharmony_cistatic inline unsigned int ocfs2_cow_align_start(struct super_block *sb,
259062306a36Sopenharmony_ci						 unsigned int start,
259162306a36Sopenharmony_ci						 unsigned int cpos)
259262306a36Sopenharmony_ci{
259362306a36Sopenharmony_ci	BUG_ON(start > cpos);
259462306a36Sopenharmony_ci
259562306a36Sopenharmony_ci	return start + ((cpos - start) & ocfs2_cow_contig_mask(sb));
259662306a36Sopenharmony_ci}
259762306a36Sopenharmony_ci
259862306a36Sopenharmony_ci/*
259962306a36Sopenharmony_ci * Given a cluster count of len, pad it out so that it is a multiple
260062306a36Sopenharmony_ci * of contig_clusters.
260162306a36Sopenharmony_ci */
260262306a36Sopenharmony_cistatic inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
260362306a36Sopenharmony_ci						  unsigned int len)
260462306a36Sopenharmony_ci{
260562306a36Sopenharmony_ci	unsigned int padded =
260662306a36Sopenharmony_ci		(len + (ocfs2_cow_contig_clusters(sb) - 1)) &
260762306a36Sopenharmony_ci		ocfs2_cow_contig_mask(sb);
260862306a36Sopenharmony_ci
260962306a36Sopenharmony_ci	/* Did we wrap? */
261062306a36Sopenharmony_ci	if (padded < len)
261162306a36Sopenharmony_ci		padded = UINT_MAX;
261262306a36Sopenharmony_ci
261362306a36Sopenharmony_ci	return padded;
261462306a36Sopenharmony_ci}
261562306a36Sopenharmony_ci
261662306a36Sopenharmony_ci/*
261762306a36Sopenharmony_ci * Calculate out the start and number of virtual clusters we need to CoW.
261862306a36Sopenharmony_ci *
261962306a36Sopenharmony_ci * cpos is vitual start cluster position we want to do CoW in a
262062306a36Sopenharmony_ci * file and write_len is the cluster length.
262162306a36Sopenharmony_ci * max_cpos is the place where we want to stop CoW intentionally.
262262306a36Sopenharmony_ci *
262362306a36Sopenharmony_ci * Normal we will start CoW from the beginning of extent record cotaining cpos.
262462306a36Sopenharmony_ci * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
262562306a36Sopenharmony_ci * get good I/O from the resulting extent tree.
262662306a36Sopenharmony_ci */
262762306a36Sopenharmony_cistatic int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
262862306a36Sopenharmony_ci					   struct ocfs2_extent_list *el,
262962306a36Sopenharmony_ci					   u32 cpos,
263062306a36Sopenharmony_ci					   u32 write_len,
263162306a36Sopenharmony_ci					   u32 max_cpos,
263262306a36Sopenharmony_ci					   u32 *cow_start,
263362306a36Sopenharmony_ci					   u32 *cow_len)
263462306a36Sopenharmony_ci{
263562306a36Sopenharmony_ci	int ret = 0;
263662306a36Sopenharmony_ci	int tree_height = le16_to_cpu(el->l_tree_depth), i;
263762306a36Sopenharmony_ci	struct buffer_head *eb_bh = NULL;
263862306a36Sopenharmony_ci	struct ocfs2_extent_block *eb = NULL;
263962306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
264062306a36Sopenharmony_ci	unsigned int want_clusters, rec_end = 0;
264162306a36Sopenharmony_ci	int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb);
264262306a36Sopenharmony_ci	int leaf_clusters;
264362306a36Sopenharmony_ci
264462306a36Sopenharmony_ci	BUG_ON(cpos + write_len > max_cpos);
264562306a36Sopenharmony_ci
264662306a36Sopenharmony_ci	if (tree_height > 0) {
264762306a36Sopenharmony_ci		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh);
264862306a36Sopenharmony_ci		if (ret) {
264962306a36Sopenharmony_ci			mlog_errno(ret);
265062306a36Sopenharmony_ci			goto out;
265162306a36Sopenharmony_ci		}
265262306a36Sopenharmony_ci
265362306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
265462306a36Sopenharmony_ci		el = &eb->h_list;
265562306a36Sopenharmony_ci
265662306a36Sopenharmony_ci		if (el->l_tree_depth) {
265762306a36Sopenharmony_ci			ret = ocfs2_error(inode->i_sb,
265862306a36Sopenharmony_ci					  "Inode %lu has non zero tree depth in leaf block %llu\n",
265962306a36Sopenharmony_ci					  inode->i_ino,
266062306a36Sopenharmony_ci					  (unsigned long long)eb_bh->b_blocknr);
266162306a36Sopenharmony_ci			goto out;
266262306a36Sopenharmony_ci		}
266362306a36Sopenharmony_ci	}
266462306a36Sopenharmony_ci
266562306a36Sopenharmony_ci	*cow_len = 0;
266662306a36Sopenharmony_ci	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
266762306a36Sopenharmony_ci		rec = &el->l_recs[i];
266862306a36Sopenharmony_ci
266962306a36Sopenharmony_ci		if (ocfs2_is_empty_extent(rec)) {
267062306a36Sopenharmony_ci			mlog_bug_on_msg(i != 0, "Inode %lu has empty record in "
267162306a36Sopenharmony_ci					"index %d\n", inode->i_ino, i);
267262306a36Sopenharmony_ci			continue;
267362306a36Sopenharmony_ci		}
267462306a36Sopenharmony_ci
267562306a36Sopenharmony_ci		if (le32_to_cpu(rec->e_cpos) +
267662306a36Sopenharmony_ci		    le16_to_cpu(rec->e_leaf_clusters) <= cpos)
267762306a36Sopenharmony_ci			continue;
267862306a36Sopenharmony_ci
267962306a36Sopenharmony_ci		if (*cow_len == 0) {
268062306a36Sopenharmony_ci			/*
268162306a36Sopenharmony_ci			 * We should find a refcounted record in the
268262306a36Sopenharmony_ci			 * first pass.
268362306a36Sopenharmony_ci			 */
268462306a36Sopenharmony_ci			BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED));
268562306a36Sopenharmony_ci			*cow_start = le32_to_cpu(rec->e_cpos);
268662306a36Sopenharmony_ci		}
268762306a36Sopenharmony_ci
268862306a36Sopenharmony_ci		/*
268962306a36Sopenharmony_ci		 * If we encounter a hole, a non-refcounted record or
269062306a36Sopenharmony_ci		 * pass the max_cpos, stop the search.
269162306a36Sopenharmony_ci		 */
269262306a36Sopenharmony_ci		if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) ||
269362306a36Sopenharmony_ci		    (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)) ||
269462306a36Sopenharmony_ci		    (max_cpos <= le32_to_cpu(rec->e_cpos)))
269562306a36Sopenharmony_ci			break;
269662306a36Sopenharmony_ci
269762306a36Sopenharmony_ci		leaf_clusters = le16_to_cpu(rec->e_leaf_clusters);
269862306a36Sopenharmony_ci		rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters;
269962306a36Sopenharmony_ci		if (rec_end > max_cpos) {
270062306a36Sopenharmony_ci			rec_end = max_cpos;
270162306a36Sopenharmony_ci			leaf_clusters = rec_end - le32_to_cpu(rec->e_cpos);
270262306a36Sopenharmony_ci		}
270362306a36Sopenharmony_ci
270462306a36Sopenharmony_ci		/*
270562306a36Sopenharmony_ci		 * How many clusters do we actually need from
270662306a36Sopenharmony_ci		 * this extent?  First we see how many we actually
270762306a36Sopenharmony_ci		 * need to complete the write.  If that's smaller
270862306a36Sopenharmony_ci		 * than contig_clusters, we try for contig_clusters.
270962306a36Sopenharmony_ci		 */
271062306a36Sopenharmony_ci		if (!*cow_len)
271162306a36Sopenharmony_ci			want_clusters = write_len;
271262306a36Sopenharmony_ci		else
271362306a36Sopenharmony_ci			want_clusters = (cpos + write_len) -
271462306a36Sopenharmony_ci				(*cow_start + *cow_len);
271562306a36Sopenharmony_ci		if (want_clusters < contig_clusters)
271662306a36Sopenharmony_ci			want_clusters = contig_clusters;
271762306a36Sopenharmony_ci
271862306a36Sopenharmony_ci		/*
271962306a36Sopenharmony_ci		 * If the write does not cover the whole extent, we
272062306a36Sopenharmony_ci		 * need to calculate how we're going to split the extent.
272162306a36Sopenharmony_ci		 * We try to do it on contig_clusters boundaries.
272262306a36Sopenharmony_ci		 *
272362306a36Sopenharmony_ci		 * Any extent smaller than contig_clusters will be
272462306a36Sopenharmony_ci		 * CoWed in its entirety.
272562306a36Sopenharmony_ci		 */
272662306a36Sopenharmony_ci		if (leaf_clusters <= contig_clusters)
272762306a36Sopenharmony_ci			*cow_len += leaf_clusters;
272862306a36Sopenharmony_ci		else if (*cow_len || (*cow_start == cpos)) {
272962306a36Sopenharmony_ci			/*
273062306a36Sopenharmony_ci			 * This extent needs to be CoW'd from its
273162306a36Sopenharmony_ci			 * beginning, so all we have to do is compute
273262306a36Sopenharmony_ci			 * how many clusters to grab.  We align
273362306a36Sopenharmony_ci			 * want_clusters to the edge of contig_clusters
273462306a36Sopenharmony_ci			 * to get better I/O.
273562306a36Sopenharmony_ci			 */
273662306a36Sopenharmony_ci			want_clusters = ocfs2_cow_align_length(inode->i_sb,
273762306a36Sopenharmony_ci							       want_clusters);
273862306a36Sopenharmony_ci
273962306a36Sopenharmony_ci			if (leaf_clusters < want_clusters)
274062306a36Sopenharmony_ci				*cow_len += leaf_clusters;
274162306a36Sopenharmony_ci			else
274262306a36Sopenharmony_ci				*cow_len += want_clusters;
274362306a36Sopenharmony_ci		} else if ((*cow_start + contig_clusters) >=
274462306a36Sopenharmony_ci			   (cpos + write_len)) {
274562306a36Sopenharmony_ci			/*
274662306a36Sopenharmony_ci			 * Breaking off contig_clusters at the front
274762306a36Sopenharmony_ci			 * of the extent will cover our write.  That's
274862306a36Sopenharmony_ci			 * easy.
274962306a36Sopenharmony_ci			 */
275062306a36Sopenharmony_ci			*cow_len = contig_clusters;
275162306a36Sopenharmony_ci		} else if ((rec_end - cpos) <= contig_clusters) {
275262306a36Sopenharmony_ci			/*
275362306a36Sopenharmony_ci			 * Breaking off contig_clusters at the tail of
275462306a36Sopenharmony_ci			 * this extent will cover cpos.
275562306a36Sopenharmony_ci			 */
275662306a36Sopenharmony_ci			*cow_start = rec_end - contig_clusters;
275762306a36Sopenharmony_ci			*cow_len = contig_clusters;
275862306a36Sopenharmony_ci		} else if ((rec_end - cpos) <= want_clusters) {
275962306a36Sopenharmony_ci			/*
276062306a36Sopenharmony_ci			 * While we can't fit the entire write in this
276162306a36Sopenharmony_ci			 * extent, we know that the write goes from cpos
276262306a36Sopenharmony_ci			 * to the end of the extent.  Break that off.
276362306a36Sopenharmony_ci			 * We try to break it at some multiple of
276462306a36Sopenharmony_ci			 * contig_clusters from the front of the extent.
276562306a36Sopenharmony_ci			 * Failing that (ie, cpos is within
276662306a36Sopenharmony_ci			 * contig_clusters of the front), we'll CoW the
276762306a36Sopenharmony_ci			 * entire extent.
276862306a36Sopenharmony_ci			 */
276962306a36Sopenharmony_ci			*cow_start = ocfs2_cow_align_start(inode->i_sb,
277062306a36Sopenharmony_ci							   *cow_start, cpos);
277162306a36Sopenharmony_ci			*cow_len = rec_end - *cow_start;
277262306a36Sopenharmony_ci		} else {
277362306a36Sopenharmony_ci			/*
277462306a36Sopenharmony_ci			 * Ok, the entire write lives in the middle of
277562306a36Sopenharmony_ci			 * this extent.  Let's try to slice the extent up
277662306a36Sopenharmony_ci			 * nicely.  Optimally, our CoW region starts at
277762306a36Sopenharmony_ci			 * m*contig_clusters from the beginning of the
277862306a36Sopenharmony_ci			 * extent and goes for n*contig_clusters,
277962306a36Sopenharmony_ci			 * covering the entire write.
278062306a36Sopenharmony_ci			 */
278162306a36Sopenharmony_ci			*cow_start = ocfs2_cow_align_start(inode->i_sb,
278262306a36Sopenharmony_ci							   *cow_start, cpos);
278362306a36Sopenharmony_ci
278462306a36Sopenharmony_ci			want_clusters = (cpos + write_len) - *cow_start;
278562306a36Sopenharmony_ci			want_clusters = ocfs2_cow_align_length(inode->i_sb,
278662306a36Sopenharmony_ci							       want_clusters);
278762306a36Sopenharmony_ci			if (*cow_start + want_clusters <= rec_end)
278862306a36Sopenharmony_ci				*cow_len = want_clusters;
278962306a36Sopenharmony_ci			else
279062306a36Sopenharmony_ci				*cow_len = rec_end - *cow_start;
279162306a36Sopenharmony_ci		}
279262306a36Sopenharmony_ci
279362306a36Sopenharmony_ci		/* Have we covered our entire write yet? */
279462306a36Sopenharmony_ci		if ((*cow_start + *cow_len) >= (cpos + write_len))
279562306a36Sopenharmony_ci			break;
279662306a36Sopenharmony_ci
279762306a36Sopenharmony_ci		/*
279862306a36Sopenharmony_ci		 * If we reach the end of the extent block and don't get enough
279962306a36Sopenharmony_ci		 * clusters, continue with the next extent block if possible.
280062306a36Sopenharmony_ci		 */
280162306a36Sopenharmony_ci		if (i + 1 == le16_to_cpu(el->l_next_free_rec) &&
280262306a36Sopenharmony_ci		    eb && eb->h_next_leaf_blk) {
280362306a36Sopenharmony_ci			brelse(eb_bh);
280462306a36Sopenharmony_ci			eb_bh = NULL;
280562306a36Sopenharmony_ci
280662306a36Sopenharmony_ci			ret = ocfs2_read_extent_block(INODE_CACHE(inode),
280762306a36Sopenharmony_ci					       le64_to_cpu(eb->h_next_leaf_blk),
280862306a36Sopenharmony_ci					       &eb_bh);
280962306a36Sopenharmony_ci			if (ret) {
281062306a36Sopenharmony_ci				mlog_errno(ret);
281162306a36Sopenharmony_ci				goto out;
281262306a36Sopenharmony_ci			}
281362306a36Sopenharmony_ci
281462306a36Sopenharmony_ci			eb = (struct ocfs2_extent_block *) eb_bh->b_data;
281562306a36Sopenharmony_ci			el = &eb->h_list;
281662306a36Sopenharmony_ci			i = -1;
281762306a36Sopenharmony_ci		}
281862306a36Sopenharmony_ci	}
281962306a36Sopenharmony_ci
282062306a36Sopenharmony_ciout:
282162306a36Sopenharmony_ci	brelse(eb_bh);
282262306a36Sopenharmony_ci	return ret;
282362306a36Sopenharmony_ci}
282462306a36Sopenharmony_ci
282562306a36Sopenharmony_ci/*
282662306a36Sopenharmony_ci * Prepare meta_ac, data_ac and calculate credits when we want to add some
282762306a36Sopenharmony_ci * num_clusters in data_tree "et" and change the refcount for the old
282862306a36Sopenharmony_ci * clusters(starting form p_cluster) in the refcount tree.
282962306a36Sopenharmony_ci *
283062306a36Sopenharmony_ci * Note:
283162306a36Sopenharmony_ci * 1. since we may split the old tree, so we at most will need num_clusters + 2
283262306a36Sopenharmony_ci *    more new leaf records.
283362306a36Sopenharmony_ci * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so
283462306a36Sopenharmony_ci *    just give data_ac = NULL.
283562306a36Sopenharmony_ci */
283662306a36Sopenharmony_cistatic int ocfs2_lock_refcount_allocators(struct super_block *sb,
283762306a36Sopenharmony_ci					u32 p_cluster, u32 num_clusters,
283862306a36Sopenharmony_ci					struct ocfs2_extent_tree *et,
283962306a36Sopenharmony_ci					struct ocfs2_caching_info *ref_ci,
284062306a36Sopenharmony_ci					struct buffer_head *ref_root_bh,
284162306a36Sopenharmony_ci					struct ocfs2_alloc_context **meta_ac,
284262306a36Sopenharmony_ci					struct ocfs2_alloc_context **data_ac,
284362306a36Sopenharmony_ci					int *credits)
284462306a36Sopenharmony_ci{
284562306a36Sopenharmony_ci	int ret = 0, meta_add = 0;
284662306a36Sopenharmony_ci	int num_free_extents = ocfs2_num_free_extents(et);
284762306a36Sopenharmony_ci
284862306a36Sopenharmony_ci	if (num_free_extents < 0) {
284962306a36Sopenharmony_ci		ret = num_free_extents;
285062306a36Sopenharmony_ci		mlog_errno(ret);
285162306a36Sopenharmony_ci		goto out;
285262306a36Sopenharmony_ci	}
285362306a36Sopenharmony_ci
285462306a36Sopenharmony_ci	if (num_free_extents < num_clusters + 2)
285562306a36Sopenharmony_ci		meta_add =
285662306a36Sopenharmony_ci			ocfs2_extend_meta_needed(et->et_root_el);
285762306a36Sopenharmony_ci
285862306a36Sopenharmony_ci	*credits += ocfs2_calc_extend_credits(sb, et->et_root_el);
285962306a36Sopenharmony_ci
286062306a36Sopenharmony_ci	ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh,
286162306a36Sopenharmony_ci					       p_cluster, num_clusters,
286262306a36Sopenharmony_ci					       &meta_add, credits);
286362306a36Sopenharmony_ci	if (ret) {
286462306a36Sopenharmony_ci		mlog_errno(ret);
286562306a36Sopenharmony_ci		goto out;
286662306a36Sopenharmony_ci	}
286762306a36Sopenharmony_ci
286862306a36Sopenharmony_ci	trace_ocfs2_lock_refcount_allocators(meta_add, *credits);
286962306a36Sopenharmony_ci	ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add,
287062306a36Sopenharmony_ci						meta_ac);
287162306a36Sopenharmony_ci	if (ret) {
287262306a36Sopenharmony_ci		mlog_errno(ret);
287362306a36Sopenharmony_ci		goto out;
287462306a36Sopenharmony_ci	}
287562306a36Sopenharmony_ci
287662306a36Sopenharmony_ci	if (data_ac) {
287762306a36Sopenharmony_ci		ret = ocfs2_reserve_clusters(OCFS2_SB(sb), num_clusters,
287862306a36Sopenharmony_ci					     data_ac);
287962306a36Sopenharmony_ci		if (ret)
288062306a36Sopenharmony_ci			mlog_errno(ret);
288162306a36Sopenharmony_ci	}
288262306a36Sopenharmony_ci
288362306a36Sopenharmony_ciout:
288462306a36Sopenharmony_ci	if (ret) {
288562306a36Sopenharmony_ci		if (*meta_ac) {
288662306a36Sopenharmony_ci			ocfs2_free_alloc_context(*meta_ac);
288762306a36Sopenharmony_ci			*meta_ac = NULL;
288862306a36Sopenharmony_ci		}
288962306a36Sopenharmony_ci	}
289062306a36Sopenharmony_ci
289162306a36Sopenharmony_ci	return ret;
289262306a36Sopenharmony_ci}
289362306a36Sopenharmony_ci
289462306a36Sopenharmony_cistatic int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
289562306a36Sopenharmony_ci{
289662306a36Sopenharmony_ci	BUG_ON(buffer_dirty(bh));
289762306a36Sopenharmony_ci
289862306a36Sopenharmony_ci	clear_buffer_mapped(bh);
289962306a36Sopenharmony_ci
290062306a36Sopenharmony_ci	return 0;
290162306a36Sopenharmony_ci}
290262306a36Sopenharmony_ci
290362306a36Sopenharmony_ciint ocfs2_duplicate_clusters_by_page(handle_t *handle,
290462306a36Sopenharmony_ci				     struct inode *inode,
290562306a36Sopenharmony_ci				     u32 cpos, u32 old_cluster,
290662306a36Sopenharmony_ci				     u32 new_cluster, u32 new_len)
290762306a36Sopenharmony_ci{
290862306a36Sopenharmony_ci	int ret = 0, partial;
290962306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
291062306a36Sopenharmony_ci	u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
291162306a36Sopenharmony_ci	struct page *page;
291262306a36Sopenharmony_ci	pgoff_t page_index;
291362306a36Sopenharmony_ci	unsigned int from, to;
291462306a36Sopenharmony_ci	loff_t offset, end, map_end;
291562306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
291662306a36Sopenharmony_ci
291762306a36Sopenharmony_ci	trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster,
291862306a36Sopenharmony_ci					       new_cluster, new_len);
291962306a36Sopenharmony_ci
292062306a36Sopenharmony_ci	offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
292162306a36Sopenharmony_ci	end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
292262306a36Sopenharmony_ci	/*
292362306a36Sopenharmony_ci	 * We only duplicate pages until we reach the page contains i_size - 1.
292462306a36Sopenharmony_ci	 * So trim 'end' to i_size.
292562306a36Sopenharmony_ci	 */
292662306a36Sopenharmony_ci	if (end > i_size_read(inode))
292762306a36Sopenharmony_ci		end = i_size_read(inode);
292862306a36Sopenharmony_ci
292962306a36Sopenharmony_ci	while (offset < end) {
293062306a36Sopenharmony_ci		page_index = offset >> PAGE_SHIFT;
293162306a36Sopenharmony_ci		map_end = ((loff_t)page_index + 1) << PAGE_SHIFT;
293262306a36Sopenharmony_ci		if (map_end > end)
293362306a36Sopenharmony_ci			map_end = end;
293462306a36Sopenharmony_ci
293562306a36Sopenharmony_ci		/* from, to is the offset within the page. */
293662306a36Sopenharmony_ci		from = offset & (PAGE_SIZE - 1);
293762306a36Sopenharmony_ci		to = PAGE_SIZE;
293862306a36Sopenharmony_ci		if (map_end & (PAGE_SIZE - 1))
293962306a36Sopenharmony_ci			to = map_end & (PAGE_SIZE - 1);
294062306a36Sopenharmony_ci
294162306a36Sopenharmony_ciretry:
294262306a36Sopenharmony_ci		page = find_or_create_page(mapping, page_index, GFP_NOFS);
294362306a36Sopenharmony_ci		if (!page) {
294462306a36Sopenharmony_ci			ret = -ENOMEM;
294562306a36Sopenharmony_ci			mlog_errno(ret);
294662306a36Sopenharmony_ci			break;
294762306a36Sopenharmony_ci		}
294862306a36Sopenharmony_ci
294962306a36Sopenharmony_ci		/*
295062306a36Sopenharmony_ci		 * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty
295162306a36Sopenharmony_ci		 * page, so write it back.
295262306a36Sopenharmony_ci		 */
295362306a36Sopenharmony_ci		if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) {
295462306a36Sopenharmony_ci			if (PageDirty(page)) {
295562306a36Sopenharmony_ci				unlock_page(page);
295662306a36Sopenharmony_ci				put_page(page);
295762306a36Sopenharmony_ci
295862306a36Sopenharmony_ci				ret = filemap_write_and_wait_range(mapping,
295962306a36Sopenharmony_ci						offset, map_end - 1);
296062306a36Sopenharmony_ci				goto retry;
296162306a36Sopenharmony_ci			}
296262306a36Sopenharmony_ci		}
296362306a36Sopenharmony_ci
296462306a36Sopenharmony_ci		if (!PageUptodate(page)) {
296562306a36Sopenharmony_ci			struct folio *folio = page_folio(page);
296662306a36Sopenharmony_ci
296762306a36Sopenharmony_ci			ret = block_read_full_folio(folio, ocfs2_get_block);
296862306a36Sopenharmony_ci			if (ret) {
296962306a36Sopenharmony_ci				mlog_errno(ret);
297062306a36Sopenharmony_ci				goto unlock;
297162306a36Sopenharmony_ci			}
297262306a36Sopenharmony_ci			folio_lock(folio);
297362306a36Sopenharmony_ci		}
297462306a36Sopenharmony_ci
297562306a36Sopenharmony_ci		if (page_has_buffers(page)) {
297662306a36Sopenharmony_ci			ret = walk_page_buffers(handle, page_buffers(page),
297762306a36Sopenharmony_ci						from, to, &partial,
297862306a36Sopenharmony_ci						ocfs2_clear_cow_buffer);
297962306a36Sopenharmony_ci			if (ret) {
298062306a36Sopenharmony_ci				mlog_errno(ret);
298162306a36Sopenharmony_ci				goto unlock;
298262306a36Sopenharmony_ci			}
298362306a36Sopenharmony_ci		}
298462306a36Sopenharmony_ci
298562306a36Sopenharmony_ci		ocfs2_map_and_dirty_page(inode,
298662306a36Sopenharmony_ci					 handle, from, to,
298762306a36Sopenharmony_ci					 page, 0, &new_block);
298862306a36Sopenharmony_ci		mark_page_accessed(page);
298962306a36Sopenharmony_ciunlock:
299062306a36Sopenharmony_ci		unlock_page(page);
299162306a36Sopenharmony_ci		put_page(page);
299262306a36Sopenharmony_ci		page = NULL;
299362306a36Sopenharmony_ci		offset = map_end;
299462306a36Sopenharmony_ci		if (ret)
299562306a36Sopenharmony_ci			break;
299662306a36Sopenharmony_ci	}
299762306a36Sopenharmony_ci
299862306a36Sopenharmony_ci	return ret;
299962306a36Sopenharmony_ci}
300062306a36Sopenharmony_ci
300162306a36Sopenharmony_ciint ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
300262306a36Sopenharmony_ci				    struct inode *inode,
300362306a36Sopenharmony_ci				    u32 cpos, u32 old_cluster,
300462306a36Sopenharmony_ci				    u32 new_cluster, u32 new_len)
300562306a36Sopenharmony_ci{
300662306a36Sopenharmony_ci	int ret = 0;
300762306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
300862306a36Sopenharmony_ci	struct ocfs2_caching_info *ci = INODE_CACHE(inode);
300962306a36Sopenharmony_ci	int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
301062306a36Sopenharmony_ci	u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster);
301162306a36Sopenharmony_ci	u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
301262306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(sb);
301362306a36Sopenharmony_ci	struct buffer_head *old_bh = NULL;
301462306a36Sopenharmony_ci	struct buffer_head *new_bh = NULL;
301562306a36Sopenharmony_ci
301662306a36Sopenharmony_ci	trace_ocfs2_duplicate_clusters_by_page(cpos, old_cluster,
301762306a36Sopenharmony_ci					       new_cluster, new_len);
301862306a36Sopenharmony_ci
301962306a36Sopenharmony_ci	for (i = 0; i < blocks; i++, old_block++, new_block++) {
302062306a36Sopenharmony_ci		new_bh = sb_getblk(osb->sb, new_block);
302162306a36Sopenharmony_ci		if (new_bh == NULL) {
302262306a36Sopenharmony_ci			ret = -ENOMEM;
302362306a36Sopenharmony_ci			mlog_errno(ret);
302462306a36Sopenharmony_ci			break;
302562306a36Sopenharmony_ci		}
302662306a36Sopenharmony_ci
302762306a36Sopenharmony_ci		ocfs2_set_new_buffer_uptodate(ci, new_bh);
302862306a36Sopenharmony_ci
302962306a36Sopenharmony_ci		ret = ocfs2_read_block(ci, old_block, &old_bh, NULL);
303062306a36Sopenharmony_ci		if (ret) {
303162306a36Sopenharmony_ci			mlog_errno(ret);
303262306a36Sopenharmony_ci			break;
303362306a36Sopenharmony_ci		}
303462306a36Sopenharmony_ci
303562306a36Sopenharmony_ci		ret = ocfs2_journal_access(handle, ci, new_bh,
303662306a36Sopenharmony_ci					   OCFS2_JOURNAL_ACCESS_CREATE);
303762306a36Sopenharmony_ci		if (ret) {
303862306a36Sopenharmony_ci			mlog_errno(ret);
303962306a36Sopenharmony_ci			break;
304062306a36Sopenharmony_ci		}
304162306a36Sopenharmony_ci
304262306a36Sopenharmony_ci		memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
304362306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, new_bh);
304462306a36Sopenharmony_ci
304562306a36Sopenharmony_ci		brelse(new_bh);
304662306a36Sopenharmony_ci		brelse(old_bh);
304762306a36Sopenharmony_ci		new_bh = NULL;
304862306a36Sopenharmony_ci		old_bh = NULL;
304962306a36Sopenharmony_ci	}
305062306a36Sopenharmony_ci
305162306a36Sopenharmony_ci	brelse(new_bh);
305262306a36Sopenharmony_ci	brelse(old_bh);
305362306a36Sopenharmony_ci	return ret;
305462306a36Sopenharmony_ci}
305562306a36Sopenharmony_ci
305662306a36Sopenharmony_cistatic int ocfs2_clear_ext_refcount(handle_t *handle,
305762306a36Sopenharmony_ci				    struct ocfs2_extent_tree *et,
305862306a36Sopenharmony_ci				    u32 cpos, u32 p_cluster, u32 len,
305962306a36Sopenharmony_ci				    unsigned int ext_flags,
306062306a36Sopenharmony_ci				    struct ocfs2_alloc_context *meta_ac,
306162306a36Sopenharmony_ci				    struct ocfs2_cached_dealloc_ctxt *dealloc)
306262306a36Sopenharmony_ci{
306362306a36Sopenharmony_ci	int ret, index;
306462306a36Sopenharmony_ci	struct ocfs2_extent_rec replace_rec;
306562306a36Sopenharmony_ci	struct ocfs2_path *path = NULL;
306662306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
306762306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
306862306a36Sopenharmony_ci	u64 ino = ocfs2_metadata_cache_owner(et->et_ci);
306962306a36Sopenharmony_ci
307062306a36Sopenharmony_ci	trace_ocfs2_clear_ext_refcount((unsigned long long)ino,
307162306a36Sopenharmony_ci				       cpos, len, p_cluster, ext_flags);
307262306a36Sopenharmony_ci
307362306a36Sopenharmony_ci	memset(&replace_rec, 0, sizeof(replace_rec));
307462306a36Sopenharmony_ci	replace_rec.e_cpos = cpu_to_le32(cpos);
307562306a36Sopenharmony_ci	replace_rec.e_leaf_clusters = cpu_to_le16(len);
307662306a36Sopenharmony_ci	replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(sb,
307762306a36Sopenharmony_ci								   p_cluster));
307862306a36Sopenharmony_ci	replace_rec.e_flags = ext_flags;
307962306a36Sopenharmony_ci	replace_rec.e_flags &= ~OCFS2_EXT_REFCOUNTED;
308062306a36Sopenharmony_ci
308162306a36Sopenharmony_ci	path = ocfs2_new_path_from_et(et);
308262306a36Sopenharmony_ci	if (!path) {
308362306a36Sopenharmony_ci		ret = -ENOMEM;
308462306a36Sopenharmony_ci		mlog_errno(ret);
308562306a36Sopenharmony_ci		goto out;
308662306a36Sopenharmony_ci	}
308762306a36Sopenharmony_ci
308862306a36Sopenharmony_ci	ret = ocfs2_find_path(et->et_ci, path, cpos);
308962306a36Sopenharmony_ci	if (ret) {
309062306a36Sopenharmony_ci		mlog_errno(ret);
309162306a36Sopenharmony_ci		goto out;
309262306a36Sopenharmony_ci	}
309362306a36Sopenharmony_ci
309462306a36Sopenharmony_ci	el = path_leaf_el(path);
309562306a36Sopenharmony_ci
309662306a36Sopenharmony_ci	index = ocfs2_search_extent_list(el, cpos);
309762306a36Sopenharmony_ci	if (index == -1) {
309862306a36Sopenharmony_ci		ret = ocfs2_error(sb,
309962306a36Sopenharmony_ci				  "Inode %llu has an extent at cpos %u which can no longer be found\n",
310062306a36Sopenharmony_ci				  (unsigned long long)ino, cpos);
310162306a36Sopenharmony_ci		goto out;
310262306a36Sopenharmony_ci	}
310362306a36Sopenharmony_ci
310462306a36Sopenharmony_ci	ret = ocfs2_split_extent(handle, et, path, index,
310562306a36Sopenharmony_ci				 &replace_rec, meta_ac, dealloc);
310662306a36Sopenharmony_ci	if (ret)
310762306a36Sopenharmony_ci		mlog_errno(ret);
310862306a36Sopenharmony_ci
310962306a36Sopenharmony_ciout:
311062306a36Sopenharmony_ci	ocfs2_free_path(path);
311162306a36Sopenharmony_ci	return ret;
311262306a36Sopenharmony_ci}
311362306a36Sopenharmony_ci
311462306a36Sopenharmony_cistatic int ocfs2_replace_clusters(handle_t *handle,
311562306a36Sopenharmony_ci				  struct ocfs2_cow_context *context,
311662306a36Sopenharmony_ci				  u32 cpos, u32 old,
311762306a36Sopenharmony_ci				  u32 new, u32 len,
311862306a36Sopenharmony_ci				  unsigned int ext_flags)
311962306a36Sopenharmony_ci{
312062306a36Sopenharmony_ci	int ret;
312162306a36Sopenharmony_ci	struct ocfs2_caching_info *ci = context->data_et.et_ci;
312262306a36Sopenharmony_ci	u64 ino = ocfs2_metadata_cache_owner(ci);
312362306a36Sopenharmony_ci
312462306a36Sopenharmony_ci	trace_ocfs2_replace_clusters((unsigned long long)ino,
312562306a36Sopenharmony_ci				     cpos, old, new, len, ext_flags);
312662306a36Sopenharmony_ci
312762306a36Sopenharmony_ci	/*If the old clusters is unwritten, no need to duplicate. */
312862306a36Sopenharmony_ci	if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
312962306a36Sopenharmony_ci		ret = context->cow_duplicate_clusters(handle, context->inode,
313062306a36Sopenharmony_ci						      cpos, old, new, len);
313162306a36Sopenharmony_ci		if (ret) {
313262306a36Sopenharmony_ci			mlog_errno(ret);
313362306a36Sopenharmony_ci			goto out;
313462306a36Sopenharmony_ci		}
313562306a36Sopenharmony_ci	}
313662306a36Sopenharmony_ci
313762306a36Sopenharmony_ci	ret = ocfs2_clear_ext_refcount(handle, &context->data_et,
313862306a36Sopenharmony_ci				       cpos, new, len, ext_flags,
313962306a36Sopenharmony_ci				       context->meta_ac, &context->dealloc);
314062306a36Sopenharmony_ci	if (ret)
314162306a36Sopenharmony_ci		mlog_errno(ret);
314262306a36Sopenharmony_ciout:
314362306a36Sopenharmony_ci	return ret;
314462306a36Sopenharmony_ci}
314562306a36Sopenharmony_ci
314662306a36Sopenharmony_ciint ocfs2_cow_sync_writeback(struct super_block *sb,
314762306a36Sopenharmony_ci			     struct inode *inode,
314862306a36Sopenharmony_ci			     u32 cpos, u32 num_clusters)
314962306a36Sopenharmony_ci{
315062306a36Sopenharmony_ci	int ret;
315162306a36Sopenharmony_ci	loff_t start, end;
315262306a36Sopenharmony_ci
315362306a36Sopenharmony_ci	if (ocfs2_should_order_data(inode))
315462306a36Sopenharmony_ci		return 0;
315562306a36Sopenharmony_ci
315662306a36Sopenharmony_ci	start = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
315762306a36Sopenharmony_ci	end = start + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits) - 1;
315862306a36Sopenharmony_ci
315962306a36Sopenharmony_ci	ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
316062306a36Sopenharmony_ci	if (ret < 0)
316162306a36Sopenharmony_ci		mlog_errno(ret);
316262306a36Sopenharmony_ci
316362306a36Sopenharmony_ci	return ret;
316462306a36Sopenharmony_ci}
316562306a36Sopenharmony_ci
316662306a36Sopenharmony_cistatic int ocfs2_di_get_clusters(struct ocfs2_cow_context *context,
316762306a36Sopenharmony_ci				 u32 v_cluster, u32 *p_cluster,
316862306a36Sopenharmony_ci				 u32 *num_clusters,
316962306a36Sopenharmony_ci				 unsigned int *extent_flags)
317062306a36Sopenharmony_ci{
317162306a36Sopenharmony_ci	return ocfs2_get_clusters(context->inode, v_cluster, p_cluster,
317262306a36Sopenharmony_ci				  num_clusters, extent_flags);
317362306a36Sopenharmony_ci}
317462306a36Sopenharmony_ci
317562306a36Sopenharmony_cistatic int ocfs2_make_clusters_writable(struct super_block *sb,
317662306a36Sopenharmony_ci					struct ocfs2_cow_context *context,
317762306a36Sopenharmony_ci					u32 cpos, u32 p_cluster,
317862306a36Sopenharmony_ci					u32 num_clusters, unsigned int e_flags)
317962306a36Sopenharmony_ci{
318062306a36Sopenharmony_ci	int ret, delete, index, credits =  0;
318162306a36Sopenharmony_ci	u32 new_bit, new_len, orig_num_clusters;
318262306a36Sopenharmony_ci	unsigned int set_len;
318362306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(sb);
318462306a36Sopenharmony_ci	handle_t *handle;
318562306a36Sopenharmony_ci	struct buffer_head *ref_leaf_bh = NULL;
318662306a36Sopenharmony_ci	struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci;
318762306a36Sopenharmony_ci	struct ocfs2_refcount_rec rec;
318862306a36Sopenharmony_ci
318962306a36Sopenharmony_ci	trace_ocfs2_make_clusters_writable(cpos, p_cluster,
319062306a36Sopenharmony_ci					   num_clusters, e_flags);
319162306a36Sopenharmony_ci
319262306a36Sopenharmony_ci	ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
319362306a36Sopenharmony_ci					     &context->data_et,
319462306a36Sopenharmony_ci					     ref_ci,
319562306a36Sopenharmony_ci					     context->ref_root_bh,
319662306a36Sopenharmony_ci					     &context->meta_ac,
319762306a36Sopenharmony_ci					     &context->data_ac, &credits);
319862306a36Sopenharmony_ci	if (ret) {
319962306a36Sopenharmony_ci		mlog_errno(ret);
320062306a36Sopenharmony_ci		return ret;
320162306a36Sopenharmony_ci	}
320262306a36Sopenharmony_ci
320362306a36Sopenharmony_ci	if (context->post_refcount)
320462306a36Sopenharmony_ci		credits += context->post_refcount->credits;
320562306a36Sopenharmony_ci
320662306a36Sopenharmony_ci	credits += context->extra_credits;
320762306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, credits);
320862306a36Sopenharmony_ci	if (IS_ERR(handle)) {
320962306a36Sopenharmony_ci		ret = PTR_ERR(handle);
321062306a36Sopenharmony_ci		mlog_errno(ret);
321162306a36Sopenharmony_ci		goto out;
321262306a36Sopenharmony_ci	}
321362306a36Sopenharmony_ci
321462306a36Sopenharmony_ci	orig_num_clusters = num_clusters;
321562306a36Sopenharmony_ci
321662306a36Sopenharmony_ci	while (num_clusters) {
321762306a36Sopenharmony_ci		ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
321862306a36Sopenharmony_ci					     p_cluster, num_clusters,
321962306a36Sopenharmony_ci					     &rec, &index, &ref_leaf_bh);
322062306a36Sopenharmony_ci		if (ret) {
322162306a36Sopenharmony_ci			mlog_errno(ret);
322262306a36Sopenharmony_ci			goto out_commit;
322362306a36Sopenharmony_ci		}
322462306a36Sopenharmony_ci
322562306a36Sopenharmony_ci		BUG_ON(!rec.r_refcount);
322662306a36Sopenharmony_ci		set_len = min((u64)p_cluster + num_clusters,
322762306a36Sopenharmony_ci			      le64_to_cpu(rec.r_cpos) +
322862306a36Sopenharmony_ci			      le32_to_cpu(rec.r_clusters)) - p_cluster;
322962306a36Sopenharmony_ci
323062306a36Sopenharmony_ci		/*
323162306a36Sopenharmony_ci		 * There are many different situation here.
323262306a36Sopenharmony_ci		 * 1. If refcount == 1, remove the flag and don't COW.
323362306a36Sopenharmony_ci		 * 2. If refcount > 1, allocate clusters.
323462306a36Sopenharmony_ci		 *    Here we may not allocate r_len once at a time, so continue
323562306a36Sopenharmony_ci		 *    until we reach num_clusters.
323662306a36Sopenharmony_ci		 */
323762306a36Sopenharmony_ci		if (le32_to_cpu(rec.r_refcount) == 1) {
323862306a36Sopenharmony_ci			delete = 0;
323962306a36Sopenharmony_ci			ret = ocfs2_clear_ext_refcount(handle,
324062306a36Sopenharmony_ci						       &context->data_et,
324162306a36Sopenharmony_ci						       cpos, p_cluster,
324262306a36Sopenharmony_ci						       set_len, e_flags,
324362306a36Sopenharmony_ci						       context->meta_ac,
324462306a36Sopenharmony_ci						       &context->dealloc);
324562306a36Sopenharmony_ci			if (ret) {
324662306a36Sopenharmony_ci				mlog_errno(ret);
324762306a36Sopenharmony_ci				goto out_commit;
324862306a36Sopenharmony_ci			}
324962306a36Sopenharmony_ci		} else {
325062306a36Sopenharmony_ci			delete = 1;
325162306a36Sopenharmony_ci
325262306a36Sopenharmony_ci			ret = __ocfs2_claim_clusters(handle,
325362306a36Sopenharmony_ci						     context->data_ac,
325462306a36Sopenharmony_ci						     1, set_len,
325562306a36Sopenharmony_ci						     &new_bit, &new_len);
325662306a36Sopenharmony_ci			if (ret) {
325762306a36Sopenharmony_ci				mlog_errno(ret);
325862306a36Sopenharmony_ci				goto out_commit;
325962306a36Sopenharmony_ci			}
326062306a36Sopenharmony_ci
326162306a36Sopenharmony_ci			ret = ocfs2_replace_clusters(handle, context,
326262306a36Sopenharmony_ci						     cpos, p_cluster, new_bit,
326362306a36Sopenharmony_ci						     new_len, e_flags);
326462306a36Sopenharmony_ci			if (ret) {
326562306a36Sopenharmony_ci				mlog_errno(ret);
326662306a36Sopenharmony_ci				goto out_commit;
326762306a36Sopenharmony_ci			}
326862306a36Sopenharmony_ci			set_len = new_len;
326962306a36Sopenharmony_ci		}
327062306a36Sopenharmony_ci
327162306a36Sopenharmony_ci		ret = __ocfs2_decrease_refcount(handle, ref_ci,
327262306a36Sopenharmony_ci						context->ref_root_bh,
327362306a36Sopenharmony_ci						p_cluster, set_len,
327462306a36Sopenharmony_ci						context->meta_ac,
327562306a36Sopenharmony_ci						&context->dealloc, delete);
327662306a36Sopenharmony_ci		if (ret) {
327762306a36Sopenharmony_ci			mlog_errno(ret);
327862306a36Sopenharmony_ci			goto out_commit;
327962306a36Sopenharmony_ci		}
328062306a36Sopenharmony_ci
328162306a36Sopenharmony_ci		cpos += set_len;
328262306a36Sopenharmony_ci		p_cluster += set_len;
328362306a36Sopenharmony_ci		num_clusters -= set_len;
328462306a36Sopenharmony_ci		brelse(ref_leaf_bh);
328562306a36Sopenharmony_ci		ref_leaf_bh = NULL;
328662306a36Sopenharmony_ci	}
328762306a36Sopenharmony_ci
328862306a36Sopenharmony_ci	/* handle any post_cow action. */
328962306a36Sopenharmony_ci	if (context->post_refcount && context->post_refcount->func) {
329062306a36Sopenharmony_ci		ret = context->post_refcount->func(context->inode, handle,
329162306a36Sopenharmony_ci						context->post_refcount->para);
329262306a36Sopenharmony_ci		if (ret) {
329362306a36Sopenharmony_ci			mlog_errno(ret);
329462306a36Sopenharmony_ci			goto out_commit;
329562306a36Sopenharmony_ci		}
329662306a36Sopenharmony_ci	}
329762306a36Sopenharmony_ci
329862306a36Sopenharmony_ci	/*
329962306a36Sopenharmony_ci	 * Here we should write the new page out first if we are
330062306a36Sopenharmony_ci	 * in write-back mode.
330162306a36Sopenharmony_ci	 */
330262306a36Sopenharmony_ci	if (context->get_clusters == ocfs2_di_get_clusters) {
330362306a36Sopenharmony_ci		ret = ocfs2_cow_sync_writeback(sb, context->inode, cpos,
330462306a36Sopenharmony_ci					       orig_num_clusters);
330562306a36Sopenharmony_ci		if (ret)
330662306a36Sopenharmony_ci			mlog_errno(ret);
330762306a36Sopenharmony_ci	}
330862306a36Sopenharmony_ci
330962306a36Sopenharmony_ciout_commit:
331062306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
331162306a36Sopenharmony_ci
331262306a36Sopenharmony_ciout:
331362306a36Sopenharmony_ci	if (context->data_ac) {
331462306a36Sopenharmony_ci		ocfs2_free_alloc_context(context->data_ac);
331562306a36Sopenharmony_ci		context->data_ac = NULL;
331662306a36Sopenharmony_ci	}
331762306a36Sopenharmony_ci	if (context->meta_ac) {
331862306a36Sopenharmony_ci		ocfs2_free_alloc_context(context->meta_ac);
331962306a36Sopenharmony_ci		context->meta_ac = NULL;
332062306a36Sopenharmony_ci	}
332162306a36Sopenharmony_ci	brelse(ref_leaf_bh);
332262306a36Sopenharmony_ci
332362306a36Sopenharmony_ci	return ret;
332462306a36Sopenharmony_ci}
332562306a36Sopenharmony_ci
332662306a36Sopenharmony_cistatic int ocfs2_replace_cow(struct ocfs2_cow_context *context)
332762306a36Sopenharmony_ci{
332862306a36Sopenharmony_ci	int ret = 0;
332962306a36Sopenharmony_ci	struct inode *inode = context->inode;
333062306a36Sopenharmony_ci	u32 cow_start = context->cow_start, cow_len = context->cow_len;
333162306a36Sopenharmony_ci	u32 p_cluster, num_clusters;
333262306a36Sopenharmony_ci	unsigned int ext_flags;
333362306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
333462306a36Sopenharmony_ci
333562306a36Sopenharmony_ci	if (!ocfs2_refcount_tree(osb)) {
333662306a36Sopenharmony_ci		return ocfs2_error(inode->i_sb, "Inode %lu want to use refcount tree, but the feature bit is not set in the super block\n",
333762306a36Sopenharmony_ci				   inode->i_ino);
333862306a36Sopenharmony_ci	}
333962306a36Sopenharmony_ci
334062306a36Sopenharmony_ci	ocfs2_init_dealloc_ctxt(&context->dealloc);
334162306a36Sopenharmony_ci
334262306a36Sopenharmony_ci	while (cow_len) {
334362306a36Sopenharmony_ci		ret = context->get_clusters(context, cow_start, &p_cluster,
334462306a36Sopenharmony_ci					    &num_clusters, &ext_flags);
334562306a36Sopenharmony_ci		if (ret) {
334662306a36Sopenharmony_ci			mlog_errno(ret);
334762306a36Sopenharmony_ci			break;
334862306a36Sopenharmony_ci		}
334962306a36Sopenharmony_ci
335062306a36Sopenharmony_ci		BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED));
335162306a36Sopenharmony_ci
335262306a36Sopenharmony_ci		if (cow_len < num_clusters)
335362306a36Sopenharmony_ci			num_clusters = cow_len;
335462306a36Sopenharmony_ci
335562306a36Sopenharmony_ci		ret = ocfs2_make_clusters_writable(inode->i_sb, context,
335662306a36Sopenharmony_ci						   cow_start, p_cluster,
335762306a36Sopenharmony_ci						   num_clusters, ext_flags);
335862306a36Sopenharmony_ci		if (ret) {
335962306a36Sopenharmony_ci			mlog_errno(ret);
336062306a36Sopenharmony_ci			break;
336162306a36Sopenharmony_ci		}
336262306a36Sopenharmony_ci
336362306a36Sopenharmony_ci		cow_len -= num_clusters;
336462306a36Sopenharmony_ci		cow_start += num_clusters;
336562306a36Sopenharmony_ci	}
336662306a36Sopenharmony_ci
336762306a36Sopenharmony_ci	if (ocfs2_dealloc_has_cluster(&context->dealloc)) {
336862306a36Sopenharmony_ci		ocfs2_schedule_truncate_log_flush(osb, 1);
336962306a36Sopenharmony_ci		ocfs2_run_deallocs(osb, &context->dealloc);
337062306a36Sopenharmony_ci	}
337162306a36Sopenharmony_ci
337262306a36Sopenharmony_ci	return ret;
337362306a36Sopenharmony_ci}
337462306a36Sopenharmony_ci
337562306a36Sopenharmony_ci/*
337662306a36Sopenharmony_ci * Starting at cpos, try to CoW write_len clusters.  Don't CoW
337762306a36Sopenharmony_ci * past max_cpos.  This will stop when it runs into a hole or an
337862306a36Sopenharmony_ci * unrefcounted extent.
337962306a36Sopenharmony_ci */
338062306a36Sopenharmony_cistatic int ocfs2_refcount_cow_hunk(struct inode *inode,
338162306a36Sopenharmony_ci				   struct buffer_head *di_bh,
338262306a36Sopenharmony_ci				   u32 cpos, u32 write_len, u32 max_cpos)
338362306a36Sopenharmony_ci{
338462306a36Sopenharmony_ci	int ret;
338562306a36Sopenharmony_ci	u32 cow_start = 0, cow_len = 0;
338662306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
338762306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
338862306a36Sopenharmony_ci	struct buffer_head *ref_root_bh = NULL;
338962306a36Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree;
339062306a36Sopenharmony_ci	struct ocfs2_cow_context *context = NULL;
339162306a36Sopenharmony_ci
339262306a36Sopenharmony_ci	BUG_ON(!ocfs2_is_refcount_inode(inode));
339362306a36Sopenharmony_ci
339462306a36Sopenharmony_ci	ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list,
339562306a36Sopenharmony_ci					      cpos, write_len, max_cpos,
339662306a36Sopenharmony_ci					      &cow_start, &cow_len);
339762306a36Sopenharmony_ci	if (ret) {
339862306a36Sopenharmony_ci		mlog_errno(ret);
339962306a36Sopenharmony_ci		goto out;
340062306a36Sopenharmony_ci	}
340162306a36Sopenharmony_ci
340262306a36Sopenharmony_ci	trace_ocfs2_refcount_cow_hunk(OCFS2_I(inode)->ip_blkno,
340362306a36Sopenharmony_ci				      cpos, write_len, max_cpos,
340462306a36Sopenharmony_ci				      cow_start, cow_len);
340562306a36Sopenharmony_ci
340662306a36Sopenharmony_ci	BUG_ON(cow_len == 0);
340762306a36Sopenharmony_ci
340862306a36Sopenharmony_ci	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
340962306a36Sopenharmony_ci	if (!context) {
341062306a36Sopenharmony_ci		ret = -ENOMEM;
341162306a36Sopenharmony_ci		mlog_errno(ret);
341262306a36Sopenharmony_ci		goto out;
341362306a36Sopenharmony_ci	}
341462306a36Sopenharmony_ci
341562306a36Sopenharmony_ci	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
341662306a36Sopenharmony_ci				       1, &ref_tree, &ref_root_bh);
341762306a36Sopenharmony_ci	if (ret) {
341862306a36Sopenharmony_ci		mlog_errno(ret);
341962306a36Sopenharmony_ci		goto out;
342062306a36Sopenharmony_ci	}
342162306a36Sopenharmony_ci
342262306a36Sopenharmony_ci	context->inode = inode;
342362306a36Sopenharmony_ci	context->cow_start = cow_start;
342462306a36Sopenharmony_ci	context->cow_len = cow_len;
342562306a36Sopenharmony_ci	context->ref_tree = ref_tree;
342662306a36Sopenharmony_ci	context->ref_root_bh = ref_root_bh;
342762306a36Sopenharmony_ci	context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
342862306a36Sopenharmony_ci	context->get_clusters = ocfs2_di_get_clusters;
342962306a36Sopenharmony_ci
343062306a36Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&context->data_et,
343162306a36Sopenharmony_ci				      INODE_CACHE(inode), di_bh);
343262306a36Sopenharmony_ci
343362306a36Sopenharmony_ci	ret = ocfs2_replace_cow(context);
343462306a36Sopenharmony_ci	if (ret)
343562306a36Sopenharmony_ci		mlog_errno(ret);
343662306a36Sopenharmony_ci
343762306a36Sopenharmony_ci	/*
343862306a36Sopenharmony_ci	 * truncate the extent map here since no matter whether we meet with
343962306a36Sopenharmony_ci	 * any error during the action, we shouldn't trust cached extent map
344062306a36Sopenharmony_ci	 * any more.
344162306a36Sopenharmony_ci	 */
344262306a36Sopenharmony_ci	ocfs2_extent_map_trunc(inode, cow_start);
344362306a36Sopenharmony_ci
344462306a36Sopenharmony_ci	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
344562306a36Sopenharmony_ci	brelse(ref_root_bh);
344662306a36Sopenharmony_ciout:
344762306a36Sopenharmony_ci	kfree(context);
344862306a36Sopenharmony_ci	return ret;
344962306a36Sopenharmony_ci}
345062306a36Sopenharmony_ci
345162306a36Sopenharmony_ci/*
345262306a36Sopenharmony_ci * CoW any and all clusters between cpos and cpos+write_len.
345362306a36Sopenharmony_ci * Don't CoW past max_cpos.  If this returns successfully, all
345462306a36Sopenharmony_ci * clusters between cpos and cpos+write_len are safe to modify.
345562306a36Sopenharmony_ci */
345662306a36Sopenharmony_ciint ocfs2_refcount_cow(struct inode *inode,
345762306a36Sopenharmony_ci		       struct buffer_head *di_bh,
345862306a36Sopenharmony_ci		       u32 cpos, u32 write_len, u32 max_cpos)
345962306a36Sopenharmony_ci{
346062306a36Sopenharmony_ci	int ret = 0;
346162306a36Sopenharmony_ci	u32 p_cluster, num_clusters;
346262306a36Sopenharmony_ci	unsigned int ext_flags;
346362306a36Sopenharmony_ci
346462306a36Sopenharmony_ci	while (write_len) {
346562306a36Sopenharmony_ci		ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
346662306a36Sopenharmony_ci					 &num_clusters, &ext_flags);
346762306a36Sopenharmony_ci		if (ret) {
346862306a36Sopenharmony_ci			mlog_errno(ret);
346962306a36Sopenharmony_ci			break;
347062306a36Sopenharmony_ci		}
347162306a36Sopenharmony_ci
347262306a36Sopenharmony_ci		if (write_len < num_clusters)
347362306a36Sopenharmony_ci			num_clusters = write_len;
347462306a36Sopenharmony_ci
347562306a36Sopenharmony_ci		if (ext_flags & OCFS2_EXT_REFCOUNTED) {
347662306a36Sopenharmony_ci			ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
347762306a36Sopenharmony_ci						      num_clusters, max_cpos);
347862306a36Sopenharmony_ci			if (ret) {
347962306a36Sopenharmony_ci				mlog_errno(ret);
348062306a36Sopenharmony_ci				break;
348162306a36Sopenharmony_ci			}
348262306a36Sopenharmony_ci		}
348362306a36Sopenharmony_ci
348462306a36Sopenharmony_ci		write_len -= num_clusters;
348562306a36Sopenharmony_ci		cpos += num_clusters;
348662306a36Sopenharmony_ci	}
348762306a36Sopenharmony_ci
348862306a36Sopenharmony_ci	return ret;
348962306a36Sopenharmony_ci}
349062306a36Sopenharmony_ci
349162306a36Sopenharmony_cistatic int ocfs2_xattr_value_get_clusters(struct ocfs2_cow_context *context,
349262306a36Sopenharmony_ci					  u32 v_cluster, u32 *p_cluster,
349362306a36Sopenharmony_ci					  u32 *num_clusters,
349462306a36Sopenharmony_ci					  unsigned int *extent_flags)
349562306a36Sopenharmony_ci{
349662306a36Sopenharmony_ci	struct inode *inode = context->inode;
349762306a36Sopenharmony_ci	struct ocfs2_xattr_value_root *xv = context->cow_object;
349862306a36Sopenharmony_ci
349962306a36Sopenharmony_ci	return ocfs2_xattr_get_clusters(inode, v_cluster, p_cluster,
350062306a36Sopenharmony_ci					num_clusters, &xv->xr_list,
350162306a36Sopenharmony_ci					extent_flags);
350262306a36Sopenharmony_ci}
350362306a36Sopenharmony_ci
350462306a36Sopenharmony_ci/*
350562306a36Sopenharmony_ci * Given a xattr value root, calculate the most meta/credits we need for
350662306a36Sopenharmony_ci * refcount tree change if we truncate it to 0.
350762306a36Sopenharmony_ci */
350862306a36Sopenharmony_ciint ocfs2_refcounted_xattr_delete_need(struct inode *inode,
350962306a36Sopenharmony_ci				       struct ocfs2_caching_info *ref_ci,
351062306a36Sopenharmony_ci				       struct buffer_head *ref_root_bh,
351162306a36Sopenharmony_ci				       struct ocfs2_xattr_value_root *xv,
351262306a36Sopenharmony_ci				       int *meta_add, int *credits)
351362306a36Sopenharmony_ci{
351462306a36Sopenharmony_ci	int ret = 0, index, ref_blocks = 0;
351562306a36Sopenharmony_ci	u32 p_cluster, num_clusters;
351662306a36Sopenharmony_ci	u32 cpos = 0, clusters = le32_to_cpu(xv->xr_clusters);
351762306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb;
351862306a36Sopenharmony_ci	struct ocfs2_refcount_rec rec;
351962306a36Sopenharmony_ci	struct buffer_head *ref_leaf_bh = NULL;
352062306a36Sopenharmony_ci
352162306a36Sopenharmony_ci	while (cpos < clusters) {
352262306a36Sopenharmony_ci		ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
352362306a36Sopenharmony_ci					       &num_clusters, &xv->xr_list,
352462306a36Sopenharmony_ci					       NULL);
352562306a36Sopenharmony_ci		if (ret) {
352662306a36Sopenharmony_ci			mlog_errno(ret);
352762306a36Sopenharmony_ci			goto out;
352862306a36Sopenharmony_ci		}
352962306a36Sopenharmony_ci
353062306a36Sopenharmony_ci		cpos += num_clusters;
353162306a36Sopenharmony_ci
353262306a36Sopenharmony_ci		while (num_clusters) {
353362306a36Sopenharmony_ci			ret = ocfs2_get_refcount_rec(ref_ci, ref_root_bh,
353462306a36Sopenharmony_ci						     p_cluster, num_clusters,
353562306a36Sopenharmony_ci						     &rec, &index,
353662306a36Sopenharmony_ci						     &ref_leaf_bh);
353762306a36Sopenharmony_ci			if (ret) {
353862306a36Sopenharmony_ci				mlog_errno(ret);
353962306a36Sopenharmony_ci				goto out;
354062306a36Sopenharmony_ci			}
354162306a36Sopenharmony_ci
354262306a36Sopenharmony_ci			BUG_ON(!rec.r_refcount);
354362306a36Sopenharmony_ci
354462306a36Sopenharmony_ci			rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
354562306a36Sopenharmony_ci
354662306a36Sopenharmony_ci			/*
354762306a36Sopenharmony_ci			 * We really don't know whether the other clusters is in
354862306a36Sopenharmony_ci			 * this refcount block or not, so just take the worst
354962306a36Sopenharmony_ci			 * case that all the clusters are in this block and each
355062306a36Sopenharmony_ci			 * one will split a refcount rec, so totally we need
355162306a36Sopenharmony_ci			 * clusters * 2 new refcount rec.
355262306a36Sopenharmony_ci			 */
355362306a36Sopenharmony_ci			if (le16_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
355462306a36Sopenharmony_ci			    le16_to_cpu(rb->rf_records.rl_count))
355562306a36Sopenharmony_ci				ref_blocks++;
355662306a36Sopenharmony_ci
355762306a36Sopenharmony_ci			*credits += 1;
355862306a36Sopenharmony_ci			brelse(ref_leaf_bh);
355962306a36Sopenharmony_ci			ref_leaf_bh = NULL;
356062306a36Sopenharmony_ci
356162306a36Sopenharmony_ci			if (num_clusters <= le32_to_cpu(rec.r_clusters))
356262306a36Sopenharmony_ci				break;
356362306a36Sopenharmony_ci			else
356462306a36Sopenharmony_ci				num_clusters -= le32_to_cpu(rec.r_clusters);
356562306a36Sopenharmony_ci			p_cluster += num_clusters;
356662306a36Sopenharmony_ci		}
356762306a36Sopenharmony_ci	}
356862306a36Sopenharmony_ci
356962306a36Sopenharmony_ci	*meta_add += ref_blocks;
357062306a36Sopenharmony_ci	if (!ref_blocks)
357162306a36Sopenharmony_ci		goto out;
357262306a36Sopenharmony_ci
357362306a36Sopenharmony_ci	rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
357462306a36Sopenharmony_ci	if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
357562306a36Sopenharmony_ci		*credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
357662306a36Sopenharmony_ci	else {
357762306a36Sopenharmony_ci		struct ocfs2_extent_tree et;
357862306a36Sopenharmony_ci
357962306a36Sopenharmony_ci		ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh);
358062306a36Sopenharmony_ci		*credits += ocfs2_calc_extend_credits(inode->i_sb,
358162306a36Sopenharmony_ci						      et.et_root_el);
358262306a36Sopenharmony_ci	}
358362306a36Sopenharmony_ci
358462306a36Sopenharmony_ciout:
358562306a36Sopenharmony_ci	brelse(ref_leaf_bh);
358662306a36Sopenharmony_ci	return ret;
358762306a36Sopenharmony_ci}
358862306a36Sopenharmony_ci
358962306a36Sopenharmony_ci/*
359062306a36Sopenharmony_ci * Do CoW for xattr.
359162306a36Sopenharmony_ci */
359262306a36Sopenharmony_ciint ocfs2_refcount_cow_xattr(struct inode *inode,
359362306a36Sopenharmony_ci			     struct ocfs2_dinode *di,
359462306a36Sopenharmony_ci			     struct ocfs2_xattr_value_buf *vb,
359562306a36Sopenharmony_ci			     struct ocfs2_refcount_tree *ref_tree,
359662306a36Sopenharmony_ci			     struct buffer_head *ref_root_bh,
359762306a36Sopenharmony_ci			     u32 cpos, u32 write_len,
359862306a36Sopenharmony_ci			     struct ocfs2_post_refcount *post)
359962306a36Sopenharmony_ci{
360062306a36Sopenharmony_ci	int ret;
360162306a36Sopenharmony_ci	struct ocfs2_xattr_value_root *xv = vb->vb_xv;
360262306a36Sopenharmony_ci	struct ocfs2_cow_context *context = NULL;
360362306a36Sopenharmony_ci	u32 cow_start, cow_len;
360462306a36Sopenharmony_ci
360562306a36Sopenharmony_ci	BUG_ON(!ocfs2_is_refcount_inode(inode));
360662306a36Sopenharmony_ci
360762306a36Sopenharmony_ci	ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list,
360862306a36Sopenharmony_ci					      cpos, write_len, UINT_MAX,
360962306a36Sopenharmony_ci					      &cow_start, &cow_len);
361062306a36Sopenharmony_ci	if (ret) {
361162306a36Sopenharmony_ci		mlog_errno(ret);
361262306a36Sopenharmony_ci		goto out;
361362306a36Sopenharmony_ci	}
361462306a36Sopenharmony_ci
361562306a36Sopenharmony_ci	BUG_ON(cow_len == 0);
361662306a36Sopenharmony_ci
361762306a36Sopenharmony_ci	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
361862306a36Sopenharmony_ci	if (!context) {
361962306a36Sopenharmony_ci		ret = -ENOMEM;
362062306a36Sopenharmony_ci		mlog_errno(ret);
362162306a36Sopenharmony_ci		goto out;
362262306a36Sopenharmony_ci	}
362362306a36Sopenharmony_ci
362462306a36Sopenharmony_ci	context->inode = inode;
362562306a36Sopenharmony_ci	context->cow_start = cow_start;
362662306a36Sopenharmony_ci	context->cow_len = cow_len;
362762306a36Sopenharmony_ci	context->ref_tree = ref_tree;
362862306a36Sopenharmony_ci	context->ref_root_bh = ref_root_bh;
362962306a36Sopenharmony_ci	context->cow_object = xv;
363062306a36Sopenharmony_ci
363162306a36Sopenharmony_ci	context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
363262306a36Sopenharmony_ci	/* We need the extra credits for duplicate_clusters by jbd. */
363362306a36Sopenharmony_ci	context->extra_credits =
363462306a36Sopenharmony_ci		ocfs2_clusters_to_blocks(inode->i_sb, 1) * cow_len;
363562306a36Sopenharmony_ci	context->get_clusters = ocfs2_xattr_value_get_clusters;
363662306a36Sopenharmony_ci	context->post_refcount = post;
363762306a36Sopenharmony_ci
363862306a36Sopenharmony_ci	ocfs2_init_xattr_value_extent_tree(&context->data_et,
363962306a36Sopenharmony_ci					   INODE_CACHE(inode), vb);
364062306a36Sopenharmony_ci
364162306a36Sopenharmony_ci	ret = ocfs2_replace_cow(context);
364262306a36Sopenharmony_ci	if (ret)
364362306a36Sopenharmony_ci		mlog_errno(ret);
364462306a36Sopenharmony_ci
364562306a36Sopenharmony_ciout:
364662306a36Sopenharmony_ci	kfree(context);
364762306a36Sopenharmony_ci	return ret;
364862306a36Sopenharmony_ci}
364962306a36Sopenharmony_ci
365062306a36Sopenharmony_ci/*
365162306a36Sopenharmony_ci * Insert a new extent into refcount tree and mark a extent rec
365262306a36Sopenharmony_ci * as refcounted in the dinode tree.
365362306a36Sopenharmony_ci */
365462306a36Sopenharmony_ciint ocfs2_add_refcount_flag(struct inode *inode,
365562306a36Sopenharmony_ci			    struct ocfs2_extent_tree *data_et,
365662306a36Sopenharmony_ci			    struct ocfs2_caching_info *ref_ci,
365762306a36Sopenharmony_ci			    struct buffer_head *ref_root_bh,
365862306a36Sopenharmony_ci			    u32 cpos, u32 p_cluster, u32 num_clusters,
365962306a36Sopenharmony_ci			    struct ocfs2_cached_dealloc_ctxt *dealloc,
366062306a36Sopenharmony_ci			    struct ocfs2_post_refcount *post)
366162306a36Sopenharmony_ci{
366262306a36Sopenharmony_ci	int ret;
366362306a36Sopenharmony_ci	handle_t *handle;
366462306a36Sopenharmony_ci	int credits = 1, ref_blocks = 0;
366562306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
366662306a36Sopenharmony_ci	struct ocfs2_alloc_context *meta_ac = NULL;
366762306a36Sopenharmony_ci
366862306a36Sopenharmony_ci	/* We need to be able to handle at least an extent tree split. */
366962306a36Sopenharmony_ci	ref_blocks = ocfs2_extend_meta_needed(data_et->et_root_el);
367062306a36Sopenharmony_ci
367162306a36Sopenharmony_ci	ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
367262306a36Sopenharmony_ci					       ref_ci, ref_root_bh,
367362306a36Sopenharmony_ci					       p_cluster, num_clusters,
367462306a36Sopenharmony_ci					       &ref_blocks, &credits);
367562306a36Sopenharmony_ci	if (ret) {
367662306a36Sopenharmony_ci		mlog_errno(ret);
367762306a36Sopenharmony_ci		goto out;
367862306a36Sopenharmony_ci	}
367962306a36Sopenharmony_ci
368062306a36Sopenharmony_ci	trace_ocfs2_add_refcount_flag(ref_blocks, credits);
368162306a36Sopenharmony_ci
368262306a36Sopenharmony_ci	if (ref_blocks) {
368362306a36Sopenharmony_ci		ret = ocfs2_reserve_new_metadata_blocks(osb,
368462306a36Sopenharmony_ci							ref_blocks, &meta_ac);
368562306a36Sopenharmony_ci		if (ret) {
368662306a36Sopenharmony_ci			mlog_errno(ret);
368762306a36Sopenharmony_ci			goto out;
368862306a36Sopenharmony_ci		}
368962306a36Sopenharmony_ci	}
369062306a36Sopenharmony_ci
369162306a36Sopenharmony_ci	if (post)
369262306a36Sopenharmony_ci		credits += post->credits;
369362306a36Sopenharmony_ci
369462306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, credits);
369562306a36Sopenharmony_ci	if (IS_ERR(handle)) {
369662306a36Sopenharmony_ci		ret = PTR_ERR(handle);
369762306a36Sopenharmony_ci		mlog_errno(ret);
369862306a36Sopenharmony_ci		goto out;
369962306a36Sopenharmony_ci	}
370062306a36Sopenharmony_ci
370162306a36Sopenharmony_ci	ret = ocfs2_mark_extent_refcounted(inode, data_et, handle,
370262306a36Sopenharmony_ci					   cpos, num_clusters, p_cluster,
370362306a36Sopenharmony_ci					   meta_ac, dealloc);
370462306a36Sopenharmony_ci	if (ret) {
370562306a36Sopenharmony_ci		mlog_errno(ret);
370662306a36Sopenharmony_ci		goto out_commit;
370762306a36Sopenharmony_ci	}
370862306a36Sopenharmony_ci
370962306a36Sopenharmony_ci	ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
371062306a36Sopenharmony_ci					p_cluster, num_clusters, 0,
371162306a36Sopenharmony_ci					meta_ac, dealloc);
371262306a36Sopenharmony_ci	if (ret) {
371362306a36Sopenharmony_ci		mlog_errno(ret);
371462306a36Sopenharmony_ci		goto out_commit;
371562306a36Sopenharmony_ci	}
371662306a36Sopenharmony_ci
371762306a36Sopenharmony_ci	if (post && post->func) {
371862306a36Sopenharmony_ci		ret = post->func(inode, handle, post->para);
371962306a36Sopenharmony_ci		if (ret)
372062306a36Sopenharmony_ci			mlog_errno(ret);
372162306a36Sopenharmony_ci	}
372262306a36Sopenharmony_ci
372362306a36Sopenharmony_ciout_commit:
372462306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
372562306a36Sopenharmony_ciout:
372662306a36Sopenharmony_ci	if (meta_ac)
372762306a36Sopenharmony_ci		ocfs2_free_alloc_context(meta_ac);
372862306a36Sopenharmony_ci	return ret;
372962306a36Sopenharmony_ci}
373062306a36Sopenharmony_ci
373162306a36Sopenharmony_cistatic int ocfs2_change_ctime(struct inode *inode,
373262306a36Sopenharmony_ci			      struct buffer_head *di_bh)
373362306a36Sopenharmony_ci{
373462306a36Sopenharmony_ci	int ret;
373562306a36Sopenharmony_ci	handle_t *handle;
373662306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
373762306a36Sopenharmony_ci
373862306a36Sopenharmony_ci	handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb),
373962306a36Sopenharmony_ci				   OCFS2_INODE_UPDATE_CREDITS);
374062306a36Sopenharmony_ci	if (IS_ERR(handle)) {
374162306a36Sopenharmony_ci		ret = PTR_ERR(handle);
374262306a36Sopenharmony_ci		mlog_errno(ret);
374362306a36Sopenharmony_ci		goto out;
374462306a36Sopenharmony_ci	}
374562306a36Sopenharmony_ci
374662306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
374762306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
374862306a36Sopenharmony_ci	if (ret) {
374962306a36Sopenharmony_ci		mlog_errno(ret);
375062306a36Sopenharmony_ci		goto out_commit;
375162306a36Sopenharmony_ci	}
375262306a36Sopenharmony_ci
375362306a36Sopenharmony_ci	inode_set_ctime_current(inode);
375462306a36Sopenharmony_ci	di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
375562306a36Sopenharmony_ci	di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
375662306a36Sopenharmony_ci
375762306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, di_bh);
375862306a36Sopenharmony_ci
375962306a36Sopenharmony_ciout_commit:
376062306a36Sopenharmony_ci	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
376162306a36Sopenharmony_ciout:
376262306a36Sopenharmony_ci	return ret;
376362306a36Sopenharmony_ci}
376462306a36Sopenharmony_ci
376562306a36Sopenharmony_cistatic int ocfs2_attach_refcount_tree(struct inode *inode,
376662306a36Sopenharmony_ci				      struct buffer_head *di_bh)
376762306a36Sopenharmony_ci{
376862306a36Sopenharmony_ci	int ret, data_changed = 0;
376962306a36Sopenharmony_ci	struct buffer_head *ref_root_bh = NULL;
377062306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
377162306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
377262306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
377362306a36Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree;
377462306a36Sopenharmony_ci	unsigned int ext_flags;
377562306a36Sopenharmony_ci	loff_t size;
377662306a36Sopenharmony_ci	u32 cpos, num_clusters, clusters, p_cluster;
377762306a36Sopenharmony_ci	struct ocfs2_cached_dealloc_ctxt dealloc;
377862306a36Sopenharmony_ci	struct ocfs2_extent_tree di_et;
377962306a36Sopenharmony_ci
378062306a36Sopenharmony_ci	ocfs2_init_dealloc_ctxt(&dealloc);
378162306a36Sopenharmony_ci
378262306a36Sopenharmony_ci	if (!ocfs2_is_refcount_inode(inode)) {
378362306a36Sopenharmony_ci		ret = ocfs2_create_refcount_tree(inode, di_bh);
378462306a36Sopenharmony_ci		if (ret) {
378562306a36Sopenharmony_ci			mlog_errno(ret);
378662306a36Sopenharmony_ci			goto out;
378762306a36Sopenharmony_ci		}
378862306a36Sopenharmony_ci	}
378962306a36Sopenharmony_ci
379062306a36Sopenharmony_ci	BUG_ON(!di->i_refcount_loc);
379162306a36Sopenharmony_ci	ret = ocfs2_lock_refcount_tree(osb,
379262306a36Sopenharmony_ci				       le64_to_cpu(di->i_refcount_loc), 1,
379362306a36Sopenharmony_ci				       &ref_tree, &ref_root_bh);
379462306a36Sopenharmony_ci	if (ret) {
379562306a36Sopenharmony_ci		mlog_errno(ret);
379662306a36Sopenharmony_ci		goto out;
379762306a36Sopenharmony_ci	}
379862306a36Sopenharmony_ci
379962306a36Sopenharmony_ci	if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
380062306a36Sopenharmony_ci		goto attach_xattr;
380162306a36Sopenharmony_ci
380262306a36Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&di_et, INODE_CACHE(inode), di_bh);
380362306a36Sopenharmony_ci
380462306a36Sopenharmony_ci	size = i_size_read(inode);
380562306a36Sopenharmony_ci	clusters = ocfs2_clusters_for_bytes(inode->i_sb, size);
380662306a36Sopenharmony_ci
380762306a36Sopenharmony_ci	cpos = 0;
380862306a36Sopenharmony_ci	while (cpos < clusters) {
380962306a36Sopenharmony_ci		ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
381062306a36Sopenharmony_ci					 &num_clusters, &ext_flags);
381162306a36Sopenharmony_ci		if (ret) {
381262306a36Sopenharmony_ci			mlog_errno(ret);
381362306a36Sopenharmony_ci			goto unlock;
381462306a36Sopenharmony_ci		}
381562306a36Sopenharmony_ci		if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) {
381662306a36Sopenharmony_ci			ret = ocfs2_add_refcount_flag(inode, &di_et,
381762306a36Sopenharmony_ci						      &ref_tree->rf_ci,
381862306a36Sopenharmony_ci						      ref_root_bh, cpos,
381962306a36Sopenharmony_ci						      p_cluster, num_clusters,
382062306a36Sopenharmony_ci						      &dealloc, NULL);
382162306a36Sopenharmony_ci			if (ret) {
382262306a36Sopenharmony_ci				mlog_errno(ret);
382362306a36Sopenharmony_ci				goto unlock;
382462306a36Sopenharmony_ci			}
382562306a36Sopenharmony_ci
382662306a36Sopenharmony_ci			data_changed = 1;
382762306a36Sopenharmony_ci		}
382862306a36Sopenharmony_ci		cpos += num_clusters;
382962306a36Sopenharmony_ci	}
383062306a36Sopenharmony_ci
383162306a36Sopenharmony_ciattach_xattr:
383262306a36Sopenharmony_ci	if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
383362306a36Sopenharmony_ci		ret = ocfs2_xattr_attach_refcount_tree(inode, di_bh,
383462306a36Sopenharmony_ci						       &ref_tree->rf_ci,
383562306a36Sopenharmony_ci						       ref_root_bh,
383662306a36Sopenharmony_ci						       &dealloc);
383762306a36Sopenharmony_ci		if (ret) {
383862306a36Sopenharmony_ci			mlog_errno(ret);
383962306a36Sopenharmony_ci			goto unlock;
384062306a36Sopenharmony_ci		}
384162306a36Sopenharmony_ci	}
384262306a36Sopenharmony_ci
384362306a36Sopenharmony_ci	if (data_changed) {
384462306a36Sopenharmony_ci		ret = ocfs2_change_ctime(inode, di_bh);
384562306a36Sopenharmony_ci		if (ret)
384662306a36Sopenharmony_ci			mlog_errno(ret);
384762306a36Sopenharmony_ci	}
384862306a36Sopenharmony_ci
384962306a36Sopenharmony_ciunlock:
385062306a36Sopenharmony_ci	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
385162306a36Sopenharmony_ci	brelse(ref_root_bh);
385262306a36Sopenharmony_ci
385362306a36Sopenharmony_ci	if (!ret && ocfs2_dealloc_has_cluster(&dealloc)) {
385462306a36Sopenharmony_ci		ocfs2_schedule_truncate_log_flush(osb, 1);
385562306a36Sopenharmony_ci		ocfs2_run_deallocs(osb, &dealloc);
385662306a36Sopenharmony_ci	}
385762306a36Sopenharmony_ciout:
385862306a36Sopenharmony_ci	/*
385962306a36Sopenharmony_ci	 * Empty the extent map so that we may get the right extent
386062306a36Sopenharmony_ci	 * record from the disk.
386162306a36Sopenharmony_ci	 */
386262306a36Sopenharmony_ci	ocfs2_extent_map_trunc(inode, 0);
386362306a36Sopenharmony_ci
386462306a36Sopenharmony_ci	return ret;
386562306a36Sopenharmony_ci}
386662306a36Sopenharmony_ci
386762306a36Sopenharmony_cistatic int ocfs2_add_refcounted_extent(struct inode *inode,
386862306a36Sopenharmony_ci				   struct ocfs2_extent_tree *et,
386962306a36Sopenharmony_ci				   struct ocfs2_caching_info *ref_ci,
387062306a36Sopenharmony_ci				   struct buffer_head *ref_root_bh,
387162306a36Sopenharmony_ci				   u32 cpos, u32 p_cluster, u32 num_clusters,
387262306a36Sopenharmony_ci				   unsigned int ext_flags,
387362306a36Sopenharmony_ci				   struct ocfs2_cached_dealloc_ctxt *dealloc)
387462306a36Sopenharmony_ci{
387562306a36Sopenharmony_ci	int ret;
387662306a36Sopenharmony_ci	handle_t *handle;
387762306a36Sopenharmony_ci	int credits = 0;
387862306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
387962306a36Sopenharmony_ci	struct ocfs2_alloc_context *meta_ac = NULL;
388062306a36Sopenharmony_ci
388162306a36Sopenharmony_ci	ret = ocfs2_lock_refcount_allocators(inode->i_sb,
388262306a36Sopenharmony_ci					     p_cluster, num_clusters,
388362306a36Sopenharmony_ci					     et, ref_ci,
388462306a36Sopenharmony_ci					     ref_root_bh, &meta_ac,
388562306a36Sopenharmony_ci					     NULL, &credits);
388662306a36Sopenharmony_ci	if (ret) {
388762306a36Sopenharmony_ci		mlog_errno(ret);
388862306a36Sopenharmony_ci		goto out;
388962306a36Sopenharmony_ci	}
389062306a36Sopenharmony_ci
389162306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, credits);
389262306a36Sopenharmony_ci	if (IS_ERR(handle)) {
389362306a36Sopenharmony_ci		ret = PTR_ERR(handle);
389462306a36Sopenharmony_ci		mlog_errno(ret);
389562306a36Sopenharmony_ci		goto out;
389662306a36Sopenharmony_ci	}
389762306a36Sopenharmony_ci
389862306a36Sopenharmony_ci	ret = ocfs2_insert_extent(handle, et, cpos,
389962306a36Sopenharmony_ci			ocfs2_clusters_to_blocks(inode->i_sb, p_cluster),
390062306a36Sopenharmony_ci			num_clusters, ext_flags, meta_ac);
390162306a36Sopenharmony_ci	if (ret) {
390262306a36Sopenharmony_ci		mlog_errno(ret);
390362306a36Sopenharmony_ci		goto out_commit;
390462306a36Sopenharmony_ci	}
390562306a36Sopenharmony_ci
390662306a36Sopenharmony_ci	ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
390762306a36Sopenharmony_ci				      p_cluster, num_clusters,
390862306a36Sopenharmony_ci				      meta_ac, dealloc);
390962306a36Sopenharmony_ci	if (ret) {
391062306a36Sopenharmony_ci		mlog_errno(ret);
391162306a36Sopenharmony_ci		goto out_commit;
391262306a36Sopenharmony_ci	}
391362306a36Sopenharmony_ci
391462306a36Sopenharmony_ci	ret = dquot_alloc_space_nodirty(inode,
391562306a36Sopenharmony_ci		ocfs2_clusters_to_bytes(osb->sb, num_clusters));
391662306a36Sopenharmony_ci	if (ret)
391762306a36Sopenharmony_ci		mlog_errno(ret);
391862306a36Sopenharmony_ci
391962306a36Sopenharmony_ciout_commit:
392062306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
392162306a36Sopenharmony_ciout:
392262306a36Sopenharmony_ci	if (meta_ac)
392362306a36Sopenharmony_ci		ocfs2_free_alloc_context(meta_ac);
392462306a36Sopenharmony_ci	return ret;
392562306a36Sopenharmony_ci}
392662306a36Sopenharmony_ci
392762306a36Sopenharmony_cistatic int ocfs2_duplicate_inline_data(struct inode *s_inode,
392862306a36Sopenharmony_ci				       struct buffer_head *s_bh,
392962306a36Sopenharmony_ci				       struct inode *t_inode,
393062306a36Sopenharmony_ci				       struct buffer_head *t_bh)
393162306a36Sopenharmony_ci{
393262306a36Sopenharmony_ci	int ret;
393362306a36Sopenharmony_ci	handle_t *handle;
393462306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb);
393562306a36Sopenharmony_ci	struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data;
393662306a36Sopenharmony_ci	struct ocfs2_dinode *t_di = (struct ocfs2_dinode *)t_bh->b_data;
393762306a36Sopenharmony_ci
393862306a36Sopenharmony_ci	BUG_ON(!(OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
393962306a36Sopenharmony_ci
394062306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
394162306a36Sopenharmony_ci	if (IS_ERR(handle)) {
394262306a36Sopenharmony_ci		ret = PTR_ERR(handle);
394362306a36Sopenharmony_ci		mlog_errno(ret);
394462306a36Sopenharmony_ci		goto out;
394562306a36Sopenharmony_ci	}
394662306a36Sopenharmony_ci
394762306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh,
394862306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
394962306a36Sopenharmony_ci	if (ret) {
395062306a36Sopenharmony_ci		mlog_errno(ret);
395162306a36Sopenharmony_ci		goto out_commit;
395262306a36Sopenharmony_ci	}
395362306a36Sopenharmony_ci
395462306a36Sopenharmony_ci	t_di->id2.i_data.id_count = s_di->id2.i_data.id_count;
395562306a36Sopenharmony_ci	memcpy(t_di->id2.i_data.id_data, s_di->id2.i_data.id_data,
395662306a36Sopenharmony_ci	       le16_to_cpu(s_di->id2.i_data.id_count));
395762306a36Sopenharmony_ci	spin_lock(&OCFS2_I(t_inode)->ip_lock);
395862306a36Sopenharmony_ci	OCFS2_I(t_inode)->ip_dyn_features |= OCFS2_INLINE_DATA_FL;
395962306a36Sopenharmony_ci	t_di->i_dyn_features = cpu_to_le16(OCFS2_I(t_inode)->ip_dyn_features);
396062306a36Sopenharmony_ci	spin_unlock(&OCFS2_I(t_inode)->ip_lock);
396162306a36Sopenharmony_ci
396262306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, t_bh);
396362306a36Sopenharmony_ci
396462306a36Sopenharmony_ciout_commit:
396562306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
396662306a36Sopenharmony_ciout:
396762306a36Sopenharmony_ci	return ret;
396862306a36Sopenharmony_ci}
396962306a36Sopenharmony_ci
397062306a36Sopenharmony_cistatic int ocfs2_duplicate_extent_list(struct inode *s_inode,
397162306a36Sopenharmony_ci				struct inode *t_inode,
397262306a36Sopenharmony_ci				struct buffer_head *t_bh,
397362306a36Sopenharmony_ci				struct ocfs2_caching_info *ref_ci,
397462306a36Sopenharmony_ci				struct buffer_head *ref_root_bh,
397562306a36Sopenharmony_ci				struct ocfs2_cached_dealloc_ctxt *dealloc)
397662306a36Sopenharmony_ci{
397762306a36Sopenharmony_ci	int ret = 0;
397862306a36Sopenharmony_ci	u32 p_cluster, num_clusters, clusters, cpos;
397962306a36Sopenharmony_ci	loff_t size;
398062306a36Sopenharmony_ci	unsigned int ext_flags;
398162306a36Sopenharmony_ci	struct ocfs2_extent_tree et;
398262306a36Sopenharmony_ci
398362306a36Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(t_inode), t_bh);
398462306a36Sopenharmony_ci
398562306a36Sopenharmony_ci	size = i_size_read(s_inode);
398662306a36Sopenharmony_ci	clusters = ocfs2_clusters_for_bytes(s_inode->i_sb, size);
398762306a36Sopenharmony_ci
398862306a36Sopenharmony_ci	cpos = 0;
398962306a36Sopenharmony_ci	while (cpos < clusters) {
399062306a36Sopenharmony_ci		ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster,
399162306a36Sopenharmony_ci					 &num_clusters, &ext_flags);
399262306a36Sopenharmony_ci		if (ret) {
399362306a36Sopenharmony_ci			mlog_errno(ret);
399462306a36Sopenharmony_ci			goto out;
399562306a36Sopenharmony_ci		}
399662306a36Sopenharmony_ci		if (p_cluster) {
399762306a36Sopenharmony_ci			ret = ocfs2_add_refcounted_extent(t_inode, &et,
399862306a36Sopenharmony_ci							  ref_ci, ref_root_bh,
399962306a36Sopenharmony_ci							  cpos, p_cluster,
400062306a36Sopenharmony_ci							  num_clusters,
400162306a36Sopenharmony_ci							  ext_flags,
400262306a36Sopenharmony_ci							  dealloc);
400362306a36Sopenharmony_ci			if (ret) {
400462306a36Sopenharmony_ci				mlog_errno(ret);
400562306a36Sopenharmony_ci				goto out;
400662306a36Sopenharmony_ci			}
400762306a36Sopenharmony_ci		}
400862306a36Sopenharmony_ci
400962306a36Sopenharmony_ci		cpos += num_clusters;
401062306a36Sopenharmony_ci	}
401162306a36Sopenharmony_ci
401262306a36Sopenharmony_ciout:
401362306a36Sopenharmony_ci	return ret;
401462306a36Sopenharmony_ci}
401562306a36Sopenharmony_ci
401662306a36Sopenharmony_ci/*
401762306a36Sopenharmony_ci * change the new file's attributes to the src.
401862306a36Sopenharmony_ci *
401962306a36Sopenharmony_ci * reflink creates a snapshot of a file, that means the attributes
402062306a36Sopenharmony_ci * must be identical except for three exceptions - nlink, ino, and ctime.
402162306a36Sopenharmony_ci */
402262306a36Sopenharmony_cistatic int ocfs2_complete_reflink(struct inode *s_inode,
402362306a36Sopenharmony_ci				  struct buffer_head *s_bh,
402462306a36Sopenharmony_ci				  struct inode *t_inode,
402562306a36Sopenharmony_ci				  struct buffer_head *t_bh,
402662306a36Sopenharmony_ci				  bool preserve)
402762306a36Sopenharmony_ci{
402862306a36Sopenharmony_ci	int ret;
402962306a36Sopenharmony_ci	handle_t *handle;
403062306a36Sopenharmony_ci	struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data;
403162306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)t_bh->b_data;
403262306a36Sopenharmony_ci	loff_t size = i_size_read(s_inode);
403362306a36Sopenharmony_ci
403462306a36Sopenharmony_ci	handle = ocfs2_start_trans(OCFS2_SB(t_inode->i_sb),
403562306a36Sopenharmony_ci				   OCFS2_INODE_UPDATE_CREDITS);
403662306a36Sopenharmony_ci	if (IS_ERR(handle)) {
403762306a36Sopenharmony_ci		ret = PTR_ERR(handle);
403862306a36Sopenharmony_ci		mlog_errno(ret);
403962306a36Sopenharmony_ci		return ret;
404062306a36Sopenharmony_ci	}
404162306a36Sopenharmony_ci
404262306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh,
404362306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
404462306a36Sopenharmony_ci	if (ret) {
404562306a36Sopenharmony_ci		mlog_errno(ret);
404662306a36Sopenharmony_ci		goto out_commit;
404762306a36Sopenharmony_ci	}
404862306a36Sopenharmony_ci
404962306a36Sopenharmony_ci	spin_lock(&OCFS2_I(t_inode)->ip_lock);
405062306a36Sopenharmony_ci	OCFS2_I(t_inode)->ip_clusters = OCFS2_I(s_inode)->ip_clusters;
405162306a36Sopenharmony_ci	OCFS2_I(t_inode)->ip_attr = OCFS2_I(s_inode)->ip_attr;
405262306a36Sopenharmony_ci	OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features;
405362306a36Sopenharmony_ci	spin_unlock(&OCFS2_I(t_inode)->ip_lock);
405462306a36Sopenharmony_ci	i_size_write(t_inode, size);
405562306a36Sopenharmony_ci	t_inode->i_blocks = s_inode->i_blocks;
405662306a36Sopenharmony_ci
405762306a36Sopenharmony_ci	di->i_xattr_inline_size = s_di->i_xattr_inline_size;
405862306a36Sopenharmony_ci	di->i_clusters = s_di->i_clusters;
405962306a36Sopenharmony_ci	di->i_size = s_di->i_size;
406062306a36Sopenharmony_ci	di->i_dyn_features = s_di->i_dyn_features;
406162306a36Sopenharmony_ci	di->i_attr = s_di->i_attr;
406262306a36Sopenharmony_ci
406362306a36Sopenharmony_ci	if (preserve) {
406462306a36Sopenharmony_ci		t_inode->i_uid = s_inode->i_uid;
406562306a36Sopenharmony_ci		t_inode->i_gid = s_inode->i_gid;
406662306a36Sopenharmony_ci		t_inode->i_mode = s_inode->i_mode;
406762306a36Sopenharmony_ci		di->i_uid = s_di->i_uid;
406862306a36Sopenharmony_ci		di->i_gid = s_di->i_gid;
406962306a36Sopenharmony_ci		di->i_mode = s_di->i_mode;
407062306a36Sopenharmony_ci
407162306a36Sopenharmony_ci		/*
407262306a36Sopenharmony_ci		 * update time.
407362306a36Sopenharmony_ci		 * we want mtime to appear identical to the source and
407462306a36Sopenharmony_ci		 * update ctime.
407562306a36Sopenharmony_ci		 */
407662306a36Sopenharmony_ci		inode_set_ctime_current(t_inode);
407762306a36Sopenharmony_ci
407862306a36Sopenharmony_ci		di->i_ctime = cpu_to_le64(inode_get_ctime(t_inode).tv_sec);
407962306a36Sopenharmony_ci		di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(t_inode).tv_nsec);
408062306a36Sopenharmony_ci
408162306a36Sopenharmony_ci		t_inode->i_mtime = s_inode->i_mtime;
408262306a36Sopenharmony_ci		di->i_mtime = s_di->i_mtime;
408362306a36Sopenharmony_ci		di->i_mtime_nsec = s_di->i_mtime_nsec;
408462306a36Sopenharmony_ci	}
408562306a36Sopenharmony_ci
408662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, t_bh);
408762306a36Sopenharmony_ci
408862306a36Sopenharmony_ciout_commit:
408962306a36Sopenharmony_ci	ocfs2_commit_trans(OCFS2_SB(t_inode->i_sb), handle);
409062306a36Sopenharmony_ci	return ret;
409162306a36Sopenharmony_ci}
409262306a36Sopenharmony_ci
409362306a36Sopenharmony_cistatic int ocfs2_create_reflink_node(struct inode *s_inode,
409462306a36Sopenharmony_ci				     struct buffer_head *s_bh,
409562306a36Sopenharmony_ci				     struct inode *t_inode,
409662306a36Sopenharmony_ci				     struct buffer_head *t_bh,
409762306a36Sopenharmony_ci				     bool preserve)
409862306a36Sopenharmony_ci{
409962306a36Sopenharmony_ci	int ret;
410062306a36Sopenharmony_ci	struct buffer_head *ref_root_bh = NULL;
410162306a36Sopenharmony_ci	struct ocfs2_cached_dealloc_ctxt dealloc;
410262306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb);
410362306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data;
410462306a36Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree;
410562306a36Sopenharmony_ci
410662306a36Sopenharmony_ci	ocfs2_init_dealloc_ctxt(&dealloc);
410762306a36Sopenharmony_ci
410862306a36Sopenharmony_ci	ret = ocfs2_set_refcount_tree(t_inode, t_bh,
410962306a36Sopenharmony_ci				      le64_to_cpu(di->i_refcount_loc));
411062306a36Sopenharmony_ci	if (ret) {
411162306a36Sopenharmony_ci		mlog_errno(ret);
411262306a36Sopenharmony_ci		goto out;
411362306a36Sopenharmony_ci	}
411462306a36Sopenharmony_ci
411562306a36Sopenharmony_ci	if (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
411662306a36Sopenharmony_ci		ret = ocfs2_duplicate_inline_data(s_inode, s_bh,
411762306a36Sopenharmony_ci						  t_inode, t_bh);
411862306a36Sopenharmony_ci		if (ret)
411962306a36Sopenharmony_ci			mlog_errno(ret);
412062306a36Sopenharmony_ci		goto out;
412162306a36Sopenharmony_ci	}
412262306a36Sopenharmony_ci
412362306a36Sopenharmony_ci	ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
412462306a36Sopenharmony_ci				       1, &ref_tree, &ref_root_bh);
412562306a36Sopenharmony_ci	if (ret) {
412662306a36Sopenharmony_ci		mlog_errno(ret);
412762306a36Sopenharmony_ci		goto out;
412862306a36Sopenharmony_ci	}
412962306a36Sopenharmony_ci
413062306a36Sopenharmony_ci	ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh,
413162306a36Sopenharmony_ci					  &ref_tree->rf_ci, ref_root_bh,
413262306a36Sopenharmony_ci					  &dealloc);
413362306a36Sopenharmony_ci	if (ret) {
413462306a36Sopenharmony_ci		mlog_errno(ret);
413562306a36Sopenharmony_ci		goto out_unlock_refcount;
413662306a36Sopenharmony_ci	}
413762306a36Sopenharmony_ci
413862306a36Sopenharmony_ciout_unlock_refcount:
413962306a36Sopenharmony_ci	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
414062306a36Sopenharmony_ci	brelse(ref_root_bh);
414162306a36Sopenharmony_ciout:
414262306a36Sopenharmony_ci	if (ocfs2_dealloc_has_cluster(&dealloc)) {
414362306a36Sopenharmony_ci		ocfs2_schedule_truncate_log_flush(osb, 1);
414462306a36Sopenharmony_ci		ocfs2_run_deallocs(osb, &dealloc);
414562306a36Sopenharmony_ci	}
414662306a36Sopenharmony_ci
414762306a36Sopenharmony_ci	return ret;
414862306a36Sopenharmony_ci}
414962306a36Sopenharmony_ci
415062306a36Sopenharmony_cistatic int __ocfs2_reflink(struct dentry *old_dentry,
415162306a36Sopenharmony_ci			   struct buffer_head *old_bh,
415262306a36Sopenharmony_ci			   struct inode *new_inode,
415362306a36Sopenharmony_ci			   bool preserve)
415462306a36Sopenharmony_ci{
415562306a36Sopenharmony_ci	int ret;
415662306a36Sopenharmony_ci	struct inode *inode = d_inode(old_dentry);
415762306a36Sopenharmony_ci	struct buffer_head *new_bh = NULL;
415862306a36Sopenharmony_ci
415962306a36Sopenharmony_ci	if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
416062306a36Sopenharmony_ci		ret = -EINVAL;
416162306a36Sopenharmony_ci		mlog_errno(ret);
416262306a36Sopenharmony_ci		goto out;
416362306a36Sopenharmony_ci	}
416462306a36Sopenharmony_ci
416562306a36Sopenharmony_ci	ret = filemap_fdatawrite(inode->i_mapping);
416662306a36Sopenharmony_ci	if (ret) {
416762306a36Sopenharmony_ci		mlog_errno(ret);
416862306a36Sopenharmony_ci		goto out;
416962306a36Sopenharmony_ci	}
417062306a36Sopenharmony_ci
417162306a36Sopenharmony_ci	ret = ocfs2_attach_refcount_tree(inode, old_bh);
417262306a36Sopenharmony_ci	if (ret) {
417362306a36Sopenharmony_ci		mlog_errno(ret);
417462306a36Sopenharmony_ci		goto out;
417562306a36Sopenharmony_ci	}
417662306a36Sopenharmony_ci
417762306a36Sopenharmony_ci	inode_lock_nested(new_inode, I_MUTEX_CHILD);
417862306a36Sopenharmony_ci	ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
417962306a36Sopenharmony_ci				      OI_LS_REFLINK_TARGET);
418062306a36Sopenharmony_ci	if (ret) {
418162306a36Sopenharmony_ci		mlog_errno(ret);
418262306a36Sopenharmony_ci		goto out_unlock;
418362306a36Sopenharmony_ci	}
418462306a36Sopenharmony_ci
418562306a36Sopenharmony_ci	ret = ocfs2_create_reflink_node(inode, old_bh,
418662306a36Sopenharmony_ci					new_inode, new_bh, preserve);
418762306a36Sopenharmony_ci	if (ret) {
418862306a36Sopenharmony_ci		mlog_errno(ret);
418962306a36Sopenharmony_ci		goto inode_unlock;
419062306a36Sopenharmony_ci	}
419162306a36Sopenharmony_ci
419262306a36Sopenharmony_ci	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
419362306a36Sopenharmony_ci		ret = ocfs2_reflink_xattrs(inode, old_bh,
419462306a36Sopenharmony_ci					   new_inode, new_bh,
419562306a36Sopenharmony_ci					   preserve);
419662306a36Sopenharmony_ci		if (ret) {
419762306a36Sopenharmony_ci			mlog_errno(ret);
419862306a36Sopenharmony_ci			goto inode_unlock;
419962306a36Sopenharmony_ci		}
420062306a36Sopenharmony_ci	}
420162306a36Sopenharmony_ci
420262306a36Sopenharmony_ci	ret = ocfs2_complete_reflink(inode, old_bh,
420362306a36Sopenharmony_ci				     new_inode, new_bh, preserve);
420462306a36Sopenharmony_ci	if (ret)
420562306a36Sopenharmony_ci		mlog_errno(ret);
420662306a36Sopenharmony_ci
420762306a36Sopenharmony_ciinode_unlock:
420862306a36Sopenharmony_ci	ocfs2_inode_unlock(new_inode, 1);
420962306a36Sopenharmony_ci	brelse(new_bh);
421062306a36Sopenharmony_ciout_unlock:
421162306a36Sopenharmony_ci	inode_unlock(new_inode);
421262306a36Sopenharmony_ciout:
421362306a36Sopenharmony_ci	if (!ret) {
421462306a36Sopenharmony_ci		ret = filemap_fdatawait(inode->i_mapping);
421562306a36Sopenharmony_ci		if (ret)
421662306a36Sopenharmony_ci			mlog_errno(ret);
421762306a36Sopenharmony_ci	}
421862306a36Sopenharmony_ci	return ret;
421962306a36Sopenharmony_ci}
422062306a36Sopenharmony_ci
422162306a36Sopenharmony_cistatic int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
422262306a36Sopenharmony_ci			 struct dentry *new_dentry, bool preserve)
422362306a36Sopenharmony_ci{
422462306a36Sopenharmony_ci	int error, had_lock;
422562306a36Sopenharmony_ci	struct inode *inode = d_inode(old_dentry);
422662306a36Sopenharmony_ci	struct buffer_head *old_bh = NULL;
422762306a36Sopenharmony_ci	struct inode *new_orphan_inode = NULL;
422862306a36Sopenharmony_ci	struct ocfs2_lock_holder oh;
422962306a36Sopenharmony_ci
423062306a36Sopenharmony_ci	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
423162306a36Sopenharmony_ci		return -EOPNOTSUPP;
423262306a36Sopenharmony_ci
423362306a36Sopenharmony_ci
423462306a36Sopenharmony_ci	error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
423562306a36Sopenharmony_ci					     &new_orphan_inode);
423662306a36Sopenharmony_ci	if (error) {
423762306a36Sopenharmony_ci		mlog_errno(error);
423862306a36Sopenharmony_ci		goto out;
423962306a36Sopenharmony_ci	}
424062306a36Sopenharmony_ci
424162306a36Sopenharmony_ci	error = ocfs2_rw_lock(inode, 1);
424262306a36Sopenharmony_ci	if (error) {
424362306a36Sopenharmony_ci		mlog_errno(error);
424462306a36Sopenharmony_ci		goto out;
424562306a36Sopenharmony_ci	}
424662306a36Sopenharmony_ci
424762306a36Sopenharmony_ci	error = ocfs2_inode_lock(inode, &old_bh, 1);
424862306a36Sopenharmony_ci	if (error) {
424962306a36Sopenharmony_ci		mlog_errno(error);
425062306a36Sopenharmony_ci		ocfs2_rw_unlock(inode, 1);
425162306a36Sopenharmony_ci		goto out;
425262306a36Sopenharmony_ci	}
425362306a36Sopenharmony_ci
425462306a36Sopenharmony_ci	down_write(&OCFS2_I(inode)->ip_xattr_sem);
425562306a36Sopenharmony_ci	down_write(&OCFS2_I(inode)->ip_alloc_sem);
425662306a36Sopenharmony_ci	error = __ocfs2_reflink(old_dentry, old_bh,
425762306a36Sopenharmony_ci				new_orphan_inode, preserve);
425862306a36Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_alloc_sem);
425962306a36Sopenharmony_ci	up_write(&OCFS2_I(inode)->ip_xattr_sem);
426062306a36Sopenharmony_ci
426162306a36Sopenharmony_ci	ocfs2_inode_unlock(inode, 1);
426262306a36Sopenharmony_ci	ocfs2_rw_unlock(inode, 1);
426362306a36Sopenharmony_ci	brelse(old_bh);
426462306a36Sopenharmony_ci
426562306a36Sopenharmony_ci	if (error) {
426662306a36Sopenharmony_ci		mlog_errno(error);
426762306a36Sopenharmony_ci		goto out;
426862306a36Sopenharmony_ci	}
426962306a36Sopenharmony_ci
427062306a36Sopenharmony_ci	had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
427162306a36Sopenharmony_ci					    &oh);
427262306a36Sopenharmony_ci	if (had_lock < 0) {
427362306a36Sopenharmony_ci		error = had_lock;
427462306a36Sopenharmony_ci		mlog_errno(error);
427562306a36Sopenharmony_ci		goto out;
427662306a36Sopenharmony_ci	}
427762306a36Sopenharmony_ci
427862306a36Sopenharmony_ci	/* If the security isn't preserved, we need to re-initialize them. */
427962306a36Sopenharmony_ci	if (!preserve) {
428062306a36Sopenharmony_ci		error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
428162306a36Sopenharmony_ci						    &new_dentry->d_name);
428262306a36Sopenharmony_ci		if (error)
428362306a36Sopenharmony_ci			mlog_errno(error);
428462306a36Sopenharmony_ci	}
428562306a36Sopenharmony_ci	if (!error) {
428662306a36Sopenharmony_ci		error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
428762306a36Sopenharmony_ci						       new_dentry);
428862306a36Sopenharmony_ci		if (error)
428962306a36Sopenharmony_ci			mlog_errno(error);
429062306a36Sopenharmony_ci	}
429162306a36Sopenharmony_ci	ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
429262306a36Sopenharmony_ci
429362306a36Sopenharmony_ciout:
429462306a36Sopenharmony_ci	if (new_orphan_inode) {
429562306a36Sopenharmony_ci		/*
429662306a36Sopenharmony_ci		 * We need to open_unlock the inode no matter whether we
429762306a36Sopenharmony_ci		 * succeed or not, so that other nodes can delete it later.
429862306a36Sopenharmony_ci		 */
429962306a36Sopenharmony_ci		ocfs2_open_unlock(new_orphan_inode);
430062306a36Sopenharmony_ci		if (error)
430162306a36Sopenharmony_ci			iput(new_orphan_inode);
430262306a36Sopenharmony_ci	}
430362306a36Sopenharmony_ci
430462306a36Sopenharmony_ci	return error;
430562306a36Sopenharmony_ci}
430662306a36Sopenharmony_ci
430762306a36Sopenharmony_ci/*
430862306a36Sopenharmony_ci * Below here are the bits used by OCFS2_IOC_REFLINK() to fake
430962306a36Sopenharmony_ci * sys_reflink().  This will go away when vfs_reflink() exists in
431062306a36Sopenharmony_ci * fs/namei.c.
431162306a36Sopenharmony_ci */
431262306a36Sopenharmony_ci
431362306a36Sopenharmony_ci/* copied from may_create in VFS. */
431462306a36Sopenharmony_cistatic inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
431562306a36Sopenharmony_ci{
431662306a36Sopenharmony_ci	if (d_really_is_positive(child))
431762306a36Sopenharmony_ci		return -EEXIST;
431862306a36Sopenharmony_ci	if (IS_DEADDIR(dir))
431962306a36Sopenharmony_ci		return -ENOENT;
432062306a36Sopenharmony_ci	return inode_permission(&nop_mnt_idmap, dir, MAY_WRITE | MAY_EXEC);
432162306a36Sopenharmony_ci}
432262306a36Sopenharmony_ci
432362306a36Sopenharmony_ci/**
432462306a36Sopenharmony_ci * ocfs2_vfs_reflink - Create a reference-counted link
432562306a36Sopenharmony_ci *
432662306a36Sopenharmony_ci * @old_dentry:        source dentry + inode
432762306a36Sopenharmony_ci * @dir:       directory to create the target
432862306a36Sopenharmony_ci * @new_dentry:        target dentry
432962306a36Sopenharmony_ci * @preserve:  if true, preserve all file attributes
433062306a36Sopenharmony_ci */
433162306a36Sopenharmony_cistatic int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
433262306a36Sopenharmony_ci			     struct dentry *new_dentry, bool preserve)
433362306a36Sopenharmony_ci{
433462306a36Sopenharmony_ci	struct inode *inode = d_inode(old_dentry);
433562306a36Sopenharmony_ci	int error;
433662306a36Sopenharmony_ci
433762306a36Sopenharmony_ci	if (!inode)
433862306a36Sopenharmony_ci		return -ENOENT;
433962306a36Sopenharmony_ci
434062306a36Sopenharmony_ci	error = ocfs2_may_create(dir, new_dentry);
434162306a36Sopenharmony_ci	if (error)
434262306a36Sopenharmony_ci		return error;
434362306a36Sopenharmony_ci
434462306a36Sopenharmony_ci	if (dir->i_sb != inode->i_sb)
434562306a36Sopenharmony_ci		return -EXDEV;
434662306a36Sopenharmony_ci
434762306a36Sopenharmony_ci	/*
434862306a36Sopenharmony_ci	 * A reflink to an append-only or immutable file cannot be created.
434962306a36Sopenharmony_ci	 */
435062306a36Sopenharmony_ci	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
435162306a36Sopenharmony_ci		return -EPERM;
435262306a36Sopenharmony_ci
435362306a36Sopenharmony_ci	/* Only regular files can be reflinked. */
435462306a36Sopenharmony_ci	if (!S_ISREG(inode->i_mode))
435562306a36Sopenharmony_ci		return -EPERM;
435662306a36Sopenharmony_ci
435762306a36Sopenharmony_ci	/*
435862306a36Sopenharmony_ci	 * If the caller wants to preserve ownership, they require the
435962306a36Sopenharmony_ci	 * rights to do so.
436062306a36Sopenharmony_ci	 */
436162306a36Sopenharmony_ci	if (preserve) {
436262306a36Sopenharmony_ci		if (!uid_eq(current_fsuid(), inode->i_uid) && !capable(CAP_CHOWN))
436362306a36Sopenharmony_ci			return -EPERM;
436462306a36Sopenharmony_ci		if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN))
436562306a36Sopenharmony_ci			return -EPERM;
436662306a36Sopenharmony_ci	}
436762306a36Sopenharmony_ci
436862306a36Sopenharmony_ci	/*
436962306a36Sopenharmony_ci	 * If the caller is modifying any aspect of the attributes, they
437062306a36Sopenharmony_ci	 * are not creating a snapshot.  They need read permission on the
437162306a36Sopenharmony_ci	 * file.
437262306a36Sopenharmony_ci	 */
437362306a36Sopenharmony_ci	if (!preserve) {
437462306a36Sopenharmony_ci		error = inode_permission(&nop_mnt_idmap, inode, MAY_READ);
437562306a36Sopenharmony_ci		if (error)
437662306a36Sopenharmony_ci			return error;
437762306a36Sopenharmony_ci	}
437862306a36Sopenharmony_ci
437962306a36Sopenharmony_ci	inode_lock(inode);
438062306a36Sopenharmony_ci	error = dquot_initialize(dir);
438162306a36Sopenharmony_ci	if (!error)
438262306a36Sopenharmony_ci		error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
438362306a36Sopenharmony_ci	inode_unlock(inode);
438462306a36Sopenharmony_ci	if (!error)
438562306a36Sopenharmony_ci		fsnotify_create(dir, new_dentry);
438662306a36Sopenharmony_ci	return error;
438762306a36Sopenharmony_ci}
438862306a36Sopenharmony_ci/*
438962306a36Sopenharmony_ci * Most codes are copied from sys_linkat.
439062306a36Sopenharmony_ci */
439162306a36Sopenharmony_ciint ocfs2_reflink_ioctl(struct inode *inode,
439262306a36Sopenharmony_ci			const char __user *oldname,
439362306a36Sopenharmony_ci			const char __user *newname,
439462306a36Sopenharmony_ci			bool preserve)
439562306a36Sopenharmony_ci{
439662306a36Sopenharmony_ci	struct dentry *new_dentry;
439762306a36Sopenharmony_ci	struct path old_path, new_path;
439862306a36Sopenharmony_ci	int error;
439962306a36Sopenharmony_ci
440062306a36Sopenharmony_ci	if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
440162306a36Sopenharmony_ci		return -EOPNOTSUPP;
440262306a36Sopenharmony_ci
440362306a36Sopenharmony_ci	error = user_path_at(AT_FDCWD, oldname, 0, &old_path);
440462306a36Sopenharmony_ci	if (error) {
440562306a36Sopenharmony_ci		mlog_errno(error);
440662306a36Sopenharmony_ci		return error;
440762306a36Sopenharmony_ci	}
440862306a36Sopenharmony_ci
440962306a36Sopenharmony_ci	new_dentry = user_path_create(AT_FDCWD, newname, &new_path, 0);
441062306a36Sopenharmony_ci	error = PTR_ERR(new_dentry);
441162306a36Sopenharmony_ci	if (IS_ERR(new_dentry)) {
441262306a36Sopenharmony_ci		mlog_errno(error);
441362306a36Sopenharmony_ci		goto out;
441462306a36Sopenharmony_ci	}
441562306a36Sopenharmony_ci
441662306a36Sopenharmony_ci	error = -EXDEV;
441762306a36Sopenharmony_ci	if (old_path.mnt != new_path.mnt) {
441862306a36Sopenharmony_ci		mlog_errno(error);
441962306a36Sopenharmony_ci		goto out_dput;
442062306a36Sopenharmony_ci	}
442162306a36Sopenharmony_ci
442262306a36Sopenharmony_ci	error = ocfs2_vfs_reflink(old_path.dentry,
442362306a36Sopenharmony_ci				  d_inode(new_path.dentry),
442462306a36Sopenharmony_ci				  new_dentry, preserve);
442562306a36Sopenharmony_ciout_dput:
442662306a36Sopenharmony_ci	done_path_create(&new_path, new_dentry);
442762306a36Sopenharmony_ciout:
442862306a36Sopenharmony_ci	path_put(&old_path);
442962306a36Sopenharmony_ci
443062306a36Sopenharmony_ci	return error;
443162306a36Sopenharmony_ci}
443262306a36Sopenharmony_ci
443362306a36Sopenharmony_ci/* Update destination inode size, if necessary. */
443462306a36Sopenharmony_ciint ocfs2_reflink_update_dest(struct inode *dest,
443562306a36Sopenharmony_ci			      struct buffer_head *d_bh,
443662306a36Sopenharmony_ci			      loff_t newlen)
443762306a36Sopenharmony_ci{
443862306a36Sopenharmony_ci	handle_t *handle;
443962306a36Sopenharmony_ci	int ret;
444062306a36Sopenharmony_ci
444162306a36Sopenharmony_ci	dest->i_blocks = ocfs2_inode_sector_count(dest);
444262306a36Sopenharmony_ci
444362306a36Sopenharmony_ci	if (newlen <= i_size_read(dest))
444462306a36Sopenharmony_ci		return 0;
444562306a36Sopenharmony_ci
444662306a36Sopenharmony_ci	handle = ocfs2_start_trans(OCFS2_SB(dest->i_sb),
444762306a36Sopenharmony_ci				   OCFS2_INODE_UPDATE_CREDITS);
444862306a36Sopenharmony_ci	if (IS_ERR(handle)) {
444962306a36Sopenharmony_ci		ret = PTR_ERR(handle);
445062306a36Sopenharmony_ci		mlog_errno(ret);
445162306a36Sopenharmony_ci		return ret;
445262306a36Sopenharmony_ci	}
445362306a36Sopenharmony_ci
445462306a36Sopenharmony_ci	/* Extend i_size if needed. */
445562306a36Sopenharmony_ci	spin_lock(&OCFS2_I(dest)->ip_lock);
445662306a36Sopenharmony_ci	if (newlen > i_size_read(dest))
445762306a36Sopenharmony_ci		i_size_write(dest, newlen);
445862306a36Sopenharmony_ci	spin_unlock(&OCFS2_I(dest)->ip_lock);
445962306a36Sopenharmony_ci	dest->i_mtime = inode_set_ctime_current(dest);
446062306a36Sopenharmony_ci
446162306a36Sopenharmony_ci	ret = ocfs2_mark_inode_dirty(handle, dest, d_bh);
446262306a36Sopenharmony_ci	if (ret) {
446362306a36Sopenharmony_ci		mlog_errno(ret);
446462306a36Sopenharmony_ci		goto out_commit;
446562306a36Sopenharmony_ci	}
446662306a36Sopenharmony_ci
446762306a36Sopenharmony_ciout_commit:
446862306a36Sopenharmony_ci	ocfs2_commit_trans(OCFS2_SB(dest->i_sb), handle);
446962306a36Sopenharmony_ci	return ret;
447062306a36Sopenharmony_ci}
447162306a36Sopenharmony_ci
447262306a36Sopenharmony_ci/* Remap the range pos_in:len in s_inode to pos_out:len in t_inode. */
447362306a36Sopenharmony_cistatic loff_t ocfs2_reflink_remap_extent(struct inode *s_inode,
447462306a36Sopenharmony_ci					 struct buffer_head *s_bh,
447562306a36Sopenharmony_ci					 loff_t pos_in,
447662306a36Sopenharmony_ci					 struct inode *t_inode,
447762306a36Sopenharmony_ci					 struct buffer_head *t_bh,
447862306a36Sopenharmony_ci					 loff_t pos_out,
447962306a36Sopenharmony_ci					 loff_t len,
448062306a36Sopenharmony_ci					 struct ocfs2_cached_dealloc_ctxt *dealloc)
448162306a36Sopenharmony_ci{
448262306a36Sopenharmony_ci	struct ocfs2_extent_tree s_et;
448362306a36Sopenharmony_ci	struct ocfs2_extent_tree t_et;
448462306a36Sopenharmony_ci	struct ocfs2_dinode *dis;
448562306a36Sopenharmony_ci	struct buffer_head *ref_root_bh = NULL;
448662306a36Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree;
448762306a36Sopenharmony_ci	struct ocfs2_super *osb;
448862306a36Sopenharmony_ci	loff_t remapped_bytes = 0;
448962306a36Sopenharmony_ci	loff_t pstart, plen;
449062306a36Sopenharmony_ci	u32 p_cluster, num_clusters, slast, spos, tpos, remapped_clus = 0;
449162306a36Sopenharmony_ci	unsigned int ext_flags;
449262306a36Sopenharmony_ci	int ret = 0;
449362306a36Sopenharmony_ci
449462306a36Sopenharmony_ci	osb = OCFS2_SB(s_inode->i_sb);
449562306a36Sopenharmony_ci	dis = (struct ocfs2_dinode *)s_bh->b_data;
449662306a36Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&s_et, INODE_CACHE(s_inode), s_bh);
449762306a36Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&t_et, INODE_CACHE(t_inode), t_bh);
449862306a36Sopenharmony_ci
449962306a36Sopenharmony_ci	spos = ocfs2_bytes_to_clusters(s_inode->i_sb, pos_in);
450062306a36Sopenharmony_ci	tpos = ocfs2_bytes_to_clusters(t_inode->i_sb, pos_out);
450162306a36Sopenharmony_ci	slast = ocfs2_clusters_for_bytes(s_inode->i_sb, pos_in + len);
450262306a36Sopenharmony_ci
450362306a36Sopenharmony_ci	while (spos < slast) {
450462306a36Sopenharmony_ci		if (fatal_signal_pending(current)) {
450562306a36Sopenharmony_ci			ret = -EINTR;
450662306a36Sopenharmony_ci			goto out;
450762306a36Sopenharmony_ci		}
450862306a36Sopenharmony_ci
450962306a36Sopenharmony_ci		/* Look up the extent. */
451062306a36Sopenharmony_ci		ret = ocfs2_get_clusters(s_inode, spos, &p_cluster,
451162306a36Sopenharmony_ci					 &num_clusters, &ext_flags);
451262306a36Sopenharmony_ci		if (ret) {
451362306a36Sopenharmony_ci			mlog_errno(ret);
451462306a36Sopenharmony_ci			goto out;
451562306a36Sopenharmony_ci		}
451662306a36Sopenharmony_ci
451762306a36Sopenharmony_ci		num_clusters = min_t(u32, num_clusters, slast - spos);
451862306a36Sopenharmony_ci
451962306a36Sopenharmony_ci		/* Punch out the dest range. */
452062306a36Sopenharmony_ci		pstart = ocfs2_clusters_to_bytes(t_inode->i_sb, tpos);
452162306a36Sopenharmony_ci		plen = ocfs2_clusters_to_bytes(t_inode->i_sb, num_clusters);
452262306a36Sopenharmony_ci		ret = ocfs2_remove_inode_range(t_inode, t_bh, pstart, plen);
452362306a36Sopenharmony_ci		if (ret) {
452462306a36Sopenharmony_ci			mlog_errno(ret);
452562306a36Sopenharmony_ci			goto out;
452662306a36Sopenharmony_ci		}
452762306a36Sopenharmony_ci
452862306a36Sopenharmony_ci		if (p_cluster == 0)
452962306a36Sopenharmony_ci			goto next_loop;
453062306a36Sopenharmony_ci
453162306a36Sopenharmony_ci		/* Lock the refcount btree... */
453262306a36Sopenharmony_ci		ret = ocfs2_lock_refcount_tree(osb,
453362306a36Sopenharmony_ci					       le64_to_cpu(dis->i_refcount_loc),
453462306a36Sopenharmony_ci					       1, &ref_tree, &ref_root_bh);
453562306a36Sopenharmony_ci		if (ret) {
453662306a36Sopenharmony_ci			mlog_errno(ret);
453762306a36Sopenharmony_ci			goto out;
453862306a36Sopenharmony_ci		}
453962306a36Sopenharmony_ci
454062306a36Sopenharmony_ci		/* Mark s_inode's extent as refcounted. */
454162306a36Sopenharmony_ci		if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) {
454262306a36Sopenharmony_ci			ret = ocfs2_add_refcount_flag(s_inode, &s_et,
454362306a36Sopenharmony_ci						      &ref_tree->rf_ci,
454462306a36Sopenharmony_ci						      ref_root_bh, spos,
454562306a36Sopenharmony_ci						      p_cluster, num_clusters,
454662306a36Sopenharmony_ci						      dealloc, NULL);
454762306a36Sopenharmony_ci			if (ret) {
454862306a36Sopenharmony_ci				mlog_errno(ret);
454962306a36Sopenharmony_ci				goto out_unlock_refcount;
455062306a36Sopenharmony_ci			}
455162306a36Sopenharmony_ci		}
455262306a36Sopenharmony_ci
455362306a36Sopenharmony_ci		/* Map in the new extent. */
455462306a36Sopenharmony_ci		ext_flags |= OCFS2_EXT_REFCOUNTED;
455562306a36Sopenharmony_ci		ret = ocfs2_add_refcounted_extent(t_inode, &t_et,
455662306a36Sopenharmony_ci						  &ref_tree->rf_ci,
455762306a36Sopenharmony_ci						  ref_root_bh,
455862306a36Sopenharmony_ci						  tpos, p_cluster,
455962306a36Sopenharmony_ci						  num_clusters,
456062306a36Sopenharmony_ci						  ext_flags,
456162306a36Sopenharmony_ci						  dealloc);
456262306a36Sopenharmony_ci		if (ret) {
456362306a36Sopenharmony_ci			mlog_errno(ret);
456462306a36Sopenharmony_ci			goto out_unlock_refcount;
456562306a36Sopenharmony_ci		}
456662306a36Sopenharmony_ci
456762306a36Sopenharmony_ci		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
456862306a36Sopenharmony_ci		brelse(ref_root_bh);
456962306a36Sopenharmony_cinext_loop:
457062306a36Sopenharmony_ci		spos += num_clusters;
457162306a36Sopenharmony_ci		tpos += num_clusters;
457262306a36Sopenharmony_ci		remapped_clus += num_clusters;
457362306a36Sopenharmony_ci	}
457462306a36Sopenharmony_ci
457562306a36Sopenharmony_ci	goto out;
457662306a36Sopenharmony_ciout_unlock_refcount:
457762306a36Sopenharmony_ci	ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
457862306a36Sopenharmony_ci	brelse(ref_root_bh);
457962306a36Sopenharmony_ciout:
458062306a36Sopenharmony_ci	remapped_bytes = ocfs2_clusters_to_bytes(t_inode->i_sb, remapped_clus);
458162306a36Sopenharmony_ci	remapped_bytes = min_t(loff_t, len, remapped_bytes);
458262306a36Sopenharmony_ci
458362306a36Sopenharmony_ci	return remapped_bytes > 0 ? remapped_bytes : ret;
458462306a36Sopenharmony_ci}
458562306a36Sopenharmony_ci
458662306a36Sopenharmony_ci/* Set up refcount tree and remap s_inode to t_inode. */
458762306a36Sopenharmony_ciloff_t ocfs2_reflink_remap_blocks(struct inode *s_inode,
458862306a36Sopenharmony_ci				  struct buffer_head *s_bh,
458962306a36Sopenharmony_ci				  loff_t pos_in,
459062306a36Sopenharmony_ci				  struct inode *t_inode,
459162306a36Sopenharmony_ci				  struct buffer_head *t_bh,
459262306a36Sopenharmony_ci				  loff_t pos_out,
459362306a36Sopenharmony_ci				  loff_t len)
459462306a36Sopenharmony_ci{
459562306a36Sopenharmony_ci	struct ocfs2_cached_dealloc_ctxt dealloc;
459662306a36Sopenharmony_ci	struct ocfs2_super *osb;
459762306a36Sopenharmony_ci	struct ocfs2_dinode *dis;
459862306a36Sopenharmony_ci	struct ocfs2_dinode *dit;
459962306a36Sopenharmony_ci	loff_t ret;
460062306a36Sopenharmony_ci
460162306a36Sopenharmony_ci	osb = OCFS2_SB(s_inode->i_sb);
460262306a36Sopenharmony_ci	dis = (struct ocfs2_dinode *)s_bh->b_data;
460362306a36Sopenharmony_ci	dit = (struct ocfs2_dinode *)t_bh->b_data;
460462306a36Sopenharmony_ci	ocfs2_init_dealloc_ctxt(&dealloc);
460562306a36Sopenharmony_ci
460662306a36Sopenharmony_ci	/*
460762306a36Sopenharmony_ci	 * If we're reflinking the entire file and the source is inline
460862306a36Sopenharmony_ci	 * data, just copy the contents.
460962306a36Sopenharmony_ci	 */
461062306a36Sopenharmony_ci	if (pos_in == pos_out && pos_in == 0 && len == i_size_read(s_inode) &&
461162306a36Sopenharmony_ci	    i_size_read(t_inode) <= len &&
461262306a36Sopenharmony_ci	    (OCFS2_I(s_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) {
461362306a36Sopenharmony_ci		ret = ocfs2_duplicate_inline_data(s_inode, s_bh, t_inode, t_bh);
461462306a36Sopenharmony_ci		if (ret)
461562306a36Sopenharmony_ci			mlog_errno(ret);
461662306a36Sopenharmony_ci		goto out;
461762306a36Sopenharmony_ci	}
461862306a36Sopenharmony_ci
461962306a36Sopenharmony_ci	/*
462062306a36Sopenharmony_ci	 * If both inodes belong to two different refcount groups then
462162306a36Sopenharmony_ci	 * forget it because we don't know how (or want) to go merging
462262306a36Sopenharmony_ci	 * refcount trees.
462362306a36Sopenharmony_ci	 */
462462306a36Sopenharmony_ci	ret = -EOPNOTSUPP;
462562306a36Sopenharmony_ci	if (ocfs2_is_refcount_inode(s_inode) &&
462662306a36Sopenharmony_ci	    ocfs2_is_refcount_inode(t_inode) &&
462762306a36Sopenharmony_ci	    le64_to_cpu(dis->i_refcount_loc) !=
462862306a36Sopenharmony_ci	    le64_to_cpu(dit->i_refcount_loc))
462962306a36Sopenharmony_ci		goto out;
463062306a36Sopenharmony_ci
463162306a36Sopenharmony_ci	/* Neither inode has a refcount tree.  Add one to s_inode. */
463262306a36Sopenharmony_ci	if (!ocfs2_is_refcount_inode(s_inode) &&
463362306a36Sopenharmony_ci	    !ocfs2_is_refcount_inode(t_inode)) {
463462306a36Sopenharmony_ci		ret = ocfs2_create_refcount_tree(s_inode, s_bh);
463562306a36Sopenharmony_ci		if (ret) {
463662306a36Sopenharmony_ci			mlog_errno(ret);
463762306a36Sopenharmony_ci			goto out;
463862306a36Sopenharmony_ci		}
463962306a36Sopenharmony_ci	}
464062306a36Sopenharmony_ci
464162306a36Sopenharmony_ci	/* Ensure that both inodes end up with the same refcount tree. */
464262306a36Sopenharmony_ci	if (!ocfs2_is_refcount_inode(s_inode)) {
464362306a36Sopenharmony_ci		ret = ocfs2_set_refcount_tree(s_inode, s_bh,
464462306a36Sopenharmony_ci					      le64_to_cpu(dit->i_refcount_loc));
464562306a36Sopenharmony_ci		if (ret) {
464662306a36Sopenharmony_ci			mlog_errno(ret);
464762306a36Sopenharmony_ci			goto out;
464862306a36Sopenharmony_ci		}
464962306a36Sopenharmony_ci	}
465062306a36Sopenharmony_ci	if (!ocfs2_is_refcount_inode(t_inode)) {
465162306a36Sopenharmony_ci		ret = ocfs2_set_refcount_tree(t_inode, t_bh,
465262306a36Sopenharmony_ci					      le64_to_cpu(dis->i_refcount_loc));
465362306a36Sopenharmony_ci		if (ret) {
465462306a36Sopenharmony_ci			mlog_errno(ret);
465562306a36Sopenharmony_ci			goto out;
465662306a36Sopenharmony_ci		}
465762306a36Sopenharmony_ci	}
465862306a36Sopenharmony_ci
465962306a36Sopenharmony_ci	/* Turn off inline data in the dest file. */
466062306a36Sopenharmony_ci	if (OCFS2_I(t_inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
466162306a36Sopenharmony_ci		ret = ocfs2_convert_inline_data_to_extents(t_inode, t_bh);
466262306a36Sopenharmony_ci		if (ret) {
466362306a36Sopenharmony_ci			mlog_errno(ret);
466462306a36Sopenharmony_ci			goto out;
466562306a36Sopenharmony_ci		}
466662306a36Sopenharmony_ci	}
466762306a36Sopenharmony_ci
466862306a36Sopenharmony_ci	/* Actually remap extents now. */
466962306a36Sopenharmony_ci	ret = ocfs2_reflink_remap_extent(s_inode, s_bh, pos_in, t_inode, t_bh,
467062306a36Sopenharmony_ci					 pos_out, len, &dealloc);
467162306a36Sopenharmony_ci	if (ret < 0) {
467262306a36Sopenharmony_ci		mlog_errno(ret);
467362306a36Sopenharmony_ci		goto out;
467462306a36Sopenharmony_ci	}
467562306a36Sopenharmony_ci
467662306a36Sopenharmony_ciout:
467762306a36Sopenharmony_ci	if (ocfs2_dealloc_has_cluster(&dealloc)) {
467862306a36Sopenharmony_ci		ocfs2_schedule_truncate_log_flush(osb, 1);
467962306a36Sopenharmony_ci		ocfs2_run_deallocs(osb, &dealloc);
468062306a36Sopenharmony_ci	}
468162306a36Sopenharmony_ci
468262306a36Sopenharmony_ci	return ret;
468362306a36Sopenharmony_ci}
468462306a36Sopenharmony_ci
468562306a36Sopenharmony_ci/* Lock an inode and grab a bh pointing to the inode. */
468662306a36Sopenharmony_ciint ocfs2_reflink_inodes_lock(struct inode *s_inode,
468762306a36Sopenharmony_ci			      struct buffer_head **bh_s,
468862306a36Sopenharmony_ci			      struct inode *t_inode,
468962306a36Sopenharmony_ci			      struct buffer_head **bh_t)
469062306a36Sopenharmony_ci{
469162306a36Sopenharmony_ci	struct inode *inode1 = s_inode;
469262306a36Sopenharmony_ci	struct inode *inode2 = t_inode;
469362306a36Sopenharmony_ci	struct ocfs2_inode_info *oi1;
469462306a36Sopenharmony_ci	struct ocfs2_inode_info *oi2;
469562306a36Sopenharmony_ci	struct buffer_head *bh1 = NULL;
469662306a36Sopenharmony_ci	struct buffer_head *bh2 = NULL;
469762306a36Sopenharmony_ci	bool same_inode = (s_inode == t_inode);
469862306a36Sopenharmony_ci	bool need_swap = (inode1->i_ino > inode2->i_ino);
469962306a36Sopenharmony_ci	int status;
470062306a36Sopenharmony_ci
470162306a36Sopenharmony_ci	/* First grab the VFS and rw locks. */
470262306a36Sopenharmony_ci	lock_two_nondirectories(s_inode, t_inode);
470362306a36Sopenharmony_ci	if (need_swap)
470462306a36Sopenharmony_ci		swap(inode1, inode2);
470562306a36Sopenharmony_ci
470662306a36Sopenharmony_ci	status = ocfs2_rw_lock(inode1, 1);
470762306a36Sopenharmony_ci	if (status) {
470862306a36Sopenharmony_ci		mlog_errno(status);
470962306a36Sopenharmony_ci		goto out_i1;
471062306a36Sopenharmony_ci	}
471162306a36Sopenharmony_ci	if (!same_inode) {
471262306a36Sopenharmony_ci		status = ocfs2_rw_lock(inode2, 1);
471362306a36Sopenharmony_ci		if (status) {
471462306a36Sopenharmony_ci			mlog_errno(status);
471562306a36Sopenharmony_ci			goto out_i2;
471662306a36Sopenharmony_ci		}
471762306a36Sopenharmony_ci	}
471862306a36Sopenharmony_ci
471962306a36Sopenharmony_ci	/* Now go for the cluster locks */
472062306a36Sopenharmony_ci	oi1 = OCFS2_I(inode1);
472162306a36Sopenharmony_ci	oi2 = OCFS2_I(inode2);
472262306a36Sopenharmony_ci
472362306a36Sopenharmony_ci	trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno,
472462306a36Sopenharmony_ci				(unsigned long long)oi2->ip_blkno);
472562306a36Sopenharmony_ci
472662306a36Sopenharmony_ci	/* We always want to lock the one with the lower lockid first. */
472762306a36Sopenharmony_ci	if (oi1->ip_blkno > oi2->ip_blkno)
472862306a36Sopenharmony_ci		mlog_errno(-ENOLCK);
472962306a36Sopenharmony_ci
473062306a36Sopenharmony_ci	/* lock id1 */
473162306a36Sopenharmony_ci	status = ocfs2_inode_lock_nested(inode1, &bh1, 1,
473262306a36Sopenharmony_ci					 OI_LS_REFLINK_TARGET);
473362306a36Sopenharmony_ci	if (status < 0) {
473462306a36Sopenharmony_ci		if (status != -ENOENT)
473562306a36Sopenharmony_ci			mlog_errno(status);
473662306a36Sopenharmony_ci		goto out_rw2;
473762306a36Sopenharmony_ci	}
473862306a36Sopenharmony_ci
473962306a36Sopenharmony_ci	/* lock id2 */
474062306a36Sopenharmony_ci	if (!same_inode) {
474162306a36Sopenharmony_ci		status = ocfs2_inode_lock_nested(inode2, &bh2, 1,
474262306a36Sopenharmony_ci						 OI_LS_REFLINK_TARGET);
474362306a36Sopenharmony_ci		if (status < 0) {
474462306a36Sopenharmony_ci			if (status != -ENOENT)
474562306a36Sopenharmony_ci				mlog_errno(status);
474662306a36Sopenharmony_ci			goto out_cl1;
474762306a36Sopenharmony_ci		}
474862306a36Sopenharmony_ci	} else {
474962306a36Sopenharmony_ci		bh2 = bh1;
475062306a36Sopenharmony_ci	}
475162306a36Sopenharmony_ci
475262306a36Sopenharmony_ci	/*
475362306a36Sopenharmony_ci	 * If we swapped inode order above, we have to swap the buffer heads
475462306a36Sopenharmony_ci	 * before passing them back to the caller.
475562306a36Sopenharmony_ci	 */
475662306a36Sopenharmony_ci	if (need_swap)
475762306a36Sopenharmony_ci		swap(bh1, bh2);
475862306a36Sopenharmony_ci	*bh_s = bh1;
475962306a36Sopenharmony_ci	*bh_t = bh2;
476062306a36Sopenharmony_ci
476162306a36Sopenharmony_ci	trace_ocfs2_double_lock_end(
476262306a36Sopenharmony_ci			(unsigned long long)oi1->ip_blkno,
476362306a36Sopenharmony_ci			(unsigned long long)oi2->ip_blkno);
476462306a36Sopenharmony_ci
476562306a36Sopenharmony_ci	return 0;
476662306a36Sopenharmony_ci
476762306a36Sopenharmony_ciout_cl1:
476862306a36Sopenharmony_ci	ocfs2_inode_unlock(inode1, 1);
476962306a36Sopenharmony_ci	brelse(bh1);
477062306a36Sopenharmony_ciout_rw2:
477162306a36Sopenharmony_ci	ocfs2_rw_unlock(inode2, 1);
477262306a36Sopenharmony_ciout_i2:
477362306a36Sopenharmony_ci	ocfs2_rw_unlock(inode1, 1);
477462306a36Sopenharmony_ciout_i1:
477562306a36Sopenharmony_ci	unlock_two_nondirectories(s_inode, t_inode);
477662306a36Sopenharmony_ci	return status;
477762306a36Sopenharmony_ci}
477862306a36Sopenharmony_ci
477962306a36Sopenharmony_ci/* Unlock both inodes and release buffers. */
478062306a36Sopenharmony_civoid ocfs2_reflink_inodes_unlock(struct inode *s_inode,
478162306a36Sopenharmony_ci				 struct buffer_head *s_bh,
478262306a36Sopenharmony_ci				 struct inode *t_inode,
478362306a36Sopenharmony_ci				 struct buffer_head *t_bh)
478462306a36Sopenharmony_ci{
478562306a36Sopenharmony_ci	ocfs2_inode_unlock(s_inode, 1);
478662306a36Sopenharmony_ci	ocfs2_rw_unlock(s_inode, 1);
478762306a36Sopenharmony_ci	brelse(s_bh);
478862306a36Sopenharmony_ci	if (s_inode != t_inode) {
478962306a36Sopenharmony_ci		ocfs2_inode_unlock(t_inode, 1);
479062306a36Sopenharmony_ci		ocfs2_rw_unlock(t_inode, 1);
479162306a36Sopenharmony_ci		brelse(t_bh);
479262306a36Sopenharmony_ci	}
479362306a36Sopenharmony_ci	unlock_two_nondirectories(s_inode, t_inode);
479462306a36Sopenharmony_ci}
4795