162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * alloc.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Extent allocs and frees
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/fs.h>
1162306a36Sopenharmony_ci#include <linux/types.h>
1262306a36Sopenharmony_ci#include <linux/slab.h>
1362306a36Sopenharmony_ci#include <linux/highmem.h>
1462306a36Sopenharmony_ci#include <linux/swap.h>
1562306a36Sopenharmony_ci#include <linux/quotaops.h>
1662306a36Sopenharmony_ci#include <linux/blkdev.h>
1762306a36Sopenharmony_ci#include <linux/sched/signal.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include <cluster/masklog.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#include "ocfs2.h"
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#include "alloc.h"
2462306a36Sopenharmony_ci#include "aops.h"
2562306a36Sopenharmony_ci#include "blockcheck.h"
2662306a36Sopenharmony_ci#include "dlmglue.h"
2762306a36Sopenharmony_ci#include "extent_map.h"
2862306a36Sopenharmony_ci#include "inode.h"
2962306a36Sopenharmony_ci#include "journal.h"
3062306a36Sopenharmony_ci#include "localalloc.h"
3162306a36Sopenharmony_ci#include "suballoc.h"
3262306a36Sopenharmony_ci#include "sysfile.h"
3362306a36Sopenharmony_ci#include "file.h"
3462306a36Sopenharmony_ci#include "super.h"
3562306a36Sopenharmony_ci#include "uptodate.h"
3662306a36Sopenharmony_ci#include "xattr.h"
3762306a36Sopenharmony_ci#include "refcounttree.h"
3862306a36Sopenharmony_ci#include "ocfs2_trace.h"
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci#include "buffer_head_io.h"
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cienum ocfs2_contig_type {
4362306a36Sopenharmony_ci	CONTIG_NONE = 0,
4462306a36Sopenharmony_ci	CONTIG_LEFT,
4562306a36Sopenharmony_ci	CONTIG_RIGHT,
4662306a36Sopenharmony_ci	CONTIG_LEFTRIGHT,
4762306a36Sopenharmony_ci};
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_cistatic enum ocfs2_contig_type
5062306a36Sopenharmony_ci	ocfs2_extent_rec_contig(struct super_block *sb,
5162306a36Sopenharmony_ci				struct ocfs2_extent_rec *ext,
5262306a36Sopenharmony_ci				struct ocfs2_extent_rec *insert_rec);
5362306a36Sopenharmony_ci/*
5462306a36Sopenharmony_ci * Operations for a specific extent tree type.
5562306a36Sopenharmony_ci *
5662306a36Sopenharmony_ci * To implement an on-disk btree (extent tree) type in ocfs2, add
5762306a36Sopenharmony_ci * an ocfs2_extent_tree_operations structure and the matching
5862306a36Sopenharmony_ci * ocfs2_init_<thingy>_extent_tree() function.  That's pretty much it
5962306a36Sopenharmony_ci * for the allocation portion of the extent tree.
6062306a36Sopenharmony_ci */
6162306a36Sopenharmony_cistruct ocfs2_extent_tree_operations {
6262306a36Sopenharmony_ci	/*
6362306a36Sopenharmony_ci	 * last_eb_blk is the block number of the right most leaf extent
6462306a36Sopenharmony_ci	 * block.  Most on-disk structures containing an extent tree store
6562306a36Sopenharmony_ci	 * this value for fast access.  The ->eo_set_last_eb_blk() and
6662306a36Sopenharmony_ci	 * ->eo_get_last_eb_blk() operations access this value.  They are
6762306a36Sopenharmony_ci	 *  both required.
6862306a36Sopenharmony_ci	 */
6962306a36Sopenharmony_ci	void (*eo_set_last_eb_blk)(struct ocfs2_extent_tree *et,
7062306a36Sopenharmony_ci				   u64 blkno);
7162306a36Sopenharmony_ci	u64 (*eo_get_last_eb_blk)(struct ocfs2_extent_tree *et);
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	/*
7462306a36Sopenharmony_ci	 * The on-disk structure usually keeps track of how many total
7562306a36Sopenharmony_ci	 * clusters are stored in this extent tree.  This function updates
7662306a36Sopenharmony_ci	 * that value.  new_clusters is the delta, and must be
7762306a36Sopenharmony_ci	 * added to the total.  Required.
7862306a36Sopenharmony_ci	 */
7962306a36Sopenharmony_ci	void (*eo_update_clusters)(struct ocfs2_extent_tree *et,
8062306a36Sopenharmony_ci				   u32 new_clusters);
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	/*
8362306a36Sopenharmony_ci	 * If this extent tree is supported by an extent map, insert
8462306a36Sopenharmony_ci	 * a record into the map.
8562306a36Sopenharmony_ci	 */
8662306a36Sopenharmony_ci	void (*eo_extent_map_insert)(struct ocfs2_extent_tree *et,
8762306a36Sopenharmony_ci				     struct ocfs2_extent_rec *rec);
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	/*
9062306a36Sopenharmony_ci	 * If this extent tree is supported by an extent map, truncate the
9162306a36Sopenharmony_ci	 * map to clusters,
9262306a36Sopenharmony_ci	 */
9362306a36Sopenharmony_ci	void (*eo_extent_map_truncate)(struct ocfs2_extent_tree *et,
9462306a36Sopenharmony_ci				       u32 clusters);
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	/*
9762306a36Sopenharmony_ci	 * If ->eo_insert_check() exists, it is called before rec is
9862306a36Sopenharmony_ci	 * inserted into the extent tree.  It is optional.
9962306a36Sopenharmony_ci	 */
10062306a36Sopenharmony_ci	int (*eo_insert_check)(struct ocfs2_extent_tree *et,
10162306a36Sopenharmony_ci			       struct ocfs2_extent_rec *rec);
10262306a36Sopenharmony_ci	int (*eo_sanity_check)(struct ocfs2_extent_tree *et);
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	/*
10562306a36Sopenharmony_ci	 * --------------------------------------------------------------
10662306a36Sopenharmony_ci	 * The remaining are internal to ocfs2_extent_tree and don't have
10762306a36Sopenharmony_ci	 * accessor functions
10862306a36Sopenharmony_ci	 */
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	/*
11162306a36Sopenharmony_ci	 * ->eo_fill_root_el() takes et->et_object and sets et->et_root_el.
11262306a36Sopenharmony_ci	 * It is required.
11362306a36Sopenharmony_ci	 */
11462306a36Sopenharmony_ci	void (*eo_fill_root_el)(struct ocfs2_extent_tree *et);
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	/*
11762306a36Sopenharmony_ci	 * ->eo_fill_max_leaf_clusters sets et->et_max_leaf_clusters if
11862306a36Sopenharmony_ci	 * it exists.  If it does not, et->et_max_leaf_clusters is set
11962306a36Sopenharmony_ci	 * to 0 (unlimited).  Optional.
12062306a36Sopenharmony_ci	 */
12162306a36Sopenharmony_ci	void (*eo_fill_max_leaf_clusters)(struct ocfs2_extent_tree *et);
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	/*
12462306a36Sopenharmony_ci	 * ->eo_extent_contig test whether the 2 ocfs2_extent_rec
12562306a36Sopenharmony_ci	 * are contiguous or not. Optional. Don't need to set it if use
12662306a36Sopenharmony_ci	 * ocfs2_extent_rec as the tree leaf.
12762306a36Sopenharmony_ci	 */
12862306a36Sopenharmony_ci	enum ocfs2_contig_type
12962306a36Sopenharmony_ci		(*eo_extent_contig)(struct ocfs2_extent_tree *et,
13062306a36Sopenharmony_ci				    struct ocfs2_extent_rec *ext,
13162306a36Sopenharmony_ci				    struct ocfs2_extent_rec *insert_rec);
13262306a36Sopenharmony_ci};
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci/*
13662306a36Sopenharmony_ci * Pre-declare ocfs2_dinode_et_ops so we can use it as a sanity check
13762306a36Sopenharmony_ci * in the methods.
13862306a36Sopenharmony_ci */
13962306a36Sopenharmony_cistatic u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
14062306a36Sopenharmony_cistatic void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
14162306a36Sopenharmony_ci					 u64 blkno);
14262306a36Sopenharmony_cistatic void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
14362306a36Sopenharmony_ci					 u32 clusters);
14462306a36Sopenharmony_cistatic void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
14562306a36Sopenharmony_ci					   struct ocfs2_extent_rec *rec);
14662306a36Sopenharmony_cistatic void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
14762306a36Sopenharmony_ci					     u32 clusters);
14862306a36Sopenharmony_cistatic int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
14962306a36Sopenharmony_ci				     struct ocfs2_extent_rec *rec);
15062306a36Sopenharmony_cistatic int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
15162306a36Sopenharmony_cistatic void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_cistatic int ocfs2_reuse_blk_from_dealloc(handle_t *handle,
15462306a36Sopenharmony_ci					struct ocfs2_extent_tree *et,
15562306a36Sopenharmony_ci					struct buffer_head **new_eb_bh,
15662306a36Sopenharmony_ci					int blk_wanted, int *blk_given);
15762306a36Sopenharmony_cistatic int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_cistatic const struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
16062306a36Sopenharmony_ci	.eo_set_last_eb_blk	= ocfs2_dinode_set_last_eb_blk,
16162306a36Sopenharmony_ci	.eo_get_last_eb_blk	= ocfs2_dinode_get_last_eb_blk,
16262306a36Sopenharmony_ci	.eo_update_clusters	= ocfs2_dinode_update_clusters,
16362306a36Sopenharmony_ci	.eo_extent_map_insert	= ocfs2_dinode_extent_map_insert,
16462306a36Sopenharmony_ci	.eo_extent_map_truncate	= ocfs2_dinode_extent_map_truncate,
16562306a36Sopenharmony_ci	.eo_insert_check	= ocfs2_dinode_insert_check,
16662306a36Sopenharmony_ci	.eo_sanity_check	= ocfs2_dinode_sanity_check,
16762306a36Sopenharmony_ci	.eo_fill_root_el	= ocfs2_dinode_fill_root_el,
16862306a36Sopenharmony_ci};
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_cistatic void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
17162306a36Sopenharmony_ci					 u64 blkno)
17262306a36Sopenharmony_ci{
17362306a36Sopenharmony_ci	struct ocfs2_dinode *di = et->et_object;
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
17662306a36Sopenharmony_ci	di->i_last_eb_blk = cpu_to_le64(blkno);
17762306a36Sopenharmony_ci}
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_cistatic u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
18062306a36Sopenharmony_ci{
18162306a36Sopenharmony_ci	struct ocfs2_dinode *di = et->et_object;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
18462306a36Sopenharmony_ci	return le64_to_cpu(di->i_last_eb_blk);
18562306a36Sopenharmony_ci}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_cistatic void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
18862306a36Sopenharmony_ci					 u32 clusters)
18962306a36Sopenharmony_ci{
19062306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
19162306a36Sopenharmony_ci	struct ocfs2_dinode *di = et->et_object;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	le32_add_cpu(&di->i_clusters, clusters);
19462306a36Sopenharmony_ci	spin_lock(&oi->ip_lock);
19562306a36Sopenharmony_ci	oi->ip_clusters = le32_to_cpu(di->i_clusters);
19662306a36Sopenharmony_ci	spin_unlock(&oi->ip_lock);
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_cistatic void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
20062306a36Sopenharmony_ci					   struct ocfs2_extent_rec *rec)
20162306a36Sopenharmony_ci{
20262306a36Sopenharmony_ci	struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	ocfs2_extent_map_insert_rec(inode, rec);
20562306a36Sopenharmony_ci}
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_cistatic void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
20862306a36Sopenharmony_ci					     u32 clusters)
20962306a36Sopenharmony_ci{
21062306a36Sopenharmony_ci	struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	ocfs2_extent_map_trunc(inode, clusters);
21362306a36Sopenharmony_ci}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_cistatic int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
21662306a36Sopenharmony_ci				     struct ocfs2_extent_rec *rec)
21762306a36Sopenharmony_ci{
21862306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
21962306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(oi->vfs_inode.i_sb);
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	BUG_ON(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL);
22262306a36Sopenharmony_ci	mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
22362306a36Sopenharmony_ci			(oi->ip_clusters != le32_to_cpu(rec->e_cpos)),
22462306a36Sopenharmony_ci			"Device %s, asking for sparse allocation: inode %llu, "
22562306a36Sopenharmony_ci			"cpos %u, clusters %u\n",
22662306a36Sopenharmony_ci			osb->dev_str,
22762306a36Sopenharmony_ci			(unsigned long long)oi->ip_blkno,
22862306a36Sopenharmony_ci			rec->e_cpos, oi->ip_clusters);
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	return 0;
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_cistatic int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	struct ocfs2_dinode *di = et->et_object;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
23862306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_DINODE(di));
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	return 0;
24162306a36Sopenharmony_ci}
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_cistatic void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
24462306a36Sopenharmony_ci{
24562306a36Sopenharmony_ci	struct ocfs2_dinode *di = et->et_object;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	et->et_root_el = &di->id2.i_list;
24862306a36Sopenharmony_ci}
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_cistatic void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et)
25262306a36Sopenharmony_ci{
25362306a36Sopenharmony_ci	struct ocfs2_xattr_value_buf *vb = et->et_object;
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci	et->et_root_el = &vb->vb_xv->xr_list;
25662306a36Sopenharmony_ci}
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_cistatic void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et,
25962306a36Sopenharmony_ci					      u64 blkno)
26062306a36Sopenharmony_ci{
26162306a36Sopenharmony_ci	struct ocfs2_xattr_value_buf *vb = et->et_object;
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	vb->vb_xv->xr_last_eb_blk = cpu_to_le64(blkno);
26462306a36Sopenharmony_ci}
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_cistatic u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
26762306a36Sopenharmony_ci{
26862306a36Sopenharmony_ci	struct ocfs2_xattr_value_buf *vb = et->et_object;
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	return le64_to_cpu(vb->vb_xv->xr_last_eb_blk);
27162306a36Sopenharmony_ci}
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_cistatic void ocfs2_xattr_value_update_clusters(struct ocfs2_extent_tree *et,
27462306a36Sopenharmony_ci					      u32 clusters)
27562306a36Sopenharmony_ci{
27662306a36Sopenharmony_ci	struct ocfs2_xattr_value_buf *vb = et->et_object;
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	le32_add_cpu(&vb->vb_xv->xr_clusters, clusters);
27962306a36Sopenharmony_ci}
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_cistatic const struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
28262306a36Sopenharmony_ci	.eo_set_last_eb_blk	= ocfs2_xattr_value_set_last_eb_blk,
28362306a36Sopenharmony_ci	.eo_get_last_eb_blk	= ocfs2_xattr_value_get_last_eb_blk,
28462306a36Sopenharmony_ci	.eo_update_clusters	= ocfs2_xattr_value_update_clusters,
28562306a36Sopenharmony_ci	.eo_fill_root_el	= ocfs2_xattr_value_fill_root_el,
28662306a36Sopenharmony_ci};
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_cistatic void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
28962306a36Sopenharmony_ci{
29062306a36Sopenharmony_ci	struct ocfs2_xattr_block *xb = et->et_object;
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
29362306a36Sopenharmony_ci}
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_cistatic void ocfs2_xattr_tree_fill_max_leaf_clusters(struct ocfs2_extent_tree *et)
29662306a36Sopenharmony_ci{
29762306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
29862306a36Sopenharmony_ci	et->et_max_leaf_clusters =
29962306a36Sopenharmony_ci		ocfs2_clusters_for_bytes(sb, OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
30062306a36Sopenharmony_ci}
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_cistatic void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
30362306a36Sopenharmony_ci					     u64 blkno)
30462306a36Sopenharmony_ci{
30562306a36Sopenharmony_ci	struct ocfs2_xattr_block *xb = et->et_object;
30662306a36Sopenharmony_ci	struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	xt->xt_last_eb_blk = cpu_to_le64(blkno);
30962306a36Sopenharmony_ci}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_cistatic u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
31262306a36Sopenharmony_ci{
31362306a36Sopenharmony_ci	struct ocfs2_xattr_block *xb = et->et_object;
31462306a36Sopenharmony_ci	struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	return le64_to_cpu(xt->xt_last_eb_blk);
31762306a36Sopenharmony_ci}
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_cistatic void ocfs2_xattr_tree_update_clusters(struct ocfs2_extent_tree *et,
32062306a36Sopenharmony_ci					     u32 clusters)
32162306a36Sopenharmony_ci{
32262306a36Sopenharmony_ci	struct ocfs2_xattr_block *xb = et->et_object;
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci	le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters);
32562306a36Sopenharmony_ci}
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_cistatic const struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
32862306a36Sopenharmony_ci	.eo_set_last_eb_blk	= ocfs2_xattr_tree_set_last_eb_blk,
32962306a36Sopenharmony_ci	.eo_get_last_eb_blk	= ocfs2_xattr_tree_get_last_eb_blk,
33062306a36Sopenharmony_ci	.eo_update_clusters	= ocfs2_xattr_tree_update_clusters,
33162306a36Sopenharmony_ci	.eo_fill_root_el	= ocfs2_xattr_tree_fill_root_el,
33262306a36Sopenharmony_ci	.eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
33362306a36Sopenharmony_ci};
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_cistatic void ocfs2_dx_root_set_last_eb_blk(struct ocfs2_extent_tree *et,
33662306a36Sopenharmony_ci					  u64 blkno)
33762306a36Sopenharmony_ci{
33862306a36Sopenharmony_ci	struct ocfs2_dx_root_block *dx_root = et->et_object;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	dx_root->dr_last_eb_blk = cpu_to_le64(blkno);
34162306a36Sopenharmony_ci}
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_cistatic u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
34462306a36Sopenharmony_ci{
34562306a36Sopenharmony_ci	struct ocfs2_dx_root_block *dx_root = et->et_object;
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	return le64_to_cpu(dx_root->dr_last_eb_blk);
34862306a36Sopenharmony_ci}
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_cistatic void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et,
35162306a36Sopenharmony_ci					  u32 clusters)
35262306a36Sopenharmony_ci{
35362306a36Sopenharmony_ci	struct ocfs2_dx_root_block *dx_root = et->et_object;
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci	le32_add_cpu(&dx_root->dr_clusters, clusters);
35662306a36Sopenharmony_ci}
35762306a36Sopenharmony_ci
35862306a36Sopenharmony_cistatic int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et)
35962306a36Sopenharmony_ci{
36062306a36Sopenharmony_ci	struct ocfs2_dx_root_block *dx_root = et->et_object;
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_DX_ROOT(dx_root));
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	return 0;
36562306a36Sopenharmony_ci}
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_cistatic void ocfs2_dx_root_fill_root_el(struct ocfs2_extent_tree *et)
36862306a36Sopenharmony_ci{
36962306a36Sopenharmony_ci	struct ocfs2_dx_root_block *dx_root = et->et_object;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	et->et_root_el = &dx_root->dr_list;
37262306a36Sopenharmony_ci}
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_cistatic const struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
37562306a36Sopenharmony_ci	.eo_set_last_eb_blk	= ocfs2_dx_root_set_last_eb_blk,
37662306a36Sopenharmony_ci	.eo_get_last_eb_blk	= ocfs2_dx_root_get_last_eb_blk,
37762306a36Sopenharmony_ci	.eo_update_clusters	= ocfs2_dx_root_update_clusters,
37862306a36Sopenharmony_ci	.eo_sanity_check	= ocfs2_dx_root_sanity_check,
37962306a36Sopenharmony_ci	.eo_fill_root_el	= ocfs2_dx_root_fill_root_el,
38062306a36Sopenharmony_ci};
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_cistatic void ocfs2_refcount_tree_fill_root_el(struct ocfs2_extent_tree *et)
38362306a36Sopenharmony_ci{
38462306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb = et->et_object;
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	et->et_root_el = &rb->rf_list;
38762306a36Sopenharmony_ci}
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_cistatic void ocfs2_refcount_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
39062306a36Sopenharmony_ci						u64 blkno)
39162306a36Sopenharmony_ci{
39262306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb = et->et_object;
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	rb->rf_last_eb_blk = cpu_to_le64(blkno);
39562306a36Sopenharmony_ci}
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_cistatic u64 ocfs2_refcount_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
39862306a36Sopenharmony_ci{
39962306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb = et->et_object;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	return le64_to_cpu(rb->rf_last_eb_blk);
40262306a36Sopenharmony_ci}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_cistatic void ocfs2_refcount_tree_update_clusters(struct ocfs2_extent_tree *et,
40562306a36Sopenharmony_ci						u32 clusters)
40662306a36Sopenharmony_ci{
40762306a36Sopenharmony_ci	struct ocfs2_refcount_block *rb = et->et_object;
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	le32_add_cpu(&rb->rf_clusters, clusters);
41062306a36Sopenharmony_ci}
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_cistatic enum ocfs2_contig_type
41362306a36Sopenharmony_ciocfs2_refcount_tree_extent_contig(struct ocfs2_extent_tree *et,
41462306a36Sopenharmony_ci				  struct ocfs2_extent_rec *ext,
41562306a36Sopenharmony_ci				  struct ocfs2_extent_rec *insert_rec)
41662306a36Sopenharmony_ci{
41762306a36Sopenharmony_ci	return CONTIG_NONE;
41862306a36Sopenharmony_ci}
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_cistatic const struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = {
42162306a36Sopenharmony_ci	.eo_set_last_eb_blk	= ocfs2_refcount_tree_set_last_eb_blk,
42262306a36Sopenharmony_ci	.eo_get_last_eb_blk	= ocfs2_refcount_tree_get_last_eb_blk,
42362306a36Sopenharmony_ci	.eo_update_clusters	= ocfs2_refcount_tree_update_clusters,
42462306a36Sopenharmony_ci	.eo_fill_root_el	= ocfs2_refcount_tree_fill_root_el,
42562306a36Sopenharmony_ci	.eo_extent_contig	= ocfs2_refcount_tree_extent_contig,
42662306a36Sopenharmony_ci};
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_cistatic void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
42962306a36Sopenharmony_ci				     struct ocfs2_caching_info *ci,
43062306a36Sopenharmony_ci				     struct buffer_head *bh,
43162306a36Sopenharmony_ci				     ocfs2_journal_access_func access,
43262306a36Sopenharmony_ci				     void *obj,
43362306a36Sopenharmony_ci				     const struct ocfs2_extent_tree_operations *ops)
43462306a36Sopenharmony_ci{
43562306a36Sopenharmony_ci	et->et_ops = ops;
43662306a36Sopenharmony_ci	et->et_root_bh = bh;
43762306a36Sopenharmony_ci	et->et_ci = ci;
43862306a36Sopenharmony_ci	et->et_root_journal_access = access;
43962306a36Sopenharmony_ci	if (!obj)
44062306a36Sopenharmony_ci		obj = (void *)bh->b_data;
44162306a36Sopenharmony_ci	et->et_object = obj;
44262306a36Sopenharmony_ci	et->et_dealloc = NULL;
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	et->et_ops->eo_fill_root_el(et);
44562306a36Sopenharmony_ci	if (!et->et_ops->eo_fill_max_leaf_clusters)
44662306a36Sopenharmony_ci		et->et_max_leaf_clusters = 0;
44762306a36Sopenharmony_ci	else
44862306a36Sopenharmony_ci		et->et_ops->eo_fill_max_leaf_clusters(et);
44962306a36Sopenharmony_ci}
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_civoid ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
45262306a36Sopenharmony_ci				   struct ocfs2_caching_info *ci,
45362306a36Sopenharmony_ci				   struct buffer_head *bh)
45462306a36Sopenharmony_ci{
45562306a36Sopenharmony_ci	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_di,
45662306a36Sopenharmony_ci				 NULL, &ocfs2_dinode_et_ops);
45762306a36Sopenharmony_ci}
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_civoid ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
46062306a36Sopenharmony_ci				       struct ocfs2_caching_info *ci,
46162306a36Sopenharmony_ci				       struct buffer_head *bh)
46262306a36Sopenharmony_ci{
46362306a36Sopenharmony_ci	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_xb,
46462306a36Sopenharmony_ci				 NULL, &ocfs2_xattr_tree_et_ops);
46562306a36Sopenharmony_ci}
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_civoid ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
46862306a36Sopenharmony_ci					struct ocfs2_caching_info *ci,
46962306a36Sopenharmony_ci					struct ocfs2_xattr_value_buf *vb)
47062306a36Sopenharmony_ci{
47162306a36Sopenharmony_ci	__ocfs2_init_extent_tree(et, ci, vb->vb_bh, vb->vb_access, vb,
47262306a36Sopenharmony_ci				 &ocfs2_xattr_value_et_ops);
47362306a36Sopenharmony_ci}
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_civoid ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
47662306a36Sopenharmony_ci				    struct ocfs2_caching_info *ci,
47762306a36Sopenharmony_ci				    struct buffer_head *bh)
47862306a36Sopenharmony_ci{
47962306a36Sopenharmony_ci	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_dr,
48062306a36Sopenharmony_ci				 NULL, &ocfs2_dx_root_et_ops);
48162306a36Sopenharmony_ci}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_civoid ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
48462306a36Sopenharmony_ci				     struct ocfs2_caching_info *ci,
48562306a36Sopenharmony_ci				     struct buffer_head *bh)
48662306a36Sopenharmony_ci{
48762306a36Sopenharmony_ci	__ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_rb,
48862306a36Sopenharmony_ci				 NULL, &ocfs2_refcount_tree_et_ops);
48962306a36Sopenharmony_ci}
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_cistatic inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
49262306a36Sopenharmony_ci					    u64 new_last_eb_blk)
49362306a36Sopenharmony_ci{
49462306a36Sopenharmony_ci	et->et_ops->eo_set_last_eb_blk(et, new_last_eb_blk);
49562306a36Sopenharmony_ci}
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_cistatic inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
49862306a36Sopenharmony_ci{
49962306a36Sopenharmony_ci	return et->et_ops->eo_get_last_eb_blk(et);
50062306a36Sopenharmony_ci}
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_cistatic inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
50362306a36Sopenharmony_ci					    u32 clusters)
50462306a36Sopenharmony_ci{
50562306a36Sopenharmony_ci	et->et_ops->eo_update_clusters(et, clusters);
50662306a36Sopenharmony_ci}
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_cistatic inline void ocfs2_et_extent_map_insert(struct ocfs2_extent_tree *et,
50962306a36Sopenharmony_ci					      struct ocfs2_extent_rec *rec)
51062306a36Sopenharmony_ci{
51162306a36Sopenharmony_ci	if (et->et_ops->eo_extent_map_insert)
51262306a36Sopenharmony_ci		et->et_ops->eo_extent_map_insert(et, rec);
51362306a36Sopenharmony_ci}
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_cistatic inline void ocfs2_et_extent_map_truncate(struct ocfs2_extent_tree *et,
51662306a36Sopenharmony_ci						u32 clusters)
51762306a36Sopenharmony_ci{
51862306a36Sopenharmony_ci	if (et->et_ops->eo_extent_map_truncate)
51962306a36Sopenharmony_ci		et->et_ops->eo_extent_map_truncate(et, clusters);
52062306a36Sopenharmony_ci}
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_cistatic inline int ocfs2_et_root_journal_access(handle_t *handle,
52362306a36Sopenharmony_ci					       struct ocfs2_extent_tree *et,
52462306a36Sopenharmony_ci					       int type)
52562306a36Sopenharmony_ci{
52662306a36Sopenharmony_ci	return et->et_root_journal_access(handle, et->et_ci, et->et_root_bh,
52762306a36Sopenharmony_ci					  type);
52862306a36Sopenharmony_ci}
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_cistatic inline enum ocfs2_contig_type
53162306a36Sopenharmony_ci	ocfs2_et_extent_contig(struct ocfs2_extent_tree *et,
53262306a36Sopenharmony_ci			       struct ocfs2_extent_rec *rec,
53362306a36Sopenharmony_ci			       struct ocfs2_extent_rec *insert_rec)
53462306a36Sopenharmony_ci{
53562306a36Sopenharmony_ci	if (et->et_ops->eo_extent_contig)
53662306a36Sopenharmony_ci		return et->et_ops->eo_extent_contig(et, rec, insert_rec);
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	return ocfs2_extent_rec_contig(
53962306a36Sopenharmony_ci				ocfs2_metadata_cache_get_super(et->et_ci),
54062306a36Sopenharmony_ci				rec, insert_rec);
54162306a36Sopenharmony_ci}
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_cistatic inline int ocfs2_et_insert_check(struct ocfs2_extent_tree *et,
54462306a36Sopenharmony_ci					struct ocfs2_extent_rec *rec)
54562306a36Sopenharmony_ci{
54662306a36Sopenharmony_ci	int ret = 0;
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	if (et->et_ops->eo_insert_check)
54962306a36Sopenharmony_ci		ret = et->et_ops->eo_insert_check(et, rec);
55062306a36Sopenharmony_ci	return ret;
55162306a36Sopenharmony_ci}
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_cistatic inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
55462306a36Sopenharmony_ci{
55562306a36Sopenharmony_ci	int ret = 0;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	if (et->et_ops->eo_sanity_check)
55862306a36Sopenharmony_ci		ret = et->et_ops->eo_sanity_check(et);
55962306a36Sopenharmony_ci	return ret;
56062306a36Sopenharmony_ci}
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_cistatic int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
56362306a36Sopenharmony_ci					 struct ocfs2_extent_block *eb);
56462306a36Sopenharmony_cistatic void ocfs2_adjust_rightmost_records(handle_t *handle,
56562306a36Sopenharmony_ci					   struct ocfs2_extent_tree *et,
56662306a36Sopenharmony_ci					   struct ocfs2_path *path,
56762306a36Sopenharmony_ci					   struct ocfs2_extent_rec *insert_rec);
56862306a36Sopenharmony_ci/*
56962306a36Sopenharmony_ci * Reset the actual path elements so that we can re-use the structure
57062306a36Sopenharmony_ci * to build another path. Generally, this involves freeing the buffer
57162306a36Sopenharmony_ci * heads.
57262306a36Sopenharmony_ci */
57362306a36Sopenharmony_civoid ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
57462306a36Sopenharmony_ci{
57562306a36Sopenharmony_ci	int i, start = 0, depth = 0;
57662306a36Sopenharmony_ci	struct ocfs2_path_item *node;
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	if (keep_root)
57962306a36Sopenharmony_ci		start = 1;
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	for(i = start; i < path_num_items(path); i++) {
58262306a36Sopenharmony_ci		node = &path->p_node[i];
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci		brelse(node->bh);
58562306a36Sopenharmony_ci		node->bh = NULL;
58662306a36Sopenharmony_ci		node->el = NULL;
58762306a36Sopenharmony_ci	}
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	/*
59062306a36Sopenharmony_ci	 * Tree depth may change during truncate, or insert. If we're
59162306a36Sopenharmony_ci	 * keeping the root extent list, then make sure that our path
59262306a36Sopenharmony_ci	 * structure reflects the proper depth.
59362306a36Sopenharmony_ci	 */
59462306a36Sopenharmony_ci	if (keep_root)
59562306a36Sopenharmony_ci		depth = le16_to_cpu(path_root_el(path)->l_tree_depth);
59662306a36Sopenharmony_ci	else
59762306a36Sopenharmony_ci		path_root_access(path) = NULL;
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	path->p_tree_depth = depth;
60062306a36Sopenharmony_ci}
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_civoid ocfs2_free_path(struct ocfs2_path *path)
60362306a36Sopenharmony_ci{
60462306a36Sopenharmony_ci	if (path) {
60562306a36Sopenharmony_ci		ocfs2_reinit_path(path, 0);
60662306a36Sopenharmony_ci		kfree(path);
60762306a36Sopenharmony_ci	}
60862306a36Sopenharmony_ci}
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci/*
61162306a36Sopenharmony_ci * All the elements of src into dest. After this call, src could be freed
61262306a36Sopenharmony_ci * without affecting dest.
61362306a36Sopenharmony_ci *
61462306a36Sopenharmony_ci * Both paths should have the same root. Any non-root elements of dest
61562306a36Sopenharmony_ci * will be freed.
61662306a36Sopenharmony_ci */
61762306a36Sopenharmony_cistatic void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src)
61862306a36Sopenharmony_ci{
61962306a36Sopenharmony_ci	int i;
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	BUG_ON(path_root_bh(dest) != path_root_bh(src));
62262306a36Sopenharmony_ci	BUG_ON(path_root_el(dest) != path_root_el(src));
62362306a36Sopenharmony_ci	BUG_ON(path_root_access(dest) != path_root_access(src));
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci	ocfs2_reinit_path(dest, 1);
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci	for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
62862306a36Sopenharmony_ci		dest->p_node[i].bh = src->p_node[i].bh;
62962306a36Sopenharmony_ci		dest->p_node[i].el = src->p_node[i].el;
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_ci		if (dest->p_node[i].bh)
63262306a36Sopenharmony_ci			get_bh(dest->p_node[i].bh);
63362306a36Sopenharmony_ci	}
63462306a36Sopenharmony_ci}
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci/*
63762306a36Sopenharmony_ci * Make the *dest path the same as src and re-initialize src path to
63862306a36Sopenharmony_ci * have a root only.
63962306a36Sopenharmony_ci */
64062306a36Sopenharmony_cistatic void ocfs2_mv_path(struct ocfs2_path *dest, struct ocfs2_path *src)
64162306a36Sopenharmony_ci{
64262306a36Sopenharmony_ci	int i;
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci	BUG_ON(path_root_bh(dest) != path_root_bh(src));
64562306a36Sopenharmony_ci	BUG_ON(path_root_access(dest) != path_root_access(src));
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci	for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
64862306a36Sopenharmony_ci		brelse(dest->p_node[i].bh);
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci		dest->p_node[i].bh = src->p_node[i].bh;
65162306a36Sopenharmony_ci		dest->p_node[i].el = src->p_node[i].el;
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci		src->p_node[i].bh = NULL;
65462306a36Sopenharmony_ci		src->p_node[i].el = NULL;
65562306a36Sopenharmony_ci	}
65662306a36Sopenharmony_ci}
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci/*
65962306a36Sopenharmony_ci * Insert an extent block at given index.
66062306a36Sopenharmony_ci *
66162306a36Sopenharmony_ci * This will not take an additional reference on eb_bh.
66262306a36Sopenharmony_ci */
66362306a36Sopenharmony_cistatic inline void ocfs2_path_insert_eb(struct ocfs2_path *path, int index,
66462306a36Sopenharmony_ci					struct buffer_head *eb_bh)
66562306a36Sopenharmony_ci{
66662306a36Sopenharmony_ci	struct ocfs2_extent_block *eb = (struct ocfs2_extent_block *)eb_bh->b_data;
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci	/*
66962306a36Sopenharmony_ci	 * Right now, no root bh is an extent block, so this helps
67062306a36Sopenharmony_ci	 * catch code errors with dinode trees. The assertion can be
67162306a36Sopenharmony_ci	 * safely removed if we ever need to insert extent block
67262306a36Sopenharmony_ci	 * structures at the root.
67362306a36Sopenharmony_ci	 */
67462306a36Sopenharmony_ci	BUG_ON(index == 0);
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci	path->p_node[index].bh = eb_bh;
67762306a36Sopenharmony_ci	path->p_node[index].el = &eb->h_list;
67862306a36Sopenharmony_ci}
67962306a36Sopenharmony_ci
68062306a36Sopenharmony_cistatic struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
68162306a36Sopenharmony_ci					 struct ocfs2_extent_list *root_el,
68262306a36Sopenharmony_ci					 ocfs2_journal_access_func access)
68362306a36Sopenharmony_ci{
68462306a36Sopenharmony_ci	struct ocfs2_path *path;
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_ci	BUG_ON(le16_to_cpu(root_el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH);
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci	path = kzalloc(sizeof(*path), GFP_NOFS);
68962306a36Sopenharmony_ci	if (path) {
69062306a36Sopenharmony_ci		path->p_tree_depth = le16_to_cpu(root_el->l_tree_depth);
69162306a36Sopenharmony_ci		get_bh(root_bh);
69262306a36Sopenharmony_ci		path_root_bh(path) = root_bh;
69362306a36Sopenharmony_ci		path_root_el(path) = root_el;
69462306a36Sopenharmony_ci		path_root_access(path) = access;
69562306a36Sopenharmony_ci	}
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	return path;
69862306a36Sopenharmony_ci}
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_cistruct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
70162306a36Sopenharmony_ci{
70262306a36Sopenharmony_ci	return ocfs2_new_path(path_root_bh(path), path_root_el(path),
70362306a36Sopenharmony_ci			      path_root_access(path));
70462306a36Sopenharmony_ci}
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_cistruct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
70762306a36Sopenharmony_ci{
70862306a36Sopenharmony_ci	return ocfs2_new_path(et->et_root_bh, et->et_root_el,
70962306a36Sopenharmony_ci			      et->et_root_journal_access);
71062306a36Sopenharmony_ci}
71162306a36Sopenharmony_ci
71262306a36Sopenharmony_ci/*
71362306a36Sopenharmony_ci * Journal the buffer at depth idx.  All idx>0 are extent_blocks,
71462306a36Sopenharmony_ci * otherwise it's the root_access function.
71562306a36Sopenharmony_ci *
71662306a36Sopenharmony_ci * I don't like the way this function's name looks next to
71762306a36Sopenharmony_ci * ocfs2_journal_access_path(), but I don't have a better one.
71862306a36Sopenharmony_ci */
71962306a36Sopenharmony_ciint ocfs2_path_bh_journal_access(handle_t *handle,
72062306a36Sopenharmony_ci				 struct ocfs2_caching_info *ci,
72162306a36Sopenharmony_ci				 struct ocfs2_path *path,
72262306a36Sopenharmony_ci				 int idx)
72362306a36Sopenharmony_ci{
72462306a36Sopenharmony_ci	ocfs2_journal_access_func access = path_root_access(path);
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	if (!access)
72762306a36Sopenharmony_ci		access = ocfs2_journal_access;
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci	if (idx)
73062306a36Sopenharmony_ci		access = ocfs2_journal_access_eb;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	return access(handle, ci, path->p_node[idx].bh,
73362306a36Sopenharmony_ci		      OCFS2_JOURNAL_ACCESS_WRITE);
73462306a36Sopenharmony_ci}
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci/*
73762306a36Sopenharmony_ci * Convenience function to journal all components in a path.
73862306a36Sopenharmony_ci */
73962306a36Sopenharmony_ciint ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
74062306a36Sopenharmony_ci			      handle_t *handle,
74162306a36Sopenharmony_ci			      struct ocfs2_path *path)
74262306a36Sopenharmony_ci{
74362306a36Sopenharmony_ci	int i, ret = 0;
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ci	if (!path)
74662306a36Sopenharmony_ci		goto out;
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci	for(i = 0; i < path_num_items(path); i++) {
74962306a36Sopenharmony_ci		ret = ocfs2_path_bh_journal_access(handle, ci, path, i);
75062306a36Sopenharmony_ci		if (ret < 0) {
75162306a36Sopenharmony_ci			mlog_errno(ret);
75262306a36Sopenharmony_ci			goto out;
75362306a36Sopenharmony_ci		}
75462306a36Sopenharmony_ci	}
75562306a36Sopenharmony_ci
75662306a36Sopenharmony_ciout:
75762306a36Sopenharmony_ci	return ret;
75862306a36Sopenharmony_ci}
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci/*
76162306a36Sopenharmony_ci * Return the index of the extent record which contains cluster #v_cluster.
76262306a36Sopenharmony_ci * -1 is returned if it was not found.
76362306a36Sopenharmony_ci *
76462306a36Sopenharmony_ci * Should work fine on interior and exterior nodes.
76562306a36Sopenharmony_ci */
76662306a36Sopenharmony_ciint ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
76762306a36Sopenharmony_ci{
76862306a36Sopenharmony_ci	int ret = -1;
76962306a36Sopenharmony_ci	int i;
77062306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
77162306a36Sopenharmony_ci	u32 rec_end, rec_start, clusters;
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
77462306a36Sopenharmony_ci		rec = &el->l_recs[i];
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci		rec_start = le32_to_cpu(rec->e_cpos);
77762306a36Sopenharmony_ci		clusters = ocfs2_rec_clusters(el, rec);
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci		rec_end = rec_start + clusters;
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci		if (v_cluster >= rec_start && v_cluster < rec_end) {
78262306a36Sopenharmony_ci			ret = i;
78362306a36Sopenharmony_ci			break;
78462306a36Sopenharmony_ci		}
78562306a36Sopenharmony_ci	}
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	return ret;
78862306a36Sopenharmony_ci}
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci/*
79162306a36Sopenharmony_ci * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and
79262306a36Sopenharmony_ci * ocfs2_extent_rec_contig only work properly against leaf nodes!
79362306a36Sopenharmony_ci */
79462306a36Sopenharmony_cistatic int ocfs2_block_extent_contig(struct super_block *sb,
79562306a36Sopenharmony_ci				     struct ocfs2_extent_rec *ext,
79662306a36Sopenharmony_ci				     u64 blkno)
79762306a36Sopenharmony_ci{
79862306a36Sopenharmony_ci	u64 blk_end = le64_to_cpu(ext->e_blkno);
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci	blk_end += ocfs2_clusters_to_blocks(sb,
80162306a36Sopenharmony_ci				    le16_to_cpu(ext->e_leaf_clusters));
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	return blkno == blk_end;
80462306a36Sopenharmony_ci}
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_cistatic int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
80762306a36Sopenharmony_ci				  struct ocfs2_extent_rec *right)
80862306a36Sopenharmony_ci{
80962306a36Sopenharmony_ci	u32 left_range;
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	left_range = le32_to_cpu(left->e_cpos) +
81262306a36Sopenharmony_ci		le16_to_cpu(left->e_leaf_clusters);
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_ci	return (left_range == le32_to_cpu(right->e_cpos));
81562306a36Sopenharmony_ci}
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_cistatic enum ocfs2_contig_type
81862306a36Sopenharmony_ci	ocfs2_extent_rec_contig(struct super_block *sb,
81962306a36Sopenharmony_ci				struct ocfs2_extent_rec *ext,
82062306a36Sopenharmony_ci				struct ocfs2_extent_rec *insert_rec)
82162306a36Sopenharmony_ci{
82262306a36Sopenharmony_ci	u64 blkno = le64_to_cpu(insert_rec->e_blkno);
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	/*
82562306a36Sopenharmony_ci	 * Refuse to coalesce extent records with different flag
82662306a36Sopenharmony_ci	 * fields - we don't want to mix unwritten extents with user
82762306a36Sopenharmony_ci	 * data.
82862306a36Sopenharmony_ci	 */
82962306a36Sopenharmony_ci	if (ext->e_flags != insert_rec->e_flags)
83062306a36Sopenharmony_ci		return CONTIG_NONE;
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci	if (ocfs2_extents_adjacent(ext, insert_rec) &&
83362306a36Sopenharmony_ci	    ocfs2_block_extent_contig(sb, ext, blkno))
83462306a36Sopenharmony_ci			return CONTIG_RIGHT;
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	blkno = le64_to_cpu(ext->e_blkno);
83762306a36Sopenharmony_ci	if (ocfs2_extents_adjacent(insert_rec, ext) &&
83862306a36Sopenharmony_ci	    ocfs2_block_extent_contig(sb, insert_rec, blkno))
83962306a36Sopenharmony_ci		return CONTIG_LEFT;
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	return CONTIG_NONE;
84262306a36Sopenharmony_ci}
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci/*
84562306a36Sopenharmony_ci * NOTE: We can have pretty much any combination of contiguousness and
84662306a36Sopenharmony_ci * appending.
84762306a36Sopenharmony_ci *
84862306a36Sopenharmony_ci * The usefulness of APPEND_TAIL is more in that it lets us know that
84962306a36Sopenharmony_ci * we'll have to update the path to that leaf.
85062306a36Sopenharmony_ci */
85162306a36Sopenharmony_cienum ocfs2_append_type {
85262306a36Sopenharmony_ci	APPEND_NONE = 0,
85362306a36Sopenharmony_ci	APPEND_TAIL,
85462306a36Sopenharmony_ci};
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_cienum ocfs2_split_type {
85762306a36Sopenharmony_ci	SPLIT_NONE = 0,
85862306a36Sopenharmony_ci	SPLIT_LEFT,
85962306a36Sopenharmony_ci	SPLIT_RIGHT,
86062306a36Sopenharmony_ci};
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_cistruct ocfs2_insert_type {
86362306a36Sopenharmony_ci	enum ocfs2_split_type	ins_split;
86462306a36Sopenharmony_ci	enum ocfs2_append_type	ins_appending;
86562306a36Sopenharmony_ci	enum ocfs2_contig_type	ins_contig;
86662306a36Sopenharmony_ci	int			ins_contig_index;
86762306a36Sopenharmony_ci	int			ins_tree_depth;
86862306a36Sopenharmony_ci};
86962306a36Sopenharmony_ci
87062306a36Sopenharmony_cistruct ocfs2_merge_ctxt {
87162306a36Sopenharmony_ci	enum ocfs2_contig_type	c_contig_type;
87262306a36Sopenharmony_ci	int			c_has_empty_extent;
87362306a36Sopenharmony_ci	int			c_split_covers_rec;
87462306a36Sopenharmony_ci};
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_cistatic int ocfs2_validate_extent_block(struct super_block *sb,
87762306a36Sopenharmony_ci				       struct buffer_head *bh)
87862306a36Sopenharmony_ci{
87962306a36Sopenharmony_ci	int rc;
88062306a36Sopenharmony_ci	struct ocfs2_extent_block *eb =
88162306a36Sopenharmony_ci		(struct ocfs2_extent_block *)bh->b_data;
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci	trace_ocfs2_validate_extent_block((unsigned long long)bh->b_blocknr);
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	BUG_ON(!buffer_uptodate(bh));
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci	/*
88862306a36Sopenharmony_ci	 * If the ecc fails, we return the error but otherwise
88962306a36Sopenharmony_ci	 * leave the filesystem running.  We know any error is
89062306a36Sopenharmony_ci	 * local to this block.
89162306a36Sopenharmony_ci	 */
89262306a36Sopenharmony_ci	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &eb->h_check);
89362306a36Sopenharmony_ci	if (rc) {
89462306a36Sopenharmony_ci		mlog(ML_ERROR, "Checksum failed for extent block %llu\n",
89562306a36Sopenharmony_ci		     (unsigned long long)bh->b_blocknr);
89662306a36Sopenharmony_ci		return rc;
89762306a36Sopenharmony_ci	}
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	/*
90062306a36Sopenharmony_ci	 * Errors after here are fatal.
90162306a36Sopenharmony_ci	 */
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
90462306a36Sopenharmony_ci		rc = ocfs2_error(sb,
90562306a36Sopenharmony_ci				 "Extent block #%llu has bad signature %.*s\n",
90662306a36Sopenharmony_ci				 (unsigned long long)bh->b_blocknr, 7,
90762306a36Sopenharmony_ci				 eb->h_signature);
90862306a36Sopenharmony_ci		goto bail;
90962306a36Sopenharmony_ci	}
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	if (le64_to_cpu(eb->h_blkno) != bh->b_blocknr) {
91262306a36Sopenharmony_ci		rc = ocfs2_error(sb,
91362306a36Sopenharmony_ci				 "Extent block #%llu has an invalid h_blkno of %llu\n",
91462306a36Sopenharmony_ci				 (unsigned long long)bh->b_blocknr,
91562306a36Sopenharmony_ci				 (unsigned long long)le64_to_cpu(eb->h_blkno));
91662306a36Sopenharmony_ci		goto bail;
91762306a36Sopenharmony_ci	}
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_ci	if (le32_to_cpu(eb->h_fs_generation) != OCFS2_SB(sb)->fs_generation)
92062306a36Sopenharmony_ci		rc = ocfs2_error(sb,
92162306a36Sopenharmony_ci				 "Extent block #%llu has an invalid h_fs_generation of #%u\n",
92262306a36Sopenharmony_ci				 (unsigned long long)bh->b_blocknr,
92362306a36Sopenharmony_ci				 le32_to_cpu(eb->h_fs_generation));
92462306a36Sopenharmony_cibail:
92562306a36Sopenharmony_ci	return rc;
92662306a36Sopenharmony_ci}
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ciint ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
92962306a36Sopenharmony_ci			    struct buffer_head **bh)
93062306a36Sopenharmony_ci{
93162306a36Sopenharmony_ci	int rc;
93262306a36Sopenharmony_ci	struct buffer_head *tmp = *bh;
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci	rc = ocfs2_read_block(ci, eb_blkno, &tmp,
93562306a36Sopenharmony_ci			      ocfs2_validate_extent_block);
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci	/* If ocfs2_read_block() got us a new bh, pass it up. */
93862306a36Sopenharmony_ci	if (!rc && !*bh)
93962306a36Sopenharmony_ci		*bh = tmp;
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_ci	return rc;
94262306a36Sopenharmony_ci}
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci/*
94662306a36Sopenharmony_ci * How many free extents have we got before we need more meta data?
94762306a36Sopenharmony_ci */
94862306a36Sopenharmony_ciint ocfs2_num_free_extents(struct ocfs2_extent_tree *et)
94962306a36Sopenharmony_ci{
95062306a36Sopenharmony_ci	int retval;
95162306a36Sopenharmony_ci	struct ocfs2_extent_list *el = NULL;
95262306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
95362306a36Sopenharmony_ci	struct buffer_head *eb_bh = NULL;
95462306a36Sopenharmony_ci	u64 last_eb_blk = 0;
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci	el = et->et_root_el;
95762306a36Sopenharmony_ci	last_eb_blk = ocfs2_et_get_last_eb_blk(et);
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci	if (last_eb_blk) {
96062306a36Sopenharmony_ci		retval = ocfs2_read_extent_block(et->et_ci, last_eb_blk,
96162306a36Sopenharmony_ci						 &eb_bh);
96262306a36Sopenharmony_ci		if (retval < 0) {
96362306a36Sopenharmony_ci			mlog_errno(retval);
96462306a36Sopenharmony_ci			goto bail;
96562306a36Sopenharmony_ci		}
96662306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
96762306a36Sopenharmony_ci		el = &eb->h_list;
96862306a36Sopenharmony_ci	}
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci	BUG_ON(el->l_tree_depth != 0);
97162306a36Sopenharmony_ci
97262306a36Sopenharmony_ci	retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
97362306a36Sopenharmony_cibail:
97462306a36Sopenharmony_ci	brelse(eb_bh);
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_ci	trace_ocfs2_num_free_extents(retval);
97762306a36Sopenharmony_ci	return retval;
97862306a36Sopenharmony_ci}
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci/* expects array to already be allocated
98162306a36Sopenharmony_ci *
98262306a36Sopenharmony_ci * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
98362306a36Sopenharmony_ci * l_count for you
98462306a36Sopenharmony_ci */
98562306a36Sopenharmony_cistatic int ocfs2_create_new_meta_bhs(handle_t *handle,
98662306a36Sopenharmony_ci				     struct ocfs2_extent_tree *et,
98762306a36Sopenharmony_ci				     int wanted,
98862306a36Sopenharmony_ci				     struct ocfs2_alloc_context *meta_ac,
98962306a36Sopenharmony_ci				     struct buffer_head *bhs[])
99062306a36Sopenharmony_ci{
99162306a36Sopenharmony_ci	int count, status, i;
99262306a36Sopenharmony_ci	u16 suballoc_bit_start;
99362306a36Sopenharmony_ci	u32 num_got;
99462306a36Sopenharmony_ci	u64 suballoc_loc, first_blkno;
99562306a36Sopenharmony_ci	struct ocfs2_super *osb =
99662306a36Sopenharmony_ci		OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
99762306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	count = 0;
100062306a36Sopenharmony_ci	while (count < wanted) {
100162306a36Sopenharmony_ci		status = ocfs2_claim_metadata(handle,
100262306a36Sopenharmony_ci					      meta_ac,
100362306a36Sopenharmony_ci					      wanted - count,
100462306a36Sopenharmony_ci					      &suballoc_loc,
100562306a36Sopenharmony_ci					      &suballoc_bit_start,
100662306a36Sopenharmony_ci					      &num_got,
100762306a36Sopenharmony_ci					      &first_blkno);
100862306a36Sopenharmony_ci		if (status < 0) {
100962306a36Sopenharmony_ci			mlog_errno(status);
101062306a36Sopenharmony_ci			goto bail;
101162306a36Sopenharmony_ci		}
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci		for(i = count;  i < (num_got + count); i++) {
101462306a36Sopenharmony_ci			bhs[i] = sb_getblk(osb->sb, first_blkno);
101562306a36Sopenharmony_ci			if (bhs[i] == NULL) {
101662306a36Sopenharmony_ci				status = -ENOMEM;
101762306a36Sopenharmony_ci				mlog_errno(status);
101862306a36Sopenharmony_ci				goto bail;
101962306a36Sopenharmony_ci			}
102062306a36Sopenharmony_ci			ocfs2_set_new_buffer_uptodate(et->et_ci, bhs[i]);
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci			status = ocfs2_journal_access_eb(handle, et->et_ci,
102362306a36Sopenharmony_ci							 bhs[i],
102462306a36Sopenharmony_ci							 OCFS2_JOURNAL_ACCESS_CREATE);
102562306a36Sopenharmony_ci			if (status < 0) {
102662306a36Sopenharmony_ci				mlog_errno(status);
102762306a36Sopenharmony_ci				goto bail;
102862306a36Sopenharmony_ci			}
102962306a36Sopenharmony_ci
103062306a36Sopenharmony_ci			memset(bhs[i]->b_data, 0, osb->sb->s_blocksize);
103162306a36Sopenharmony_ci			eb = (struct ocfs2_extent_block *) bhs[i]->b_data;
103262306a36Sopenharmony_ci			/* Ok, setup the minimal stuff here. */
103362306a36Sopenharmony_ci			strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
103462306a36Sopenharmony_ci			eb->h_blkno = cpu_to_le64(first_blkno);
103562306a36Sopenharmony_ci			eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
103662306a36Sopenharmony_ci			eb->h_suballoc_slot =
103762306a36Sopenharmony_ci				cpu_to_le16(meta_ac->ac_alloc_slot);
103862306a36Sopenharmony_ci			eb->h_suballoc_loc = cpu_to_le64(suballoc_loc);
103962306a36Sopenharmony_ci			eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
104062306a36Sopenharmony_ci			eb->h_list.l_count =
104162306a36Sopenharmony_ci				cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ci			suballoc_bit_start++;
104462306a36Sopenharmony_ci			first_blkno++;
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ci			/* We'll also be dirtied by the caller, so
104762306a36Sopenharmony_ci			 * this isn't absolutely necessary. */
104862306a36Sopenharmony_ci			ocfs2_journal_dirty(handle, bhs[i]);
104962306a36Sopenharmony_ci		}
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci		count += num_got;
105262306a36Sopenharmony_ci	}
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_ci	status = 0;
105562306a36Sopenharmony_cibail:
105662306a36Sopenharmony_ci	if (status < 0) {
105762306a36Sopenharmony_ci		for(i = 0; i < wanted; i++) {
105862306a36Sopenharmony_ci			brelse(bhs[i]);
105962306a36Sopenharmony_ci			bhs[i] = NULL;
106062306a36Sopenharmony_ci		}
106162306a36Sopenharmony_ci	}
106262306a36Sopenharmony_ci	return status;
106362306a36Sopenharmony_ci}
106462306a36Sopenharmony_ci
106562306a36Sopenharmony_ci/*
106662306a36Sopenharmony_ci * Helper function for ocfs2_add_branch() and ocfs2_shift_tree_depth().
106762306a36Sopenharmony_ci *
106862306a36Sopenharmony_ci * Returns the sum of the rightmost extent rec logical offset and
106962306a36Sopenharmony_ci * cluster count.
107062306a36Sopenharmony_ci *
107162306a36Sopenharmony_ci * ocfs2_add_branch() uses this to determine what logical cluster
107262306a36Sopenharmony_ci * value should be populated into the leftmost new branch records.
107362306a36Sopenharmony_ci *
107462306a36Sopenharmony_ci * ocfs2_shift_tree_depth() uses this to determine the # clusters
107562306a36Sopenharmony_ci * value for the new topmost tree record.
107662306a36Sopenharmony_ci */
107762306a36Sopenharmony_cistatic inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list  *el)
107862306a36Sopenharmony_ci{
107962306a36Sopenharmony_ci	int i;
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci	i = le16_to_cpu(el->l_next_free_rec) - 1;
108262306a36Sopenharmony_ci
108362306a36Sopenharmony_ci	return le32_to_cpu(el->l_recs[i].e_cpos) +
108462306a36Sopenharmony_ci		ocfs2_rec_clusters(el, &el->l_recs[i]);
108562306a36Sopenharmony_ci}
108662306a36Sopenharmony_ci
108762306a36Sopenharmony_ci/*
108862306a36Sopenharmony_ci * Change range of the branches in the right most path according to the leaf
108962306a36Sopenharmony_ci * extent block's rightmost record.
109062306a36Sopenharmony_ci */
109162306a36Sopenharmony_cistatic int ocfs2_adjust_rightmost_branch(handle_t *handle,
109262306a36Sopenharmony_ci					 struct ocfs2_extent_tree *et)
109362306a36Sopenharmony_ci{
109462306a36Sopenharmony_ci	int status;
109562306a36Sopenharmony_ci	struct ocfs2_path *path = NULL;
109662306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
109762306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ci	path = ocfs2_new_path_from_et(et);
110062306a36Sopenharmony_ci	if (!path) {
110162306a36Sopenharmony_ci		status = -ENOMEM;
110262306a36Sopenharmony_ci		return status;
110362306a36Sopenharmony_ci	}
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	status = ocfs2_find_path(et->et_ci, path, UINT_MAX);
110662306a36Sopenharmony_ci	if (status < 0) {
110762306a36Sopenharmony_ci		mlog_errno(status);
110862306a36Sopenharmony_ci		goto out;
110962306a36Sopenharmony_ci	}
111062306a36Sopenharmony_ci
111162306a36Sopenharmony_ci	status = ocfs2_extend_trans(handle, path_num_items(path));
111262306a36Sopenharmony_ci	if (status < 0) {
111362306a36Sopenharmony_ci		mlog_errno(status);
111462306a36Sopenharmony_ci		goto out;
111562306a36Sopenharmony_ci	}
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci	status = ocfs2_journal_access_path(et->et_ci, handle, path);
111862306a36Sopenharmony_ci	if (status < 0) {
111962306a36Sopenharmony_ci		mlog_errno(status);
112062306a36Sopenharmony_ci		goto out;
112162306a36Sopenharmony_ci	}
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci	el = path_leaf_el(path);
112462306a36Sopenharmony_ci	rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec) - 1];
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	ocfs2_adjust_rightmost_records(handle, et, path, rec);
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ciout:
112962306a36Sopenharmony_ci	ocfs2_free_path(path);
113062306a36Sopenharmony_ci	return status;
113162306a36Sopenharmony_ci}
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ci/*
113462306a36Sopenharmony_ci * Add an entire tree branch to our inode. eb_bh is the extent block
113562306a36Sopenharmony_ci * to start at, if we don't want to start the branch at the root
113662306a36Sopenharmony_ci * structure.
113762306a36Sopenharmony_ci *
113862306a36Sopenharmony_ci * last_eb_bh is required as we have to update it's next_leaf pointer
113962306a36Sopenharmony_ci * for the new last extent block.
114062306a36Sopenharmony_ci *
114162306a36Sopenharmony_ci * the new branch will be 'empty' in the sense that every block will
114262306a36Sopenharmony_ci * contain a single record with cluster count == 0.
114362306a36Sopenharmony_ci */
114462306a36Sopenharmony_cistatic int ocfs2_add_branch(handle_t *handle,
114562306a36Sopenharmony_ci			    struct ocfs2_extent_tree *et,
114662306a36Sopenharmony_ci			    struct buffer_head *eb_bh,
114762306a36Sopenharmony_ci			    struct buffer_head **last_eb_bh,
114862306a36Sopenharmony_ci			    struct ocfs2_alloc_context *meta_ac)
114962306a36Sopenharmony_ci{
115062306a36Sopenharmony_ci	int status, new_blocks, i, block_given = 0;
115162306a36Sopenharmony_ci	u64 next_blkno, new_last_eb_blk;
115262306a36Sopenharmony_ci	struct buffer_head *bh;
115362306a36Sopenharmony_ci	struct buffer_head **new_eb_bhs = NULL;
115462306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
115562306a36Sopenharmony_ci	struct ocfs2_extent_list  *eb_el;
115662306a36Sopenharmony_ci	struct ocfs2_extent_list  *el;
115762306a36Sopenharmony_ci	u32 new_cpos, root_end;
115862306a36Sopenharmony_ci
115962306a36Sopenharmony_ci	BUG_ON(!last_eb_bh || !*last_eb_bh);
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_ci	if (eb_bh) {
116262306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
116362306a36Sopenharmony_ci		el = &eb->h_list;
116462306a36Sopenharmony_ci	} else
116562306a36Sopenharmony_ci		el = et->et_root_el;
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci	/* we never add a branch to a leaf. */
116862306a36Sopenharmony_ci	BUG_ON(!el->l_tree_depth);
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	new_blocks = le16_to_cpu(el->l_tree_depth);
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci	eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
117362306a36Sopenharmony_ci	new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
117462306a36Sopenharmony_ci	root_end = ocfs2_sum_rightmost_rec(et->et_root_el);
117562306a36Sopenharmony_ci
117662306a36Sopenharmony_ci	/*
117762306a36Sopenharmony_ci	 * If there is a gap before the root end and the real end
117862306a36Sopenharmony_ci	 * of the righmost leaf block, we need to remove the gap
117962306a36Sopenharmony_ci	 * between new_cpos and root_end first so that the tree
118062306a36Sopenharmony_ci	 * is consistent after we add a new branch(it will start
118162306a36Sopenharmony_ci	 * from new_cpos).
118262306a36Sopenharmony_ci	 */
118362306a36Sopenharmony_ci	if (root_end > new_cpos) {
118462306a36Sopenharmony_ci		trace_ocfs2_adjust_rightmost_branch(
118562306a36Sopenharmony_ci			(unsigned long long)
118662306a36Sopenharmony_ci			ocfs2_metadata_cache_owner(et->et_ci),
118762306a36Sopenharmony_ci			root_end, new_cpos);
118862306a36Sopenharmony_ci
118962306a36Sopenharmony_ci		status = ocfs2_adjust_rightmost_branch(handle, et);
119062306a36Sopenharmony_ci		if (status) {
119162306a36Sopenharmony_ci			mlog_errno(status);
119262306a36Sopenharmony_ci			goto bail;
119362306a36Sopenharmony_ci		}
119462306a36Sopenharmony_ci	}
119562306a36Sopenharmony_ci
119662306a36Sopenharmony_ci	/* allocate the number of new eb blocks we need */
119762306a36Sopenharmony_ci	new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),
119862306a36Sopenharmony_ci			     GFP_KERNEL);
119962306a36Sopenharmony_ci	if (!new_eb_bhs) {
120062306a36Sopenharmony_ci		status = -ENOMEM;
120162306a36Sopenharmony_ci		mlog_errno(status);
120262306a36Sopenharmony_ci		goto bail;
120362306a36Sopenharmony_ci	}
120462306a36Sopenharmony_ci
120562306a36Sopenharmony_ci	/* Firstyly, try to reuse dealloc since we have already estimated how
120662306a36Sopenharmony_ci	 * many extent blocks we may use.
120762306a36Sopenharmony_ci	 */
120862306a36Sopenharmony_ci	if (!ocfs2_is_dealloc_empty(et)) {
120962306a36Sopenharmony_ci		status = ocfs2_reuse_blk_from_dealloc(handle, et,
121062306a36Sopenharmony_ci						      new_eb_bhs, new_blocks,
121162306a36Sopenharmony_ci						      &block_given);
121262306a36Sopenharmony_ci		if (status < 0) {
121362306a36Sopenharmony_ci			mlog_errno(status);
121462306a36Sopenharmony_ci			goto bail;
121562306a36Sopenharmony_ci		}
121662306a36Sopenharmony_ci	}
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	BUG_ON(block_given > new_blocks);
121962306a36Sopenharmony_ci
122062306a36Sopenharmony_ci	if (block_given < new_blocks) {
122162306a36Sopenharmony_ci		BUG_ON(!meta_ac);
122262306a36Sopenharmony_ci		status = ocfs2_create_new_meta_bhs(handle, et,
122362306a36Sopenharmony_ci						   new_blocks - block_given,
122462306a36Sopenharmony_ci						   meta_ac,
122562306a36Sopenharmony_ci						   &new_eb_bhs[block_given]);
122662306a36Sopenharmony_ci		if (status < 0) {
122762306a36Sopenharmony_ci			mlog_errno(status);
122862306a36Sopenharmony_ci			goto bail;
122962306a36Sopenharmony_ci		}
123062306a36Sopenharmony_ci	}
123162306a36Sopenharmony_ci
123262306a36Sopenharmony_ci	/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
123362306a36Sopenharmony_ci	 * linked with the rest of the tree.
123462306a36Sopenharmony_ci	 * conversly, new_eb_bhs[0] is the new bottommost leaf.
123562306a36Sopenharmony_ci	 *
123662306a36Sopenharmony_ci	 * when we leave the loop, new_last_eb_blk will point to the
123762306a36Sopenharmony_ci	 * newest leaf, and next_blkno will point to the topmost extent
123862306a36Sopenharmony_ci	 * block. */
123962306a36Sopenharmony_ci	next_blkno = new_last_eb_blk = 0;
124062306a36Sopenharmony_ci	for(i = 0; i < new_blocks; i++) {
124162306a36Sopenharmony_ci		bh = new_eb_bhs[i];
124262306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) bh->b_data;
124362306a36Sopenharmony_ci		/* ocfs2_create_new_meta_bhs() should create it right! */
124462306a36Sopenharmony_ci		BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
124562306a36Sopenharmony_ci		eb_el = &eb->h_list;
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_ci		status = ocfs2_journal_access_eb(handle, et->et_ci, bh,
124862306a36Sopenharmony_ci						 OCFS2_JOURNAL_ACCESS_CREATE);
124962306a36Sopenharmony_ci		if (status < 0) {
125062306a36Sopenharmony_ci			mlog_errno(status);
125162306a36Sopenharmony_ci			goto bail;
125262306a36Sopenharmony_ci		}
125362306a36Sopenharmony_ci
125462306a36Sopenharmony_ci		eb->h_next_leaf_blk = 0;
125562306a36Sopenharmony_ci		eb_el->l_tree_depth = cpu_to_le16(i);
125662306a36Sopenharmony_ci		eb_el->l_next_free_rec = cpu_to_le16(1);
125762306a36Sopenharmony_ci		/*
125862306a36Sopenharmony_ci		 * This actually counts as an empty extent as
125962306a36Sopenharmony_ci		 * c_clusters == 0
126062306a36Sopenharmony_ci		 */
126162306a36Sopenharmony_ci		eb_el->l_recs[0].e_cpos = cpu_to_le32(new_cpos);
126262306a36Sopenharmony_ci		eb_el->l_recs[0].e_blkno = cpu_to_le64(next_blkno);
126362306a36Sopenharmony_ci		/*
126462306a36Sopenharmony_ci		 * eb_el isn't always an interior node, but even leaf
126562306a36Sopenharmony_ci		 * nodes want a zero'd flags and reserved field so
126662306a36Sopenharmony_ci		 * this gets the whole 32 bits regardless of use.
126762306a36Sopenharmony_ci		 */
126862306a36Sopenharmony_ci		eb_el->l_recs[0].e_int_clusters = cpu_to_le32(0);
126962306a36Sopenharmony_ci		if (!eb_el->l_tree_depth)
127062306a36Sopenharmony_ci			new_last_eb_blk = le64_to_cpu(eb->h_blkno);
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, bh);
127362306a36Sopenharmony_ci		next_blkno = le64_to_cpu(eb->h_blkno);
127462306a36Sopenharmony_ci	}
127562306a36Sopenharmony_ci
127662306a36Sopenharmony_ci	/* This is a bit hairy. We want to update up to three blocks
127762306a36Sopenharmony_ci	 * here without leaving any of them in an inconsistent state
127862306a36Sopenharmony_ci	 * in case of error. We don't have to worry about
127962306a36Sopenharmony_ci	 * journal_dirty erroring as it won't unless we've aborted the
128062306a36Sopenharmony_ci	 * handle (in which case we would never be here) so reserving
128162306a36Sopenharmony_ci	 * the write with journal_access is all we need to do. */
128262306a36Sopenharmony_ci	status = ocfs2_journal_access_eb(handle, et->et_ci, *last_eb_bh,
128362306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
128462306a36Sopenharmony_ci	if (status < 0) {
128562306a36Sopenharmony_ci		mlog_errno(status);
128662306a36Sopenharmony_ci		goto bail;
128762306a36Sopenharmony_ci	}
128862306a36Sopenharmony_ci	status = ocfs2_et_root_journal_access(handle, et,
128962306a36Sopenharmony_ci					      OCFS2_JOURNAL_ACCESS_WRITE);
129062306a36Sopenharmony_ci	if (status < 0) {
129162306a36Sopenharmony_ci		mlog_errno(status);
129262306a36Sopenharmony_ci		goto bail;
129362306a36Sopenharmony_ci	}
129462306a36Sopenharmony_ci	if (eb_bh) {
129562306a36Sopenharmony_ci		status = ocfs2_journal_access_eb(handle, et->et_ci, eb_bh,
129662306a36Sopenharmony_ci						 OCFS2_JOURNAL_ACCESS_WRITE);
129762306a36Sopenharmony_ci		if (status < 0) {
129862306a36Sopenharmony_ci			mlog_errno(status);
129962306a36Sopenharmony_ci			goto bail;
130062306a36Sopenharmony_ci		}
130162306a36Sopenharmony_ci	}
130262306a36Sopenharmony_ci
130362306a36Sopenharmony_ci	/* Link the new branch into the rest of the tree (el will
130462306a36Sopenharmony_ci	 * either be on the root_bh, or the extent block passed in. */
130562306a36Sopenharmony_ci	i = le16_to_cpu(el->l_next_free_rec);
130662306a36Sopenharmony_ci	el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
130762306a36Sopenharmony_ci	el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
130862306a36Sopenharmony_ci	el->l_recs[i].e_int_clusters = 0;
130962306a36Sopenharmony_ci	le16_add_cpu(&el->l_next_free_rec, 1);
131062306a36Sopenharmony_ci
131162306a36Sopenharmony_ci	/* fe needs a new last extent block pointer, as does the
131262306a36Sopenharmony_ci	 * next_leaf on the previously last-extent-block. */
131362306a36Sopenharmony_ci	ocfs2_et_set_last_eb_blk(et, new_last_eb_blk);
131462306a36Sopenharmony_ci
131562306a36Sopenharmony_ci	eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
131662306a36Sopenharmony_ci	eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
131762306a36Sopenharmony_ci
131862306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, *last_eb_bh);
131962306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, et->et_root_bh);
132062306a36Sopenharmony_ci	if (eb_bh)
132162306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, eb_bh);
132262306a36Sopenharmony_ci
132362306a36Sopenharmony_ci	/*
132462306a36Sopenharmony_ci	 * Some callers want to track the rightmost leaf so pass it
132562306a36Sopenharmony_ci	 * back here.
132662306a36Sopenharmony_ci	 */
132762306a36Sopenharmony_ci	brelse(*last_eb_bh);
132862306a36Sopenharmony_ci	get_bh(new_eb_bhs[0]);
132962306a36Sopenharmony_ci	*last_eb_bh = new_eb_bhs[0];
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_ci	status = 0;
133262306a36Sopenharmony_cibail:
133362306a36Sopenharmony_ci	if (new_eb_bhs) {
133462306a36Sopenharmony_ci		for (i = 0; i < new_blocks; i++)
133562306a36Sopenharmony_ci			brelse(new_eb_bhs[i]);
133662306a36Sopenharmony_ci		kfree(new_eb_bhs);
133762306a36Sopenharmony_ci	}
133862306a36Sopenharmony_ci
133962306a36Sopenharmony_ci	return status;
134062306a36Sopenharmony_ci}
134162306a36Sopenharmony_ci
134262306a36Sopenharmony_ci/*
134362306a36Sopenharmony_ci * adds another level to the allocation tree.
134462306a36Sopenharmony_ci * returns back the new extent block so you can add a branch to it
134562306a36Sopenharmony_ci * after this call.
134662306a36Sopenharmony_ci */
134762306a36Sopenharmony_cistatic int ocfs2_shift_tree_depth(handle_t *handle,
134862306a36Sopenharmony_ci				  struct ocfs2_extent_tree *et,
134962306a36Sopenharmony_ci				  struct ocfs2_alloc_context *meta_ac,
135062306a36Sopenharmony_ci				  struct buffer_head **ret_new_eb_bh)
135162306a36Sopenharmony_ci{
135262306a36Sopenharmony_ci	int status, i, block_given = 0;
135362306a36Sopenharmony_ci	u32 new_clusters;
135462306a36Sopenharmony_ci	struct buffer_head *new_eb_bh = NULL;
135562306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
135662306a36Sopenharmony_ci	struct ocfs2_extent_list  *root_el;
135762306a36Sopenharmony_ci	struct ocfs2_extent_list  *eb_el;
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci	if (!ocfs2_is_dealloc_empty(et)) {
136062306a36Sopenharmony_ci		status = ocfs2_reuse_blk_from_dealloc(handle, et,
136162306a36Sopenharmony_ci						      &new_eb_bh, 1,
136262306a36Sopenharmony_ci						      &block_given);
136362306a36Sopenharmony_ci	} else if (meta_ac) {
136462306a36Sopenharmony_ci		status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
136562306a36Sopenharmony_ci						   &new_eb_bh);
136662306a36Sopenharmony_ci
136762306a36Sopenharmony_ci	} else {
136862306a36Sopenharmony_ci		BUG();
136962306a36Sopenharmony_ci	}
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci	if (status < 0) {
137262306a36Sopenharmony_ci		mlog_errno(status);
137362306a36Sopenharmony_ci		goto bail;
137462306a36Sopenharmony_ci	}
137562306a36Sopenharmony_ci
137662306a36Sopenharmony_ci	eb = (struct ocfs2_extent_block *) new_eb_bh->b_data;
137762306a36Sopenharmony_ci	/* ocfs2_create_new_meta_bhs() should create it right! */
137862306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
137962306a36Sopenharmony_ci
138062306a36Sopenharmony_ci	eb_el = &eb->h_list;
138162306a36Sopenharmony_ci	root_el = et->et_root_el;
138262306a36Sopenharmony_ci
138362306a36Sopenharmony_ci	status = ocfs2_journal_access_eb(handle, et->et_ci, new_eb_bh,
138462306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_CREATE);
138562306a36Sopenharmony_ci	if (status < 0) {
138662306a36Sopenharmony_ci		mlog_errno(status);
138762306a36Sopenharmony_ci		goto bail;
138862306a36Sopenharmony_ci	}
138962306a36Sopenharmony_ci
139062306a36Sopenharmony_ci	/* copy the root extent list data into the new extent block */
139162306a36Sopenharmony_ci	eb_el->l_tree_depth = root_el->l_tree_depth;
139262306a36Sopenharmony_ci	eb_el->l_next_free_rec = root_el->l_next_free_rec;
139362306a36Sopenharmony_ci	for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++)
139462306a36Sopenharmony_ci		eb_el->l_recs[i] = root_el->l_recs[i];
139562306a36Sopenharmony_ci
139662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, new_eb_bh);
139762306a36Sopenharmony_ci
139862306a36Sopenharmony_ci	status = ocfs2_et_root_journal_access(handle, et,
139962306a36Sopenharmony_ci					      OCFS2_JOURNAL_ACCESS_WRITE);
140062306a36Sopenharmony_ci	if (status < 0) {
140162306a36Sopenharmony_ci		mlog_errno(status);
140262306a36Sopenharmony_ci		goto bail;
140362306a36Sopenharmony_ci	}
140462306a36Sopenharmony_ci
140562306a36Sopenharmony_ci	new_clusters = ocfs2_sum_rightmost_rec(eb_el);
140662306a36Sopenharmony_ci
140762306a36Sopenharmony_ci	/* update root_bh now */
140862306a36Sopenharmony_ci	le16_add_cpu(&root_el->l_tree_depth, 1);
140962306a36Sopenharmony_ci	root_el->l_recs[0].e_cpos = 0;
141062306a36Sopenharmony_ci	root_el->l_recs[0].e_blkno = eb->h_blkno;
141162306a36Sopenharmony_ci	root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
141262306a36Sopenharmony_ci	for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
141362306a36Sopenharmony_ci		memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
141462306a36Sopenharmony_ci	root_el->l_next_free_rec = cpu_to_le16(1);
141562306a36Sopenharmony_ci
141662306a36Sopenharmony_ci	/* If this is our 1st tree depth shift, then last_eb_blk
141762306a36Sopenharmony_ci	 * becomes the allocated extent block */
141862306a36Sopenharmony_ci	if (root_el->l_tree_depth == cpu_to_le16(1))
141962306a36Sopenharmony_ci		ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
142062306a36Sopenharmony_ci
142162306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, et->et_root_bh);
142262306a36Sopenharmony_ci
142362306a36Sopenharmony_ci	*ret_new_eb_bh = new_eb_bh;
142462306a36Sopenharmony_ci	new_eb_bh = NULL;
142562306a36Sopenharmony_ci	status = 0;
142662306a36Sopenharmony_cibail:
142762306a36Sopenharmony_ci	brelse(new_eb_bh);
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	return status;
143062306a36Sopenharmony_ci}
143162306a36Sopenharmony_ci
143262306a36Sopenharmony_ci/*
143362306a36Sopenharmony_ci * Should only be called when there is no space left in any of the
143462306a36Sopenharmony_ci * leaf nodes. What we want to do is find the lowest tree depth
143562306a36Sopenharmony_ci * non-leaf extent block with room for new records. There are three
143662306a36Sopenharmony_ci * valid results of this search:
143762306a36Sopenharmony_ci *
143862306a36Sopenharmony_ci * 1) a lowest extent block is found, then we pass it back in
143962306a36Sopenharmony_ci *    *lowest_eb_bh and return '0'
144062306a36Sopenharmony_ci *
144162306a36Sopenharmony_ci * 2) the search fails to find anything, but the root_el has room. We
144262306a36Sopenharmony_ci *    pass NULL back in *lowest_eb_bh, but still return '0'
144362306a36Sopenharmony_ci *
144462306a36Sopenharmony_ci * 3) the search fails to find anything AND the root_el is full, in
144562306a36Sopenharmony_ci *    which case we return > 0
144662306a36Sopenharmony_ci *
144762306a36Sopenharmony_ci * return status < 0 indicates an error.
144862306a36Sopenharmony_ci */
144962306a36Sopenharmony_cistatic int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
145062306a36Sopenharmony_ci				    struct buffer_head **target_bh)
145162306a36Sopenharmony_ci{
145262306a36Sopenharmony_ci	int status = 0, i;
145362306a36Sopenharmony_ci	u64 blkno;
145462306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
145562306a36Sopenharmony_ci	struct ocfs2_extent_list  *el;
145662306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
145762306a36Sopenharmony_ci	struct buffer_head *lowest_bh = NULL;
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci	*target_bh = NULL;
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_ci	el = et->et_root_el;
146262306a36Sopenharmony_ci
146362306a36Sopenharmony_ci	while(le16_to_cpu(el->l_tree_depth) > 1) {
146462306a36Sopenharmony_ci		if (le16_to_cpu(el->l_next_free_rec) == 0) {
146562306a36Sopenharmony_ci			status = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
146662306a36Sopenharmony_ci					"Owner %llu has empty extent list (next_free_rec == 0)\n",
146762306a36Sopenharmony_ci					(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
146862306a36Sopenharmony_ci			goto bail;
146962306a36Sopenharmony_ci		}
147062306a36Sopenharmony_ci		i = le16_to_cpu(el->l_next_free_rec) - 1;
147162306a36Sopenharmony_ci		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
147262306a36Sopenharmony_ci		if (!blkno) {
147362306a36Sopenharmony_ci			status = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
147462306a36Sopenharmony_ci					"Owner %llu has extent list where extent # %d has no physical block start\n",
147562306a36Sopenharmony_ci					(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i);
147662306a36Sopenharmony_ci			goto bail;
147762306a36Sopenharmony_ci		}
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci		brelse(bh);
148062306a36Sopenharmony_ci		bh = NULL;
148162306a36Sopenharmony_ci
148262306a36Sopenharmony_ci		status = ocfs2_read_extent_block(et->et_ci, blkno, &bh);
148362306a36Sopenharmony_ci		if (status < 0) {
148462306a36Sopenharmony_ci			mlog_errno(status);
148562306a36Sopenharmony_ci			goto bail;
148662306a36Sopenharmony_ci		}
148762306a36Sopenharmony_ci
148862306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) bh->b_data;
148962306a36Sopenharmony_ci		el = &eb->h_list;
149062306a36Sopenharmony_ci
149162306a36Sopenharmony_ci		if (le16_to_cpu(el->l_next_free_rec) <
149262306a36Sopenharmony_ci		    le16_to_cpu(el->l_count)) {
149362306a36Sopenharmony_ci			brelse(lowest_bh);
149462306a36Sopenharmony_ci			lowest_bh = bh;
149562306a36Sopenharmony_ci			get_bh(lowest_bh);
149662306a36Sopenharmony_ci		}
149762306a36Sopenharmony_ci	}
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_ci	/* If we didn't find one and the fe doesn't have any room,
150062306a36Sopenharmony_ci	 * then return '1' */
150162306a36Sopenharmony_ci	el = et->et_root_el;
150262306a36Sopenharmony_ci	if (!lowest_bh && (el->l_next_free_rec == el->l_count))
150362306a36Sopenharmony_ci		status = 1;
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_ci	*target_bh = lowest_bh;
150662306a36Sopenharmony_cibail:
150762306a36Sopenharmony_ci	brelse(bh);
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci	return status;
151062306a36Sopenharmony_ci}
151162306a36Sopenharmony_ci
151262306a36Sopenharmony_ci/*
151362306a36Sopenharmony_ci * Grow a b-tree so that it has more records.
151462306a36Sopenharmony_ci *
151562306a36Sopenharmony_ci * We might shift the tree depth in which case existing paths should
151662306a36Sopenharmony_ci * be considered invalid.
151762306a36Sopenharmony_ci *
151862306a36Sopenharmony_ci * Tree depth after the grow is returned via *final_depth.
151962306a36Sopenharmony_ci *
152062306a36Sopenharmony_ci * *last_eb_bh will be updated by ocfs2_add_branch().
152162306a36Sopenharmony_ci */
152262306a36Sopenharmony_cistatic int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
152362306a36Sopenharmony_ci			   int *final_depth, struct buffer_head **last_eb_bh,
152462306a36Sopenharmony_ci			   struct ocfs2_alloc_context *meta_ac)
152562306a36Sopenharmony_ci{
152662306a36Sopenharmony_ci	int ret, shift;
152762306a36Sopenharmony_ci	struct ocfs2_extent_list *el = et->et_root_el;
152862306a36Sopenharmony_ci	int depth = le16_to_cpu(el->l_tree_depth);
152962306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
153062306a36Sopenharmony_ci
153162306a36Sopenharmony_ci	BUG_ON(meta_ac == NULL && ocfs2_is_dealloc_empty(et));
153262306a36Sopenharmony_ci
153362306a36Sopenharmony_ci	shift = ocfs2_find_branch_target(et, &bh);
153462306a36Sopenharmony_ci	if (shift < 0) {
153562306a36Sopenharmony_ci		ret = shift;
153662306a36Sopenharmony_ci		mlog_errno(ret);
153762306a36Sopenharmony_ci		goto out;
153862306a36Sopenharmony_ci	}
153962306a36Sopenharmony_ci
154062306a36Sopenharmony_ci	/* We traveled all the way to the bottom of the allocation tree
154162306a36Sopenharmony_ci	 * and didn't find room for any more extents - we need to add
154262306a36Sopenharmony_ci	 * another tree level */
154362306a36Sopenharmony_ci	if (shift) {
154462306a36Sopenharmony_ci		BUG_ON(bh);
154562306a36Sopenharmony_ci		trace_ocfs2_grow_tree(
154662306a36Sopenharmony_ci			(unsigned long long)
154762306a36Sopenharmony_ci			ocfs2_metadata_cache_owner(et->et_ci),
154862306a36Sopenharmony_ci			depth);
154962306a36Sopenharmony_ci
155062306a36Sopenharmony_ci		/* ocfs2_shift_tree_depth will return us a buffer with
155162306a36Sopenharmony_ci		 * the new extent block (so we can pass that to
155262306a36Sopenharmony_ci		 * ocfs2_add_branch). */
155362306a36Sopenharmony_ci		ret = ocfs2_shift_tree_depth(handle, et, meta_ac, &bh);
155462306a36Sopenharmony_ci		if (ret < 0) {
155562306a36Sopenharmony_ci			mlog_errno(ret);
155662306a36Sopenharmony_ci			goto out;
155762306a36Sopenharmony_ci		}
155862306a36Sopenharmony_ci		depth++;
155962306a36Sopenharmony_ci		if (depth == 1) {
156062306a36Sopenharmony_ci			/*
156162306a36Sopenharmony_ci			 * Special case: we have room now if we shifted from
156262306a36Sopenharmony_ci			 * tree_depth 0, so no more work needs to be done.
156362306a36Sopenharmony_ci			 *
156462306a36Sopenharmony_ci			 * We won't be calling add_branch, so pass
156562306a36Sopenharmony_ci			 * back *last_eb_bh as the new leaf. At depth
156662306a36Sopenharmony_ci			 * zero, it should always be null so there's
156762306a36Sopenharmony_ci			 * no reason to brelse.
156862306a36Sopenharmony_ci			 */
156962306a36Sopenharmony_ci			BUG_ON(*last_eb_bh);
157062306a36Sopenharmony_ci			get_bh(bh);
157162306a36Sopenharmony_ci			*last_eb_bh = bh;
157262306a36Sopenharmony_ci			goto out;
157362306a36Sopenharmony_ci		}
157462306a36Sopenharmony_ci	}
157562306a36Sopenharmony_ci
157662306a36Sopenharmony_ci	/* call ocfs2_add_branch to add the final part of the tree with
157762306a36Sopenharmony_ci	 * the new data. */
157862306a36Sopenharmony_ci	ret = ocfs2_add_branch(handle, et, bh, last_eb_bh,
157962306a36Sopenharmony_ci			       meta_ac);
158062306a36Sopenharmony_ci	if (ret < 0)
158162306a36Sopenharmony_ci		mlog_errno(ret);
158262306a36Sopenharmony_ci
158362306a36Sopenharmony_ciout:
158462306a36Sopenharmony_ci	if (final_depth)
158562306a36Sopenharmony_ci		*final_depth = depth;
158662306a36Sopenharmony_ci	brelse(bh);
158762306a36Sopenharmony_ci	return ret;
158862306a36Sopenharmony_ci}
158962306a36Sopenharmony_ci
159062306a36Sopenharmony_ci/*
159162306a36Sopenharmony_ci * This function will discard the rightmost extent record.
159262306a36Sopenharmony_ci */
159362306a36Sopenharmony_cistatic void ocfs2_shift_records_right(struct ocfs2_extent_list *el)
159462306a36Sopenharmony_ci{
159562306a36Sopenharmony_ci	int next_free = le16_to_cpu(el->l_next_free_rec);
159662306a36Sopenharmony_ci	int count = le16_to_cpu(el->l_count);
159762306a36Sopenharmony_ci	unsigned int num_bytes;
159862306a36Sopenharmony_ci
159962306a36Sopenharmony_ci	BUG_ON(!next_free);
160062306a36Sopenharmony_ci	/* This will cause us to go off the end of our extent list. */
160162306a36Sopenharmony_ci	BUG_ON(next_free >= count);
160262306a36Sopenharmony_ci
160362306a36Sopenharmony_ci	num_bytes = sizeof(struct ocfs2_extent_rec) * next_free;
160462306a36Sopenharmony_ci
160562306a36Sopenharmony_ci	memmove(&el->l_recs[1], &el->l_recs[0], num_bytes);
160662306a36Sopenharmony_ci}
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_cistatic void ocfs2_rotate_leaf(struct ocfs2_extent_list *el,
160962306a36Sopenharmony_ci			      struct ocfs2_extent_rec *insert_rec)
161062306a36Sopenharmony_ci{
161162306a36Sopenharmony_ci	int i, insert_index, next_free, has_empty, num_bytes;
161262306a36Sopenharmony_ci	u32 insert_cpos = le32_to_cpu(insert_rec->e_cpos);
161362306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
161462306a36Sopenharmony_ci
161562306a36Sopenharmony_ci	next_free = le16_to_cpu(el->l_next_free_rec);
161662306a36Sopenharmony_ci	has_empty = ocfs2_is_empty_extent(&el->l_recs[0]);
161762306a36Sopenharmony_ci
161862306a36Sopenharmony_ci	BUG_ON(!next_free);
161962306a36Sopenharmony_ci
162062306a36Sopenharmony_ci	/* The tree code before us didn't allow enough room in the leaf. */
162162306a36Sopenharmony_ci	BUG_ON(el->l_next_free_rec == el->l_count && !has_empty);
162262306a36Sopenharmony_ci
162362306a36Sopenharmony_ci	/*
162462306a36Sopenharmony_ci	 * The easiest way to approach this is to just remove the
162562306a36Sopenharmony_ci	 * empty extent and temporarily decrement next_free.
162662306a36Sopenharmony_ci	 */
162762306a36Sopenharmony_ci	if (has_empty) {
162862306a36Sopenharmony_ci		/*
162962306a36Sopenharmony_ci		 * If next_free was 1 (only an empty extent), this
163062306a36Sopenharmony_ci		 * loop won't execute, which is fine. We still want
163162306a36Sopenharmony_ci		 * the decrement above to happen.
163262306a36Sopenharmony_ci		 */
163362306a36Sopenharmony_ci		for(i = 0; i < (next_free - 1); i++)
163462306a36Sopenharmony_ci			el->l_recs[i] = el->l_recs[i+1];
163562306a36Sopenharmony_ci
163662306a36Sopenharmony_ci		next_free--;
163762306a36Sopenharmony_ci	}
163862306a36Sopenharmony_ci
163962306a36Sopenharmony_ci	/*
164062306a36Sopenharmony_ci	 * Figure out what the new record index should be.
164162306a36Sopenharmony_ci	 */
164262306a36Sopenharmony_ci	for(i = 0; i < next_free; i++) {
164362306a36Sopenharmony_ci		rec = &el->l_recs[i];
164462306a36Sopenharmony_ci
164562306a36Sopenharmony_ci		if (insert_cpos < le32_to_cpu(rec->e_cpos))
164662306a36Sopenharmony_ci			break;
164762306a36Sopenharmony_ci	}
164862306a36Sopenharmony_ci	insert_index = i;
164962306a36Sopenharmony_ci
165062306a36Sopenharmony_ci	trace_ocfs2_rotate_leaf(insert_cpos, insert_index,
165162306a36Sopenharmony_ci				has_empty, next_free,
165262306a36Sopenharmony_ci				le16_to_cpu(el->l_count));
165362306a36Sopenharmony_ci
165462306a36Sopenharmony_ci	BUG_ON(insert_index < 0);
165562306a36Sopenharmony_ci	BUG_ON(insert_index >= le16_to_cpu(el->l_count));
165662306a36Sopenharmony_ci	BUG_ON(insert_index > next_free);
165762306a36Sopenharmony_ci
165862306a36Sopenharmony_ci	/*
165962306a36Sopenharmony_ci	 * No need to memmove if we're just adding to the tail.
166062306a36Sopenharmony_ci	 */
166162306a36Sopenharmony_ci	if (insert_index != next_free) {
166262306a36Sopenharmony_ci		BUG_ON(next_free >= le16_to_cpu(el->l_count));
166362306a36Sopenharmony_ci
166462306a36Sopenharmony_ci		num_bytes = next_free - insert_index;
166562306a36Sopenharmony_ci		num_bytes *= sizeof(struct ocfs2_extent_rec);
166662306a36Sopenharmony_ci		memmove(&el->l_recs[insert_index + 1],
166762306a36Sopenharmony_ci			&el->l_recs[insert_index],
166862306a36Sopenharmony_ci			num_bytes);
166962306a36Sopenharmony_ci	}
167062306a36Sopenharmony_ci
167162306a36Sopenharmony_ci	/*
167262306a36Sopenharmony_ci	 * Either we had an empty extent, and need to re-increment or
167362306a36Sopenharmony_ci	 * there was no empty extent on a non full rightmost leaf node,
167462306a36Sopenharmony_ci	 * in which case we still need to increment.
167562306a36Sopenharmony_ci	 */
167662306a36Sopenharmony_ci	next_free++;
167762306a36Sopenharmony_ci	el->l_next_free_rec = cpu_to_le16(next_free);
167862306a36Sopenharmony_ci	/*
167962306a36Sopenharmony_ci	 * Make sure none of the math above just messed up our tree.
168062306a36Sopenharmony_ci	 */
168162306a36Sopenharmony_ci	BUG_ON(le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count));
168262306a36Sopenharmony_ci
168362306a36Sopenharmony_ci	el->l_recs[insert_index] = *insert_rec;
168462306a36Sopenharmony_ci
168562306a36Sopenharmony_ci}
168662306a36Sopenharmony_ci
168762306a36Sopenharmony_cistatic void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el)
168862306a36Sopenharmony_ci{
168962306a36Sopenharmony_ci	int size, num_recs = le16_to_cpu(el->l_next_free_rec);
169062306a36Sopenharmony_ci
169162306a36Sopenharmony_ci	BUG_ON(num_recs == 0);
169262306a36Sopenharmony_ci
169362306a36Sopenharmony_ci	if (ocfs2_is_empty_extent(&el->l_recs[0])) {
169462306a36Sopenharmony_ci		num_recs--;
169562306a36Sopenharmony_ci		size = num_recs * sizeof(struct ocfs2_extent_rec);
169662306a36Sopenharmony_ci		memmove(&el->l_recs[0], &el->l_recs[1], size);
169762306a36Sopenharmony_ci		memset(&el->l_recs[num_recs], 0,
169862306a36Sopenharmony_ci		       sizeof(struct ocfs2_extent_rec));
169962306a36Sopenharmony_ci		el->l_next_free_rec = cpu_to_le16(num_recs);
170062306a36Sopenharmony_ci	}
170162306a36Sopenharmony_ci}
170262306a36Sopenharmony_ci
170362306a36Sopenharmony_ci/*
170462306a36Sopenharmony_ci * Create an empty extent record .
170562306a36Sopenharmony_ci *
170662306a36Sopenharmony_ci * l_next_free_rec may be updated.
170762306a36Sopenharmony_ci *
170862306a36Sopenharmony_ci * If an empty extent already exists do nothing.
170962306a36Sopenharmony_ci */
171062306a36Sopenharmony_cistatic void ocfs2_create_empty_extent(struct ocfs2_extent_list *el)
171162306a36Sopenharmony_ci{
171262306a36Sopenharmony_ci	int next_free = le16_to_cpu(el->l_next_free_rec);
171362306a36Sopenharmony_ci
171462306a36Sopenharmony_ci	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
171562306a36Sopenharmony_ci
171662306a36Sopenharmony_ci	if (next_free == 0)
171762306a36Sopenharmony_ci		goto set_and_inc;
171862306a36Sopenharmony_ci
171962306a36Sopenharmony_ci	if (ocfs2_is_empty_extent(&el->l_recs[0]))
172062306a36Sopenharmony_ci		return;
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_ci	mlog_bug_on_msg(el->l_count == el->l_next_free_rec,
172362306a36Sopenharmony_ci			"Asked to create an empty extent in a full list:\n"
172462306a36Sopenharmony_ci			"count = %u, tree depth = %u",
172562306a36Sopenharmony_ci			le16_to_cpu(el->l_count),
172662306a36Sopenharmony_ci			le16_to_cpu(el->l_tree_depth));
172762306a36Sopenharmony_ci
172862306a36Sopenharmony_ci	ocfs2_shift_records_right(el);
172962306a36Sopenharmony_ci
173062306a36Sopenharmony_ciset_and_inc:
173162306a36Sopenharmony_ci	le16_add_cpu(&el->l_next_free_rec, 1);
173262306a36Sopenharmony_ci	memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
173362306a36Sopenharmony_ci}
173462306a36Sopenharmony_ci
173562306a36Sopenharmony_ci/*
173662306a36Sopenharmony_ci * For a rotation which involves two leaf nodes, the "root node" is
173762306a36Sopenharmony_ci * the lowest level tree node which contains a path to both leafs. This
173862306a36Sopenharmony_ci * resulting set of information can be used to form a complete "subtree"
173962306a36Sopenharmony_ci *
174062306a36Sopenharmony_ci * This function is passed two full paths from the dinode down to a
174162306a36Sopenharmony_ci * pair of adjacent leaves. It's task is to figure out which path
174262306a36Sopenharmony_ci * index contains the subtree root - this can be the root index itself
174362306a36Sopenharmony_ci * in a worst-case rotation.
174462306a36Sopenharmony_ci *
174562306a36Sopenharmony_ci * The array index of the subtree root is passed back.
174662306a36Sopenharmony_ci */
174762306a36Sopenharmony_ciint ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
174862306a36Sopenharmony_ci			    struct ocfs2_path *left,
174962306a36Sopenharmony_ci			    struct ocfs2_path *right)
175062306a36Sopenharmony_ci{
175162306a36Sopenharmony_ci	int i = 0;
175262306a36Sopenharmony_ci
175362306a36Sopenharmony_ci	/*
175462306a36Sopenharmony_ci	 * Check that the caller passed in two paths from the same tree.
175562306a36Sopenharmony_ci	 */
175662306a36Sopenharmony_ci	BUG_ON(path_root_bh(left) != path_root_bh(right));
175762306a36Sopenharmony_ci
175862306a36Sopenharmony_ci	do {
175962306a36Sopenharmony_ci		i++;
176062306a36Sopenharmony_ci
176162306a36Sopenharmony_ci		/*
176262306a36Sopenharmony_ci		 * The caller didn't pass two adjacent paths.
176362306a36Sopenharmony_ci		 */
176462306a36Sopenharmony_ci		mlog_bug_on_msg(i > left->p_tree_depth,
176562306a36Sopenharmony_ci				"Owner %llu, left depth %u, right depth %u\n"
176662306a36Sopenharmony_ci				"left leaf blk %llu, right leaf blk %llu\n",
176762306a36Sopenharmony_ci				(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
176862306a36Sopenharmony_ci				left->p_tree_depth, right->p_tree_depth,
176962306a36Sopenharmony_ci				(unsigned long long)path_leaf_bh(left)->b_blocknr,
177062306a36Sopenharmony_ci				(unsigned long long)path_leaf_bh(right)->b_blocknr);
177162306a36Sopenharmony_ci	} while (left->p_node[i].bh->b_blocknr ==
177262306a36Sopenharmony_ci		 right->p_node[i].bh->b_blocknr);
177362306a36Sopenharmony_ci
177462306a36Sopenharmony_ci	return i - 1;
177562306a36Sopenharmony_ci}
177662306a36Sopenharmony_ci
177762306a36Sopenharmony_citypedef void (path_insert_t)(void *, struct buffer_head *);
177862306a36Sopenharmony_ci
177962306a36Sopenharmony_ci/*
178062306a36Sopenharmony_ci * Traverse a btree path in search of cpos, starting at root_el.
178162306a36Sopenharmony_ci *
178262306a36Sopenharmony_ci * This code can be called with a cpos larger than the tree, in which
178362306a36Sopenharmony_ci * case it will return the rightmost path.
178462306a36Sopenharmony_ci */
178562306a36Sopenharmony_cistatic int __ocfs2_find_path(struct ocfs2_caching_info *ci,
178662306a36Sopenharmony_ci			     struct ocfs2_extent_list *root_el, u32 cpos,
178762306a36Sopenharmony_ci			     path_insert_t *func, void *data)
178862306a36Sopenharmony_ci{
178962306a36Sopenharmony_ci	int i, ret = 0;
179062306a36Sopenharmony_ci	u32 range;
179162306a36Sopenharmony_ci	u64 blkno;
179262306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
179362306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
179462306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
179562306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
179662306a36Sopenharmony_ci
179762306a36Sopenharmony_ci	el = root_el;
179862306a36Sopenharmony_ci	while (el->l_tree_depth) {
179962306a36Sopenharmony_ci		if (le16_to_cpu(el->l_next_free_rec) == 0) {
180062306a36Sopenharmony_ci			ocfs2_error(ocfs2_metadata_cache_get_super(ci),
180162306a36Sopenharmony_ci				    "Owner %llu has empty extent list at depth %u\n",
180262306a36Sopenharmony_ci				    (unsigned long long)ocfs2_metadata_cache_owner(ci),
180362306a36Sopenharmony_ci				    le16_to_cpu(el->l_tree_depth));
180462306a36Sopenharmony_ci			ret = -EROFS;
180562306a36Sopenharmony_ci			goto out;
180662306a36Sopenharmony_ci
180762306a36Sopenharmony_ci		}
180862306a36Sopenharmony_ci
180962306a36Sopenharmony_ci		for(i = 0; i < le16_to_cpu(el->l_next_free_rec) - 1; i++) {
181062306a36Sopenharmony_ci			rec = &el->l_recs[i];
181162306a36Sopenharmony_ci
181262306a36Sopenharmony_ci			/*
181362306a36Sopenharmony_ci			 * In the case that cpos is off the allocation
181462306a36Sopenharmony_ci			 * tree, this should just wind up returning the
181562306a36Sopenharmony_ci			 * rightmost record.
181662306a36Sopenharmony_ci			 */
181762306a36Sopenharmony_ci			range = le32_to_cpu(rec->e_cpos) +
181862306a36Sopenharmony_ci				ocfs2_rec_clusters(el, rec);
181962306a36Sopenharmony_ci			if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range)
182062306a36Sopenharmony_ci			    break;
182162306a36Sopenharmony_ci		}
182262306a36Sopenharmony_ci
182362306a36Sopenharmony_ci		blkno = le64_to_cpu(el->l_recs[i].e_blkno);
182462306a36Sopenharmony_ci		if (blkno == 0) {
182562306a36Sopenharmony_ci			ocfs2_error(ocfs2_metadata_cache_get_super(ci),
182662306a36Sopenharmony_ci				    "Owner %llu has bad blkno in extent list at depth %u (index %d)\n",
182762306a36Sopenharmony_ci				    (unsigned long long)ocfs2_metadata_cache_owner(ci),
182862306a36Sopenharmony_ci				    le16_to_cpu(el->l_tree_depth), i);
182962306a36Sopenharmony_ci			ret = -EROFS;
183062306a36Sopenharmony_ci			goto out;
183162306a36Sopenharmony_ci		}
183262306a36Sopenharmony_ci
183362306a36Sopenharmony_ci		brelse(bh);
183462306a36Sopenharmony_ci		bh = NULL;
183562306a36Sopenharmony_ci		ret = ocfs2_read_extent_block(ci, blkno, &bh);
183662306a36Sopenharmony_ci		if (ret) {
183762306a36Sopenharmony_ci			mlog_errno(ret);
183862306a36Sopenharmony_ci			goto out;
183962306a36Sopenharmony_ci		}
184062306a36Sopenharmony_ci
184162306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) bh->b_data;
184262306a36Sopenharmony_ci		el = &eb->h_list;
184362306a36Sopenharmony_ci
184462306a36Sopenharmony_ci		if (le16_to_cpu(el->l_next_free_rec) >
184562306a36Sopenharmony_ci		    le16_to_cpu(el->l_count)) {
184662306a36Sopenharmony_ci			ocfs2_error(ocfs2_metadata_cache_get_super(ci),
184762306a36Sopenharmony_ci				    "Owner %llu has bad count in extent list at block %llu (next free=%u, count=%u)\n",
184862306a36Sopenharmony_ci				    (unsigned long long)ocfs2_metadata_cache_owner(ci),
184962306a36Sopenharmony_ci				    (unsigned long long)bh->b_blocknr,
185062306a36Sopenharmony_ci				    le16_to_cpu(el->l_next_free_rec),
185162306a36Sopenharmony_ci				    le16_to_cpu(el->l_count));
185262306a36Sopenharmony_ci			ret = -EROFS;
185362306a36Sopenharmony_ci			goto out;
185462306a36Sopenharmony_ci		}
185562306a36Sopenharmony_ci
185662306a36Sopenharmony_ci		if (func)
185762306a36Sopenharmony_ci			func(data, bh);
185862306a36Sopenharmony_ci	}
185962306a36Sopenharmony_ci
186062306a36Sopenharmony_ciout:
186162306a36Sopenharmony_ci	/*
186262306a36Sopenharmony_ci	 * Catch any trailing bh that the loop didn't handle.
186362306a36Sopenharmony_ci	 */
186462306a36Sopenharmony_ci	brelse(bh);
186562306a36Sopenharmony_ci
186662306a36Sopenharmony_ci	return ret;
186762306a36Sopenharmony_ci}
186862306a36Sopenharmony_ci
186962306a36Sopenharmony_ci/*
187062306a36Sopenharmony_ci * Given an initialized path (that is, it has a valid root extent
187162306a36Sopenharmony_ci * list), this function will traverse the btree in search of the path
187262306a36Sopenharmony_ci * which would contain cpos.
187362306a36Sopenharmony_ci *
187462306a36Sopenharmony_ci * The path traveled is recorded in the path structure.
187562306a36Sopenharmony_ci *
187662306a36Sopenharmony_ci * Note that this will not do any comparisons on leaf node extent
187762306a36Sopenharmony_ci * records, so it will work fine in the case that we just added a tree
187862306a36Sopenharmony_ci * branch.
187962306a36Sopenharmony_ci */
188062306a36Sopenharmony_cistruct find_path_data {
188162306a36Sopenharmony_ci	int index;
188262306a36Sopenharmony_ci	struct ocfs2_path *path;
188362306a36Sopenharmony_ci};
188462306a36Sopenharmony_cistatic void find_path_ins(void *data, struct buffer_head *bh)
188562306a36Sopenharmony_ci{
188662306a36Sopenharmony_ci	struct find_path_data *fp = data;
188762306a36Sopenharmony_ci
188862306a36Sopenharmony_ci	get_bh(bh);
188962306a36Sopenharmony_ci	ocfs2_path_insert_eb(fp->path, fp->index, bh);
189062306a36Sopenharmony_ci	fp->index++;
189162306a36Sopenharmony_ci}
189262306a36Sopenharmony_ciint ocfs2_find_path(struct ocfs2_caching_info *ci,
189362306a36Sopenharmony_ci		    struct ocfs2_path *path, u32 cpos)
189462306a36Sopenharmony_ci{
189562306a36Sopenharmony_ci	struct find_path_data data;
189662306a36Sopenharmony_ci
189762306a36Sopenharmony_ci	data.index = 1;
189862306a36Sopenharmony_ci	data.path = path;
189962306a36Sopenharmony_ci	return __ocfs2_find_path(ci, path_root_el(path), cpos,
190062306a36Sopenharmony_ci				 find_path_ins, &data);
190162306a36Sopenharmony_ci}
190262306a36Sopenharmony_ci
190362306a36Sopenharmony_cistatic void find_leaf_ins(void *data, struct buffer_head *bh)
190462306a36Sopenharmony_ci{
190562306a36Sopenharmony_ci	struct ocfs2_extent_block *eb =(struct ocfs2_extent_block *)bh->b_data;
190662306a36Sopenharmony_ci	struct ocfs2_extent_list *el = &eb->h_list;
190762306a36Sopenharmony_ci	struct buffer_head **ret = data;
190862306a36Sopenharmony_ci
190962306a36Sopenharmony_ci	/* We want to retain only the leaf block. */
191062306a36Sopenharmony_ci	if (le16_to_cpu(el->l_tree_depth) == 0) {
191162306a36Sopenharmony_ci		get_bh(bh);
191262306a36Sopenharmony_ci		*ret = bh;
191362306a36Sopenharmony_ci	}
191462306a36Sopenharmony_ci}
191562306a36Sopenharmony_ci/*
191662306a36Sopenharmony_ci * Find the leaf block in the tree which would contain cpos. No
191762306a36Sopenharmony_ci * checking of the actual leaf is done.
191862306a36Sopenharmony_ci *
191962306a36Sopenharmony_ci * Some paths want to call this instead of allocating a path structure
192062306a36Sopenharmony_ci * and calling ocfs2_find_path().
192162306a36Sopenharmony_ci *
192262306a36Sopenharmony_ci * This function doesn't handle non btree extent lists.
192362306a36Sopenharmony_ci */
192462306a36Sopenharmony_ciint ocfs2_find_leaf(struct ocfs2_caching_info *ci,
192562306a36Sopenharmony_ci		    struct ocfs2_extent_list *root_el, u32 cpos,
192662306a36Sopenharmony_ci		    struct buffer_head **leaf_bh)
192762306a36Sopenharmony_ci{
192862306a36Sopenharmony_ci	int ret;
192962306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
193062306a36Sopenharmony_ci
193162306a36Sopenharmony_ci	ret = __ocfs2_find_path(ci, root_el, cpos, find_leaf_ins, &bh);
193262306a36Sopenharmony_ci	if (ret) {
193362306a36Sopenharmony_ci		mlog_errno(ret);
193462306a36Sopenharmony_ci		goto out;
193562306a36Sopenharmony_ci	}
193662306a36Sopenharmony_ci
193762306a36Sopenharmony_ci	*leaf_bh = bh;
193862306a36Sopenharmony_ciout:
193962306a36Sopenharmony_ci	return ret;
194062306a36Sopenharmony_ci}
194162306a36Sopenharmony_ci
194262306a36Sopenharmony_ci/*
194362306a36Sopenharmony_ci * Adjust the adjacent records (left_rec, right_rec) involved in a rotation.
194462306a36Sopenharmony_ci *
194562306a36Sopenharmony_ci * Basically, we've moved stuff around at the bottom of the tree and
194662306a36Sopenharmony_ci * we need to fix up the extent records above the changes to reflect
194762306a36Sopenharmony_ci * the new changes.
194862306a36Sopenharmony_ci *
194962306a36Sopenharmony_ci * left_rec: the record on the left.
195062306a36Sopenharmony_ci * right_rec: the record to the right of left_rec
195162306a36Sopenharmony_ci * right_child_el: is the child list pointed to by right_rec
195262306a36Sopenharmony_ci *
195362306a36Sopenharmony_ci * By definition, this only works on interior nodes.
195462306a36Sopenharmony_ci */
195562306a36Sopenharmony_cistatic void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
195662306a36Sopenharmony_ci				  struct ocfs2_extent_rec *right_rec,
195762306a36Sopenharmony_ci				  struct ocfs2_extent_list *right_child_el)
195862306a36Sopenharmony_ci{
195962306a36Sopenharmony_ci	u32 left_clusters, right_end;
196062306a36Sopenharmony_ci
196162306a36Sopenharmony_ci	/*
196262306a36Sopenharmony_ci	 * Interior nodes never have holes. Their cpos is the cpos of
196362306a36Sopenharmony_ci	 * the leftmost record in their child list. Their cluster
196462306a36Sopenharmony_ci	 * count covers the full theoretical range of their child list
196562306a36Sopenharmony_ci	 * - the range between their cpos and the cpos of the record
196662306a36Sopenharmony_ci	 * immediately to their right.
196762306a36Sopenharmony_ci	 */
196862306a36Sopenharmony_ci	left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
196962306a36Sopenharmony_ci	if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
197062306a36Sopenharmony_ci		BUG_ON(right_child_el->l_tree_depth);
197162306a36Sopenharmony_ci		BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
197262306a36Sopenharmony_ci		left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
197362306a36Sopenharmony_ci	}
197462306a36Sopenharmony_ci	left_clusters -= le32_to_cpu(left_rec->e_cpos);
197562306a36Sopenharmony_ci	left_rec->e_int_clusters = cpu_to_le32(left_clusters);
197662306a36Sopenharmony_ci
197762306a36Sopenharmony_ci	/*
197862306a36Sopenharmony_ci	 * Calculate the rightmost cluster count boundary before
197962306a36Sopenharmony_ci	 * moving cpos - we will need to adjust clusters after
198062306a36Sopenharmony_ci	 * updating e_cpos to keep the same highest cluster count.
198162306a36Sopenharmony_ci	 */
198262306a36Sopenharmony_ci	right_end = le32_to_cpu(right_rec->e_cpos);
198362306a36Sopenharmony_ci	right_end += le32_to_cpu(right_rec->e_int_clusters);
198462306a36Sopenharmony_ci
198562306a36Sopenharmony_ci	right_rec->e_cpos = left_rec->e_cpos;
198662306a36Sopenharmony_ci	le32_add_cpu(&right_rec->e_cpos, left_clusters);
198762306a36Sopenharmony_ci
198862306a36Sopenharmony_ci	right_end -= le32_to_cpu(right_rec->e_cpos);
198962306a36Sopenharmony_ci	right_rec->e_int_clusters = cpu_to_le32(right_end);
199062306a36Sopenharmony_ci}
199162306a36Sopenharmony_ci
199262306a36Sopenharmony_ci/*
199362306a36Sopenharmony_ci * Adjust the adjacent root node records involved in a
199462306a36Sopenharmony_ci * rotation. left_el_blkno is passed in as a key so that we can easily
199562306a36Sopenharmony_ci * find it's index in the root list.
199662306a36Sopenharmony_ci */
199762306a36Sopenharmony_cistatic void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
199862306a36Sopenharmony_ci				      struct ocfs2_extent_list *left_el,
199962306a36Sopenharmony_ci				      struct ocfs2_extent_list *right_el,
200062306a36Sopenharmony_ci				      u64 left_el_blkno)
200162306a36Sopenharmony_ci{
200262306a36Sopenharmony_ci	int i;
200362306a36Sopenharmony_ci
200462306a36Sopenharmony_ci	BUG_ON(le16_to_cpu(root_el->l_tree_depth) <=
200562306a36Sopenharmony_ci	       le16_to_cpu(left_el->l_tree_depth));
200662306a36Sopenharmony_ci
200762306a36Sopenharmony_ci	for(i = 0; i < le16_to_cpu(root_el->l_next_free_rec) - 1; i++) {
200862306a36Sopenharmony_ci		if (le64_to_cpu(root_el->l_recs[i].e_blkno) == left_el_blkno)
200962306a36Sopenharmony_ci			break;
201062306a36Sopenharmony_ci	}
201162306a36Sopenharmony_ci
201262306a36Sopenharmony_ci	/*
201362306a36Sopenharmony_ci	 * The path walking code should have never returned a root and
201462306a36Sopenharmony_ci	 * two paths which are not adjacent.
201562306a36Sopenharmony_ci	 */
201662306a36Sopenharmony_ci	BUG_ON(i >= (le16_to_cpu(root_el->l_next_free_rec) - 1));
201762306a36Sopenharmony_ci
201862306a36Sopenharmony_ci	ocfs2_adjust_adjacent_records(&root_el->l_recs[i],
201962306a36Sopenharmony_ci				      &root_el->l_recs[i + 1], right_el);
202062306a36Sopenharmony_ci}
202162306a36Sopenharmony_ci
202262306a36Sopenharmony_ci/*
202362306a36Sopenharmony_ci * We've changed a leaf block (in right_path) and need to reflect that
202462306a36Sopenharmony_ci * change back up the subtree.
202562306a36Sopenharmony_ci *
202662306a36Sopenharmony_ci * This happens in multiple places:
202762306a36Sopenharmony_ci *   - When we've moved an extent record from the left path leaf to the right
202862306a36Sopenharmony_ci *     path leaf to make room for an empty extent in the left path leaf.
202962306a36Sopenharmony_ci *   - When our insert into the right path leaf is at the leftmost edge
203062306a36Sopenharmony_ci *     and requires an update of the path immediately to it's left. This
203162306a36Sopenharmony_ci *     can occur at the end of some types of rotation and appending inserts.
203262306a36Sopenharmony_ci *   - When we've adjusted the last extent record in the left path leaf and the
203362306a36Sopenharmony_ci *     1st extent record in the right path leaf during cross extent block merge.
203462306a36Sopenharmony_ci */
203562306a36Sopenharmony_cistatic void ocfs2_complete_edge_insert(handle_t *handle,
203662306a36Sopenharmony_ci				       struct ocfs2_path *left_path,
203762306a36Sopenharmony_ci				       struct ocfs2_path *right_path,
203862306a36Sopenharmony_ci				       int subtree_index)
203962306a36Sopenharmony_ci{
204062306a36Sopenharmony_ci	int i, idx;
204162306a36Sopenharmony_ci	struct ocfs2_extent_list *el, *left_el, *right_el;
204262306a36Sopenharmony_ci	struct ocfs2_extent_rec *left_rec, *right_rec;
204362306a36Sopenharmony_ci	struct buffer_head *root_bh;
204462306a36Sopenharmony_ci
204562306a36Sopenharmony_ci	/*
204662306a36Sopenharmony_ci	 * Update the counts and position values within all the
204762306a36Sopenharmony_ci	 * interior nodes to reflect the leaf rotation we just did.
204862306a36Sopenharmony_ci	 *
204962306a36Sopenharmony_ci	 * The root node is handled below the loop.
205062306a36Sopenharmony_ci	 *
205162306a36Sopenharmony_ci	 * We begin the loop with right_el and left_el pointing to the
205262306a36Sopenharmony_ci	 * leaf lists and work our way up.
205362306a36Sopenharmony_ci	 *
205462306a36Sopenharmony_ci	 * NOTE: within this loop, left_el and right_el always refer
205562306a36Sopenharmony_ci	 * to the *child* lists.
205662306a36Sopenharmony_ci	 */
205762306a36Sopenharmony_ci	left_el = path_leaf_el(left_path);
205862306a36Sopenharmony_ci	right_el = path_leaf_el(right_path);
205962306a36Sopenharmony_ci	for(i = left_path->p_tree_depth - 1; i > subtree_index; i--) {
206062306a36Sopenharmony_ci		trace_ocfs2_complete_edge_insert(i);
206162306a36Sopenharmony_ci
206262306a36Sopenharmony_ci		/*
206362306a36Sopenharmony_ci		 * One nice property of knowing that all of these
206462306a36Sopenharmony_ci		 * nodes are below the root is that we only deal with
206562306a36Sopenharmony_ci		 * the leftmost right node record and the rightmost
206662306a36Sopenharmony_ci		 * left node record.
206762306a36Sopenharmony_ci		 */
206862306a36Sopenharmony_ci		el = left_path->p_node[i].el;
206962306a36Sopenharmony_ci		idx = le16_to_cpu(left_el->l_next_free_rec) - 1;
207062306a36Sopenharmony_ci		left_rec = &el->l_recs[idx];
207162306a36Sopenharmony_ci
207262306a36Sopenharmony_ci		el = right_path->p_node[i].el;
207362306a36Sopenharmony_ci		right_rec = &el->l_recs[0];
207462306a36Sopenharmony_ci
207562306a36Sopenharmony_ci		ocfs2_adjust_adjacent_records(left_rec, right_rec, right_el);
207662306a36Sopenharmony_ci
207762306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, left_path->p_node[i].bh);
207862306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
207962306a36Sopenharmony_ci
208062306a36Sopenharmony_ci		/*
208162306a36Sopenharmony_ci		 * Setup our list pointers now so that the current
208262306a36Sopenharmony_ci		 * parents become children in the next iteration.
208362306a36Sopenharmony_ci		 */
208462306a36Sopenharmony_ci		left_el = left_path->p_node[i].el;
208562306a36Sopenharmony_ci		right_el = right_path->p_node[i].el;
208662306a36Sopenharmony_ci	}
208762306a36Sopenharmony_ci
208862306a36Sopenharmony_ci	/*
208962306a36Sopenharmony_ci	 * At the root node, adjust the two adjacent records which
209062306a36Sopenharmony_ci	 * begin our path to the leaves.
209162306a36Sopenharmony_ci	 */
209262306a36Sopenharmony_ci
209362306a36Sopenharmony_ci	el = left_path->p_node[subtree_index].el;
209462306a36Sopenharmony_ci	left_el = left_path->p_node[subtree_index + 1].el;
209562306a36Sopenharmony_ci	right_el = right_path->p_node[subtree_index + 1].el;
209662306a36Sopenharmony_ci
209762306a36Sopenharmony_ci	ocfs2_adjust_root_records(el, left_el, right_el,
209862306a36Sopenharmony_ci				  left_path->p_node[subtree_index + 1].bh->b_blocknr);
209962306a36Sopenharmony_ci
210062306a36Sopenharmony_ci	root_bh = left_path->p_node[subtree_index].bh;
210162306a36Sopenharmony_ci
210262306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, root_bh);
210362306a36Sopenharmony_ci}
210462306a36Sopenharmony_ci
210562306a36Sopenharmony_cistatic int ocfs2_rotate_subtree_right(handle_t *handle,
210662306a36Sopenharmony_ci				      struct ocfs2_extent_tree *et,
210762306a36Sopenharmony_ci				      struct ocfs2_path *left_path,
210862306a36Sopenharmony_ci				      struct ocfs2_path *right_path,
210962306a36Sopenharmony_ci				      int subtree_index)
211062306a36Sopenharmony_ci{
211162306a36Sopenharmony_ci	int ret, i;
211262306a36Sopenharmony_ci	struct buffer_head *right_leaf_bh;
211362306a36Sopenharmony_ci	struct buffer_head *left_leaf_bh = NULL;
211462306a36Sopenharmony_ci	struct buffer_head *root_bh;
211562306a36Sopenharmony_ci	struct ocfs2_extent_list *right_el, *left_el;
211662306a36Sopenharmony_ci	struct ocfs2_extent_rec move_rec;
211762306a36Sopenharmony_ci
211862306a36Sopenharmony_ci	left_leaf_bh = path_leaf_bh(left_path);
211962306a36Sopenharmony_ci	left_el = path_leaf_el(left_path);
212062306a36Sopenharmony_ci
212162306a36Sopenharmony_ci	if (left_el->l_next_free_rec != left_el->l_count) {
212262306a36Sopenharmony_ci		ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
212362306a36Sopenharmony_ci			    "Inode %llu has non-full interior leaf node %llu (next free = %u)\n",
212462306a36Sopenharmony_ci			    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
212562306a36Sopenharmony_ci			    (unsigned long long)left_leaf_bh->b_blocknr,
212662306a36Sopenharmony_ci			    le16_to_cpu(left_el->l_next_free_rec));
212762306a36Sopenharmony_ci		return -EROFS;
212862306a36Sopenharmony_ci	}
212962306a36Sopenharmony_ci
213062306a36Sopenharmony_ci	/*
213162306a36Sopenharmony_ci	 * This extent block may already have an empty record, so we
213262306a36Sopenharmony_ci	 * return early if so.
213362306a36Sopenharmony_ci	 */
213462306a36Sopenharmony_ci	if (ocfs2_is_empty_extent(&left_el->l_recs[0]))
213562306a36Sopenharmony_ci		return 0;
213662306a36Sopenharmony_ci
213762306a36Sopenharmony_ci	root_bh = left_path->p_node[subtree_index].bh;
213862306a36Sopenharmony_ci	BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
213962306a36Sopenharmony_ci
214062306a36Sopenharmony_ci	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
214162306a36Sopenharmony_ci					   subtree_index);
214262306a36Sopenharmony_ci	if (ret) {
214362306a36Sopenharmony_ci		mlog_errno(ret);
214462306a36Sopenharmony_ci		goto out;
214562306a36Sopenharmony_ci	}
214662306a36Sopenharmony_ci
214762306a36Sopenharmony_ci	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
214862306a36Sopenharmony_ci		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
214962306a36Sopenharmony_ci						   right_path, i);
215062306a36Sopenharmony_ci		if (ret) {
215162306a36Sopenharmony_ci			mlog_errno(ret);
215262306a36Sopenharmony_ci			goto out;
215362306a36Sopenharmony_ci		}
215462306a36Sopenharmony_ci
215562306a36Sopenharmony_ci		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
215662306a36Sopenharmony_ci						   left_path, i);
215762306a36Sopenharmony_ci		if (ret) {
215862306a36Sopenharmony_ci			mlog_errno(ret);
215962306a36Sopenharmony_ci			goto out;
216062306a36Sopenharmony_ci		}
216162306a36Sopenharmony_ci	}
216262306a36Sopenharmony_ci
216362306a36Sopenharmony_ci	right_leaf_bh = path_leaf_bh(right_path);
216462306a36Sopenharmony_ci	right_el = path_leaf_el(right_path);
216562306a36Sopenharmony_ci
216662306a36Sopenharmony_ci	/* This is a code error, not a disk corruption. */
216762306a36Sopenharmony_ci	mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails "
216862306a36Sopenharmony_ci			"because rightmost leaf block %llu is empty\n",
216962306a36Sopenharmony_ci			(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
217062306a36Sopenharmony_ci			(unsigned long long)right_leaf_bh->b_blocknr);
217162306a36Sopenharmony_ci
217262306a36Sopenharmony_ci	ocfs2_create_empty_extent(right_el);
217362306a36Sopenharmony_ci
217462306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, right_leaf_bh);
217562306a36Sopenharmony_ci
217662306a36Sopenharmony_ci	/* Do the copy now. */
217762306a36Sopenharmony_ci	i = le16_to_cpu(left_el->l_next_free_rec) - 1;
217862306a36Sopenharmony_ci	move_rec = left_el->l_recs[i];
217962306a36Sopenharmony_ci	right_el->l_recs[0] = move_rec;
218062306a36Sopenharmony_ci
218162306a36Sopenharmony_ci	/*
218262306a36Sopenharmony_ci	 * Clear out the record we just copied and shift everything
218362306a36Sopenharmony_ci	 * over, leaving an empty extent in the left leaf.
218462306a36Sopenharmony_ci	 *
218562306a36Sopenharmony_ci	 * We temporarily subtract from next_free_rec so that the
218662306a36Sopenharmony_ci	 * shift will lose the tail record (which is now defunct).
218762306a36Sopenharmony_ci	 */
218862306a36Sopenharmony_ci	le16_add_cpu(&left_el->l_next_free_rec, -1);
218962306a36Sopenharmony_ci	ocfs2_shift_records_right(left_el);
219062306a36Sopenharmony_ci	memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
219162306a36Sopenharmony_ci	le16_add_cpu(&left_el->l_next_free_rec, 1);
219262306a36Sopenharmony_ci
219362306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, left_leaf_bh);
219462306a36Sopenharmony_ci
219562306a36Sopenharmony_ci	ocfs2_complete_edge_insert(handle, left_path, right_path,
219662306a36Sopenharmony_ci				   subtree_index);
219762306a36Sopenharmony_ci
219862306a36Sopenharmony_ciout:
219962306a36Sopenharmony_ci	return ret;
220062306a36Sopenharmony_ci}
220162306a36Sopenharmony_ci
220262306a36Sopenharmony_ci/*
220362306a36Sopenharmony_ci * Given a full path, determine what cpos value would return us a path
220462306a36Sopenharmony_ci * containing the leaf immediately to the left of the current one.
220562306a36Sopenharmony_ci *
220662306a36Sopenharmony_ci * Will return zero if the path passed in is already the leftmost path.
220762306a36Sopenharmony_ci */
220862306a36Sopenharmony_ciint ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
220962306a36Sopenharmony_ci				  struct ocfs2_path *path, u32 *cpos)
221062306a36Sopenharmony_ci{
221162306a36Sopenharmony_ci	int i, j, ret = 0;
221262306a36Sopenharmony_ci	u64 blkno;
221362306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
221462306a36Sopenharmony_ci
221562306a36Sopenharmony_ci	BUG_ON(path->p_tree_depth == 0);
221662306a36Sopenharmony_ci
221762306a36Sopenharmony_ci	*cpos = 0;
221862306a36Sopenharmony_ci
221962306a36Sopenharmony_ci	blkno = path_leaf_bh(path)->b_blocknr;
222062306a36Sopenharmony_ci
222162306a36Sopenharmony_ci	/* Start at the tree node just above the leaf and work our way up. */
222262306a36Sopenharmony_ci	i = path->p_tree_depth - 1;
222362306a36Sopenharmony_ci	while (i >= 0) {
222462306a36Sopenharmony_ci		el = path->p_node[i].el;
222562306a36Sopenharmony_ci
222662306a36Sopenharmony_ci		/*
222762306a36Sopenharmony_ci		 * Find the extent record just before the one in our
222862306a36Sopenharmony_ci		 * path.
222962306a36Sopenharmony_ci		 */
223062306a36Sopenharmony_ci		for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) {
223162306a36Sopenharmony_ci			if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) {
223262306a36Sopenharmony_ci				if (j == 0) {
223362306a36Sopenharmony_ci					if (i == 0) {
223462306a36Sopenharmony_ci						/*
223562306a36Sopenharmony_ci						 * We've determined that the
223662306a36Sopenharmony_ci						 * path specified is already
223762306a36Sopenharmony_ci						 * the leftmost one - return a
223862306a36Sopenharmony_ci						 * cpos of zero.
223962306a36Sopenharmony_ci						 */
224062306a36Sopenharmony_ci						goto out;
224162306a36Sopenharmony_ci					}
224262306a36Sopenharmony_ci					/*
224362306a36Sopenharmony_ci					 * The leftmost record points to our
224462306a36Sopenharmony_ci					 * leaf - we need to travel up the
224562306a36Sopenharmony_ci					 * tree one level.
224662306a36Sopenharmony_ci					 */
224762306a36Sopenharmony_ci					goto next_node;
224862306a36Sopenharmony_ci				}
224962306a36Sopenharmony_ci
225062306a36Sopenharmony_ci				*cpos = le32_to_cpu(el->l_recs[j - 1].e_cpos);
225162306a36Sopenharmony_ci				*cpos = *cpos + ocfs2_rec_clusters(el,
225262306a36Sopenharmony_ci							   &el->l_recs[j - 1]);
225362306a36Sopenharmony_ci				*cpos = *cpos - 1;
225462306a36Sopenharmony_ci				goto out;
225562306a36Sopenharmony_ci			}
225662306a36Sopenharmony_ci		}
225762306a36Sopenharmony_ci
225862306a36Sopenharmony_ci		/*
225962306a36Sopenharmony_ci		 * If we got here, we never found a valid node where
226062306a36Sopenharmony_ci		 * the tree indicated one should be.
226162306a36Sopenharmony_ci		 */
226262306a36Sopenharmony_ci		ocfs2_error(sb, "Invalid extent tree at extent block %llu\n",
226362306a36Sopenharmony_ci			    (unsigned long long)blkno);
226462306a36Sopenharmony_ci		ret = -EROFS;
226562306a36Sopenharmony_ci		goto out;
226662306a36Sopenharmony_ci
226762306a36Sopenharmony_cinext_node:
226862306a36Sopenharmony_ci		blkno = path->p_node[i].bh->b_blocknr;
226962306a36Sopenharmony_ci		i--;
227062306a36Sopenharmony_ci	}
227162306a36Sopenharmony_ci
227262306a36Sopenharmony_ciout:
227362306a36Sopenharmony_ci	return ret;
227462306a36Sopenharmony_ci}
227562306a36Sopenharmony_ci
227662306a36Sopenharmony_ci/*
227762306a36Sopenharmony_ci * Extend the transaction by enough credits to complete the rotation,
227862306a36Sopenharmony_ci * and still leave at least the original number of credits allocated
227962306a36Sopenharmony_ci * to this transaction.
228062306a36Sopenharmony_ci */
228162306a36Sopenharmony_cistatic int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
228262306a36Sopenharmony_ci					   int op_credits,
228362306a36Sopenharmony_ci					   struct ocfs2_path *path)
228462306a36Sopenharmony_ci{
228562306a36Sopenharmony_ci	int ret = 0;
228662306a36Sopenharmony_ci	int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
228762306a36Sopenharmony_ci
228862306a36Sopenharmony_ci	if (jbd2_handle_buffer_credits(handle) < credits)
228962306a36Sopenharmony_ci		ret = ocfs2_extend_trans(handle,
229062306a36Sopenharmony_ci				credits - jbd2_handle_buffer_credits(handle));
229162306a36Sopenharmony_ci
229262306a36Sopenharmony_ci	return ret;
229362306a36Sopenharmony_ci}
229462306a36Sopenharmony_ci
229562306a36Sopenharmony_ci/*
229662306a36Sopenharmony_ci * Trap the case where we're inserting into the theoretical range past
229762306a36Sopenharmony_ci * the _actual_ left leaf range. Otherwise, we'll rotate a record
229862306a36Sopenharmony_ci * whose cpos is less than ours into the right leaf.
229962306a36Sopenharmony_ci *
230062306a36Sopenharmony_ci * It's only necessary to look at the rightmost record of the left
230162306a36Sopenharmony_ci * leaf because the logic that calls us should ensure that the
230262306a36Sopenharmony_ci * theoretical ranges in the path components above the leaves are
230362306a36Sopenharmony_ci * correct.
230462306a36Sopenharmony_ci */
230562306a36Sopenharmony_cistatic int ocfs2_rotate_requires_path_adjustment(struct ocfs2_path *left_path,
230662306a36Sopenharmony_ci						 u32 insert_cpos)
230762306a36Sopenharmony_ci{
230862306a36Sopenharmony_ci	struct ocfs2_extent_list *left_el;
230962306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
231062306a36Sopenharmony_ci	int next_free;
231162306a36Sopenharmony_ci
231262306a36Sopenharmony_ci	left_el = path_leaf_el(left_path);
231362306a36Sopenharmony_ci	next_free = le16_to_cpu(left_el->l_next_free_rec);
231462306a36Sopenharmony_ci	rec = &left_el->l_recs[next_free - 1];
231562306a36Sopenharmony_ci
231662306a36Sopenharmony_ci	if (insert_cpos > le32_to_cpu(rec->e_cpos))
231762306a36Sopenharmony_ci		return 1;
231862306a36Sopenharmony_ci	return 0;
231962306a36Sopenharmony_ci}
232062306a36Sopenharmony_ci
232162306a36Sopenharmony_cistatic int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
232262306a36Sopenharmony_ci{
232362306a36Sopenharmony_ci	int next_free = le16_to_cpu(el->l_next_free_rec);
232462306a36Sopenharmony_ci	unsigned int range;
232562306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
232662306a36Sopenharmony_ci
232762306a36Sopenharmony_ci	if (next_free == 0)
232862306a36Sopenharmony_ci		return 0;
232962306a36Sopenharmony_ci
233062306a36Sopenharmony_ci	rec = &el->l_recs[0];
233162306a36Sopenharmony_ci	if (ocfs2_is_empty_extent(rec)) {
233262306a36Sopenharmony_ci		/* Empty list. */
233362306a36Sopenharmony_ci		if (next_free == 1)
233462306a36Sopenharmony_ci			return 0;
233562306a36Sopenharmony_ci		rec = &el->l_recs[1];
233662306a36Sopenharmony_ci	}
233762306a36Sopenharmony_ci
233862306a36Sopenharmony_ci	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
233962306a36Sopenharmony_ci	if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range)
234062306a36Sopenharmony_ci		return 1;
234162306a36Sopenharmony_ci	return 0;
234262306a36Sopenharmony_ci}
234362306a36Sopenharmony_ci
234462306a36Sopenharmony_ci/*
234562306a36Sopenharmony_ci * Rotate all the records in a btree right one record, starting at insert_cpos.
234662306a36Sopenharmony_ci *
234762306a36Sopenharmony_ci * The path to the rightmost leaf should be passed in.
234862306a36Sopenharmony_ci *
234962306a36Sopenharmony_ci * The array is assumed to be large enough to hold an entire path (tree depth).
235062306a36Sopenharmony_ci *
235162306a36Sopenharmony_ci * Upon successful return from this function:
235262306a36Sopenharmony_ci *
235362306a36Sopenharmony_ci * - The 'right_path' array will contain a path to the leaf block
235462306a36Sopenharmony_ci *   whose range contains e_cpos.
235562306a36Sopenharmony_ci * - That leaf block will have a single empty extent in list index 0.
235662306a36Sopenharmony_ci * - In the case that the rotation requires a post-insert update,
235762306a36Sopenharmony_ci *   *ret_left_path will contain a valid path which can be passed to
235862306a36Sopenharmony_ci *   ocfs2_insert_path().
235962306a36Sopenharmony_ci */
236062306a36Sopenharmony_cistatic int ocfs2_rotate_tree_right(handle_t *handle,
236162306a36Sopenharmony_ci				   struct ocfs2_extent_tree *et,
236262306a36Sopenharmony_ci				   enum ocfs2_split_type split,
236362306a36Sopenharmony_ci				   u32 insert_cpos,
236462306a36Sopenharmony_ci				   struct ocfs2_path *right_path,
236562306a36Sopenharmony_ci				   struct ocfs2_path **ret_left_path)
236662306a36Sopenharmony_ci{
236762306a36Sopenharmony_ci	int ret, start, orig_credits = jbd2_handle_buffer_credits(handle);
236862306a36Sopenharmony_ci	u32 cpos;
236962306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL;
237062306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
237162306a36Sopenharmony_ci
237262306a36Sopenharmony_ci	*ret_left_path = NULL;
237362306a36Sopenharmony_ci
237462306a36Sopenharmony_ci	left_path = ocfs2_new_path_from_path(right_path);
237562306a36Sopenharmony_ci	if (!left_path) {
237662306a36Sopenharmony_ci		ret = -ENOMEM;
237762306a36Sopenharmony_ci		mlog_errno(ret);
237862306a36Sopenharmony_ci		goto out;
237962306a36Sopenharmony_ci	}
238062306a36Sopenharmony_ci
238162306a36Sopenharmony_ci	ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
238262306a36Sopenharmony_ci	if (ret) {
238362306a36Sopenharmony_ci		mlog_errno(ret);
238462306a36Sopenharmony_ci		goto out;
238562306a36Sopenharmony_ci	}
238662306a36Sopenharmony_ci
238762306a36Sopenharmony_ci	trace_ocfs2_rotate_tree_right(
238862306a36Sopenharmony_ci		(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
238962306a36Sopenharmony_ci		insert_cpos, cpos);
239062306a36Sopenharmony_ci
239162306a36Sopenharmony_ci	/*
239262306a36Sopenharmony_ci	 * What we want to do here is:
239362306a36Sopenharmony_ci	 *
239462306a36Sopenharmony_ci	 * 1) Start with the rightmost path.
239562306a36Sopenharmony_ci	 *
239662306a36Sopenharmony_ci	 * 2) Determine a path to the leaf block directly to the left
239762306a36Sopenharmony_ci	 *    of that leaf.
239862306a36Sopenharmony_ci	 *
239962306a36Sopenharmony_ci	 * 3) Determine the 'subtree root' - the lowest level tree node
240062306a36Sopenharmony_ci	 *    which contains a path to both leaves.
240162306a36Sopenharmony_ci	 *
240262306a36Sopenharmony_ci	 * 4) Rotate the subtree.
240362306a36Sopenharmony_ci	 *
240462306a36Sopenharmony_ci	 * 5) Find the next subtree by considering the left path to be
240562306a36Sopenharmony_ci	 *    the new right path.
240662306a36Sopenharmony_ci	 *
240762306a36Sopenharmony_ci	 * The check at the top of this while loop also accepts
240862306a36Sopenharmony_ci	 * insert_cpos == cpos because cpos is only a _theoretical_
240962306a36Sopenharmony_ci	 * value to get us the left path - insert_cpos might very well
241062306a36Sopenharmony_ci	 * be filling that hole.
241162306a36Sopenharmony_ci	 *
241262306a36Sopenharmony_ci	 * Stop at a cpos of '0' because we either started at the
241362306a36Sopenharmony_ci	 * leftmost branch (i.e., a tree with one branch and a
241462306a36Sopenharmony_ci	 * rotation inside of it), or we've gone as far as we can in
241562306a36Sopenharmony_ci	 * rotating subtrees.
241662306a36Sopenharmony_ci	 */
241762306a36Sopenharmony_ci	while (cpos && insert_cpos <= cpos) {
241862306a36Sopenharmony_ci		trace_ocfs2_rotate_tree_right(
241962306a36Sopenharmony_ci			(unsigned long long)
242062306a36Sopenharmony_ci			ocfs2_metadata_cache_owner(et->et_ci),
242162306a36Sopenharmony_ci			insert_cpos, cpos);
242262306a36Sopenharmony_ci
242362306a36Sopenharmony_ci		ret = ocfs2_find_path(et->et_ci, left_path, cpos);
242462306a36Sopenharmony_ci		if (ret) {
242562306a36Sopenharmony_ci			mlog_errno(ret);
242662306a36Sopenharmony_ci			goto out;
242762306a36Sopenharmony_ci		}
242862306a36Sopenharmony_ci
242962306a36Sopenharmony_ci		mlog_bug_on_msg(path_leaf_bh(left_path) ==
243062306a36Sopenharmony_ci				path_leaf_bh(right_path),
243162306a36Sopenharmony_ci				"Owner %llu: error during insert of %u "
243262306a36Sopenharmony_ci				"(left path cpos %u) results in two identical "
243362306a36Sopenharmony_ci				"paths ending at %llu\n",
243462306a36Sopenharmony_ci				(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
243562306a36Sopenharmony_ci				insert_cpos, cpos,
243662306a36Sopenharmony_ci				(unsigned long long)
243762306a36Sopenharmony_ci				path_leaf_bh(left_path)->b_blocknr);
243862306a36Sopenharmony_ci
243962306a36Sopenharmony_ci		if (split == SPLIT_NONE &&
244062306a36Sopenharmony_ci		    ocfs2_rotate_requires_path_adjustment(left_path,
244162306a36Sopenharmony_ci							  insert_cpos)) {
244262306a36Sopenharmony_ci
244362306a36Sopenharmony_ci			/*
244462306a36Sopenharmony_ci			 * We've rotated the tree as much as we
244562306a36Sopenharmony_ci			 * should. The rest is up to
244662306a36Sopenharmony_ci			 * ocfs2_insert_path() to complete, after the
244762306a36Sopenharmony_ci			 * record insertion. We indicate this
244862306a36Sopenharmony_ci			 * situation by returning the left path.
244962306a36Sopenharmony_ci			 *
245062306a36Sopenharmony_ci			 * The reason we don't adjust the records here
245162306a36Sopenharmony_ci			 * before the record insert is that an error
245262306a36Sopenharmony_ci			 * later might break the rule where a parent
245362306a36Sopenharmony_ci			 * record e_cpos will reflect the actual
245462306a36Sopenharmony_ci			 * e_cpos of the 1st nonempty record of the
245562306a36Sopenharmony_ci			 * child list.
245662306a36Sopenharmony_ci			 */
245762306a36Sopenharmony_ci			*ret_left_path = left_path;
245862306a36Sopenharmony_ci			goto out_ret_path;
245962306a36Sopenharmony_ci		}
246062306a36Sopenharmony_ci
246162306a36Sopenharmony_ci		start = ocfs2_find_subtree_root(et, left_path, right_path);
246262306a36Sopenharmony_ci
246362306a36Sopenharmony_ci		trace_ocfs2_rotate_subtree(start,
246462306a36Sopenharmony_ci			(unsigned long long)
246562306a36Sopenharmony_ci			right_path->p_node[start].bh->b_blocknr,
246662306a36Sopenharmony_ci			right_path->p_tree_depth);
246762306a36Sopenharmony_ci
246862306a36Sopenharmony_ci		ret = ocfs2_extend_rotate_transaction(handle, start,
246962306a36Sopenharmony_ci						      orig_credits, right_path);
247062306a36Sopenharmony_ci		if (ret) {
247162306a36Sopenharmony_ci			mlog_errno(ret);
247262306a36Sopenharmony_ci			goto out;
247362306a36Sopenharmony_ci		}
247462306a36Sopenharmony_ci
247562306a36Sopenharmony_ci		ret = ocfs2_rotate_subtree_right(handle, et, left_path,
247662306a36Sopenharmony_ci						 right_path, start);
247762306a36Sopenharmony_ci		if (ret) {
247862306a36Sopenharmony_ci			mlog_errno(ret);
247962306a36Sopenharmony_ci			goto out;
248062306a36Sopenharmony_ci		}
248162306a36Sopenharmony_ci
248262306a36Sopenharmony_ci		if (split != SPLIT_NONE &&
248362306a36Sopenharmony_ci		    ocfs2_leftmost_rec_contains(path_leaf_el(right_path),
248462306a36Sopenharmony_ci						insert_cpos)) {
248562306a36Sopenharmony_ci			/*
248662306a36Sopenharmony_ci			 * A rotate moves the rightmost left leaf
248762306a36Sopenharmony_ci			 * record over to the leftmost right leaf
248862306a36Sopenharmony_ci			 * slot. If we're doing an extent split
248962306a36Sopenharmony_ci			 * instead of a real insert, then we have to
249062306a36Sopenharmony_ci			 * check that the extent to be split wasn't
249162306a36Sopenharmony_ci			 * just moved over. If it was, then we can
249262306a36Sopenharmony_ci			 * exit here, passing left_path back -
249362306a36Sopenharmony_ci			 * ocfs2_split_extent() is smart enough to
249462306a36Sopenharmony_ci			 * search both leaves.
249562306a36Sopenharmony_ci			 */
249662306a36Sopenharmony_ci			*ret_left_path = left_path;
249762306a36Sopenharmony_ci			goto out_ret_path;
249862306a36Sopenharmony_ci		}
249962306a36Sopenharmony_ci
250062306a36Sopenharmony_ci		/*
250162306a36Sopenharmony_ci		 * There is no need to re-read the next right path
250262306a36Sopenharmony_ci		 * as we know that it'll be our current left
250362306a36Sopenharmony_ci		 * path. Optimize by copying values instead.
250462306a36Sopenharmony_ci		 */
250562306a36Sopenharmony_ci		ocfs2_mv_path(right_path, left_path);
250662306a36Sopenharmony_ci
250762306a36Sopenharmony_ci		ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
250862306a36Sopenharmony_ci		if (ret) {
250962306a36Sopenharmony_ci			mlog_errno(ret);
251062306a36Sopenharmony_ci			goto out;
251162306a36Sopenharmony_ci		}
251262306a36Sopenharmony_ci	}
251362306a36Sopenharmony_ci
251462306a36Sopenharmony_ciout:
251562306a36Sopenharmony_ci	ocfs2_free_path(left_path);
251662306a36Sopenharmony_ci
251762306a36Sopenharmony_ciout_ret_path:
251862306a36Sopenharmony_ci	return ret;
251962306a36Sopenharmony_ci}
252062306a36Sopenharmony_ci
252162306a36Sopenharmony_cistatic int ocfs2_update_edge_lengths(handle_t *handle,
252262306a36Sopenharmony_ci				     struct ocfs2_extent_tree *et,
252362306a36Sopenharmony_ci				     struct ocfs2_path *path)
252462306a36Sopenharmony_ci{
252562306a36Sopenharmony_ci	int i, idx, ret;
252662306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
252762306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
252862306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
252962306a36Sopenharmony_ci	u32 range;
253062306a36Sopenharmony_ci
253162306a36Sopenharmony_ci	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
253262306a36Sopenharmony_ci	if (ret) {
253362306a36Sopenharmony_ci		mlog_errno(ret);
253462306a36Sopenharmony_ci		goto out;
253562306a36Sopenharmony_ci	}
253662306a36Sopenharmony_ci
253762306a36Sopenharmony_ci	/* Path should always be rightmost. */
253862306a36Sopenharmony_ci	eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
253962306a36Sopenharmony_ci	BUG_ON(eb->h_next_leaf_blk != 0ULL);
254062306a36Sopenharmony_ci
254162306a36Sopenharmony_ci	el = &eb->h_list;
254262306a36Sopenharmony_ci	BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
254362306a36Sopenharmony_ci	idx = le16_to_cpu(el->l_next_free_rec) - 1;
254462306a36Sopenharmony_ci	rec = &el->l_recs[idx];
254562306a36Sopenharmony_ci	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
254662306a36Sopenharmony_ci
254762306a36Sopenharmony_ci	for (i = 0; i < path->p_tree_depth; i++) {
254862306a36Sopenharmony_ci		el = path->p_node[i].el;
254962306a36Sopenharmony_ci		idx = le16_to_cpu(el->l_next_free_rec) - 1;
255062306a36Sopenharmony_ci		rec = &el->l_recs[idx];
255162306a36Sopenharmony_ci
255262306a36Sopenharmony_ci		rec->e_int_clusters = cpu_to_le32(range);
255362306a36Sopenharmony_ci		le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos));
255462306a36Sopenharmony_ci
255562306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, path->p_node[i].bh);
255662306a36Sopenharmony_ci	}
255762306a36Sopenharmony_ciout:
255862306a36Sopenharmony_ci	return ret;
255962306a36Sopenharmony_ci}
256062306a36Sopenharmony_ci
256162306a36Sopenharmony_cistatic void ocfs2_unlink_path(handle_t *handle,
256262306a36Sopenharmony_ci			      struct ocfs2_extent_tree *et,
256362306a36Sopenharmony_ci			      struct ocfs2_cached_dealloc_ctxt *dealloc,
256462306a36Sopenharmony_ci			      struct ocfs2_path *path, int unlink_start)
256562306a36Sopenharmony_ci{
256662306a36Sopenharmony_ci	int ret, i;
256762306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
256862306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
256962306a36Sopenharmony_ci	struct buffer_head *bh;
257062306a36Sopenharmony_ci
257162306a36Sopenharmony_ci	for(i = unlink_start; i < path_num_items(path); i++) {
257262306a36Sopenharmony_ci		bh = path->p_node[i].bh;
257362306a36Sopenharmony_ci
257462306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *)bh->b_data;
257562306a36Sopenharmony_ci		/*
257662306a36Sopenharmony_ci		 * Not all nodes might have had their final count
257762306a36Sopenharmony_ci		 * decremented by the caller - handle this here.
257862306a36Sopenharmony_ci		 */
257962306a36Sopenharmony_ci		el = &eb->h_list;
258062306a36Sopenharmony_ci		if (le16_to_cpu(el->l_next_free_rec) > 1) {
258162306a36Sopenharmony_ci			mlog(ML_ERROR,
258262306a36Sopenharmony_ci			     "Inode %llu, attempted to remove extent block "
258362306a36Sopenharmony_ci			     "%llu with %u records\n",
258462306a36Sopenharmony_ci			     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
258562306a36Sopenharmony_ci			     (unsigned long long)le64_to_cpu(eb->h_blkno),
258662306a36Sopenharmony_ci			     le16_to_cpu(el->l_next_free_rec));
258762306a36Sopenharmony_ci
258862306a36Sopenharmony_ci			ocfs2_journal_dirty(handle, bh);
258962306a36Sopenharmony_ci			ocfs2_remove_from_cache(et->et_ci, bh);
259062306a36Sopenharmony_ci			continue;
259162306a36Sopenharmony_ci		}
259262306a36Sopenharmony_ci
259362306a36Sopenharmony_ci		el->l_next_free_rec = 0;
259462306a36Sopenharmony_ci		memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
259562306a36Sopenharmony_ci
259662306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, bh);
259762306a36Sopenharmony_ci
259862306a36Sopenharmony_ci		ret = ocfs2_cache_extent_block_free(dealloc, eb);
259962306a36Sopenharmony_ci		if (ret)
260062306a36Sopenharmony_ci			mlog_errno(ret);
260162306a36Sopenharmony_ci
260262306a36Sopenharmony_ci		ocfs2_remove_from_cache(et->et_ci, bh);
260362306a36Sopenharmony_ci	}
260462306a36Sopenharmony_ci}
260562306a36Sopenharmony_ci
260662306a36Sopenharmony_cistatic void ocfs2_unlink_subtree(handle_t *handle,
260762306a36Sopenharmony_ci				 struct ocfs2_extent_tree *et,
260862306a36Sopenharmony_ci				 struct ocfs2_path *left_path,
260962306a36Sopenharmony_ci				 struct ocfs2_path *right_path,
261062306a36Sopenharmony_ci				 int subtree_index,
261162306a36Sopenharmony_ci				 struct ocfs2_cached_dealloc_ctxt *dealloc)
261262306a36Sopenharmony_ci{
261362306a36Sopenharmony_ci	int i;
261462306a36Sopenharmony_ci	struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
261562306a36Sopenharmony_ci	struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el;
261662306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
261762306a36Sopenharmony_ci
261862306a36Sopenharmony_ci	eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data;
261962306a36Sopenharmony_ci
262062306a36Sopenharmony_ci	for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
262162306a36Sopenharmony_ci		if (root_el->l_recs[i].e_blkno == eb->h_blkno)
262262306a36Sopenharmony_ci			break;
262362306a36Sopenharmony_ci
262462306a36Sopenharmony_ci	BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec));
262562306a36Sopenharmony_ci
262662306a36Sopenharmony_ci	memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
262762306a36Sopenharmony_ci	le16_add_cpu(&root_el->l_next_free_rec, -1);
262862306a36Sopenharmony_ci
262962306a36Sopenharmony_ci	eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
263062306a36Sopenharmony_ci	eb->h_next_leaf_blk = 0;
263162306a36Sopenharmony_ci
263262306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, root_bh);
263362306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
263462306a36Sopenharmony_ci
263562306a36Sopenharmony_ci	ocfs2_unlink_path(handle, et, dealloc, right_path,
263662306a36Sopenharmony_ci			  subtree_index + 1);
263762306a36Sopenharmony_ci}
263862306a36Sopenharmony_ci
263962306a36Sopenharmony_cistatic int ocfs2_rotate_subtree_left(handle_t *handle,
264062306a36Sopenharmony_ci				     struct ocfs2_extent_tree *et,
264162306a36Sopenharmony_ci				     struct ocfs2_path *left_path,
264262306a36Sopenharmony_ci				     struct ocfs2_path *right_path,
264362306a36Sopenharmony_ci				     int subtree_index,
264462306a36Sopenharmony_ci				     struct ocfs2_cached_dealloc_ctxt *dealloc,
264562306a36Sopenharmony_ci				     int *deleted)
264662306a36Sopenharmony_ci{
264762306a36Sopenharmony_ci	int ret, i, del_right_subtree = 0, right_has_empty = 0;
264862306a36Sopenharmony_ci	struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
264962306a36Sopenharmony_ci	struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
265062306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
265162306a36Sopenharmony_ci
265262306a36Sopenharmony_ci	*deleted = 0;
265362306a36Sopenharmony_ci
265462306a36Sopenharmony_ci	right_leaf_el = path_leaf_el(right_path);
265562306a36Sopenharmony_ci	left_leaf_el = path_leaf_el(left_path);
265662306a36Sopenharmony_ci	root_bh = left_path->p_node[subtree_index].bh;
265762306a36Sopenharmony_ci	BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
265862306a36Sopenharmony_ci
265962306a36Sopenharmony_ci	if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0]))
266062306a36Sopenharmony_ci		return 0;
266162306a36Sopenharmony_ci
266262306a36Sopenharmony_ci	eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data;
266362306a36Sopenharmony_ci	if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0])) {
266462306a36Sopenharmony_ci		/*
266562306a36Sopenharmony_ci		 * It's legal for us to proceed if the right leaf is
266662306a36Sopenharmony_ci		 * the rightmost one and it has an empty extent. There
266762306a36Sopenharmony_ci		 * are two cases to handle - whether the leaf will be
266862306a36Sopenharmony_ci		 * empty after removal or not. If the leaf isn't empty
266962306a36Sopenharmony_ci		 * then just remove the empty extent up front. The
267062306a36Sopenharmony_ci		 * next block will handle empty leaves by flagging
267162306a36Sopenharmony_ci		 * them for unlink.
267262306a36Sopenharmony_ci		 *
267362306a36Sopenharmony_ci		 * Non rightmost leaves will throw -EAGAIN and the
267462306a36Sopenharmony_ci		 * caller can manually move the subtree and retry.
267562306a36Sopenharmony_ci		 */
267662306a36Sopenharmony_ci
267762306a36Sopenharmony_ci		if (eb->h_next_leaf_blk != 0ULL)
267862306a36Sopenharmony_ci			return -EAGAIN;
267962306a36Sopenharmony_ci
268062306a36Sopenharmony_ci		if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
268162306a36Sopenharmony_ci			ret = ocfs2_journal_access_eb(handle, et->et_ci,
268262306a36Sopenharmony_ci						      path_leaf_bh(right_path),
268362306a36Sopenharmony_ci						      OCFS2_JOURNAL_ACCESS_WRITE);
268462306a36Sopenharmony_ci			if (ret) {
268562306a36Sopenharmony_ci				mlog_errno(ret);
268662306a36Sopenharmony_ci				goto out;
268762306a36Sopenharmony_ci			}
268862306a36Sopenharmony_ci
268962306a36Sopenharmony_ci			ocfs2_remove_empty_extent(right_leaf_el);
269062306a36Sopenharmony_ci		} else
269162306a36Sopenharmony_ci			right_has_empty = 1;
269262306a36Sopenharmony_ci	}
269362306a36Sopenharmony_ci
269462306a36Sopenharmony_ci	if (eb->h_next_leaf_blk == 0ULL &&
269562306a36Sopenharmony_ci	    le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) {
269662306a36Sopenharmony_ci		/*
269762306a36Sopenharmony_ci		 * We have to update i_last_eb_blk during the meta
269862306a36Sopenharmony_ci		 * data delete.
269962306a36Sopenharmony_ci		 */
270062306a36Sopenharmony_ci		ret = ocfs2_et_root_journal_access(handle, et,
270162306a36Sopenharmony_ci						   OCFS2_JOURNAL_ACCESS_WRITE);
270262306a36Sopenharmony_ci		if (ret) {
270362306a36Sopenharmony_ci			mlog_errno(ret);
270462306a36Sopenharmony_ci			goto out;
270562306a36Sopenharmony_ci		}
270662306a36Sopenharmony_ci
270762306a36Sopenharmony_ci		del_right_subtree = 1;
270862306a36Sopenharmony_ci	}
270962306a36Sopenharmony_ci
271062306a36Sopenharmony_ci	/*
271162306a36Sopenharmony_ci	 * Getting here with an empty extent in the right path implies
271262306a36Sopenharmony_ci	 * that it's the rightmost path and will be deleted.
271362306a36Sopenharmony_ci	 */
271462306a36Sopenharmony_ci	BUG_ON(right_has_empty && !del_right_subtree);
271562306a36Sopenharmony_ci
271662306a36Sopenharmony_ci	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
271762306a36Sopenharmony_ci					   subtree_index);
271862306a36Sopenharmony_ci	if (ret) {
271962306a36Sopenharmony_ci		mlog_errno(ret);
272062306a36Sopenharmony_ci		goto out;
272162306a36Sopenharmony_ci	}
272262306a36Sopenharmony_ci
272362306a36Sopenharmony_ci	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
272462306a36Sopenharmony_ci		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
272562306a36Sopenharmony_ci						   right_path, i);
272662306a36Sopenharmony_ci		if (ret) {
272762306a36Sopenharmony_ci			mlog_errno(ret);
272862306a36Sopenharmony_ci			goto out;
272962306a36Sopenharmony_ci		}
273062306a36Sopenharmony_ci
273162306a36Sopenharmony_ci		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
273262306a36Sopenharmony_ci						   left_path, i);
273362306a36Sopenharmony_ci		if (ret) {
273462306a36Sopenharmony_ci			mlog_errno(ret);
273562306a36Sopenharmony_ci			goto out;
273662306a36Sopenharmony_ci		}
273762306a36Sopenharmony_ci	}
273862306a36Sopenharmony_ci
273962306a36Sopenharmony_ci	if (!right_has_empty) {
274062306a36Sopenharmony_ci		/*
274162306a36Sopenharmony_ci		 * Only do this if we're moving a real
274262306a36Sopenharmony_ci		 * record. Otherwise, the action is delayed until
274362306a36Sopenharmony_ci		 * after removal of the right path in which case we
274462306a36Sopenharmony_ci		 * can do a simple shift to remove the empty extent.
274562306a36Sopenharmony_ci		 */
274662306a36Sopenharmony_ci		ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]);
274762306a36Sopenharmony_ci		memset(&right_leaf_el->l_recs[0], 0,
274862306a36Sopenharmony_ci		       sizeof(struct ocfs2_extent_rec));
274962306a36Sopenharmony_ci	}
275062306a36Sopenharmony_ci	if (eb->h_next_leaf_blk == 0ULL) {
275162306a36Sopenharmony_ci		/*
275262306a36Sopenharmony_ci		 * Move recs over to get rid of empty extent, decrease
275362306a36Sopenharmony_ci		 * next_free. This is allowed to remove the last
275462306a36Sopenharmony_ci		 * extent in our leaf (setting l_next_free_rec to
275562306a36Sopenharmony_ci		 * zero) - the delete code below won't care.
275662306a36Sopenharmony_ci		 */
275762306a36Sopenharmony_ci		ocfs2_remove_empty_extent(right_leaf_el);
275862306a36Sopenharmony_ci	}
275962306a36Sopenharmony_ci
276062306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
276162306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
276262306a36Sopenharmony_ci
276362306a36Sopenharmony_ci	if (del_right_subtree) {
276462306a36Sopenharmony_ci		ocfs2_unlink_subtree(handle, et, left_path, right_path,
276562306a36Sopenharmony_ci				     subtree_index, dealloc);
276662306a36Sopenharmony_ci		ret = ocfs2_update_edge_lengths(handle, et, left_path);
276762306a36Sopenharmony_ci		if (ret) {
276862306a36Sopenharmony_ci			mlog_errno(ret);
276962306a36Sopenharmony_ci			goto out;
277062306a36Sopenharmony_ci		}
277162306a36Sopenharmony_ci
277262306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
277362306a36Sopenharmony_ci		ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
277462306a36Sopenharmony_ci
277562306a36Sopenharmony_ci		/*
277662306a36Sopenharmony_ci		 * Removal of the extent in the left leaf was skipped
277762306a36Sopenharmony_ci		 * above so we could delete the right path
277862306a36Sopenharmony_ci		 * 1st.
277962306a36Sopenharmony_ci		 */
278062306a36Sopenharmony_ci		if (right_has_empty)
278162306a36Sopenharmony_ci			ocfs2_remove_empty_extent(left_leaf_el);
278262306a36Sopenharmony_ci
278362306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, et_root_bh);
278462306a36Sopenharmony_ci
278562306a36Sopenharmony_ci		*deleted = 1;
278662306a36Sopenharmony_ci	} else
278762306a36Sopenharmony_ci		ocfs2_complete_edge_insert(handle, left_path, right_path,
278862306a36Sopenharmony_ci					   subtree_index);
278962306a36Sopenharmony_ci
279062306a36Sopenharmony_ciout:
279162306a36Sopenharmony_ci	return ret;
279262306a36Sopenharmony_ci}
279362306a36Sopenharmony_ci
279462306a36Sopenharmony_ci/*
279562306a36Sopenharmony_ci * Given a full path, determine what cpos value would return us a path
279662306a36Sopenharmony_ci * containing the leaf immediately to the right of the current one.
279762306a36Sopenharmony_ci *
279862306a36Sopenharmony_ci * Will return zero if the path passed in is already the rightmost path.
279962306a36Sopenharmony_ci *
280062306a36Sopenharmony_ci * This looks similar, but is subtly different to
280162306a36Sopenharmony_ci * ocfs2_find_cpos_for_left_leaf().
280262306a36Sopenharmony_ci */
280362306a36Sopenharmony_ciint ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
280462306a36Sopenharmony_ci				   struct ocfs2_path *path, u32 *cpos)
280562306a36Sopenharmony_ci{
280662306a36Sopenharmony_ci	int i, j, ret = 0;
280762306a36Sopenharmony_ci	u64 blkno;
280862306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
280962306a36Sopenharmony_ci
281062306a36Sopenharmony_ci	*cpos = 0;
281162306a36Sopenharmony_ci
281262306a36Sopenharmony_ci	if (path->p_tree_depth == 0)
281362306a36Sopenharmony_ci		return 0;
281462306a36Sopenharmony_ci
281562306a36Sopenharmony_ci	blkno = path_leaf_bh(path)->b_blocknr;
281662306a36Sopenharmony_ci
281762306a36Sopenharmony_ci	/* Start at the tree node just above the leaf and work our way up. */
281862306a36Sopenharmony_ci	i = path->p_tree_depth - 1;
281962306a36Sopenharmony_ci	while (i >= 0) {
282062306a36Sopenharmony_ci		int next_free;
282162306a36Sopenharmony_ci
282262306a36Sopenharmony_ci		el = path->p_node[i].el;
282362306a36Sopenharmony_ci
282462306a36Sopenharmony_ci		/*
282562306a36Sopenharmony_ci		 * Find the extent record just after the one in our
282662306a36Sopenharmony_ci		 * path.
282762306a36Sopenharmony_ci		 */
282862306a36Sopenharmony_ci		next_free = le16_to_cpu(el->l_next_free_rec);
282962306a36Sopenharmony_ci		for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) {
283062306a36Sopenharmony_ci			if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) {
283162306a36Sopenharmony_ci				if (j == (next_free - 1)) {
283262306a36Sopenharmony_ci					if (i == 0) {
283362306a36Sopenharmony_ci						/*
283462306a36Sopenharmony_ci						 * We've determined that the
283562306a36Sopenharmony_ci						 * path specified is already
283662306a36Sopenharmony_ci						 * the rightmost one - return a
283762306a36Sopenharmony_ci						 * cpos of zero.
283862306a36Sopenharmony_ci						 */
283962306a36Sopenharmony_ci						goto out;
284062306a36Sopenharmony_ci					}
284162306a36Sopenharmony_ci					/*
284262306a36Sopenharmony_ci					 * The rightmost record points to our
284362306a36Sopenharmony_ci					 * leaf - we need to travel up the
284462306a36Sopenharmony_ci					 * tree one level.
284562306a36Sopenharmony_ci					 */
284662306a36Sopenharmony_ci					goto next_node;
284762306a36Sopenharmony_ci				}
284862306a36Sopenharmony_ci
284962306a36Sopenharmony_ci				*cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos);
285062306a36Sopenharmony_ci				goto out;
285162306a36Sopenharmony_ci			}
285262306a36Sopenharmony_ci		}
285362306a36Sopenharmony_ci
285462306a36Sopenharmony_ci		/*
285562306a36Sopenharmony_ci		 * If we got here, we never found a valid node where
285662306a36Sopenharmony_ci		 * the tree indicated one should be.
285762306a36Sopenharmony_ci		 */
285862306a36Sopenharmony_ci		ocfs2_error(sb, "Invalid extent tree at extent block %llu\n",
285962306a36Sopenharmony_ci			    (unsigned long long)blkno);
286062306a36Sopenharmony_ci		ret = -EROFS;
286162306a36Sopenharmony_ci		goto out;
286262306a36Sopenharmony_ci
286362306a36Sopenharmony_cinext_node:
286462306a36Sopenharmony_ci		blkno = path->p_node[i].bh->b_blocknr;
286562306a36Sopenharmony_ci		i--;
286662306a36Sopenharmony_ci	}
286762306a36Sopenharmony_ci
286862306a36Sopenharmony_ciout:
286962306a36Sopenharmony_ci	return ret;
287062306a36Sopenharmony_ci}
287162306a36Sopenharmony_ci
287262306a36Sopenharmony_cistatic int ocfs2_rotate_rightmost_leaf_left(handle_t *handle,
287362306a36Sopenharmony_ci					    struct ocfs2_extent_tree *et,
287462306a36Sopenharmony_ci					    struct ocfs2_path *path)
287562306a36Sopenharmony_ci{
287662306a36Sopenharmony_ci	int ret;
287762306a36Sopenharmony_ci	struct buffer_head *bh = path_leaf_bh(path);
287862306a36Sopenharmony_ci	struct ocfs2_extent_list *el = path_leaf_el(path);
287962306a36Sopenharmony_ci
288062306a36Sopenharmony_ci	if (!ocfs2_is_empty_extent(&el->l_recs[0]))
288162306a36Sopenharmony_ci		return 0;
288262306a36Sopenharmony_ci
288362306a36Sopenharmony_ci	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
288462306a36Sopenharmony_ci					   path_num_items(path) - 1);
288562306a36Sopenharmony_ci	if (ret) {
288662306a36Sopenharmony_ci		mlog_errno(ret);
288762306a36Sopenharmony_ci		goto out;
288862306a36Sopenharmony_ci	}
288962306a36Sopenharmony_ci
289062306a36Sopenharmony_ci	ocfs2_remove_empty_extent(el);
289162306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
289262306a36Sopenharmony_ci
289362306a36Sopenharmony_ciout:
289462306a36Sopenharmony_ci	return ret;
289562306a36Sopenharmony_ci}
289662306a36Sopenharmony_ci
289762306a36Sopenharmony_cistatic int __ocfs2_rotate_tree_left(handle_t *handle,
289862306a36Sopenharmony_ci				    struct ocfs2_extent_tree *et,
289962306a36Sopenharmony_ci				    int orig_credits,
290062306a36Sopenharmony_ci				    struct ocfs2_path *path,
290162306a36Sopenharmony_ci				    struct ocfs2_cached_dealloc_ctxt *dealloc,
290262306a36Sopenharmony_ci				    struct ocfs2_path **empty_extent_path)
290362306a36Sopenharmony_ci{
290462306a36Sopenharmony_ci	int ret, subtree_root, deleted;
290562306a36Sopenharmony_ci	u32 right_cpos;
290662306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL;
290762306a36Sopenharmony_ci	struct ocfs2_path *right_path = NULL;
290862306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
290962306a36Sopenharmony_ci
291062306a36Sopenharmony_ci	if (!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])))
291162306a36Sopenharmony_ci		return 0;
291262306a36Sopenharmony_ci
291362306a36Sopenharmony_ci	*empty_extent_path = NULL;
291462306a36Sopenharmony_ci
291562306a36Sopenharmony_ci	ret = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
291662306a36Sopenharmony_ci	if (ret) {
291762306a36Sopenharmony_ci		mlog_errno(ret);
291862306a36Sopenharmony_ci		goto out;
291962306a36Sopenharmony_ci	}
292062306a36Sopenharmony_ci
292162306a36Sopenharmony_ci	left_path = ocfs2_new_path_from_path(path);
292262306a36Sopenharmony_ci	if (!left_path) {
292362306a36Sopenharmony_ci		ret = -ENOMEM;
292462306a36Sopenharmony_ci		mlog_errno(ret);
292562306a36Sopenharmony_ci		goto out;
292662306a36Sopenharmony_ci	}
292762306a36Sopenharmony_ci
292862306a36Sopenharmony_ci	ocfs2_cp_path(left_path, path);
292962306a36Sopenharmony_ci
293062306a36Sopenharmony_ci	right_path = ocfs2_new_path_from_path(path);
293162306a36Sopenharmony_ci	if (!right_path) {
293262306a36Sopenharmony_ci		ret = -ENOMEM;
293362306a36Sopenharmony_ci		mlog_errno(ret);
293462306a36Sopenharmony_ci		goto out;
293562306a36Sopenharmony_ci	}
293662306a36Sopenharmony_ci
293762306a36Sopenharmony_ci	while (right_cpos) {
293862306a36Sopenharmony_ci		ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
293962306a36Sopenharmony_ci		if (ret) {
294062306a36Sopenharmony_ci			mlog_errno(ret);
294162306a36Sopenharmony_ci			goto out;
294262306a36Sopenharmony_ci		}
294362306a36Sopenharmony_ci
294462306a36Sopenharmony_ci		subtree_root = ocfs2_find_subtree_root(et, left_path,
294562306a36Sopenharmony_ci						       right_path);
294662306a36Sopenharmony_ci
294762306a36Sopenharmony_ci		trace_ocfs2_rotate_subtree(subtree_root,
294862306a36Sopenharmony_ci		     (unsigned long long)
294962306a36Sopenharmony_ci		     right_path->p_node[subtree_root].bh->b_blocknr,
295062306a36Sopenharmony_ci		     right_path->p_tree_depth);
295162306a36Sopenharmony_ci
295262306a36Sopenharmony_ci		ret = ocfs2_extend_rotate_transaction(handle, 0,
295362306a36Sopenharmony_ci						      orig_credits, left_path);
295462306a36Sopenharmony_ci		if (ret) {
295562306a36Sopenharmony_ci			mlog_errno(ret);
295662306a36Sopenharmony_ci			goto out;
295762306a36Sopenharmony_ci		}
295862306a36Sopenharmony_ci
295962306a36Sopenharmony_ci		/*
296062306a36Sopenharmony_ci		 * Caller might still want to make changes to the
296162306a36Sopenharmony_ci		 * tree root, so re-add it to the journal here.
296262306a36Sopenharmony_ci		 */
296362306a36Sopenharmony_ci		ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
296462306a36Sopenharmony_ci						   left_path, 0);
296562306a36Sopenharmony_ci		if (ret) {
296662306a36Sopenharmony_ci			mlog_errno(ret);
296762306a36Sopenharmony_ci			goto out;
296862306a36Sopenharmony_ci		}
296962306a36Sopenharmony_ci
297062306a36Sopenharmony_ci		ret = ocfs2_rotate_subtree_left(handle, et, left_path,
297162306a36Sopenharmony_ci						right_path, subtree_root,
297262306a36Sopenharmony_ci						dealloc, &deleted);
297362306a36Sopenharmony_ci		if (ret == -EAGAIN) {
297462306a36Sopenharmony_ci			/*
297562306a36Sopenharmony_ci			 * The rotation has to temporarily stop due to
297662306a36Sopenharmony_ci			 * the right subtree having an empty
297762306a36Sopenharmony_ci			 * extent. Pass it back to the caller for a
297862306a36Sopenharmony_ci			 * fixup.
297962306a36Sopenharmony_ci			 */
298062306a36Sopenharmony_ci			*empty_extent_path = right_path;
298162306a36Sopenharmony_ci			right_path = NULL;
298262306a36Sopenharmony_ci			goto out;
298362306a36Sopenharmony_ci		}
298462306a36Sopenharmony_ci		if (ret) {
298562306a36Sopenharmony_ci			mlog_errno(ret);
298662306a36Sopenharmony_ci			goto out;
298762306a36Sopenharmony_ci		}
298862306a36Sopenharmony_ci
298962306a36Sopenharmony_ci		/*
299062306a36Sopenharmony_ci		 * The subtree rotate might have removed records on
299162306a36Sopenharmony_ci		 * the rightmost edge. If so, then rotation is
299262306a36Sopenharmony_ci		 * complete.
299362306a36Sopenharmony_ci		 */
299462306a36Sopenharmony_ci		if (deleted)
299562306a36Sopenharmony_ci			break;
299662306a36Sopenharmony_ci
299762306a36Sopenharmony_ci		ocfs2_mv_path(left_path, right_path);
299862306a36Sopenharmony_ci
299962306a36Sopenharmony_ci		ret = ocfs2_find_cpos_for_right_leaf(sb, left_path,
300062306a36Sopenharmony_ci						     &right_cpos);
300162306a36Sopenharmony_ci		if (ret) {
300262306a36Sopenharmony_ci			mlog_errno(ret);
300362306a36Sopenharmony_ci			goto out;
300462306a36Sopenharmony_ci		}
300562306a36Sopenharmony_ci	}
300662306a36Sopenharmony_ci
300762306a36Sopenharmony_ciout:
300862306a36Sopenharmony_ci	ocfs2_free_path(right_path);
300962306a36Sopenharmony_ci	ocfs2_free_path(left_path);
301062306a36Sopenharmony_ci
301162306a36Sopenharmony_ci	return ret;
301262306a36Sopenharmony_ci}
301362306a36Sopenharmony_ci
301462306a36Sopenharmony_cistatic int ocfs2_remove_rightmost_path(handle_t *handle,
301562306a36Sopenharmony_ci				struct ocfs2_extent_tree *et,
301662306a36Sopenharmony_ci				struct ocfs2_path *path,
301762306a36Sopenharmony_ci				struct ocfs2_cached_dealloc_ctxt *dealloc)
301862306a36Sopenharmony_ci{
301962306a36Sopenharmony_ci	int ret, subtree_index;
302062306a36Sopenharmony_ci	u32 cpos;
302162306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL;
302262306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
302362306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
302462306a36Sopenharmony_ci
302562306a36Sopenharmony_ci	ret = ocfs2_et_sanity_check(et);
302662306a36Sopenharmony_ci	if (ret)
302762306a36Sopenharmony_ci		goto out;
302862306a36Sopenharmony_ci
302962306a36Sopenharmony_ci	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
303062306a36Sopenharmony_ci	if (ret) {
303162306a36Sopenharmony_ci		mlog_errno(ret);
303262306a36Sopenharmony_ci		goto out;
303362306a36Sopenharmony_ci	}
303462306a36Sopenharmony_ci
303562306a36Sopenharmony_ci	ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
303662306a36Sopenharmony_ci					    path, &cpos);
303762306a36Sopenharmony_ci	if (ret) {
303862306a36Sopenharmony_ci		mlog_errno(ret);
303962306a36Sopenharmony_ci		goto out;
304062306a36Sopenharmony_ci	}
304162306a36Sopenharmony_ci
304262306a36Sopenharmony_ci	if (cpos) {
304362306a36Sopenharmony_ci		/*
304462306a36Sopenharmony_ci		 * We have a path to the left of this one - it needs
304562306a36Sopenharmony_ci		 * an update too.
304662306a36Sopenharmony_ci		 */
304762306a36Sopenharmony_ci		left_path = ocfs2_new_path_from_path(path);
304862306a36Sopenharmony_ci		if (!left_path) {
304962306a36Sopenharmony_ci			ret = -ENOMEM;
305062306a36Sopenharmony_ci			mlog_errno(ret);
305162306a36Sopenharmony_ci			goto out;
305262306a36Sopenharmony_ci		}
305362306a36Sopenharmony_ci
305462306a36Sopenharmony_ci		ret = ocfs2_find_path(et->et_ci, left_path, cpos);
305562306a36Sopenharmony_ci		if (ret) {
305662306a36Sopenharmony_ci			mlog_errno(ret);
305762306a36Sopenharmony_ci			goto out;
305862306a36Sopenharmony_ci		}
305962306a36Sopenharmony_ci
306062306a36Sopenharmony_ci		ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
306162306a36Sopenharmony_ci		if (ret) {
306262306a36Sopenharmony_ci			mlog_errno(ret);
306362306a36Sopenharmony_ci			goto out;
306462306a36Sopenharmony_ci		}
306562306a36Sopenharmony_ci
306662306a36Sopenharmony_ci		subtree_index = ocfs2_find_subtree_root(et, left_path, path);
306762306a36Sopenharmony_ci
306862306a36Sopenharmony_ci		ocfs2_unlink_subtree(handle, et, left_path, path,
306962306a36Sopenharmony_ci				     subtree_index, dealloc);
307062306a36Sopenharmony_ci		ret = ocfs2_update_edge_lengths(handle, et, left_path);
307162306a36Sopenharmony_ci		if (ret) {
307262306a36Sopenharmony_ci			mlog_errno(ret);
307362306a36Sopenharmony_ci			goto out;
307462306a36Sopenharmony_ci		}
307562306a36Sopenharmony_ci
307662306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
307762306a36Sopenharmony_ci		ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
307862306a36Sopenharmony_ci	} else {
307962306a36Sopenharmony_ci		/*
308062306a36Sopenharmony_ci		 * 'path' is also the leftmost path which
308162306a36Sopenharmony_ci		 * means it must be the only one. This gets
308262306a36Sopenharmony_ci		 * handled differently because we want to
308362306a36Sopenharmony_ci		 * revert the root back to having extents
308462306a36Sopenharmony_ci		 * in-line.
308562306a36Sopenharmony_ci		 */
308662306a36Sopenharmony_ci		ocfs2_unlink_path(handle, et, dealloc, path, 1);
308762306a36Sopenharmony_ci
308862306a36Sopenharmony_ci		el = et->et_root_el;
308962306a36Sopenharmony_ci		el->l_tree_depth = 0;
309062306a36Sopenharmony_ci		el->l_next_free_rec = 0;
309162306a36Sopenharmony_ci		memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
309262306a36Sopenharmony_ci
309362306a36Sopenharmony_ci		ocfs2_et_set_last_eb_blk(et, 0);
309462306a36Sopenharmony_ci	}
309562306a36Sopenharmony_ci
309662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, path_root_bh(path));
309762306a36Sopenharmony_ci
309862306a36Sopenharmony_ciout:
309962306a36Sopenharmony_ci	ocfs2_free_path(left_path);
310062306a36Sopenharmony_ci	return ret;
310162306a36Sopenharmony_ci}
310262306a36Sopenharmony_ci
310362306a36Sopenharmony_cistatic int ocfs2_remove_rightmost_empty_extent(struct ocfs2_super *osb,
310462306a36Sopenharmony_ci				struct ocfs2_extent_tree *et,
310562306a36Sopenharmony_ci				struct ocfs2_path *path,
310662306a36Sopenharmony_ci				struct ocfs2_cached_dealloc_ctxt *dealloc)
310762306a36Sopenharmony_ci{
310862306a36Sopenharmony_ci	handle_t *handle;
310962306a36Sopenharmony_ci	int ret;
311062306a36Sopenharmony_ci	int credits = path->p_tree_depth * 2 + 1;
311162306a36Sopenharmony_ci
311262306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, credits);
311362306a36Sopenharmony_ci	if (IS_ERR(handle)) {
311462306a36Sopenharmony_ci		ret = PTR_ERR(handle);
311562306a36Sopenharmony_ci		mlog_errno(ret);
311662306a36Sopenharmony_ci		return ret;
311762306a36Sopenharmony_ci	}
311862306a36Sopenharmony_ci
311962306a36Sopenharmony_ci	ret = ocfs2_remove_rightmost_path(handle, et, path, dealloc);
312062306a36Sopenharmony_ci	if (ret)
312162306a36Sopenharmony_ci		mlog_errno(ret);
312262306a36Sopenharmony_ci
312362306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
312462306a36Sopenharmony_ci	return ret;
312562306a36Sopenharmony_ci}
312662306a36Sopenharmony_ci
312762306a36Sopenharmony_ci/*
312862306a36Sopenharmony_ci * Left rotation of btree records.
312962306a36Sopenharmony_ci *
313062306a36Sopenharmony_ci * In many ways, this is (unsurprisingly) the opposite of right
313162306a36Sopenharmony_ci * rotation. We start at some non-rightmost path containing an empty
313262306a36Sopenharmony_ci * extent in the leaf block. The code works its way to the rightmost
313362306a36Sopenharmony_ci * path by rotating records to the left in every subtree.
313462306a36Sopenharmony_ci *
313562306a36Sopenharmony_ci * This is used by any code which reduces the number of extent records
313662306a36Sopenharmony_ci * in a leaf. After removal, an empty record should be placed in the
313762306a36Sopenharmony_ci * leftmost list position.
313862306a36Sopenharmony_ci *
313962306a36Sopenharmony_ci * This won't handle a length update of the rightmost path records if
314062306a36Sopenharmony_ci * the rightmost tree leaf record is removed so the caller is
314162306a36Sopenharmony_ci * responsible for detecting and correcting that.
314262306a36Sopenharmony_ci */
314362306a36Sopenharmony_cistatic int ocfs2_rotate_tree_left(handle_t *handle,
314462306a36Sopenharmony_ci				  struct ocfs2_extent_tree *et,
314562306a36Sopenharmony_ci				  struct ocfs2_path *path,
314662306a36Sopenharmony_ci				  struct ocfs2_cached_dealloc_ctxt *dealloc)
314762306a36Sopenharmony_ci{
314862306a36Sopenharmony_ci	int ret, orig_credits = jbd2_handle_buffer_credits(handle);
314962306a36Sopenharmony_ci	struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
315062306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
315162306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
315262306a36Sopenharmony_ci
315362306a36Sopenharmony_ci	el = path_leaf_el(path);
315462306a36Sopenharmony_ci	if (!ocfs2_is_empty_extent(&el->l_recs[0]))
315562306a36Sopenharmony_ci		return 0;
315662306a36Sopenharmony_ci
315762306a36Sopenharmony_ci	if (path->p_tree_depth == 0) {
315862306a36Sopenharmony_cirightmost_no_delete:
315962306a36Sopenharmony_ci		/*
316062306a36Sopenharmony_ci		 * Inline extents. This is trivially handled, so do
316162306a36Sopenharmony_ci		 * it up front.
316262306a36Sopenharmony_ci		 */
316362306a36Sopenharmony_ci		ret = ocfs2_rotate_rightmost_leaf_left(handle, et, path);
316462306a36Sopenharmony_ci		if (ret)
316562306a36Sopenharmony_ci			mlog_errno(ret);
316662306a36Sopenharmony_ci		goto out;
316762306a36Sopenharmony_ci	}
316862306a36Sopenharmony_ci
316962306a36Sopenharmony_ci	/*
317062306a36Sopenharmony_ci	 * Handle rightmost branch now. There's several cases:
317162306a36Sopenharmony_ci	 *  1) simple rotation leaving records in there. That's trivial.
317262306a36Sopenharmony_ci	 *  2) rotation requiring a branch delete - there's no more
317362306a36Sopenharmony_ci	 *     records left. Two cases of this:
317462306a36Sopenharmony_ci	 *     a) There are branches to the left.
317562306a36Sopenharmony_ci	 *     b) This is also the leftmost (the only) branch.
317662306a36Sopenharmony_ci	 *
317762306a36Sopenharmony_ci	 *  1) is handled via ocfs2_rotate_rightmost_leaf_left()
317862306a36Sopenharmony_ci	 *  2a) we need the left branch so that we can update it with the unlink
317962306a36Sopenharmony_ci	 *  2b) we need to bring the root back to inline extents.
318062306a36Sopenharmony_ci	 */
318162306a36Sopenharmony_ci
318262306a36Sopenharmony_ci	eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
318362306a36Sopenharmony_ci	el = &eb->h_list;
318462306a36Sopenharmony_ci	if (eb->h_next_leaf_blk == 0) {
318562306a36Sopenharmony_ci		/*
318662306a36Sopenharmony_ci		 * This gets a bit tricky if we're going to delete the
318762306a36Sopenharmony_ci		 * rightmost path. Get the other cases out of the way
318862306a36Sopenharmony_ci		 * 1st.
318962306a36Sopenharmony_ci		 */
319062306a36Sopenharmony_ci		if (le16_to_cpu(el->l_next_free_rec) > 1)
319162306a36Sopenharmony_ci			goto rightmost_no_delete;
319262306a36Sopenharmony_ci
319362306a36Sopenharmony_ci		if (le16_to_cpu(el->l_next_free_rec) == 0) {
319462306a36Sopenharmony_ci			ret = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
319562306a36Sopenharmony_ci					"Owner %llu has empty extent block at %llu\n",
319662306a36Sopenharmony_ci					(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
319762306a36Sopenharmony_ci					(unsigned long long)le64_to_cpu(eb->h_blkno));
319862306a36Sopenharmony_ci			goto out;
319962306a36Sopenharmony_ci		}
320062306a36Sopenharmony_ci
320162306a36Sopenharmony_ci		/*
320262306a36Sopenharmony_ci		 * XXX: The caller can not trust "path" any more after
320362306a36Sopenharmony_ci		 * this as it will have been deleted. What do we do?
320462306a36Sopenharmony_ci		 *
320562306a36Sopenharmony_ci		 * In theory the rotate-for-merge code will never get
320662306a36Sopenharmony_ci		 * here because it'll always ask for a rotate in a
320762306a36Sopenharmony_ci		 * nonempty list.
320862306a36Sopenharmony_ci		 */
320962306a36Sopenharmony_ci
321062306a36Sopenharmony_ci		ret = ocfs2_remove_rightmost_path(handle, et, path,
321162306a36Sopenharmony_ci						  dealloc);
321262306a36Sopenharmony_ci		if (ret)
321362306a36Sopenharmony_ci			mlog_errno(ret);
321462306a36Sopenharmony_ci		goto out;
321562306a36Sopenharmony_ci	}
321662306a36Sopenharmony_ci
321762306a36Sopenharmony_ci	/*
321862306a36Sopenharmony_ci	 * Now we can loop, remembering the path we get from -EAGAIN
321962306a36Sopenharmony_ci	 * and restarting from there.
322062306a36Sopenharmony_ci	 */
322162306a36Sopenharmony_citry_rotate:
322262306a36Sopenharmony_ci	ret = __ocfs2_rotate_tree_left(handle, et, orig_credits, path,
322362306a36Sopenharmony_ci				       dealloc, &restart_path);
322462306a36Sopenharmony_ci	if (ret && ret != -EAGAIN) {
322562306a36Sopenharmony_ci		mlog_errno(ret);
322662306a36Sopenharmony_ci		goto out;
322762306a36Sopenharmony_ci	}
322862306a36Sopenharmony_ci
322962306a36Sopenharmony_ci	while (ret == -EAGAIN) {
323062306a36Sopenharmony_ci		tmp_path = restart_path;
323162306a36Sopenharmony_ci		restart_path = NULL;
323262306a36Sopenharmony_ci
323362306a36Sopenharmony_ci		ret = __ocfs2_rotate_tree_left(handle, et, orig_credits,
323462306a36Sopenharmony_ci					       tmp_path, dealloc,
323562306a36Sopenharmony_ci					       &restart_path);
323662306a36Sopenharmony_ci		if (ret && ret != -EAGAIN) {
323762306a36Sopenharmony_ci			mlog_errno(ret);
323862306a36Sopenharmony_ci			goto out;
323962306a36Sopenharmony_ci		}
324062306a36Sopenharmony_ci
324162306a36Sopenharmony_ci		ocfs2_free_path(tmp_path);
324262306a36Sopenharmony_ci		tmp_path = NULL;
324362306a36Sopenharmony_ci
324462306a36Sopenharmony_ci		if (ret == 0)
324562306a36Sopenharmony_ci			goto try_rotate;
324662306a36Sopenharmony_ci	}
324762306a36Sopenharmony_ci
324862306a36Sopenharmony_ciout:
324962306a36Sopenharmony_ci	ocfs2_free_path(tmp_path);
325062306a36Sopenharmony_ci	ocfs2_free_path(restart_path);
325162306a36Sopenharmony_ci	return ret;
325262306a36Sopenharmony_ci}
325362306a36Sopenharmony_ci
325462306a36Sopenharmony_cistatic void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
325562306a36Sopenharmony_ci				int index)
325662306a36Sopenharmony_ci{
325762306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec = &el->l_recs[index];
325862306a36Sopenharmony_ci	unsigned int size;
325962306a36Sopenharmony_ci
326062306a36Sopenharmony_ci	if (rec->e_leaf_clusters == 0) {
326162306a36Sopenharmony_ci		/*
326262306a36Sopenharmony_ci		 * We consumed all of the merged-from record. An empty
326362306a36Sopenharmony_ci		 * extent cannot exist anywhere but the 1st array
326462306a36Sopenharmony_ci		 * position, so move things over if the merged-from
326562306a36Sopenharmony_ci		 * record doesn't occupy that position.
326662306a36Sopenharmony_ci		 *
326762306a36Sopenharmony_ci		 * This creates a new empty extent so the caller
326862306a36Sopenharmony_ci		 * should be smart enough to have removed any existing
326962306a36Sopenharmony_ci		 * ones.
327062306a36Sopenharmony_ci		 */
327162306a36Sopenharmony_ci		if (index > 0) {
327262306a36Sopenharmony_ci			BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
327362306a36Sopenharmony_ci			size = index * sizeof(struct ocfs2_extent_rec);
327462306a36Sopenharmony_ci			memmove(&el->l_recs[1], &el->l_recs[0], size);
327562306a36Sopenharmony_ci		}
327662306a36Sopenharmony_ci
327762306a36Sopenharmony_ci		/*
327862306a36Sopenharmony_ci		 * Always memset - the caller doesn't check whether it
327962306a36Sopenharmony_ci		 * created an empty extent, so there could be junk in
328062306a36Sopenharmony_ci		 * the other fields.
328162306a36Sopenharmony_ci		 */
328262306a36Sopenharmony_ci		memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
328362306a36Sopenharmony_ci	}
328462306a36Sopenharmony_ci}
328562306a36Sopenharmony_ci
328662306a36Sopenharmony_cistatic int ocfs2_get_right_path(struct ocfs2_extent_tree *et,
328762306a36Sopenharmony_ci				struct ocfs2_path *left_path,
328862306a36Sopenharmony_ci				struct ocfs2_path **ret_right_path)
328962306a36Sopenharmony_ci{
329062306a36Sopenharmony_ci	int ret;
329162306a36Sopenharmony_ci	u32 right_cpos;
329262306a36Sopenharmony_ci	struct ocfs2_path *right_path = NULL;
329362306a36Sopenharmony_ci	struct ocfs2_extent_list *left_el;
329462306a36Sopenharmony_ci
329562306a36Sopenharmony_ci	*ret_right_path = NULL;
329662306a36Sopenharmony_ci
329762306a36Sopenharmony_ci	/* This function shouldn't be called for non-trees. */
329862306a36Sopenharmony_ci	BUG_ON(left_path->p_tree_depth == 0);
329962306a36Sopenharmony_ci
330062306a36Sopenharmony_ci	left_el = path_leaf_el(left_path);
330162306a36Sopenharmony_ci	BUG_ON(left_el->l_next_free_rec != left_el->l_count);
330262306a36Sopenharmony_ci
330362306a36Sopenharmony_ci	ret = ocfs2_find_cpos_for_right_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
330462306a36Sopenharmony_ci					     left_path, &right_cpos);
330562306a36Sopenharmony_ci	if (ret) {
330662306a36Sopenharmony_ci		mlog_errno(ret);
330762306a36Sopenharmony_ci		goto out;
330862306a36Sopenharmony_ci	}
330962306a36Sopenharmony_ci
331062306a36Sopenharmony_ci	/* This function shouldn't be called for the rightmost leaf. */
331162306a36Sopenharmony_ci	BUG_ON(right_cpos == 0);
331262306a36Sopenharmony_ci
331362306a36Sopenharmony_ci	right_path = ocfs2_new_path_from_path(left_path);
331462306a36Sopenharmony_ci	if (!right_path) {
331562306a36Sopenharmony_ci		ret = -ENOMEM;
331662306a36Sopenharmony_ci		mlog_errno(ret);
331762306a36Sopenharmony_ci		goto out;
331862306a36Sopenharmony_ci	}
331962306a36Sopenharmony_ci
332062306a36Sopenharmony_ci	ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
332162306a36Sopenharmony_ci	if (ret) {
332262306a36Sopenharmony_ci		mlog_errno(ret);
332362306a36Sopenharmony_ci		goto out;
332462306a36Sopenharmony_ci	}
332562306a36Sopenharmony_ci
332662306a36Sopenharmony_ci	*ret_right_path = right_path;
332762306a36Sopenharmony_ciout:
332862306a36Sopenharmony_ci	if (ret)
332962306a36Sopenharmony_ci		ocfs2_free_path(right_path);
333062306a36Sopenharmony_ci	return ret;
333162306a36Sopenharmony_ci}
333262306a36Sopenharmony_ci
333362306a36Sopenharmony_ci/*
333462306a36Sopenharmony_ci * Remove split_rec clusters from the record at index and merge them
333562306a36Sopenharmony_ci * onto the beginning of the record "next" to it.
333662306a36Sopenharmony_ci * For index < l_count - 1, the next means the extent rec at index + 1.
333762306a36Sopenharmony_ci * For index == l_count - 1, the "next" means the 1st extent rec of the
333862306a36Sopenharmony_ci * next extent block.
333962306a36Sopenharmony_ci */
334062306a36Sopenharmony_cistatic int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
334162306a36Sopenharmony_ci				 handle_t *handle,
334262306a36Sopenharmony_ci				 struct ocfs2_extent_tree *et,
334362306a36Sopenharmony_ci				 struct ocfs2_extent_rec *split_rec,
334462306a36Sopenharmony_ci				 int index)
334562306a36Sopenharmony_ci{
334662306a36Sopenharmony_ci	int ret, next_free, i;
334762306a36Sopenharmony_ci	unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
334862306a36Sopenharmony_ci	struct ocfs2_extent_rec *left_rec;
334962306a36Sopenharmony_ci	struct ocfs2_extent_rec *right_rec;
335062306a36Sopenharmony_ci	struct ocfs2_extent_list *right_el;
335162306a36Sopenharmony_ci	struct ocfs2_path *right_path = NULL;
335262306a36Sopenharmony_ci	int subtree_index = 0;
335362306a36Sopenharmony_ci	struct ocfs2_extent_list *el = path_leaf_el(left_path);
335462306a36Sopenharmony_ci	struct buffer_head *bh = path_leaf_bh(left_path);
335562306a36Sopenharmony_ci	struct buffer_head *root_bh = NULL;
335662306a36Sopenharmony_ci
335762306a36Sopenharmony_ci	BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
335862306a36Sopenharmony_ci	left_rec = &el->l_recs[index];
335962306a36Sopenharmony_ci
336062306a36Sopenharmony_ci	if (index == le16_to_cpu(el->l_next_free_rec) - 1 &&
336162306a36Sopenharmony_ci	    le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
336262306a36Sopenharmony_ci		/* we meet with a cross extent block merge. */
336362306a36Sopenharmony_ci		ret = ocfs2_get_right_path(et, left_path, &right_path);
336462306a36Sopenharmony_ci		if (ret) {
336562306a36Sopenharmony_ci			mlog_errno(ret);
336662306a36Sopenharmony_ci			return ret;
336762306a36Sopenharmony_ci		}
336862306a36Sopenharmony_ci
336962306a36Sopenharmony_ci		right_el = path_leaf_el(right_path);
337062306a36Sopenharmony_ci		next_free = le16_to_cpu(right_el->l_next_free_rec);
337162306a36Sopenharmony_ci		BUG_ON(next_free <= 0);
337262306a36Sopenharmony_ci		right_rec = &right_el->l_recs[0];
337362306a36Sopenharmony_ci		if (ocfs2_is_empty_extent(right_rec)) {
337462306a36Sopenharmony_ci			BUG_ON(next_free <= 1);
337562306a36Sopenharmony_ci			right_rec = &right_el->l_recs[1];
337662306a36Sopenharmony_ci		}
337762306a36Sopenharmony_ci
337862306a36Sopenharmony_ci		BUG_ON(le32_to_cpu(left_rec->e_cpos) +
337962306a36Sopenharmony_ci		       le16_to_cpu(left_rec->e_leaf_clusters) !=
338062306a36Sopenharmony_ci		       le32_to_cpu(right_rec->e_cpos));
338162306a36Sopenharmony_ci
338262306a36Sopenharmony_ci		subtree_index = ocfs2_find_subtree_root(et, left_path,
338362306a36Sopenharmony_ci							right_path);
338462306a36Sopenharmony_ci
338562306a36Sopenharmony_ci		ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
338662306a36Sopenharmony_ci					jbd2_handle_buffer_credits(handle),
338762306a36Sopenharmony_ci					right_path);
338862306a36Sopenharmony_ci		if (ret) {
338962306a36Sopenharmony_ci			mlog_errno(ret);
339062306a36Sopenharmony_ci			goto out;
339162306a36Sopenharmony_ci		}
339262306a36Sopenharmony_ci
339362306a36Sopenharmony_ci		root_bh = left_path->p_node[subtree_index].bh;
339462306a36Sopenharmony_ci		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
339562306a36Sopenharmony_ci
339662306a36Sopenharmony_ci		ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
339762306a36Sopenharmony_ci						   subtree_index);
339862306a36Sopenharmony_ci		if (ret) {
339962306a36Sopenharmony_ci			mlog_errno(ret);
340062306a36Sopenharmony_ci			goto out;
340162306a36Sopenharmony_ci		}
340262306a36Sopenharmony_ci
340362306a36Sopenharmony_ci		for (i = subtree_index + 1;
340462306a36Sopenharmony_ci		     i < path_num_items(right_path); i++) {
340562306a36Sopenharmony_ci			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
340662306a36Sopenharmony_ci							   right_path, i);
340762306a36Sopenharmony_ci			if (ret) {
340862306a36Sopenharmony_ci				mlog_errno(ret);
340962306a36Sopenharmony_ci				goto out;
341062306a36Sopenharmony_ci			}
341162306a36Sopenharmony_ci
341262306a36Sopenharmony_ci			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
341362306a36Sopenharmony_ci							   left_path, i);
341462306a36Sopenharmony_ci			if (ret) {
341562306a36Sopenharmony_ci				mlog_errno(ret);
341662306a36Sopenharmony_ci				goto out;
341762306a36Sopenharmony_ci			}
341862306a36Sopenharmony_ci		}
341962306a36Sopenharmony_ci
342062306a36Sopenharmony_ci	} else {
342162306a36Sopenharmony_ci		BUG_ON(index == le16_to_cpu(el->l_next_free_rec) - 1);
342262306a36Sopenharmony_ci		right_rec = &el->l_recs[index + 1];
342362306a36Sopenharmony_ci	}
342462306a36Sopenharmony_ci
342562306a36Sopenharmony_ci	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, left_path,
342662306a36Sopenharmony_ci					   path_num_items(left_path) - 1);
342762306a36Sopenharmony_ci	if (ret) {
342862306a36Sopenharmony_ci		mlog_errno(ret);
342962306a36Sopenharmony_ci		goto out;
343062306a36Sopenharmony_ci	}
343162306a36Sopenharmony_ci
343262306a36Sopenharmony_ci	le16_add_cpu(&left_rec->e_leaf_clusters, -split_clusters);
343362306a36Sopenharmony_ci
343462306a36Sopenharmony_ci	le32_add_cpu(&right_rec->e_cpos, -split_clusters);
343562306a36Sopenharmony_ci	le64_add_cpu(&right_rec->e_blkno,
343662306a36Sopenharmony_ci		     -ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
343762306a36Sopenharmony_ci					       split_clusters));
343862306a36Sopenharmony_ci	le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters);
343962306a36Sopenharmony_ci
344062306a36Sopenharmony_ci	ocfs2_cleanup_merge(el, index);
344162306a36Sopenharmony_ci
344262306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
344362306a36Sopenharmony_ci	if (right_path) {
344462306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
344562306a36Sopenharmony_ci		ocfs2_complete_edge_insert(handle, left_path, right_path,
344662306a36Sopenharmony_ci					   subtree_index);
344762306a36Sopenharmony_ci	}
344862306a36Sopenharmony_ciout:
344962306a36Sopenharmony_ci	ocfs2_free_path(right_path);
345062306a36Sopenharmony_ci	return ret;
345162306a36Sopenharmony_ci}
345262306a36Sopenharmony_ci
345362306a36Sopenharmony_cistatic int ocfs2_get_left_path(struct ocfs2_extent_tree *et,
345462306a36Sopenharmony_ci			       struct ocfs2_path *right_path,
345562306a36Sopenharmony_ci			       struct ocfs2_path **ret_left_path)
345662306a36Sopenharmony_ci{
345762306a36Sopenharmony_ci	int ret;
345862306a36Sopenharmony_ci	u32 left_cpos;
345962306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL;
346062306a36Sopenharmony_ci
346162306a36Sopenharmony_ci	*ret_left_path = NULL;
346262306a36Sopenharmony_ci
346362306a36Sopenharmony_ci	/* This function shouldn't be called for non-trees. */
346462306a36Sopenharmony_ci	BUG_ON(right_path->p_tree_depth == 0);
346562306a36Sopenharmony_ci
346662306a36Sopenharmony_ci	ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
346762306a36Sopenharmony_ci					    right_path, &left_cpos);
346862306a36Sopenharmony_ci	if (ret) {
346962306a36Sopenharmony_ci		mlog_errno(ret);
347062306a36Sopenharmony_ci		goto out;
347162306a36Sopenharmony_ci	}
347262306a36Sopenharmony_ci
347362306a36Sopenharmony_ci	/* This function shouldn't be called for the leftmost leaf. */
347462306a36Sopenharmony_ci	BUG_ON(left_cpos == 0);
347562306a36Sopenharmony_ci
347662306a36Sopenharmony_ci	left_path = ocfs2_new_path_from_path(right_path);
347762306a36Sopenharmony_ci	if (!left_path) {
347862306a36Sopenharmony_ci		ret = -ENOMEM;
347962306a36Sopenharmony_ci		mlog_errno(ret);
348062306a36Sopenharmony_ci		goto out;
348162306a36Sopenharmony_ci	}
348262306a36Sopenharmony_ci
348362306a36Sopenharmony_ci	ret = ocfs2_find_path(et->et_ci, left_path, left_cpos);
348462306a36Sopenharmony_ci	if (ret) {
348562306a36Sopenharmony_ci		mlog_errno(ret);
348662306a36Sopenharmony_ci		goto out;
348762306a36Sopenharmony_ci	}
348862306a36Sopenharmony_ci
348962306a36Sopenharmony_ci	*ret_left_path = left_path;
349062306a36Sopenharmony_ciout:
349162306a36Sopenharmony_ci	if (ret)
349262306a36Sopenharmony_ci		ocfs2_free_path(left_path);
349362306a36Sopenharmony_ci	return ret;
349462306a36Sopenharmony_ci}
349562306a36Sopenharmony_ci
349662306a36Sopenharmony_ci/*
349762306a36Sopenharmony_ci * Remove split_rec clusters from the record at index and merge them
349862306a36Sopenharmony_ci * onto the tail of the record "before" it.
349962306a36Sopenharmony_ci * For index > 0, the "before" means the extent rec at index - 1.
350062306a36Sopenharmony_ci *
350162306a36Sopenharmony_ci * For index == 0, the "before" means the last record of the previous
350262306a36Sopenharmony_ci * extent block. And there is also a situation that we may need to
350362306a36Sopenharmony_ci * remove the rightmost leaf extent block in the right_path and change
350462306a36Sopenharmony_ci * the right path to indicate the new rightmost path.
350562306a36Sopenharmony_ci */
350662306a36Sopenharmony_cistatic int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
350762306a36Sopenharmony_ci				handle_t *handle,
350862306a36Sopenharmony_ci				struct ocfs2_extent_tree *et,
350962306a36Sopenharmony_ci				struct ocfs2_extent_rec *split_rec,
351062306a36Sopenharmony_ci				struct ocfs2_cached_dealloc_ctxt *dealloc,
351162306a36Sopenharmony_ci				int index)
351262306a36Sopenharmony_ci{
351362306a36Sopenharmony_ci	int ret, i, subtree_index = 0, has_empty_extent = 0;
351462306a36Sopenharmony_ci	unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
351562306a36Sopenharmony_ci	struct ocfs2_extent_rec *left_rec;
351662306a36Sopenharmony_ci	struct ocfs2_extent_rec *right_rec;
351762306a36Sopenharmony_ci	struct ocfs2_extent_list *el = path_leaf_el(right_path);
351862306a36Sopenharmony_ci	struct buffer_head *bh = path_leaf_bh(right_path);
351962306a36Sopenharmony_ci	struct buffer_head *root_bh = NULL;
352062306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL;
352162306a36Sopenharmony_ci	struct ocfs2_extent_list *left_el;
352262306a36Sopenharmony_ci
352362306a36Sopenharmony_ci	BUG_ON(index < 0);
352462306a36Sopenharmony_ci
352562306a36Sopenharmony_ci	right_rec = &el->l_recs[index];
352662306a36Sopenharmony_ci	if (index == 0) {
352762306a36Sopenharmony_ci		/* we meet with a cross extent block merge. */
352862306a36Sopenharmony_ci		ret = ocfs2_get_left_path(et, right_path, &left_path);
352962306a36Sopenharmony_ci		if (ret) {
353062306a36Sopenharmony_ci			mlog_errno(ret);
353162306a36Sopenharmony_ci			return ret;
353262306a36Sopenharmony_ci		}
353362306a36Sopenharmony_ci
353462306a36Sopenharmony_ci		left_el = path_leaf_el(left_path);
353562306a36Sopenharmony_ci		BUG_ON(le16_to_cpu(left_el->l_next_free_rec) !=
353662306a36Sopenharmony_ci		       le16_to_cpu(left_el->l_count));
353762306a36Sopenharmony_ci
353862306a36Sopenharmony_ci		left_rec = &left_el->l_recs[
353962306a36Sopenharmony_ci				le16_to_cpu(left_el->l_next_free_rec) - 1];
354062306a36Sopenharmony_ci		BUG_ON(le32_to_cpu(left_rec->e_cpos) +
354162306a36Sopenharmony_ci		       le16_to_cpu(left_rec->e_leaf_clusters) !=
354262306a36Sopenharmony_ci		       le32_to_cpu(split_rec->e_cpos));
354362306a36Sopenharmony_ci
354462306a36Sopenharmony_ci		subtree_index = ocfs2_find_subtree_root(et, left_path,
354562306a36Sopenharmony_ci							right_path);
354662306a36Sopenharmony_ci
354762306a36Sopenharmony_ci		ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
354862306a36Sopenharmony_ci					jbd2_handle_buffer_credits(handle),
354962306a36Sopenharmony_ci					left_path);
355062306a36Sopenharmony_ci		if (ret) {
355162306a36Sopenharmony_ci			mlog_errno(ret);
355262306a36Sopenharmony_ci			goto out;
355362306a36Sopenharmony_ci		}
355462306a36Sopenharmony_ci
355562306a36Sopenharmony_ci		root_bh = left_path->p_node[subtree_index].bh;
355662306a36Sopenharmony_ci		BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
355762306a36Sopenharmony_ci
355862306a36Sopenharmony_ci		ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
355962306a36Sopenharmony_ci						   subtree_index);
356062306a36Sopenharmony_ci		if (ret) {
356162306a36Sopenharmony_ci			mlog_errno(ret);
356262306a36Sopenharmony_ci			goto out;
356362306a36Sopenharmony_ci		}
356462306a36Sopenharmony_ci
356562306a36Sopenharmony_ci		for (i = subtree_index + 1;
356662306a36Sopenharmony_ci		     i < path_num_items(right_path); i++) {
356762306a36Sopenharmony_ci			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
356862306a36Sopenharmony_ci							   right_path, i);
356962306a36Sopenharmony_ci			if (ret) {
357062306a36Sopenharmony_ci				mlog_errno(ret);
357162306a36Sopenharmony_ci				goto out;
357262306a36Sopenharmony_ci			}
357362306a36Sopenharmony_ci
357462306a36Sopenharmony_ci			ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
357562306a36Sopenharmony_ci							   left_path, i);
357662306a36Sopenharmony_ci			if (ret) {
357762306a36Sopenharmony_ci				mlog_errno(ret);
357862306a36Sopenharmony_ci				goto out;
357962306a36Sopenharmony_ci			}
358062306a36Sopenharmony_ci		}
358162306a36Sopenharmony_ci	} else {
358262306a36Sopenharmony_ci		left_rec = &el->l_recs[index - 1];
358362306a36Sopenharmony_ci		if (ocfs2_is_empty_extent(&el->l_recs[0]))
358462306a36Sopenharmony_ci			has_empty_extent = 1;
358562306a36Sopenharmony_ci	}
358662306a36Sopenharmony_ci
358762306a36Sopenharmony_ci	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
358862306a36Sopenharmony_ci					   path_num_items(right_path) - 1);
358962306a36Sopenharmony_ci	if (ret) {
359062306a36Sopenharmony_ci		mlog_errno(ret);
359162306a36Sopenharmony_ci		goto out;
359262306a36Sopenharmony_ci	}
359362306a36Sopenharmony_ci
359462306a36Sopenharmony_ci	if (has_empty_extent && index == 1) {
359562306a36Sopenharmony_ci		/*
359662306a36Sopenharmony_ci		 * The easy case - we can just plop the record right in.
359762306a36Sopenharmony_ci		 */
359862306a36Sopenharmony_ci		*left_rec = *split_rec;
359962306a36Sopenharmony_ci	} else
360062306a36Sopenharmony_ci		le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters);
360162306a36Sopenharmony_ci
360262306a36Sopenharmony_ci	le32_add_cpu(&right_rec->e_cpos, split_clusters);
360362306a36Sopenharmony_ci	le64_add_cpu(&right_rec->e_blkno,
360462306a36Sopenharmony_ci		     ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
360562306a36Sopenharmony_ci					      split_clusters));
360662306a36Sopenharmony_ci	le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters);
360762306a36Sopenharmony_ci
360862306a36Sopenharmony_ci	ocfs2_cleanup_merge(el, index);
360962306a36Sopenharmony_ci
361062306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
361162306a36Sopenharmony_ci	if (left_path) {
361262306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
361362306a36Sopenharmony_ci
361462306a36Sopenharmony_ci		/*
361562306a36Sopenharmony_ci		 * In the situation that the right_rec is empty and the extent
361662306a36Sopenharmony_ci		 * block is empty also,  ocfs2_complete_edge_insert can't handle
361762306a36Sopenharmony_ci		 * it and we need to delete the right extent block.
361862306a36Sopenharmony_ci		 */
361962306a36Sopenharmony_ci		if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
362062306a36Sopenharmony_ci		    le16_to_cpu(el->l_next_free_rec) == 1) {
362162306a36Sopenharmony_ci			/* extend credit for ocfs2_remove_rightmost_path */
362262306a36Sopenharmony_ci			ret = ocfs2_extend_rotate_transaction(handle, 0,
362362306a36Sopenharmony_ci					jbd2_handle_buffer_credits(handle),
362462306a36Sopenharmony_ci					right_path);
362562306a36Sopenharmony_ci			if (ret) {
362662306a36Sopenharmony_ci				mlog_errno(ret);
362762306a36Sopenharmony_ci				goto out;
362862306a36Sopenharmony_ci			}
362962306a36Sopenharmony_ci
363062306a36Sopenharmony_ci			ret = ocfs2_remove_rightmost_path(handle, et,
363162306a36Sopenharmony_ci							  right_path,
363262306a36Sopenharmony_ci							  dealloc);
363362306a36Sopenharmony_ci			if (ret) {
363462306a36Sopenharmony_ci				mlog_errno(ret);
363562306a36Sopenharmony_ci				goto out;
363662306a36Sopenharmony_ci			}
363762306a36Sopenharmony_ci
363862306a36Sopenharmony_ci			/* Now the rightmost extent block has been deleted.
363962306a36Sopenharmony_ci			 * So we use the new rightmost path.
364062306a36Sopenharmony_ci			 */
364162306a36Sopenharmony_ci			ocfs2_mv_path(right_path, left_path);
364262306a36Sopenharmony_ci			left_path = NULL;
364362306a36Sopenharmony_ci		} else
364462306a36Sopenharmony_ci			ocfs2_complete_edge_insert(handle, left_path,
364562306a36Sopenharmony_ci						   right_path, subtree_index);
364662306a36Sopenharmony_ci	}
364762306a36Sopenharmony_ciout:
364862306a36Sopenharmony_ci	ocfs2_free_path(left_path);
364962306a36Sopenharmony_ci	return ret;
365062306a36Sopenharmony_ci}
365162306a36Sopenharmony_ci
365262306a36Sopenharmony_cistatic int ocfs2_try_to_merge_extent(handle_t *handle,
365362306a36Sopenharmony_ci				     struct ocfs2_extent_tree *et,
365462306a36Sopenharmony_ci				     struct ocfs2_path *path,
365562306a36Sopenharmony_ci				     int split_index,
365662306a36Sopenharmony_ci				     struct ocfs2_extent_rec *split_rec,
365762306a36Sopenharmony_ci				     struct ocfs2_cached_dealloc_ctxt *dealloc,
365862306a36Sopenharmony_ci				     struct ocfs2_merge_ctxt *ctxt)
365962306a36Sopenharmony_ci{
366062306a36Sopenharmony_ci	int ret = 0;
366162306a36Sopenharmony_ci	struct ocfs2_extent_list *el = path_leaf_el(path);
366262306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
366362306a36Sopenharmony_ci
366462306a36Sopenharmony_ci	BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
366562306a36Sopenharmony_ci
366662306a36Sopenharmony_ci	if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
366762306a36Sopenharmony_ci		/* extend credit for ocfs2_remove_rightmost_path */
366862306a36Sopenharmony_ci		ret = ocfs2_extend_rotate_transaction(handle, 0,
366962306a36Sopenharmony_ci				jbd2_handle_buffer_credits(handle),
367062306a36Sopenharmony_ci				path);
367162306a36Sopenharmony_ci		if (ret) {
367262306a36Sopenharmony_ci			mlog_errno(ret);
367362306a36Sopenharmony_ci			goto out;
367462306a36Sopenharmony_ci		}
367562306a36Sopenharmony_ci		/*
367662306a36Sopenharmony_ci		 * The merge code will need to create an empty
367762306a36Sopenharmony_ci		 * extent to take the place of the newly
367862306a36Sopenharmony_ci		 * emptied slot. Remove any pre-existing empty
367962306a36Sopenharmony_ci		 * extents - having more than one in a leaf is
368062306a36Sopenharmony_ci		 * illegal.
368162306a36Sopenharmony_ci		 */
368262306a36Sopenharmony_ci		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
368362306a36Sopenharmony_ci		if (ret) {
368462306a36Sopenharmony_ci			mlog_errno(ret);
368562306a36Sopenharmony_ci			goto out;
368662306a36Sopenharmony_ci		}
368762306a36Sopenharmony_ci		split_index--;
368862306a36Sopenharmony_ci		rec = &el->l_recs[split_index];
368962306a36Sopenharmony_ci	}
369062306a36Sopenharmony_ci
369162306a36Sopenharmony_ci	if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) {
369262306a36Sopenharmony_ci		/*
369362306a36Sopenharmony_ci		 * Left-right contig implies this.
369462306a36Sopenharmony_ci		 */
369562306a36Sopenharmony_ci		BUG_ON(!ctxt->c_split_covers_rec);
369662306a36Sopenharmony_ci
369762306a36Sopenharmony_ci		/*
369862306a36Sopenharmony_ci		 * Since the leftright insert always covers the entire
369962306a36Sopenharmony_ci		 * extent, this call will delete the insert record
370062306a36Sopenharmony_ci		 * entirely, resulting in an empty extent record added to
370162306a36Sopenharmony_ci		 * the extent block.
370262306a36Sopenharmony_ci		 *
370362306a36Sopenharmony_ci		 * Since the adding of an empty extent shifts
370462306a36Sopenharmony_ci		 * everything back to the right, there's no need to
370562306a36Sopenharmony_ci		 * update split_index here.
370662306a36Sopenharmony_ci		 *
370762306a36Sopenharmony_ci		 * When the split_index is zero, we need to merge it to the
370862306a36Sopenharmony_ci		 * prevoius extent block. It is more efficient and easier
370962306a36Sopenharmony_ci		 * if we do merge_right first and merge_left later.
371062306a36Sopenharmony_ci		 */
371162306a36Sopenharmony_ci		ret = ocfs2_merge_rec_right(path, handle, et, split_rec,
371262306a36Sopenharmony_ci					    split_index);
371362306a36Sopenharmony_ci		if (ret) {
371462306a36Sopenharmony_ci			mlog_errno(ret);
371562306a36Sopenharmony_ci			goto out;
371662306a36Sopenharmony_ci		}
371762306a36Sopenharmony_ci
371862306a36Sopenharmony_ci		/*
371962306a36Sopenharmony_ci		 * We can only get this from logic error above.
372062306a36Sopenharmony_ci		 */
372162306a36Sopenharmony_ci		BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
372262306a36Sopenharmony_ci
372362306a36Sopenharmony_ci		/* extend credit for ocfs2_remove_rightmost_path */
372462306a36Sopenharmony_ci		ret = ocfs2_extend_rotate_transaction(handle, 0,
372562306a36Sopenharmony_ci					jbd2_handle_buffer_credits(handle),
372662306a36Sopenharmony_ci					path);
372762306a36Sopenharmony_ci		if (ret) {
372862306a36Sopenharmony_ci			mlog_errno(ret);
372962306a36Sopenharmony_ci			goto out;
373062306a36Sopenharmony_ci		}
373162306a36Sopenharmony_ci
373262306a36Sopenharmony_ci		/* The merge left us with an empty extent, remove it. */
373362306a36Sopenharmony_ci		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
373462306a36Sopenharmony_ci		if (ret) {
373562306a36Sopenharmony_ci			mlog_errno(ret);
373662306a36Sopenharmony_ci			goto out;
373762306a36Sopenharmony_ci		}
373862306a36Sopenharmony_ci
373962306a36Sopenharmony_ci		rec = &el->l_recs[split_index];
374062306a36Sopenharmony_ci
374162306a36Sopenharmony_ci		/*
374262306a36Sopenharmony_ci		 * Note that we don't pass split_rec here on purpose -
374362306a36Sopenharmony_ci		 * we've merged it into the rec already.
374462306a36Sopenharmony_ci		 */
374562306a36Sopenharmony_ci		ret = ocfs2_merge_rec_left(path, handle, et, rec,
374662306a36Sopenharmony_ci					   dealloc, split_index);
374762306a36Sopenharmony_ci
374862306a36Sopenharmony_ci		if (ret) {
374962306a36Sopenharmony_ci			mlog_errno(ret);
375062306a36Sopenharmony_ci			goto out;
375162306a36Sopenharmony_ci		}
375262306a36Sopenharmony_ci
375362306a36Sopenharmony_ci		/* extend credit for ocfs2_remove_rightmost_path */
375462306a36Sopenharmony_ci		ret = ocfs2_extend_rotate_transaction(handle, 0,
375562306a36Sopenharmony_ci				jbd2_handle_buffer_credits(handle),
375662306a36Sopenharmony_ci				path);
375762306a36Sopenharmony_ci		if (ret) {
375862306a36Sopenharmony_ci			mlog_errno(ret);
375962306a36Sopenharmony_ci			goto out;
376062306a36Sopenharmony_ci		}
376162306a36Sopenharmony_ci
376262306a36Sopenharmony_ci		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
376362306a36Sopenharmony_ci		/*
376462306a36Sopenharmony_ci		 * Error from this last rotate is not critical, so
376562306a36Sopenharmony_ci		 * print but don't bubble it up.
376662306a36Sopenharmony_ci		 */
376762306a36Sopenharmony_ci		if (ret)
376862306a36Sopenharmony_ci			mlog_errno(ret);
376962306a36Sopenharmony_ci		ret = 0;
377062306a36Sopenharmony_ci	} else {
377162306a36Sopenharmony_ci		/*
377262306a36Sopenharmony_ci		 * Merge a record to the left or right.
377362306a36Sopenharmony_ci		 *
377462306a36Sopenharmony_ci		 * 'contig_type' is relative to the existing record,
377562306a36Sopenharmony_ci		 * so for example, if we're "right contig", it's to
377662306a36Sopenharmony_ci		 * the record on the left (hence the left merge).
377762306a36Sopenharmony_ci		 */
377862306a36Sopenharmony_ci		if (ctxt->c_contig_type == CONTIG_RIGHT) {
377962306a36Sopenharmony_ci			ret = ocfs2_merge_rec_left(path, handle, et,
378062306a36Sopenharmony_ci						   split_rec, dealloc,
378162306a36Sopenharmony_ci						   split_index);
378262306a36Sopenharmony_ci			if (ret) {
378362306a36Sopenharmony_ci				mlog_errno(ret);
378462306a36Sopenharmony_ci				goto out;
378562306a36Sopenharmony_ci			}
378662306a36Sopenharmony_ci		} else {
378762306a36Sopenharmony_ci			ret = ocfs2_merge_rec_right(path, handle,
378862306a36Sopenharmony_ci						    et, split_rec,
378962306a36Sopenharmony_ci						    split_index);
379062306a36Sopenharmony_ci			if (ret) {
379162306a36Sopenharmony_ci				mlog_errno(ret);
379262306a36Sopenharmony_ci				goto out;
379362306a36Sopenharmony_ci			}
379462306a36Sopenharmony_ci		}
379562306a36Sopenharmony_ci
379662306a36Sopenharmony_ci		if (ctxt->c_split_covers_rec) {
379762306a36Sopenharmony_ci			/* extend credit for ocfs2_remove_rightmost_path */
379862306a36Sopenharmony_ci			ret = ocfs2_extend_rotate_transaction(handle, 0,
379962306a36Sopenharmony_ci					jbd2_handle_buffer_credits(handle),
380062306a36Sopenharmony_ci					path);
380162306a36Sopenharmony_ci			if (ret) {
380262306a36Sopenharmony_ci				mlog_errno(ret);
380362306a36Sopenharmony_ci				ret = 0;
380462306a36Sopenharmony_ci				goto out;
380562306a36Sopenharmony_ci			}
380662306a36Sopenharmony_ci
380762306a36Sopenharmony_ci			/*
380862306a36Sopenharmony_ci			 * The merge may have left an empty extent in
380962306a36Sopenharmony_ci			 * our leaf. Try to rotate it away.
381062306a36Sopenharmony_ci			 */
381162306a36Sopenharmony_ci			ret = ocfs2_rotate_tree_left(handle, et, path,
381262306a36Sopenharmony_ci						     dealloc);
381362306a36Sopenharmony_ci			if (ret)
381462306a36Sopenharmony_ci				mlog_errno(ret);
381562306a36Sopenharmony_ci			ret = 0;
381662306a36Sopenharmony_ci		}
381762306a36Sopenharmony_ci	}
381862306a36Sopenharmony_ci
381962306a36Sopenharmony_ciout:
382062306a36Sopenharmony_ci	return ret;
382162306a36Sopenharmony_ci}
382262306a36Sopenharmony_ci
382362306a36Sopenharmony_cistatic void ocfs2_subtract_from_rec(struct super_block *sb,
382462306a36Sopenharmony_ci				    enum ocfs2_split_type split,
382562306a36Sopenharmony_ci				    struct ocfs2_extent_rec *rec,
382662306a36Sopenharmony_ci				    struct ocfs2_extent_rec *split_rec)
382762306a36Sopenharmony_ci{
382862306a36Sopenharmony_ci	u64 len_blocks;
382962306a36Sopenharmony_ci
383062306a36Sopenharmony_ci	len_blocks = ocfs2_clusters_to_blocks(sb,
383162306a36Sopenharmony_ci				le16_to_cpu(split_rec->e_leaf_clusters));
383262306a36Sopenharmony_ci
383362306a36Sopenharmony_ci	if (split == SPLIT_LEFT) {
383462306a36Sopenharmony_ci		/*
383562306a36Sopenharmony_ci		 * Region is on the left edge of the existing
383662306a36Sopenharmony_ci		 * record.
383762306a36Sopenharmony_ci		 */
383862306a36Sopenharmony_ci		le32_add_cpu(&rec->e_cpos,
383962306a36Sopenharmony_ci			     le16_to_cpu(split_rec->e_leaf_clusters));
384062306a36Sopenharmony_ci		le64_add_cpu(&rec->e_blkno, len_blocks);
384162306a36Sopenharmony_ci		le16_add_cpu(&rec->e_leaf_clusters,
384262306a36Sopenharmony_ci			     -le16_to_cpu(split_rec->e_leaf_clusters));
384362306a36Sopenharmony_ci	} else {
384462306a36Sopenharmony_ci		/*
384562306a36Sopenharmony_ci		 * Region is on the right edge of the existing
384662306a36Sopenharmony_ci		 * record.
384762306a36Sopenharmony_ci		 */
384862306a36Sopenharmony_ci		le16_add_cpu(&rec->e_leaf_clusters,
384962306a36Sopenharmony_ci			     -le16_to_cpu(split_rec->e_leaf_clusters));
385062306a36Sopenharmony_ci	}
385162306a36Sopenharmony_ci}
385262306a36Sopenharmony_ci
385362306a36Sopenharmony_ci/*
385462306a36Sopenharmony_ci * Do the final bits of extent record insertion at the target leaf
385562306a36Sopenharmony_ci * list. If this leaf is part of an allocation tree, it is assumed
385662306a36Sopenharmony_ci * that the tree above has been prepared.
385762306a36Sopenharmony_ci */
385862306a36Sopenharmony_cistatic void ocfs2_insert_at_leaf(struct ocfs2_extent_tree *et,
385962306a36Sopenharmony_ci				 struct ocfs2_extent_rec *insert_rec,
386062306a36Sopenharmony_ci				 struct ocfs2_extent_list *el,
386162306a36Sopenharmony_ci				 struct ocfs2_insert_type *insert)
386262306a36Sopenharmony_ci{
386362306a36Sopenharmony_ci	int i = insert->ins_contig_index;
386462306a36Sopenharmony_ci	unsigned int range;
386562306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
386662306a36Sopenharmony_ci
386762306a36Sopenharmony_ci	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
386862306a36Sopenharmony_ci
386962306a36Sopenharmony_ci	if (insert->ins_split != SPLIT_NONE) {
387062306a36Sopenharmony_ci		i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos));
387162306a36Sopenharmony_ci		BUG_ON(i == -1);
387262306a36Sopenharmony_ci		rec = &el->l_recs[i];
387362306a36Sopenharmony_ci		ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
387462306a36Sopenharmony_ci					insert->ins_split, rec,
387562306a36Sopenharmony_ci					insert_rec);
387662306a36Sopenharmony_ci		goto rotate;
387762306a36Sopenharmony_ci	}
387862306a36Sopenharmony_ci
387962306a36Sopenharmony_ci	/*
388062306a36Sopenharmony_ci	 * Contiguous insert - either left or right.
388162306a36Sopenharmony_ci	 */
388262306a36Sopenharmony_ci	if (insert->ins_contig != CONTIG_NONE) {
388362306a36Sopenharmony_ci		rec = &el->l_recs[i];
388462306a36Sopenharmony_ci		if (insert->ins_contig == CONTIG_LEFT) {
388562306a36Sopenharmony_ci			rec->e_blkno = insert_rec->e_blkno;
388662306a36Sopenharmony_ci			rec->e_cpos = insert_rec->e_cpos;
388762306a36Sopenharmony_ci		}
388862306a36Sopenharmony_ci		le16_add_cpu(&rec->e_leaf_clusters,
388962306a36Sopenharmony_ci			     le16_to_cpu(insert_rec->e_leaf_clusters));
389062306a36Sopenharmony_ci		return;
389162306a36Sopenharmony_ci	}
389262306a36Sopenharmony_ci
389362306a36Sopenharmony_ci	/*
389462306a36Sopenharmony_ci	 * Handle insert into an empty leaf.
389562306a36Sopenharmony_ci	 */
389662306a36Sopenharmony_ci	if (le16_to_cpu(el->l_next_free_rec) == 0 ||
389762306a36Sopenharmony_ci	    ((le16_to_cpu(el->l_next_free_rec) == 1) &&
389862306a36Sopenharmony_ci	     ocfs2_is_empty_extent(&el->l_recs[0]))) {
389962306a36Sopenharmony_ci		el->l_recs[0] = *insert_rec;
390062306a36Sopenharmony_ci		el->l_next_free_rec = cpu_to_le16(1);
390162306a36Sopenharmony_ci		return;
390262306a36Sopenharmony_ci	}
390362306a36Sopenharmony_ci
390462306a36Sopenharmony_ci	/*
390562306a36Sopenharmony_ci	 * Appending insert.
390662306a36Sopenharmony_ci	 */
390762306a36Sopenharmony_ci	if (insert->ins_appending == APPEND_TAIL) {
390862306a36Sopenharmony_ci		i = le16_to_cpu(el->l_next_free_rec) - 1;
390962306a36Sopenharmony_ci		rec = &el->l_recs[i];
391062306a36Sopenharmony_ci		range = le32_to_cpu(rec->e_cpos)
391162306a36Sopenharmony_ci			+ le16_to_cpu(rec->e_leaf_clusters);
391262306a36Sopenharmony_ci		BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range);
391362306a36Sopenharmony_ci
391462306a36Sopenharmony_ci		mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
391562306a36Sopenharmony_ci				le16_to_cpu(el->l_count),
391662306a36Sopenharmony_ci				"owner %llu, depth %u, count %u, next free %u, "
391762306a36Sopenharmony_ci				"rec.cpos %u, rec.clusters %u, "
391862306a36Sopenharmony_ci				"insert.cpos %u, insert.clusters %u\n",
391962306a36Sopenharmony_ci				ocfs2_metadata_cache_owner(et->et_ci),
392062306a36Sopenharmony_ci				le16_to_cpu(el->l_tree_depth),
392162306a36Sopenharmony_ci				le16_to_cpu(el->l_count),
392262306a36Sopenharmony_ci				le16_to_cpu(el->l_next_free_rec),
392362306a36Sopenharmony_ci				le32_to_cpu(el->l_recs[i].e_cpos),
392462306a36Sopenharmony_ci				le16_to_cpu(el->l_recs[i].e_leaf_clusters),
392562306a36Sopenharmony_ci				le32_to_cpu(insert_rec->e_cpos),
392662306a36Sopenharmony_ci				le16_to_cpu(insert_rec->e_leaf_clusters));
392762306a36Sopenharmony_ci		i++;
392862306a36Sopenharmony_ci		el->l_recs[i] = *insert_rec;
392962306a36Sopenharmony_ci		le16_add_cpu(&el->l_next_free_rec, 1);
393062306a36Sopenharmony_ci		return;
393162306a36Sopenharmony_ci	}
393262306a36Sopenharmony_ci
393362306a36Sopenharmony_cirotate:
393462306a36Sopenharmony_ci	/*
393562306a36Sopenharmony_ci	 * Ok, we have to rotate.
393662306a36Sopenharmony_ci	 *
393762306a36Sopenharmony_ci	 * At this point, it is safe to assume that inserting into an
393862306a36Sopenharmony_ci	 * empty leaf and appending to a leaf have both been handled
393962306a36Sopenharmony_ci	 * above.
394062306a36Sopenharmony_ci	 *
394162306a36Sopenharmony_ci	 * This leaf needs to have space, either by the empty 1st
394262306a36Sopenharmony_ci	 * extent record, or by virtue of an l_next_free_rec < l_count.
394362306a36Sopenharmony_ci	 */
394462306a36Sopenharmony_ci	ocfs2_rotate_leaf(el, insert_rec);
394562306a36Sopenharmony_ci}
394662306a36Sopenharmony_ci
394762306a36Sopenharmony_cistatic void ocfs2_adjust_rightmost_records(handle_t *handle,
394862306a36Sopenharmony_ci					   struct ocfs2_extent_tree *et,
394962306a36Sopenharmony_ci					   struct ocfs2_path *path,
395062306a36Sopenharmony_ci					   struct ocfs2_extent_rec *insert_rec)
395162306a36Sopenharmony_ci{
395262306a36Sopenharmony_ci	int i, next_free;
395362306a36Sopenharmony_ci	struct buffer_head *bh;
395462306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
395562306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
395662306a36Sopenharmony_ci
395762306a36Sopenharmony_ci	/*
395862306a36Sopenharmony_ci	 * Update everything except the leaf block.
395962306a36Sopenharmony_ci	 */
396062306a36Sopenharmony_ci	for (i = 0; i < path->p_tree_depth; i++) {
396162306a36Sopenharmony_ci		bh = path->p_node[i].bh;
396262306a36Sopenharmony_ci		el = path->p_node[i].el;
396362306a36Sopenharmony_ci
396462306a36Sopenharmony_ci		next_free = le16_to_cpu(el->l_next_free_rec);
396562306a36Sopenharmony_ci		if (next_free == 0) {
396662306a36Sopenharmony_ci			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
396762306a36Sopenharmony_ci				    "Owner %llu has a bad extent list\n",
396862306a36Sopenharmony_ci				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
396962306a36Sopenharmony_ci			return;
397062306a36Sopenharmony_ci		}
397162306a36Sopenharmony_ci
397262306a36Sopenharmony_ci		rec = &el->l_recs[next_free - 1];
397362306a36Sopenharmony_ci
397462306a36Sopenharmony_ci		rec->e_int_clusters = insert_rec->e_cpos;
397562306a36Sopenharmony_ci		le32_add_cpu(&rec->e_int_clusters,
397662306a36Sopenharmony_ci			     le16_to_cpu(insert_rec->e_leaf_clusters));
397762306a36Sopenharmony_ci		le32_add_cpu(&rec->e_int_clusters,
397862306a36Sopenharmony_ci			     -le32_to_cpu(rec->e_cpos));
397962306a36Sopenharmony_ci
398062306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, bh);
398162306a36Sopenharmony_ci	}
398262306a36Sopenharmony_ci}
398362306a36Sopenharmony_ci
398462306a36Sopenharmony_cistatic int ocfs2_append_rec_to_path(handle_t *handle,
398562306a36Sopenharmony_ci				    struct ocfs2_extent_tree *et,
398662306a36Sopenharmony_ci				    struct ocfs2_extent_rec *insert_rec,
398762306a36Sopenharmony_ci				    struct ocfs2_path *right_path,
398862306a36Sopenharmony_ci				    struct ocfs2_path **ret_left_path)
398962306a36Sopenharmony_ci{
399062306a36Sopenharmony_ci	int ret, next_free;
399162306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
399262306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL;
399362306a36Sopenharmony_ci
399462306a36Sopenharmony_ci	*ret_left_path = NULL;
399562306a36Sopenharmony_ci
399662306a36Sopenharmony_ci	/*
399762306a36Sopenharmony_ci	 * This shouldn't happen for non-trees. The extent rec cluster
399862306a36Sopenharmony_ci	 * count manipulation below only works for interior nodes.
399962306a36Sopenharmony_ci	 */
400062306a36Sopenharmony_ci	BUG_ON(right_path->p_tree_depth == 0);
400162306a36Sopenharmony_ci
400262306a36Sopenharmony_ci	/*
400362306a36Sopenharmony_ci	 * If our appending insert is at the leftmost edge of a leaf,
400462306a36Sopenharmony_ci	 * then we might need to update the rightmost records of the
400562306a36Sopenharmony_ci	 * neighboring path.
400662306a36Sopenharmony_ci	 */
400762306a36Sopenharmony_ci	el = path_leaf_el(right_path);
400862306a36Sopenharmony_ci	next_free = le16_to_cpu(el->l_next_free_rec);
400962306a36Sopenharmony_ci	if (next_free == 0 ||
401062306a36Sopenharmony_ci	    (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) {
401162306a36Sopenharmony_ci		u32 left_cpos;
401262306a36Sopenharmony_ci
401362306a36Sopenharmony_ci		ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
401462306a36Sopenharmony_ci						    right_path, &left_cpos);
401562306a36Sopenharmony_ci		if (ret) {
401662306a36Sopenharmony_ci			mlog_errno(ret);
401762306a36Sopenharmony_ci			goto out;
401862306a36Sopenharmony_ci		}
401962306a36Sopenharmony_ci
402062306a36Sopenharmony_ci		trace_ocfs2_append_rec_to_path(
402162306a36Sopenharmony_ci			(unsigned long long)
402262306a36Sopenharmony_ci			ocfs2_metadata_cache_owner(et->et_ci),
402362306a36Sopenharmony_ci			le32_to_cpu(insert_rec->e_cpos),
402462306a36Sopenharmony_ci			left_cpos);
402562306a36Sopenharmony_ci
402662306a36Sopenharmony_ci		/*
402762306a36Sopenharmony_ci		 * No need to worry if the append is already in the
402862306a36Sopenharmony_ci		 * leftmost leaf.
402962306a36Sopenharmony_ci		 */
403062306a36Sopenharmony_ci		if (left_cpos) {
403162306a36Sopenharmony_ci			left_path = ocfs2_new_path_from_path(right_path);
403262306a36Sopenharmony_ci			if (!left_path) {
403362306a36Sopenharmony_ci				ret = -ENOMEM;
403462306a36Sopenharmony_ci				mlog_errno(ret);
403562306a36Sopenharmony_ci				goto out;
403662306a36Sopenharmony_ci			}
403762306a36Sopenharmony_ci
403862306a36Sopenharmony_ci			ret = ocfs2_find_path(et->et_ci, left_path,
403962306a36Sopenharmony_ci					      left_cpos);
404062306a36Sopenharmony_ci			if (ret) {
404162306a36Sopenharmony_ci				mlog_errno(ret);
404262306a36Sopenharmony_ci				goto out;
404362306a36Sopenharmony_ci			}
404462306a36Sopenharmony_ci
404562306a36Sopenharmony_ci			/*
404662306a36Sopenharmony_ci			 * ocfs2_insert_path() will pass the left_path to the
404762306a36Sopenharmony_ci			 * journal for us.
404862306a36Sopenharmony_ci			 */
404962306a36Sopenharmony_ci		}
405062306a36Sopenharmony_ci	}
405162306a36Sopenharmony_ci
405262306a36Sopenharmony_ci	ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
405362306a36Sopenharmony_ci	if (ret) {
405462306a36Sopenharmony_ci		mlog_errno(ret);
405562306a36Sopenharmony_ci		goto out;
405662306a36Sopenharmony_ci	}
405762306a36Sopenharmony_ci
405862306a36Sopenharmony_ci	ocfs2_adjust_rightmost_records(handle, et, right_path, insert_rec);
405962306a36Sopenharmony_ci
406062306a36Sopenharmony_ci	*ret_left_path = left_path;
406162306a36Sopenharmony_ci	ret = 0;
406262306a36Sopenharmony_ciout:
406362306a36Sopenharmony_ci	if (ret != 0)
406462306a36Sopenharmony_ci		ocfs2_free_path(left_path);
406562306a36Sopenharmony_ci
406662306a36Sopenharmony_ci	return ret;
406762306a36Sopenharmony_ci}
406862306a36Sopenharmony_ci
406962306a36Sopenharmony_cistatic void ocfs2_split_record(struct ocfs2_extent_tree *et,
407062306a36Sopenharmony_ci			       struct ocfs2_path *left_path,
407162306a36Sopenharmony_ci			       struct ocfs2_path *right_path,
407262306a36Sopenharmony_ci			       struct ocfs2_extent_rec *split_rec,
407362306a36Sopenharmony_ci			       enum ocfs2_split_type split)
407462306a36Sopenharmony_ci{
407562306a36Sopenharmony_ci	int index;
407662306a36Sopenharmony_ci	u32 cpos = le32_to_cpu(split_rec->e_cpos);
407762306a36Sopenharmony_ci	struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el;
407862306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec, *tmprec;
407962306a36Sopenharmony_ci
408062306a36Sopenharmony_ci	right_el = path_leaf_el(right_path);
408162306a36Sopenharmony_ci	if (left_path)
408262306a36Sopenharmony_ci		left_el = path_leaf_el(left_path);
408362306a36Sopenharmony_ci
408462306a36Sopenharmony_ci	el = right_el;
408562306a36Sopenharmony_ci	insert_el = right_el;
408662306a36Sopenharmony_ci	index = ocfs2_search_extent_list(el, cpos);
408762306a36Sopenharmony_ci	if (index != -1) {
408862306a36Sopenharmony_ci		if (index == 0 && left_path) {
408962306a36Sopenharmony_ci			BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
409062306a36Sopenharmony_ci
409162306a36Sopenharmony_ci			/*
409262306a36Sopenharmony_ci			 * This typically means that the record
409362306a36Sopenharmony_ci			 * started in the left path but moved to the
409462306a36Sopenharmony_ci			 * right as a result of rotation. We either
409562306a36Sopenharmony_ci			 * move the existing record to the left, or we
409662306a36Sopenharmony_ci			 * do the later insert there.
409762306a36Sopenharmony_ci			 *
409862306a36Sopenharmony_ci			 * In this case, the left path should always
409962306a36Sopenharmony_ci			 * exist as the rotate code will have passed
410062306a36Sopenharmony_ci			 * it back for a post-insert update.
410162306a36Sopenharmony_ci			 */
410262306a36Sopenharmony_ci
410362306a36Sopenharmony_ci			if (split == SPLIT_LEFT) {
410462306a36Sopenharmony_ci				/*
410562306a36Sopenharmony_ci				 * It's a left split. Since we know
410662306a36Sopenharmony_ci				 * that the rotate code gave us an
410762306a36Sopenharmony_ci				 * empty extent in the left path, we
410862306a36Sopenharmony_ci				 * can just do the insert there.
410962306a36Sopenharmony_ci				 */
411062306a36Sopenharmony_ci				insert_el = left_el;
411162306a36Sopenharmony_ci			} else {
411262306a36Sopenharmony_ci				/*
411362306a36Sopenharmony_ci				 * Right split - we have to move the
411462306a36Sopenharmony_ci				 * existing record over to the left
411562306a36Sopenharmony_ci				 * leaf. The insert will be into the
411662306a36Sopenharmony_ci				 * newly created empty extent in the
411762306a36Sopenharmony_ci				 * right leaf.
411862306a36Sopenharmony_ci				 */
411962306a36Sopenharmony_ci				tmprec = &right_el->l_recs[index];
412062306a36Sopenharmony_ci				ocfs2_rotate_leaf(left_el, tmprec);
412162306a36Sopenharmony_ci				el = left_el;
412262306a36Sopenharmony_ci
412362306a36Sopenharmony_ci				memset(tmprec, 0, sizeof(*tmprec));
412462306a36Sopenharmony_ci				index = ocfs2_search_extent_list(left_el, cpos);
412562306a36Sopenharmony_ci				BUG_ON(index == -1);
412662306a36Sopenharmony_ci			}
412762306a36Sopenharmony_ci		}
412862306a36Sopenharmony_ci	} else {
412962306a36Sopenharmony_ci		BUG_ON(!left_path);
413062306a36Sopenharmony_ci		BUG_ON(!ocfs2_is_empty_extent(&left_el->l_recs[0]));
413162306a36Sopenharmony_ci		/*
413262306a36Sopenharmony_ci		 * Left path is easy - we can just allow the insert to
413362306a36Sopenharmony_ci		 * happen.
413462306a36Sopenharmony_ci		 */
413562306a36Sopenharmony_ci		el = left_el;
413662306a36Sopenharmony_ci		insert_el = left_el;
413762306a36Sopenharmony_ci		index = ocfs2_search_extent_list(el, cpos);
413862306a36Sopenharmony_ci		BUG_ON(index == -1);
413962306a36Sopenharmony_ci	}
414062306a36Sopenharmony_ci
414162306a36Sopenharmony_ci	rec = &el->l_recs[index];
414262306a36Sopenharmony_ci	ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
414362306a36Sopenharmony_ci				split, rec, split_rec);
414462306a36Sopenharmony_ci	ocfs2_rotate_leaf(insert_el, split_rec);
414562306a36Sopenharmony_ci}
414662306a36Sopenharmony_ci
414762306a36Sopenharmony_ci/*
414862306a36Sopenharmony_ci * This function only does inserts on an allocation b-tree. For tree
414962306a36Sopenharmony_ci * depth = 0, ocfs2_insert_at_leaf() is called directly.
415062306a36Sopenharmony_ci *
415162306a36Sopenharmony_ci * right_path is the path we want to do the actual insert
415262306a36Sopenharmony_ci * in. left_path should only be passed in if we need to update that
415362306a36Sopenharmony_ci * portion of the tree after an edge insert.
415462306a36Sopenharmony_ci */
415562306a36Sopenharmony_cistatic int ocfs2_insert_path(handle_t *handle,
415662306a36Sopenharmony_ci			     struct ocfs2_extent_tree *et,
415762306a36Sopenharmony_ci			     struct ocfs2_path *left_path,
415862306a36Sopenharmony_ci			     struct ocfs2_path *right_path,
415962306a36Sopenharmony_ci			     struct ocfs2_extent_rec *insert_rec,
416062306a36Sopenharmony_ci			     struct ocfs2_insert_type *insert)
416162306a36Sopenharmony_ci{
416262306a36Sopenharmony_ci	int ret, subtree_index;
416362306a36Sopenharmony_ci	struct buffer_head *leaf_bh = path_leaf_bh(right_path);
416462306a36Sopenharmony_ci
416562306a36Sopenharmony_ci	if (left_path) {
416662306a36Sopenharmony_ci		/*
416762306a36Sopenharmony_ci		 * There's a chance that left_path got passed back to
416862306a36Sopenharmony_ci		 * us without being accounted for in the
416962306a36Sopenharmony_ci		 * journal. Extend our transaction here to be sure we
417062306a36Sopenharmony_ci		 * can change those blocks.
417162306a36Sopenharmony_ci		 */
417262306a36Sopenharmony_ci		ret = ocfs2_extend_trans(handle, left_path->p_tree_depth);
417362306a36Sopenharmony_ci		if (ret < 0) {
417462306a36Sopenharmony_ci			mlog_errno(ret);
417562306a36Sopenharmony_ci			goto out;
417662306a36Sopenharmony_ci		}
417762306a36Sopenharmony_ci
417862306a36Sopenharmony_ci		ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
417962306a36Sopenharmony_ci		if (ret < 0) {
418062306a36Sopenharmony_ci			mlog_errno(ret);
418162306a36Sopenharmony_ci			goto out;
418262306a36Sopenharmony_ci		}
418362306a36Sopenharmony_ci	}
418462306a36Sopenharmony_ci
418562306a36Sopenharmony_ci	/*
418662306a36Sopenharmony_ci	 * Pass both paths to the journal. The majority of inserts
418762306a36Sopenharmony_ci	 * will be touching all components anyway.
418862306a36Sopenharmony_ci	 */
418962306a36Sopenharmony_ci	ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
419062306a36Sopenharmony_ci	if (ret < 0) {
419162306a36Sopenharmony_ci		mlog_errno(ret);
419262306a36Sopenharmony_ci		goto out;
419362306a36Sopenharmony_ci	}
419462306a36Sopenharmony_ci
419562306a36Sopenharmony_ci	if (insert->ins_split != SPLIT_NONE) {
419662306a36Sopenharmony_ci		/*
419762306a36Sopenharmony_ci		 * We could call ocfs2_insert_at_leaf() for some types
419862306a36Sopenharmony_ci		 * of splits, but it's easier to just let one separate
419962306a36Sopenharmony_ci		 * function sort it all out.
420062306a36Sopenharmony_ci		 */
420162306a36Sopenharmony_ci		ocfs2_split_record(et, left_path, right_path,
420262306a36Sopenharmony_ci				   insert_rec, insert->ins_split);
420362306a36Sopenharmony_ci
420462306a36Sopenharmony_ci		/*
420562306a36Sopenharmony_ci		 * Split might have modified either leaf and we don't
420662306a36Sopenharmony_ci		 * have a guarantee that the later edge insert will
420762306a36Sopenharmony_ci		 * dirty this for us.
420862306a36Sopenharmony_ci		 */
420962306a36Sopenharmony_ci		if (left_path)
421062306a36Sopenharmony_ci			ocfs2_journal_dirty(handle,
421162306a36Sopenharmony_ci					    path_leaf_bh(left_path));
421262306a36Sopenharmony_ci	} else
421362306a36Sopenharmony_ci		ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path),
421462306a36Sopenharmony_ci				     insert);
421562306a36Sopenharmony_ci
421662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, leaf_bh);
421762306a36Sopenharmony_ci
421862306a36Sopenharmony_ci	if (left_path) {
421962306a36Sopenharmony_ci		/*
422062306a36Sopenharmony_ci		 * The rotate code has indicated that we need to fix
422162306a36Sopenharmony_ci		 * up portions of the tree after the insert.
422262306a36Sopenharmony_ci		 *
422362306a36Sopenharmony_ci		 * XXX: Should we extend the transaction here?
422462306a36Sopenharmony_ci		 */
422562306a36Sopenharmony_ci		subtree_index = ocfs2_find_subtree_root(et, left_path,
422662306a36Sopenharmony_ci							right_path);
422762306a36Sopenharmony_ci		ocfs2_complete_edge_insert(handle, left_path, right_path,
422862306a36Sopenharmony_ci					   subtree_index);
422962306a36Sopenharmony_ci	}
423062306a36Sopenharmony_ci
423162306a36Sopenharmony_ci	ret = 0;
423262306a36Sopenharmony_ciout:
423362306a36Sopenharmony_ci	return ret;
423462306a36Sopenharmony_ci}
423562306a36Sopenharmony_ci
423662306a36Sopenharmony_cistatic int ocfs2_do_insert_extent(handle_t *handle,
423762306a36Sopenharmony_ci				  struct ocfs2_extent_tree *et,
423862306a36Sopenharmony_ci				  struct ocfs2_extent_rec *insert_rec,
423962306a36Sopenharmony_ci				  struct ocfs2_insert_type *type)
424062306a36Sopenharmony_ci{
424162306a36Sopenharmony_ci	int ret, rotate = 0;
424262306a36Sopenharmony_ci	u32 cpos;
424362306a36Sopenharmony_ci	struct ocfs2_path *right_path = NULL;
424462306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL;
424562306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
424662306a36Sopenharmony_ci
424762306a36Sopenharmony_ci	el = et->et_root_el;
424862306a36Sopenharmony_ci
424962306a36Sopenharmony_ci	ret = ocfs2_et_root_journal_access(handle, et,
425062306a36Sopenharmony_ci					   OCFS2_JOURNAL_ACCESS_WRITE);
425162306a36Sopenharmony_ci	if (ret) {
425262306a36Sopenharmony_ci		mlog_errno(ret);
425362306a36Sopenharmony_ci		goto out;
425462306a36Sopenharmony_ci	}
425562306a36Sopenharmony_ci
425662306a36Sopenharmony_ci	if (le16_to_cpu(el->l_tree_depth) == 0) {
425762306a36Sopenharmony_ci		ocfs2_insert_at_leaf(et, insert_rec, el, type);
425862306a36Sopenharmony_ci		goto out_update_clusters;
425962306a36Sopenharmony_ci	}
426062306a36Sopenharmony_ci
426162306a36Sopenharmony_ci	right_path = ocfs2_new_path_from_et(et);
426262306a36Sopenharmony_ci	if (!right_path) {
426362306a36Sopenharmony_ci		ret = -ENOMEM;
426462306a36Sopenharmony_ci		mlog_errno(ret);
426562306a36Sopenharmony_ci		goto out;
426662306a36Sopenharmony_ci	}
426762306a36Sopenharmony_ci
426862306a36Sopenharmony_ci	/*
426962306a36Sopenharmony_ci	 * Determine the path to start with. Rotations need the
427062306a36Sopenharmony_ci	 * rightmost path, everything else can go directly to the
427162306a36Sopenharmony_ci	 * target leaf.
427262306a36Sopenharmony_ci	 */
427362306a36Sopenharmony_ci	cpos = le32_to_cpu(insert_rec->e_cpos);
427462306a36Sopenharmony_ci	if (type->ins_appending == APPEND_NONE &&
427562306a36Sopenharmony_ci	    type->ins_contig == CONTIG_NONE) {
427662306a36Sopenharmony_ci		rotate = 1;
427762306a36Sopenharmony_ci		cpos = UINT_MAX;
427862306a36Sopenharmony_ci	}
427962306a36Sopenharmony_ci
428062306a36Sopenharmony_ci	ret = ocfs2_find_path(et->et_ci, right_path, cpos);
428162306a36Sopenharmony_ci	if (ret) {
428262306a36Sopenharmony_ci		mlog_errno(ret);
428362306a36Sopenharmony_ci		goto out;
428462306a36Sopenharmony_ci	}
428562306a36Sopenharmony_ci
428662306a36Sopenharmony_ci	/*
428762306a36Sopenharmony_ci	 * Rotations and appends need special treatment - they modify
428862306a36Sopenharmony_ci	 * parts of the tree's above them.
428962306a36Sopenharmony_ci	 *
429062306a36Sopenharmony_ci	 * Both might pass back a path immediate to the left of the
429162306a36Sopenharmony_ci	 * one being inserted to. This will be cause
429262306a36Sopenharmony_ci	 * ocfs2_insert_path() to modify the rightmost records of
429362306a36Sopenharmony_ci	 * left_path to account for an edge insert.
429462306a36Sopenharmony_ci	 *
429562306a36Sopenharmony_ci	 * XXX: When modifying this code, keep in mind that an insert
429662306a36Sopenharmony_ci	 * can wind up skipping both of these two special cases...
429762306a36Sopenharmony_ci	 */
429862306a36Sopenharmony_ci	if (rotate) {
429962306a36Sopenharmony_ci		ret = ocfs2_rotate_tree_right(handle, et, type->ins_split,
430062306a36Sopenharmony_ci					      le32_to_cpu(insert_rec->e_cpos),
430162306a36Sopenharmony_ci					      right_path, &left_path);
430262306a36Sopenharmony_ci		if (ret) {
430362306a36Sopenharmony_ci			mlog_errno(ret);
430462306a36Sopenharmony_ci			goto out;
430562306a36Sopenharmony_ci		}
430662306a36Sopenharmony_ci
430762306a36Sopenharmony_ci		/*
430862306a36Sopenharmony_ci		 * ocfs2_rotate_tree_right() might have extended the
430962306a36Sopenharmony_ci		 * transaction without re-journaling our tree root.
431062306a36Sopenharmony_ci		 */
431162306a36Sopenharmony_ci		ret = ocfs2_et_root_journal_access(handle, et,
431262306a36Sopenharmony_ci						   OCFS2_JOURNAL_ACCESS_WRITE);
431362306a36Sopenharmony_ci		if (ret) {
431462306a36Sopenharmony_ci			mlog_errno(ret);
431562306a36Sopenharmony_ci			goto out;
431662306a36Sopenharmony_ci		}
431762306a36Sopenharmony_ci	} else if (type->ins_appending == APPEND_TAIL
431862306a36Sopenharmony_ci		   && type->ins_contig != CONTIG_LEFT) {
431962306a36Sopenharmony_ci		ret = ocfs2_append_rec_to_path(handle, et, insert_rec,
432062306a36Sopenharmony_ci					       right_path, &left_path);
432162306a36Sopenharmony_ci		if (ret) {
432262306a36Sopenharmony_ci			mlog_errno(ret);
432362306a36Sopenharmony_ci			goto out;
432462306a36Sopenharmony_ci		}
432562306a36Sopenharmony_ci	}
432662306a36Sopenharmony_ci
432762306a36Sopenharmony_ci	ret = ocfs2_insert_path(handle, et, left_path, right_path,
432862306a36Sopenharmony_ci				insert_rec, type);
432962306a36Sopenharmony_ci	if (ret) {
433062306a36Sopenharmony_ci		mlog_errno(ret);
433162306a36Sopenharmony_ci		goto out;
433262306a36Sopenharmony_ci	}
433362306a36Sopenharmony_ci
433462306a36Sopenharmony_ciout_update_clusters:
433562306a36Sopenharmony_ci	if (type->ins_split == SPLIT_NONE)
433662306a36Sopenharmony_ci		ocfs2_et_update_clusters(et,
433762306a36Sopenharmony_ci					 le16_to_cpu(insert_rec->e_leaf_clusters));
433862306a36Sopenharmony_ci
433962306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, et->et_root_bh);
434062306a36Sopenharmony_ci
434162306a36Sopenharmony_ciout:
434262306a36Sopenharmony_ci	ocfs2_free_path(left_path);
434362306a36Sopenharmony_ci	ocfs2_free_path(right_path);
434462306a36Sopenharmony_ci
434562306a36Sopenharmony_ci	return ret;
434662306a36Sopenharmony_ci}
434762306a36Sopenharmony_ci
434862306a36Sopenharmony_cistatic int ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
434962306a36Sopenharmony_ci			       struct ocfs2_path *path,
435062306a36Sopenharmony_ci			       struct ocfs2_extent_list *el, int index,
435162306a36Sopenharmony_ci			       struct ocfs2_extent_rec *split_rec,
435262306a36Sopenharmony_ci			       struct ocfs2_merge_ctxt *ctxt)
435362306a36Sopenharmony_ci{
435462306a36Sopenharmony_ci	int status = 0;
435562306a36Sopenharmony_ci	enum ocfs2_contig_type ret = CONTIG_NONE;
435662306a36Sopenharmony_ci	u32 left_cpos, right_cpos;
435762306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec = NULL;
435862306a36Sopenharmony_ci	struct ocfs2_extent_list *new_el;
435962306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL, *right_path = NULL;
436062306a36Sopenharmony_ci	struct buffer_head *bh;
436162306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
436262306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
436362306a36Sopenharmony_ci
436462306a36Sopenharmony_ci	if (index > 0) {
436562306a36Sopenharmony_ci		rec = &el->l_recs[index - 1];
436662306a36Sopenharmony_ci	} else if (path->p_tree_depth > 0) {
436762306a36Sopenharmony_ci		status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
436862306a36Sopenharmony_ci		if (status)
436962306a36Sopenharmony_ci			goto exit;
437062306a36Sopenharmony_ci
437162306a36Sopenharmony_ci		if (left_cpos != 0) {
437262306a36Sopenharmony_ci			left_path = ocfs2_new_path_from_path(path);
437362306a36Sopenharmony_ci			if (!left_path) {
437462306a36Sopenharmony_ci				status = -ENOMEM;
437562306a36Sopenharmony_ci				mlog_errno(status);
437662306a36Sopenharmony_ci				goto exit;
437762306a36Sopenharmony_ci			}
437862306a36Sopenharmony_ci
437962306a36Sopenharmony_ci			status = ocfs2_find_path(et->et_ci, left_path,
438062306a36Sopenharmony_ci						 left_cpos);
438162306a36Sopenharmony_ci			if (status)
438262306a36Sopenharmony_ci				goto free_left_path;
438362306a36Sopenharmony_ci
438462306a36Sopenharmony_ci			new_el = path_leaf_el(left_path);
438562306a36Sopenharmony_ci
438662306a36Sopenharmony_ci			if (le16_to_cpu(new_el->l_next_free_rec) !=
438762306a36Sopenharmony_ci			    le16_to_cpu(new_el->l_count)) {
438862306a36Sopenharmony_ci				bh = path_leaf_bh(left_path);
438962306a36Sopenharmony_ci				eb = (struct ocfs2_extent_block *)bh->b_data;
439062306a36Sopenharmony_ci				status = ocfs2_error(sb,
439162306a36Sopenharmony_ci						"Extent block #%llu has an invalid l_next_free_rec of %d.  It should have matched the l_count of %d\n",
439262306a36Sopenharmony_ci						(unsigned long long)le64_to_cpu(eb->h_blkno),
439362306a36Sopenharmony_ci						le16_to_cpu(new_el->l_next_free_rec),
439462306a36Sopenharmony_ci						le16_to_cpu(new_el->l_count));
439562306a36Sopenharmony_ci				goto free_left_path;
439662306a36Sopenharmony_ci			}
439762306a36Sopenharmony_ci			rec = &new_el->l_recs[
439862306a36Sopenharmony_ci				le16_to_cpu(new_el->l_next_free_rec) - 1];
439962306a36Sopenharmony_ci		}
440062306a36Sopenharmony_ci	}
440162306a36Sopenharmony_ci
440262306a36Sopenharmony_ci	/*
440362306a36Sopenharmony_ci	 * We're careful to check for an empty extent record here -
440462306a36Sopenharmony_ci	 * the merge code will know what to do if it sees one.
440562306a36Sopenharmony_ci	 */
440662306a36Sopenharmony_ci	if (rec) {
440762306a36Sopenharmony_ci		if (index == 1 && ocfs2_is_empty_extent(rec)) {
440862306a36Sopenharmony_ci			if (split_rec->e_cpos == el->l_recs[index].e_cpos)
440962306a36Sopenharmony_ci				ret = CONTIG_RIGHT;
441062306a36Sopenharmony_ci		} else {
441162306a36Sopenharmony_ci			ret = ocfs2_et_extent_contig(et, rec, split_rec);
441262306a36Sopenharmony_ci		}
441362306a36Sopenharmony_ci	}
441462306a36Sopenharmony_ci
441562306a36Sopenharmony_ci	rec = NULL;
441662306a36Sopenharmony_ci	if (index < (le16_to_cpu(el->l_next_free_rec) - 1))
441762306a36Sopenharmony_ci		rec = &el->l_recs[index + 1];
441862306a36Sopenharmony_ci	else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
441962306a36Sopenharmony_ci		 path->p_tree_depth > 0) {
442062306a36Sopenharmony_ci		status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
442162306a36Sopenharmony_ci		if (status)
442262306a36Sopenharmony_ci			goto free_left_path;
442362306a36Sopenharmony_ci
442462306a36Sopenharmony_ci		if (right_cpos == 0)
442562306a36Sopenharmony_ci			goto free_left_path;
442662306a36Sopenharmony_ci
442762306a36Sopenharmony_ci		right_path = ocfs2_new_path_from_path(path);
442862306a36Sopenharmony_ci		if (!right_path) {
442962306a36Sopenharmony_ci			status = -ENOMEM;
443062306a36Sopenharmony_ci			mlog_errno(status);
443162306a36Sopenharmony_ci			goto free_left_path;
443262306a36Sopenharmony_ci		}
443362306a36Sopenharmony_ci
443462306a36Sopenharmony_ci		status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
443562306a36Sopenharmony_ci		if (status)
443662306a36Sopenharmony_ci			goto free_right_path;
443762306a36Sopenharmony_ci
443862306a36Sopenharmony_ci		new_el = path_leaf_el(right_path);
443962306a36Sopenharmony_ci		rec = &new_el->l_recs[0];
444062306a36Sopenharmony_ci		if (ocfs2_is_empty_extent(rec)) {
444162306a36Sopenharmony_ci			if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
444262306a36Sopenharmony_ci				bh = path_leaf_bh(right_path);
444362306a36Sopenharmony_ci				eb = (struct ocfs2_extent_block *)bh->b_data;
444462306a36Sopenharmony_ci				status = ocfs2_error(sb,
444562306a36Sopenharmony_ci						"Extent block #%llu has an invalid l_next_free_rec of %d\n",
444662306a36Sopenharmony_ci						(unsigned long long)le64_to_cpu(eb->h_blkno),
444762306a36Sopenharmony_ci						le16_to_cpu(new_el->l_next_free_rec));
444862306a36Sopenharmony_ci				goto free_right_path;
444962306a36Sopenharmony_ci			}
445062306a36Sopenharmony_ci			rec = &new_el->l_recs[1];
445162306a36Sopenharmony_ci		}
445262306a36Sopenharmony_ci	}
445362306a36Sopenharmony_ci
445462306a36Sopenharmony_ci	if (rec) {
445562306a36Sopenharmony_ci		enum ocfs2_contig_type contig_type;
445662306a36Sopenharmony_ci
445762306a36Sopenharmony_ci		contig_type = ocfs2_et_extent_contig(et, rec, split_rec);
445862306a36Sopenharmony_ci
445962306a36Sopenharmony_ci		if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
446062306a36Sopenharmony_ci			ret = CONTIG_LEFTRIGHT;
446162306a36Sopenharmony_ci		else if (ret == CONTIG_NONE)
446262306a36Sopenharmony_ci			ret = contig_type;
446362306a36Sopenharmony_ci	}
446462306a36Sopenharmony_ci
446562306a36Sopenharmony_cifree_right_path:
446662306a36Sopenharmony_ci	ocfs2_free_path(right_path);
446762306a36Sopenharmony_cifree_left_path:
446862306a36Sopenharmony_ci	ocfs2_free_path(left_path);
446962306a36Sopenharmony_ciexit:
447062306a36Sopenharmony_ci	if (status == 0)
447162306a36Sopenharmony_ci		ctxt->c_contig_type = ret;
447262306a36Sopenharmony_ci
447362306a36Sopenharmony_ci	return status;
447462306a36Sopenharmony_ci}
447562306a36Sopenharmony_ci
447662306a36Sopenharmony_cistatic void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
447762306a36Sopenharmony_ci				     struct ocfs2_insert_type *insert,
447862306a36Sopenharmony_ci				     struct ocfs2_extent_list *el,
447962306a36Sopenharmony_ci				     struct ocfs2_extent_rec *insert_rec)
448062306a36Sopenharmony_ci{
448162306a36Sopenharmony_ci	int i;
448262306a36Sopenharmony_ci	enum ocfs2_contig_type contig_type = CONTIG_NONE;
448362306a36Sopenharmony_ci
448462306a36Sopenharmony_ci	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
448562306a36Sopenharmony_ci
448662306a36Sopenharmony_ci	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
448762306a36Sopenharmony_ci		contig_type = ocfs2_et_extent_contig(et, &el->l_recs[i],
448862306a36Sopenharmony_ci						     insert_rec);
448962306a36Sopenharmony_ci		if (contig_type != CONTIG_NONE) {
449062306a36Sopenharmony_ci			insert->ins_contig_index = i;
449162306a36Sopenharmony_ci			break;
449262306a36Sopenharmony_ci		}
449362306a36Sopenharmony_ci	}
449462306a36Sopenharmony_ci	insert->ins_contig = contig_type;
449562306a36Sopenharmony_ci
449662306a36Sopenharmony_ci	if (insert->ins_contig != CONTIG_NONE) {
449762306a36Sopenharmony_ci		struct ocfs2_extent_rec *rec =
449862306a36Sopenharmony_ci				&el->l_recs[insert->ins_contig_index];
449962306a36Sopenharmony_ci		unsigned int len = le16_to_cpu(rec->e_leaf_clusters) +
450062306a36Sopenharmony_ci				   le16_to_cpu(insert_rec->e_leaf_clusters);
450162306a36Sopenharmony_ci
450262306a36Sopenharmony_ci		/*
450362306a36Sopenharmony_ci		 * Caller might want us to limit the size of extents, don't
450462306a36Sopenharmony_ci		 * calculate contiguousness if we might exceed that limit.
450562306a36Sopenharmony_ci		 */
450662306a36Sopenharmony_ci		if (et->et_max_leaf_clusters &&
450762306a36Sopenharmony_ci		    (len > et->et_max_leaf_clusters))
450862306a36Sopenharmony_ci			insert->ins_contig = CONTIG_NONE;
450962306a36Sopenharmony_ci	}
451062306a36Sopenharmony_ci}
451162306a36Sopenharmony_ci
451262306a36Sopenharmony_ci/*
451362306a36Sopenharmony_ci * This should only be called against the righmost leaf extent list.
451462306a36Sopenharmony_ci *
451562306a36Sopenharmony_ci * ocfs2_figure_appending_type() will figure out whether we'll have to
451662306a36Sopenharmony_ci * insert at the tail of the rightmost leaf.
451762306a36Sopenharmony_ci *
451862306a36Sopenharmony_ci * This should also work against the root extent list for tree's with 0
451962306a36Sopenharmony_ci * depth. If we consider the root extent list to be the rightmost leaf node
452062306a36Sopenharmony_ci * then the logic here makes sense.
452162306a36Sopenharmony_ci */
452262306a36Sopenharmony_cistatic void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
452362306a36Sopenharmony_ci					struct ocfs2_extent_list *el,
452462306a36Sopenharmony_ci					struct ocfs2_extent_rec *insert_rec)
452562306a36Sopenharmony_ci{
452662306a36Sopenharmony_ci	int i;
452762306a36Sopenharmony_ci	u32 cpos = le32_to_cpu(insert_rec->e_cpos);
452862306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
452962306a36Sopenharmony_ci
453062306a36Sopenharmony_ci	insert->ins_appending = APPEND_NONE;
453162306a36Sopenharmony_ci
453262306a36Sopenharmony_ci	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
453362306a36Sopenharmony_ci
453462306a36Sopenharmony_ci	if (!el->l_next_free_rec)
453562306a36Sopenharmony_ci		goto set_tail_append;
453662306a36Sopenharmony_ci
453762306a36Sopenharmony_ci	if (ocfs2_is_empty_extent(&el->l_recs[0])) {
453862306a36Sopenharmony_ci		/* Were all records empty? */
453962306a36Sopenharmony_ci		if (le16_to_cpu(el->l_next_free_rec) == 1)
454062306a36Sopenharmony_ci			goto set_tail_append;
454162306a36Sopenharmony_ci	}
454262306a36Sopenharmony_ci
454362306a36Sopenharmony_ci	i = le16_to_cpu(el->l_next_free_rec) - 1;
454462306a36Sopenharmony_ci	rec = &el->l_recs[i];
454562306a36Sopenharmony_ci
454662306a36Sopenharmony_ci	if (cpos >=
454762306a36Sopenharmony_ci	    (le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)))
454862306a36Sopenharmony_ci		goto set_tail_append;
454962306a36Sopenharmony_ci
455062306a36Sopenharmony_ci	return;
455162306a36Sopenharmony_ci
455262306a36Sopenharmony_ciset_tail_append:
455362306a36Sopenharmony_ci	insert->ins_appending = APPEND_TAIL;
455462306a36Sopenharmony_ci}
455562306a36Sopenharmony_ci
455662306a36Sopenharmony_ci/*
455762306a36Sopenharmony_ci * Helper function called at the beginning of an insert.
455862306a36Sopenharmony_ci *
455962306a36Sopenharmony_ci * This computes a few things that are commonly used in the process of
456062306a36Sopenharmony_ci * inserting into the btree:
456162306a36Sopenharmony_ci *   - Whether the new extent is contiguous with an existing one.
456262306a36Sopenharmony_ci *   - The current tree depth.
456362306a36Sopenharmony_ci *   - Whether the insert is an appending one.
456462306a36Sopenharmony_ci *   - The total # of free records in the tree.
456562306a36Sopenharmony_ci *
456662306a36Sopenharmony_ci * All of the information is stored on the ocfs2_insert_type
456762306a36Sopenharmony_ci * structure.
456862306a36Sopenharmony_ci */
456962306a36Sopenharmony_cistatic int ocfs2_figure_insert_type(struct ocfs2_extent_tree *et,
457062306a36Sopenharmony_ci				    struct buffer_head **last_eb_bh,
457162306a36Sopenharmony_ci				    struct ocfs2_extent_rec *insert_rec,
457262306a36Sopenharmony_ci				    int *free_records,
457362306a36Sopenharmony_ci				    struct ocfs2_insert_type *insert)
457462306a36Sopenharmony_ci{
457562306a36Sopenharmony_ci	int ret;
457662306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
457762306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
457862306a36Sopenharmony_ci	struct ocfs2_path *path = NULL;
457962306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
458062306a36Sopenharmony_ci
458162306a36Sopenharmony_ci	insert->ins_split = SPLIT_NONE;
458262306a36Sopenharmony_ci
458362306a36Sopenharmony_ci	el = et->et_root_el;
458462306a36Sopenharmony_ci	insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth);
458562306a36Sopenharmony_ci
458662306a36Sopenharmony_ci	if (el->l_tree_depth) {
458762306a36Sopenharmony_ci		/*
458862306a36Sopenharmony_ci		 * If we have tree depth, we read in the
458962306a36Sopenharmony_ci		 * rightmost extent block ahead of time as
459062306a36Sopenharmony_ci		 * ocfs2_figure_insert_type() and ocfs2_add_branch()
459162306a36Sopenharmony_ci		 * may want it later.
459262306a36Sopenharmony_ci		 */
459362306a36Sopenharmony_ci		ret = ocfs2_read_extent_block(et->et_ci,
459462306a36Sopenharmony_ci					      ocfs2_et_get_last_eb_blk(et),
459562306a36Sopenharmony_ci					      &bh);
459662306a36Sopenharmony_ci		if (ret) {
459762306a36Sopenharmony_ci			mlog_errno(ret);
459862306a36Sopenharmony_ci			goto out;
459962306a36Sopenharmony_ci		}
460062306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) bh->b_data;
460162306a36Sopenharmony_ci		el = &eb->h_list;
460262306a36Sopenharmony_ci	}
460362306a36Sopenharmony_ci
460462306a36Sopenharmony_ci	/*
460562306a36Sopenharmony_ci	 * Unless we have a contiguous insert, we'll need to know if
460662306a36Sopenharmony_ci	 * there is room left in our allocation tree for another
460762306a36Sopenharmony_ci	 * extent record.
460862306a36Sopenharmony_ci	 *
460962306a36Sopenharmony_ci	 * XXX: This test is simplistic, we can search for empty
461062306a36Sopenharmony_ci	 * extent records too.
461162306a36Sopenharmony_ci	 */
461262306a36Sopenharmony_ci	*free_records = le16_to_cpu(el->l_count) -
461362306a36Sopenharmony_ci		le16_to_cpu(el->l_next_free_rec);
461462306a36Sopenharmony_ci
461562306a36Sopenharmony_ci	if (!insert->ins_tree_depth) {
461662306a36Sopenharmony_ci		ocfs2_figure_contig_type(et, insert, el, insert_rec);
461762306a36Sopenharmony_ci		ocfs2_figure_appending_type(insert, el, insert_rec);
461862306a36Sopenharmony_ci		return 0;
461962306a36Sopenharmony_ci	}
462062306a36Sopenharmony_ci
462162306a36Sopenharmony_ci	path = ocfs2_new_path_from_et(et);
462262306a36Sopenharmony_ci	if (!path) {
462362306a36Sopenharmony_ci		ret = -ENOMEM;
462462306a36Sopenharmony_ci		mlog_errno(ret);
462562306a36Sopenharmony_ci		goto out;
462662306a36Sopenharmony_ci	}
462762306a36Sopenharmony_ci
462862306a36Sopenharmony_ci	/*
462962306a36Sopenharmony_ci	 * In the case that we're inserting past what the tree
463062306a36Sopenharmony_ci	 * currently accounts for, ocfs2_find_path() will return for
463162306a36Sopenharmony_ci	 * us the rightmost tree path. This is accounted for below in
463262306a36Sopenharmony_ci	 * the appending code.
463362306a36Sopenharmony_ci	 */
463462306a36Sopenharmony_ci	ret = ocfs2_find_path(et->et_ci, path, le32_to_cpu(insert_rec->e_cpos));
463562306a36Sopenharmony_ci	if (ret) {
463662306a36Sopenharmony_ci		mlog_errno(ret);
463762306a36Sopenharmony_ci		goto out;
463862306a36Sopenharmony_ci	}
463962306a36Sopenharmony_ci
464062306a36Sopenharmony_ci	el = path_leaf_el(path);
464162306a36Sopenharmony_ci
464262306a36Sopenharmony_ci	/*
464362306a36Sopenharmony_ci	 * Now that we have the path, there's two things we want to determine:
464462306a36Sopenharmony_ci	 * 1) Contiguousness (also set contig_index if this is so)
464562306a36Sopenharmony_ci	 *
464662306a36Sopenharmony_ci	 * 2) Are we doing an append? We can trivially break this up
464762306a36Sopenharmony_ci         *     into two types of appends: simple record append, or a
464862306a36Sopenharmony_ci         *     rotate inside the tail leaf.
464962306a36Sopenharmony_ci	 */
465062306a36Sopenharmony_ci	ocfs2_figure_contig_type(et, insert, el, insert_rec);
465162306a36Sopenharmony_ci
465262306a36Sopenharmony_ci	/*
465362306a36Sopenharmony_ci	 * The insert code isn't quite ready to deal with all cases of
465462306a36Sopenharmony_ci	 * left contiguousness. Specifically, if it's an insert into
465562306a36Sopenharmony_ci	 * the 1st record in a leaf, it will require the adjustment of
465662306a36Sopenharmony_ci	 * cluster count on the last record of the path directly to it's
465762306a36Sopenharmony_ci	 * left. For now, just catch that case and fool the layers
465862306a36Sopenharmony_ci	 * above us. This works just fine for tree_depth == 0, which
465962306a36Sopenharmony_ci	 * is why we allow that above.
466062306a36Sopenharmony_ci	 */
466162306a36Sopenharmony_ci	if (insert->ins_contig == CONTIG_LEFT &&
466262306a36Sopenharmony_ci	    insert->ins_contig_index == 0)
466362306a36Sopenharmony_ci		insert->ins_contig = CONTIG_NONE;
466462306a36Sopenharmony_ci
466562306a36Sopenharmony_ci	/*
466662306a36Sopenharmony_ci	 * Ok, so we can simply compare against last_eb to figure out
466762306a36Sopenharmony_ci	 * whether the path doesn't exist. This will only happen in
466862306a36Sopenharmony_ci	 * the case that we're doing a tail append, so maybe we can
466962306a36Sopenharmony_ci	 * take advantage of that information somehow.
467062306a36Sopenharmony_ci	 */
467162306a36Sopenharmony_ci	if (ocfs2_et_get_last_eb_blk(et) ==
467262306a36Sopenharmony_ci	    path_leaf_bh(path)->b_blocknr) {
467362306a36Sopenharmony_ci		/*
467462306a36Sopenharmony_ci		 * Ok, ocfs2_find_path() returned us the rightmost
467562306a36Sopenharmony_ci		 * tree path. This might be an appending insert. There are
467662306a36Sopenharmony_ci		 * two cases:
467762306a36Sopenharmony_ci		 *    1) We're doing a true append at the tail:
467862306a36Sopenharmony_ci		 *	-This might even be off the end of the leaf
467962306a36Sopenharmony_ci		 *    2) We're "appending" by rotating in the tail
468062306a36Sopenharmony_ci		 */
468162306a36Sopenharmony_ci		ocfs2_figure_appending_type(insert, el, insert_rec);
468262306a36Sopenharmony_ci	}
468362306a36Sopenharmony_ci
468462306a36Sopenharmony_ciout:
468562306a36Sopenharmony_ci	ocfs2_free_path(path);
468662306a36Sopenharmony_ci
468762306a36Sopenharmony_ci	if (ret == 0)
468862306a36Sopenharmony_ci		*last_eb_bh = bh;
468962306a36Sopenharmony_ci	else
469062306a36Sopenharmony_ci		brelse(bh);
469162306a36Sopenharmony_ci	return ret;
469262306a36Sopenharmony_ci}
469362306a36Sopenharmony_ci
469462306a36Sopenharmony_ci/*
469562306a36Sopenharmony_ci * Insert an extent into a btree.
469662306a36Sopenharmony_ci *
469762306a36Sopenharmony_ci * The caller needs to update the owning btree's cluster count.
469862306a36Sopenharmony_ci */
469962306a36Sopenharmony_ciint ocfs2_insert_extent(handle_t *handle,
470062306a36Sopenharmony_ci			struct ocfs2_extent_tree *et,
470162306a36Sopenharmony_ci			u32 cpos,
470262306a36Sopenharmony_ci			u64 start_blk,
470362306a36Sopenharmony_ci			u32 new_clusters,
470462306a36Sopenharmony_ci			u8 flags,
470562306a36Sopenharmony_ci			struct ocfs2_alloc_context *meta_ac)
470662306a36Sopenharmony_ci{
470762306a36Sopenharmony_ci	int status;
470862306a36Sopenharmony_ci	int free_records;
470962306a36Sopenharmony_ci	struct buffer_head *last_eb_bh = NULL;
471062306a36Sopenharmony_ci	struct ocfs2_insert_type insert = {0, };
471162306a36Sopenharmony_ci	struct ocfs2_extent_rec rec;
471262306a36Sopenharmony_ci
471362306a36Sopenharmony_ci	trace_ocfs2_insert_extent_start(
471462306a36Sopenharmony_ci		(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
471562306a36Sopenharmony_ci		cpos, new_clusters);
471662306a36Sopenharmony_ci
471762306a36Sopenharmony_ci	memset(&rec, 0, sizeof(rec));
471862306a36Sopenharmony_ci	rec.e_cpos = cpu_to_le32(cpos);
471962306a36Sopenharmony_ci	rec.e_blkno = cpu_to_le64(start_blk);
472062306a36Sopenharmony_ci	rec.e_leaf_clusters = cpu_to_le16(new_clusters);
472162306a36Sopenharmony_ci	rec.e_flags = flags;
472262306a36Sopenharmony_ci	status = ocfs2_et_insert_check(et, &rec);
472362306a36Sopenharmony_ci	if (status) {
472462306a36Sopenharmony_ci		mlog_errno(status);
472562306a36Sopenharmony_ci		goto bail;
472662306a36Sopenharmony_ci	}
472762306a36Sopenharmony_ci
472862306a36Sopenharmony_ci	status = ocfs2_figure_insert_type(et, &last_eb_bh, &rec,
472962306a36Sopenharmony_ci					  &free_records, &insert);
473062306a36Sopenharmony_ci	if (status < 0) {
473162306a36Sopenharmony_ci		mlog_errno(status);
473262306a36Sopenharmony_ci		goto bail;
473362306a36Sopenharmony_ci	}
473462306a36Sopenharmony_ci
473562306a36Sopenharmony_ci	trace_ocfs2_insert_extent(insert.ins_appending, insert.ins_contig,
473662306a36Sopenharmony_ci				  insert.ins_contig_index, free_records,
473762306a36Sopenharmony_ci				  insert.ins_tree_depth);
473862306a36Sopenharmony_ci
473962306a36Sopenharmony_ci	if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
474062306a36Sopenharmony_ci		status = ocfs2_grow_tree(handle, et,
474162306a36Sopenharmony_ci					 &insert.ins_tree_depth, &last_eb_bh,
474262306a36Sopenharmony_ci					 meta_ac);
474362306a36Sopenharmony_ci		if (status) {
474462306a36Sopenharmony_ci			mlog_errno(status);
474562306a36Sopenharmony_ci			goto bail;
474662306a36Sopenharmony_ci		}
474762306a36Sopenharmony_ci	}
474862306a36Sopenharmony_ci
474962306a36Sopenharmony_ci	/* Finally, we can add clusters. This might rotate the tree for us. */
475062306a36Sopenharmony_ci	status = ocfs2_do_insert_extent(handle, et, &rec, &insert);
475162306a36Sopenharmony_ci	if (status < 0)
475262306a36Sopenharmony_ci		mlog_errno(status);
475362306a36Sopenharmony_ci	else
475462306a36Sopenharmony_ci		ocfs2_et_extent_map_insert(et, &rec);
475562306a36Sopenharmony_ci
475662306a36Sopenharmony_cibail:
475762306a36Sopenharmony_ci	brelse(last_eb_bh);
475862306a36Sopenharmony_ci
475962306a36Sopenharmony_ci	return status;
476062306a36Sopenharmony_ci}
476162306a36Sopenharmony_ci
476262306a36Sopenharmony_ci/*
476362306a36Sopenharmony_ci * Allcate and add clusters into the extent b-tree.
476462306a36Sopenharmony_ci * The new clusters(clusters_to_add) will be inserted at logical_offset.
476562306a36Sopenharmony_ci * The extent b-tree's root is specified by et, and
476662306a36Sopenharmony_ci * it is not limited to the file storage. Any extent tree can use this
476762306a36Sopenharmony_ci * function if it implements the proper ocfs2_extent_tree.
476862306a36Sopenharmony_ci */
476962306a36Sopenharmony_ciint ocfs2_add_clusters_in_btree(handle_t *handle,
477062306a36Sopenharmony_ci				struct ocfs2_extent_tree *et,
477162306a36Sopenharmony_ci				u32 *logical_offset,
477262306a36Sopenharmony_ci				u32 clusters_to_add,
477362306a36Sopenharmony_ci				int mark_unwritten,
477462306a36Sopenharmony_ci				struct ocfs2_alloc_context *data_ac,
477562306a36Sopenharmony_ci				struct ocfs2_alloc_context *meta_ac,
477662306a36Sopenharmony_ci				enum ocfs2_alloc_restarted *reason_ret)
477762306a36Sopenharmony_ci{
477862306a36Sopenharmony_ci	int status = 0, err = 0;
477962306a36Sopenharmony_ci	int need_free = 0;
478062306a36Sopenharmony_ci	int free_extents;
478162306a36Sopenharmony_ci	enum ocfs2_alloc_restarted reason = RESTART_NONE;
478262306a36Sopenharmony_ci	u32 bit_off, num_bits;
478362306a36Sopenharmony_ci	u64 block;
478462306a36Sopenharmony_ci	u8 flags = 0;
478562306a36Sopenharmony_ci	struct ocfs2_super *osb =
478662306a36Sopenharmony_ci		OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
478762306a36Sopenharmony_ci
478862306a36Sopenharmony_ci	BUG_ON(!clusters_to_add);
478962306a36Sopenharmony_ci
479062306a36Sopenharmony_ci	if (mark_unwritten)
479162306a36Sopenharmony_ci		flags = OCFS2_EXT_UNWRITTEN;
479262306a36Sopenharmony_ci
479362306a36Sopenharmony_ci	free_extents = ocfs2_num_free_extents(et);
479462306a36Sopenharmony_ci	if (free_extents < 0) {
479562306a36Sopenharmony_ci		status = free_extents;
479662306a36Sopenharmony_ci		mlog_errno(status);
479762306a36Sopenharmony_ci		goto leave;
479862306a36Sopenharmony_ci	}
479962306a36Sopenharmony_ci
480062306a36Sopenharmony_ci	/* there are two cases which could cause us to EAGAIN in the
480162306a36Sopenharmony_ci	 * we-need-more-metadata case:
480262306a36Sopenharmony_ci	 * 1) we haven't reserved *any*
480362306a36Sopenharmony_ci	 * 2) we are so fragmented, we've needed to add metadata too
480462306a36Sopenharmony_ci	 *    many times. */
480562306a36Sopenharmony_ci	if (!free_extents && !meta_ac) {
480662306a36Sopenharmony_ci		err = -1;
480762306a36Sopenharmony_ci		status = -EAGAIN;
480862306a36Sopenharmony_ci		reason = RESTART_META;
480962306a36Sopenharmony_ci		goto leave;
481062306a36Sopenharmony_ci	} else if ((!free_extents)
481162306a36Sopenharmony_ci		   && (ocfs2_alloc_context_bits_left(meta_ac)
481262306a36Sopenharmony_ci		       < ocfs2_extend_meta_needed(et->et_root_el))) {
481362306a36Sopenharmony_ci		err = -2;
481462306a36Sopenharmony_ci		status = -EAGAIN;
481562306a36Sopenharmony_ci		reason = RESTART_META;
481662306a36Sopenharmony_ci		goto leave;
481762306a36Sopenharmony_ci	}
481862306a36Sopenharmony_ci
481962306a36Sopenharmony_ci	status = __ocfs2_claim_clusters(handle, data_ac, 1,
482062306a36Sopenharmony_ci					clusters_to_add, &bit_off, &num_bits);
482162306a36Sopenharmony_ci	if (status < 0) {
482262306a36Sopenharmony_ci		if (status != -ENOSPC)
482362306a36Sopenharmony_ci			mlog_errno(status);
482462306a36Sopenharmony_ci		goto leave;
482562306a36Sopenharmony_ci	}
482662306a36Sopenharmony_ci
482762306a36Sopenharmony_ci	BUG_ON(num_bits > clusters_to_add);
482862306a36Sopenharmony_ci
482962306a36Sopenharmony_ci	/* reserve our write early -- insert_extent may update the tree root */
483062306a36Sopenharmony_ci	status = ocfs2_et_root_journal_access(handle, et,
483162306a36Sopenharmony_ci					      OCFS2_JOURNAL_ACCESS_WRITE);
483262306a36Sopenharmony_ci	if (status < 0) {
483362306a36Sopenharmony_ci		mlog_errno(status);
483462306a36Sopenharmony_ci		need_free = 1;
483562306a36Sopenharmony_ci		goto bail;
483662306a36Sopenharmony_ci	}
483762306a36Sopenharmony_ci
483862306a36Sopenharmony_ci	block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
483962306a36Sopenharmony_ci	trace_ocfs2_add_clusters_in_btree(
484062306a36Sopenharmony_ci	     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
484162306a36Sopenharmony_ci	     bit_off, num_bits);
484262306a36Sopenharmony_ci	status = ocfs2_insert_extent(handle, et, *logical_offset, block,
484362306a36Sopenharmony_ci				     num_bits, flags, meta_ac);
484462306a36Sopenharmony_ci	if (status < 0) {
484562306a36Sopenharmony_ci		mlog_errno(status);
484662306a36Sopenharmony_ci		need_free = 1;
484762306a36Sopenharmony_ci		goto bail;
484862306a36Sopenharmony_ci	}
484962306a36Sopenharmony_ci
485062306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, et->et_root_bh);
485162306a36Sopenharmony_ci
485262306a36Sopenharmony_ci	clusters_to_add -= num_bits;
485362306a36Sopenharmony_ci	*logical_offset += num_bits;
485462306a36Sopenharmony_ci
485562306a36Sopenharmony_ci	if (clusters_to_add) {
485662306a36Sopenharmony_ci		err = clusters_to_add;
485762306a36Sopenharmony_ci		status = -EAGAIN;
485862306a36Sopenharmony_ci		reason = RESTART_TRANS;
485962306a36Sopenharmony_ci	}
486062306a36Sopenharmony_ci
486162306a36Sopenharmony_cibail:
486262306a36Sopenharmony_ci	if (need_free) {
486362306a36Sopenharmony_ci		if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
486462306a36Sopenharmony_ci			ocfs2_free_local_alloc_bits(osb, handle, data_ac,
486562306a36Sopenharmony_ci					bit_off, num_bits);
486662306a36Sopenharmony_ci		else
486762306a36Sopenharmony_ci			ocfs2_free_clusters(handle,
486862306a36Sopenharmony_ci					data_ac->ac_inode,
486962306a36Sopenharmony_ci					data_ac->ac_bh,
487062306a36Sopenharmony_ci					ocfs2_clusters_to_blocks(osb->sb, bit_off),
487162306a36Sopenharmony_ci					num_bits);
487262306a36Sopenharmony_ci	}
487362306a36Sopenharmony_ci
487462306a36Sopenharmony_cileave:
487562306a36Sopenharmony_ci	if (reason_ret)
487662306a36Sopenharmony_ci		*reason_ret = reason;
487762306a36Sopenharmony_ci	trace_ocfs2_add_clusters_in_btree_ret(status, reason, err);
487862306a36Sopenharmony_ci	return status;
487962306a36Sopenharmony_ci}
488062306a36Sopenharmony_ci
488162306a36Sopenharmony_cistatic void ocfs2_make_right_split_rec(struct super_block *sb,
488262306a36Sopenharmony_ci				       struct ocfs2_extent_rec *split_rec,
488362306a36Sopenharmony_ci				       u32 cpos,
488462306a36Sopenharmony_ci				       struct ocfs2_extent_rec *rec)
488562306a36Sopenharmony_ci{
488662306a36Sopenharmony_ci	u32 rec_cpos = le32_to_cpu(rec->e_cpos);
488762306a36Sopenharmony_ci	u32 rec_range = rec_cpos + le16_to_cpu(rec->e_leaf_clusters);
488862306a36Sopenharmony_ci
488962306a36Sopenharmony_ci	memset(split_rec, 0, sizeof(struct ocfs2_extent_rec));
489062306a36Sopenharmony_ci
489162306a36Sopenharmony_ci	split_rec->e_cpos = cpu_to_le32(cpos);
489262306a36Sopenharmony_ci	split_rec->e_leaf_clusters = cpu_to_le16(rec_range - cpos);
489362306a36Sopenharmony_ci
489462306a36Sopenharmony_ci	split_rec->e_blkno = rec->e_blkno;
489562306a36Sopenharmony_ci	le64_add_cpu(&split_rec->e_blkno,
489662306a36Sopenharmony_ci		     ocfs2_clusters_to_blocks(sb, cpos - rec_cpos));
489762306a36Sopenharmony_ci
489862306a36Sopenharmony_ci	split_rec->e_flags = rec->e_flags;
489962306a36Sopenharmony_ci}
490062306a36Sopenharmony_ci
490162306a36Sopenharmony_cistatic int ocfs2_split_and_insert(handle_t *handle,
490262306a36Sopenharmony_ci				  struct ocfs2_extent_tree *et,
490362306a36Sopenharmony_ci				  struct ocfs2_path *path,
490462306a36Sopenharmony_ci				  struct buffer_head **last_eb_bh,
490562306a36Sopenharmony_ci				  int split_index,
490662306a36Sopenharmony_ci				  struct ocfs2_extent_rec *orig_split_rec,
490762306a36Sopenharmony_ci				  struct ocfs2_alloc_context *meta_ac)
490862306a36Sopenharmony_ci{
490962306a36Sopenharmony_ci	int ret = 0, depth;
491062306a36Sopenharmony_ci	unsigned int insert_range, rec_range, do_leftright = 0;
491162306a36Sopenharmony_ci	struct ocfs2_extent_rec tmprec;
491262306a36Sopenharmony_ci	struct ocfs2_extent_list *rightmost_el;
491362306a36Sopenharmony_ci	struct ocfs2_extent_rec rec;
491462306a36Sopenharmony_ci	struct ocfs2_extent_rec split_rec = *orig_split_rec;
491562306a36Sopenharmony_ci	struct ocfs2_insert_type insert;
491662306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
491762306a36Sopenharmony_ci
491862306a36Sopenharmony_cileftright:
491962306a36Sopenharmony_ci	/*
492062306a36Sopenharmony_ci	 * Store a copy of the record on the stack - it might move
492162306a36Sopenharmony_ci	 * around as the tree is manipulated below.
492262306a36Sopenharmony_ci	 */
492362306a36Sopenharmony_ci	rec = path_leaf_el(path)->l_recs[split_index];
492462306a36Sopenharmony_ci
492562306a36Sopenharmony_ci	rightmost_el = et->et_root_el;
492662306a36Sopenharmony_ci
492762306a36Sopenharmony_ci	depth = le16_to_cpu(rightmost_el->l_tree_depth);
492862306a36Sopenharmony_ci	if (depth) {
492962306a36Sopenharmony_ci		BUG_ON(!(*last_eb_bh));
493062306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
493162306a36Sopenharmony_ci		rightmost_el = &eb->h_list;
493262306a36Sopenharmony_ci	}
493362306a36Sopenharmony_ci
493462306a36Sopenharmony_ci	if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
493562306a36Sopenharmony_ci	    le16_to_cpu(rightmost_el->l_count)) {
493662306a36Sopenharmony_ci		ret = ocfs2_grow_tree(handle, et,
493762306a36Sopenharmony_ci				      &depth, last_eb_bh, meta_ac);
493862306a36Sopenharmony_ci		if (ret) {
493962306a36Sopenharmony_ci			mlog_errno(ret);
494062306a36Sopenharmony_ci			goto out;
494162306a36Sopenharmony_ci		}
494262306a36Sopenharmony_ci	}
494362306a36Sopenharmony_ci
494462306a36Sopenharmony_ci	memset(&insert, 0, sizeof(struct ocfs2_insert_type));
494562306a36Sopenharmony_ci	insert.ins_appending = APPEND_NONE;
494662306a36Sopenharmony_ci	insert.ins_contig = CONTIG_NONE;
494762306a36Sopenharmony_ci	insert.ins_tree_depth = depth;
494862306a36Sopenharmony_ci
494962306a36Sopenharmony_ci	insert_range = le32_to_cpu(split_rec.e_cpos) +
495062306a36Sopenharmony_ci		le16_to_cpu(split_rec.e_leaf_clusters);
495162306a36Sopenharmony_ci	rec_range = le32_to_cpu(rec.e_cpos) +
495262306a36Sopenharmony_ci		le16_to_cpu(rec.e_leaf_clusters);
495362306a36Sopenharmony_ci
495462306a36Sopenharmony_ci	if (split_rec.e_cpos == rec.e_cpos) {
495562306a36Sopenharmony_ci		insert.ins_split = SPLIT_LEFT;
495662306a36Sopenharmony_ci	} else if (insert_range == rec_range) {
495762306a36Sopenharmony_ci		insert.ins_split = SPLIT_RIGHT;
495862306a36Sopenharmony_ci	} else {
495962306a36Sopenharmony_ci		/*
496062306a36Sopenharmony_ci		 * Left/right split. We fake this as a right split
496162306a36Sopenharmony_ci		 * first and then make a second pass as a left split.
496262306a36Sopenharmony_ci		 */
496362306a36Sopenharmony_ci		insert.ins_split = SPLIT_RIGHT;
496462306a36Sopenharmony_ci
496562306a36Sopenharmony_ci		ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
496662306a36Sopenharmony_ci					   &tmprec, insert_range, &rec);
496762306a36Sopenharmony_ci
496862306a36Sopenharmony_ci		split_rec = tmprec;
496962306a36Sopenharmony_ci
497062306a36Sopenharmony_ci		BUG_ON(do_leftright);
497162306a36Sopenharmony_ci		do_leftright = 1;
497262306a36Sopenharmony_ci	}
497362306a36Sopenharmony_ci
497462306a36Sopenharmony_ci	ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
497562306a36Sopenharmony_ci	if (ret) {
497662306a36Sopenharmony_ci		mlog_errno(ret);
497762306a36Sopenharmony_ci		goto out;
497862306a36Sopenharmony_ci	}
497962306a36Sopenharmony_ci
498062306a36Sopenharmony_ci	if (do_leftright == 1) {
498162306a36Sopenharmony_ci		u32 cpos;
498262306a36Sopenharmony_ci		struct ocfs2_extent_list *el;
498362306a36Sopenharmony_ci
498462306a36Sopenharmony_ci		do_leftright++;
498562306a36Sopenharmony_ci		split_rec = *orig_split_rec;
498662306a36Sopenharmony_ci
498762306a36Sopenharmony_ci		ocfs2_reinit_path(path, 1);
498862306a36Sopenharmony_ci
498962306a36Sopenharmony_ci		cpos = le32_to_cpu(split_rec.e_cpos);
499062306a36Sopenharmony_ci		ret = ocfs2_find_path(et->et_ci, path, cpos);
499162306a36Sopenharmony_ci		if (ret) {
499262306a36Sopenharmony_ci			mlog_errno(ret);
499362306a36Sopenharmony_ci			goto out;
499462306a36Sopenharmony_ci		}
499562306a36Sopenharmony_ci
499662306a36Sopenharmony_ci		el = path_leaf_el(path);
499762306a36Sopenharmony_ci		split_index = ocfs2_search_extent_list(el, cpos);
499862306a36Sopenharmony_ci		if (split_index == -1) {
499962306a36Sopenharmony_ci			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
500062306a36Sopenharmony_ci				    "Owner %llu has an extent at cpos %u which can no longer be found\n",
500162306a36Sopenharmony_ci				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
500262306a36Sopenharmony_ci				    cpos);
500362306a36Sopenharmony_ci			ret = -EROFS;
500462306a36Sopenharmony_ci			goto out;
500562306a36Sopenharmony_ci		}
500662306a36Sopenharmony_ci		goto leftright;
500762306a36Sopenharmony_ci	}
500862306a36Sopenharmony_ciout:
500962306a36Sopenharmony_ci
501062306a36Sopenharmony_ci	return ret;
501162306a36Sopenharmony_ci}
501262306a36Sopenharmony_ci
501362306a36Sopenharmony_cistatic int ocfs2_replace_extent_rec(handle_t *handle,
501462306a36Sopenharmony_ci				    struct ocfs2_extent_tree *et,
501562306a36Sopenharmony_ci				    struct ocfs2_path *path,
501662306a36Sopenharmony_ci				    struct ocfs2_extent_list *el,
501762306a36Sopenharmony_ci				    int split_index,
501862306a36Sopenharmony_ci				    struct ocfs2_extent_rec *split_rec)
501962306a36Sopenharmony_ci{
502062306a36Sopenharmony_ci	int ret;
502162306a36Sopenharmony_ci
502262306a36Sopenharmony_ci	ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
502362306a36Sopenharmony_ci					   path_num_items(path) - 1);
502462306a36Sopenharmony_ci	if (ret) {
502562306a36Sopenharmony_ci		mlog_errno(ret);
502662306a36Sopenharmony_ci		goto out;
502762306a36Sopenharmony_ci	}
502862306a36Sopenharmony_ci
502962306a36Sopenharmony_ci	el->l_recs[split_index] = *split_rec;
503062306a36Sopenharmony_ci
503162306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, path_leaf_bh(path));
503262306a36Sopenharmony_ciout:
503362306a36Sopenharmony_ci	return ret;
503462306a36Sopenharmony_ci}
503562306a36Sopenharmony_ci
503662306a36Sopenharmony_ci/*
503762306a36Sopenharmony_ci * Split part or all of the extent record at split_index in the leaf
503862306a36Sopenharmony_ci * pointed to by path. Merge with the contiguous extent record if needed.
503962306a36Sopenharmony_ci *
504062306a36Sopenharmony_ci * Care is taken to handle contiguousness so as to not grow the tree.
504162306a36Sopenharmony_ci *
504262306a36Sopenharmony_ci * meta_ac is not strictly necessary - we only truly need it if growth
504362306a36Sopenharmony_ci * of the tree is required. All other cases will degrade into a less
504462306a36Sopenharmony_ci * optimal tree layout.
504562306a36Sopenharmony_ci *
504662306a36Sopenharmony_ci * last_eb_bh should be the rightmost leaf block for any extent
504762306a36Sopenharmony_ci * btree. Since a split may grow the tree or a merge might shrink it,
504862306a36Sopenharmony_ci * the caller cannot trust the contents of that buffer after this call.
504962306a36Sopenharmony_ci *
505062306a36Sopenharmony_ci * This code is optimized for readability - several passes might be
505162306a36Sopenharmony_ci * made over certain portions of the tree. All of those blocks will
505262306a36Sopenharmony_ci * have been brought into cache (and pinned via the journal), so the
505362306a36Sopenharmony_ci * extra overhead is not expressed in terms of disk reads.
505462306a36Sopenharmony_ci */
505562306a36Sopenharmony_ciint ocfs2_split_extent(handle_t *handle,
505662306a36Sopenharmony_ci		       struct ocfs2_extent_tree *et,
505762306a36Sopenharmony_ci		       struct ocfs2_path *path,
505862306a36Sopenharmony_ci		       int split_index,
505962306a36Sopenharmony_ci		       struct ocfs2_extent_rec *split_rec,
506062306a36Sopenharmony_ci		       struct ocfs2_alloc_context *meta_ac,
506162306a36Sopenharmony_ci		       struct ocfs2_cached_dealloc_ctxt *dealloc)
506262306a36Sopenharmony_ci{
506362306a36Sopenharmony_ci	int ret = 0;
506462306a36Sopenharmony_ci	struct ocfs2_extent_list *el = path_leaf_el(path);
506562306a36Sopenharmony_ci	struct buffer_head *last_eb_bh = NULL;
506662306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
506762306a36Sopenharmony_ci	struct ocfs2_merge_ctxt ctxt;
506862306a36Sopenharmony_ci
506962306a36Sopenharmony_ci	if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
507062306a36Sopenharmony_ci	    ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
507162306a36Sopenharmony_ci	     (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) {
507262306a36Sopenharmony_ci		ret = -EIO;
507362306a36Sopenharmony_ci		mlog_errno(ret);
507462306a36Sopenharmony_ci		goto out;
507562306a36Sopenharmony_ci	}
507662306a36Sopenharmony_ci
507762306a36Sopenharmony_ci	ret = ocfs2_figure_merge_contig_type(et, path, el,
507862306a36Sopenharmony_ci					     split_index,
507962306a36Sopenharmony_ci					     split_rec,
508062306a36Sopenharmony_ci					     &ctxt);
508162306a36Sopenharmony_ci	if (ret) {
508262306a36Sopenharmony_ci		mlog_errno(ret);
508362306a36Sopenharmony_ci		goto out;
508462306a36Sopenharmony_ci	}
508562306a36Sopenharmony_ci
508662306a36Sopenharmony_ci	/*
508762306a36Sopenharmony_ci	 * The core merge / split code wants to know how much room is
508862306a36Sopenharmony_ci	 * left in this allocation tree, so we pass the
508962306a36Sopenharmony_ci	 * rightmost extent list.
509062306a36Sopenharmony_ci	 */
509162306a36Sopenharmony_ci	if (path->p_tree_depth) {
509262306a36Sopenharmony_ci		ret = ocfs2_read_extent_block(et->et_ci,
509362306a36Sopenharmony_ci					      ocfs2_et_get_last_eb_blk(et),
509462306a36Sopenharmony_ci					      &last_eb_bh);
509562306a36Sopenharmony_ci		if (ret) {
509662306a36Sopenharmony_ci			mlog_errno(ret);
509762306a36Sopenharmony_ci			goto out;
509862306a36Sopenharmony_ci		}
509962306a36Sopenharmony_ci	}
510062306a36Sopenharmony_ci
510162306a36Sopenharmony_ci	if (rec->e_cpos == split_rec->e_cpos &&
510262306a36Sopenharmony_ci	    rec->e_leaf_clusters == split_rec->e_leaf_clusters)
510362306a36Sopenharmony_ci		ctxt.c_split_covers_rec = 1;
510462306a36Sopenharmony_ci	else
510562306a36Sopenharmony_ci		ctxt.c_split_covers_rec = 0;
510662306a36Sopenharmony_ci
510762306a36Sopenharmony_ci	ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
510862306a36Sopenharmony_ci
510962306a36Sopenharmony_ci	trace_ocfs2_split_extent(split_index, ctxt.c_contig_type,
511062306a36Sopenharmony_ci				 ctxt.c_has_empty_extent,
511162306a36Sopenharmony_ci				 ctxt.c_split_covers_rec);
511262306a36Sopenharmony_ci
511362306a36Sopenharmony_ci	if (ctxt.c_contig_type == CONTIG_NONE) {
511462306a36Sopenharmony_ci		if (ctxt.c_split_covers_rec)
511562306a36Sopenharmony_ci			ret = ocfs2_replace_extent_rec(handle, et, path, el,
511662306a36Sopenharmony_ci						       split_index, split_rec);
511762306a36Sopenharmony_ci		else
511862306a36Sopenharmony_ci			ret = ocfs2_split_and_insert(handle, et, path,
511962306a36Sopenharmony_ci						     &last_eb_bh, split_index,
512062306a36Sopenharmony_ci						     split_rec, meta_ac);
512162306a36Sopenharmony_ci		if (ret)
512262306a36Sopenharmony_ci			mlog_errno(ret);
512362306a36Sopenharmony_ci	} else {
512462306a36Sopenharmony_ci		ret = ocfs2_try_to_merge_extent(handle, et, path,
512562306a36Sopenharmony_ci						split_index, split_rec,
512662306a36Sopenharmony_ci						dealloc, &ctxt);
512762306a36Sopenharmony_ci		if (ret)
512862306a36Sopenharmony_ci			mlog_errno(ret);
512962306a36Sopenharmony_ci	}
513062306a36Sopenharmony_ci
513162306a36Sopenharmony_ciout:
513262306a36Sopenharmony_ci	brelse(last_eb_bh);
513362306a36Sopenharmony_ci	return ret;
513462306a36Sopenharmony_ci}
513562306a36Sopenharmony_ci
513662306a36Sopenharmony_ci/*
513762306a36Sopenharmony_ci * Change the flags of the already-existing extent at cpos for len clusters.
513862306a36Sopenharmony_ci *
513962306a36Sopenharmony_ci * new_flags: the flags we want to set.
514062306a36Sopenharmony_ci * clear_flags: the flags we want to clear.
514162306a36Sopenharmony_ci * phys: the new physical offset we want this new extent starts from.
514262306a36Sopenharmony_ci *
514362306a36Sopenharmony_ci * If the existing extent is larger than the request, initiate a
514462306a36Sopenharmony_ci * split. An attempt will be made at merging with adjacent extents.
514562306a36Sopenharmony_ci *
514662306a36Sopenharmony_ci * The caller is responsible for passing down meta_ac if we'll need it.
514762306a36Sopenharmony_ci */
514862306a36Sopenharmony_ciint ocfs2_change_extent_flag(handle_t *handle,
514962306a36Sopenharmony_ci			     struct ocfs2_extent_tree *et,
515062306a36Sopenharmony_ci			     u32 cpos, u32 len, u32 phys,
515162306a36Sopenharmony_ci			     struct ocfs2_alloc_context *meta_ac,
515262306a36Sopenharmony_ci			     struct ocfs2_cached_dealloc_ctxt *dealloc,
515362306a36Sopenharmony_ci			     int new_flags, int clear_flags)
515462306a36Sopenharmony_ci{
515562306a36Sopenharmony_ci	int ret, index;
515662306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
515762306a36Sopenharmony_ci	u64 start_blkno = ocfs2_clusters_to_blocks(sb, phys);
515862306a36Sopenharmony_ci	struct ocfs2_extent_rec split_rec;
515962306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL;
516062306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
516162306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
516262306a36Sopenharmony_ci
516362306a36Sopenharmony_ci	left_path = ocfs2_new_path_from_et(et);
516462306a36Sopenharmony_ci	if (!left_path) {
516562306a36Sopenharmony_ci		ret = -ENOMEM;
516662306a36Sopenharmony_ci		mlog_errno(ret);
516762306a36Sopenharmony_ci		goto out;
516862306a36Sopenharmony_ci	}
516962306a36Sopenharmony_ci
517062306a36Sopenharmony_ci	ret = ocfs2_find_path(et->et_ci, left_path, cpos);
517162306a36Sopenharmony_ci	if (ret) {
517262306a36Sopenharmony_ci		mlog_errno(ret);
517362306a36Sopenharmony_ci		goto out;
517462306a36Sopenharmony_ci	}
517562306a36Sopenharmony_ci	el = path_leaf_el(left_path);
517662306a36Sopenharmony_ci
517762306a36Sopenharmony_ci	index = ocfs2_search_extent_list(el, cpos);
517862306a36Sopenharmony_ci	if (index == -1) {
517962306a36Sopenharmony_ci		ocfs2_error(sb,
518062306a36Sopenharmony_ci			    "Owner %llu has an extent at cpos %u which can no longer be found\n",
518162306a36Sopenharmony_ci			    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
518262306a36Sopenharmony_ci			    cpos);
518362306a36Sopenharmony_ci		ret = -EROFS;
518462306a36Sopenharmony_ci		goto out;
518562306a36Sopenharmony_ci	}
518662306a36Sopenharmony_ci
518762306a36Sopenharmony_ci	ret = -EIO;
518862306a36Sopenharmony_ci	rec = &el->l_recs[index];
518962306a36Sopenharmony_ci	if (new_flags && (rec->e_flags & new_flags)) {
519062306a36Sopenharmony_ci		mlog(ML_ERROR, "Owner %llu tried to set %d flags on an "
519162306a36Sopenharmony_ci		     "extent that already had them\n",
519262306a36Sopenharmony_ci		     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
519362306a36Sopenharmony_ci		     new_flags);
519462306a36Sopenharmony_ci		goto out;
519562306a36Sopenharmony_ci	}
519662306a36Sopenharmony_ci
519762306a36Sopenharmony_ci	if (clear_flags && !(rec->e_flags & clear_flags)) {
519862306a36Sopenharmony_ci		mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an "
519962306a36Sopenharmony_ci		     "extent that didn't have them\n",
520062306a36Sopenharmony_ci		     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
520162306a36Sopenharmony_ci		     clear_flags);
520262306a36Sopenharmony_ci		goto out;
520362306a36Sopenharmony_ci	}
520462306a36Sopenharmony_ci
520562306a36Sopenharmony_ci	memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec));
520662306a36Sopenharmony_ci	split_rec.e_cpos = cpu_to_le32(cpos);
520762306a36Sopenharmony_ci	split_rec.e_leaf_clusters = cpu_to_le16(len);
520862306a36Sopenharmony_ci	split_rec.e_blkno = cpu_to_le64(start_blkno);
520962306a36Sopenharmony_ci	split_rec.e_flags = rec->e_flags;
521062306a36Sopenharmony_ci	if (new_flags)
521162306a36Sopenharmony_ci		split_rec.e_flags |= new_flags;
521262306a36Sopenharmony_ci	if (clear_flags)
521362306a36Sopenharmony_ci		split_rec.e_flags &= ~clear_flags;
521462306a36Sopenharmony_ci
521562306a36Sopenharmony_ci	ret = ocfs2_split_extent(handle, et, left_path,
521662306a36Sopenharmony_ci				 index, &split_rec, meta_ac,
521762306a36Sopenharmony_ci				 dealloc);
521862306a36Sopenharmony_ci	if (ret)
521962306a36Sopenharmony_ci		mlog_errno(ret);
522062306a36Sopenharmony_ci
522162306a36Sopenharmony_ciout:
522262306a36Sopenharmony_ci	ocfs2_free_path(left_path);
522362306a36Sopenharmony_ci	return ret;
522462306a36Sopenharmony_ci
522562306a36Sopenharmony_ci}
522662306a36Sopenharmony_ci
522762306a36Sopenharmony_ci/*
522862306a36Sopenharmony_ci * Mark the already-existing extent at cpos as written for len clusters.
522962306a36Sopenharmony_ci * This removes the unwritten extent flag.
523062306a36Sopenharmony_ci *
523162306a36Sopenharmony_ci * If the existing extent is larger than the request, initiate a
523262306a36Sopenharmony_ci * split. An attempt will be made at merging with adjacent extents.
523362306a36Sopenharmony_ci *
523462306a36Sopenharmony_ci * The caller is responsible for passing down meta_ac if we'll need it.
523562306a36Sopenharmony_ci */
523662306a36Sopenharmony_ciint ocfs2_mark_extent_written(struct inode *inode,
523762306a36Sopenharmony_ci			      struct ocfs2_extent_tree *et,
523862306a36Sopenharmony_ci			      handle_t *handle, u32 cpos, u32 len, u32 phys,
523962306a36Sopenharmony_ci			      struct ocfs2_alloc_context *meta_ac,
524062306a36Sopenharmony_ci			      struct ocfs2_cached_dealloc_ctxt *dealloc)
524162306a36Sopenharmony_ci{
524262306a36Sopenharmony_ci	int ret;
524362306a36Sopenharmony_ci
524462306a36Sopenharmony_ci	trace_ocfs2_mark_extent_written(
524562306a36Sopenharmony_ci		(unsigned long long)OCFS2_I(inode)->ip_blkno,
524662306a36Sopenharmony_ci		cpos, len, phys);
524762306a36Sopenharmony_ci
524862306a36Sopenharmony_ci	if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
524962306a36Sopenharmony_ci		ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents that are being written to, but the feature bit is not set in the super block\n",
525062306a36Sopenharmony_ci			    (unsigned long long)OCFS2_I(inode)->ip_blkno);
525162306a36Sopenharmony_ci		ret = -EROFS;
525262306a36Sopenharmony_ci		goto out;
525362306a36Sopenharmony_ci	}
525462306a36Sopenharmony_ci
525562306a36Sopenharmony_ci	/*
525662306a36Sopenharmony_ci	 * XXX: This should be fixed up so that we just re-insert the
525762306a36Sopenharmony_ci	 * next extent records.
525862306a36Sopenharmony_ci	 */
525962306a36Sopenharmony_ci	ocfs2_et_extent_map_truncate(et, 0);
526062306a36Sopenharmony_ci
526162306a36Sopenharmony_ci	ret = ocfs2_change_extent_flag(handle, et, cpos,
526262306a36Sopenharmony_ci				       len, phys, meta_ac, dealloc,
526362306a36Sopenharmony_ci				       0, OCFS2_EXT_UNWRITTEN);
526462306a36Sopenharmony_ci	if (ret)
526562306a36Sopenharmony_ci		mlog_errno(ret);
526662306a36Sopenharmony_ci
526762306a36Sopenharmony_ciout:
526862306a36Sopenharmony_ci	return ret;
526962306a36Sopenharmony_ci}
527062306a36Sopenharmony_ci
527162306a36Sopenharmony_cistatic int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
527262306a36Sopenharmony_ci			    struct ocfs2_path *path,
527362306a36Sopenharmony_ci			    int index, u32 new_range,
527462306a36Sopenharmony_ci			    struct ocfs2_alloc_context *meta_ac)
527562306a36Sopenharmony_ci{
527662306a36Sopenharmony_ci	int ret, depth, credits;
527762306a36Sopenharmony_ci	struct buffer_head *last_eb_bh = NULL;
527862306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
527962306a36Sopenharmony_ci	struct ocfs2_extent_list *rightmost_el, *el;
528062306a36Sopenharmony_ci	struct ocfs2_extent_rec split_rec;
528162306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
528262306a36Sopenharmony_ci	struct ocfs2_insert_type insert;
528362306a36Sopenharmony_ci
528462306a36Sopenharmony_ci	/*
528562306a36Sopenharmony_ci	 * Setup the record to split before we grow the tree.
528662306a36Sopenharmony_ci	 */
528762306a36Sopenharmony_ci	el = path_leaf_el(path);
528862306a36Sopenharmony_ci	rec = &el->l_recs[index];
528962306a36Sopenharmony_ci	ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
529062306a36Sopenharmony_ci				   &split_rec, new_range, rec);
529162306a36Sopenharmony_ci
529262306a36Sopenharmony_ci	depth = path->p_tree_depth;
529362306a36Sopenharmony_ci	if (depth > 0) {
529462306a36Sopenharmony_ci		ret = ocfs2_read_extent_block(et->et_ci,
529562306a36Sopenharmony_ci					      ocfs2_et_get_last_eb_blk(et),
529662306a36Sopenharmony_ci					      &last_eb_bh);
529762306a36Sopenharmony_ci		if (ret < 0) {
529862306a36Sopenharmony_ci			mlog_errno(ret);
529962306a36Sopenharmony_ci			goto out;
530062306a36Sopenharmony_ci		}
530162306a36Sopenharmony_ci
530262306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
530362306a36Sopenharmony_ci		rightmost_el = &eb->h_list;
530462306a36Sopenharmony_ci	} else
530562306a36Sopenharmony_ci		rightmost_el = path_leaf_el(path);
530662306a36Sopenharmony_ci
530762306a36Sopenharmony_ci	credits = path->p_tree_depth +
530862306a36Sopenharmony_ci		  ocfs2_extend_meta_needed(et->et_root_el);
530962306a36Sopenharmony_ci	ret = ocfs2_extend_trans(handle, credits);
531062306a36Sopenharmony_ci	if (ret) {
531162306a36Sopenharmony_ci		mlog_errno(ret);
531262306a36Sopenharmony_ci		goto out;
531362306a36Sopenharmony_ci	}
531462306a36Sopenharmony_ci
531562306a36Sopenharmony_ci	if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
531662306a36Sopenharmony_ci	    le16_to_cpu(rightmost_el->l_count)) {
531762306a36Sopenharmony_ci		ret = ocfs2_grow_tree(handle, et, &depth, &last_eb_bh,
531862306a36Sopenharmony_ci				      meta_ac);
531962306a36Sopenharmony_ci		if (ret) {
532062306a36Sopenharmony_ci			mlog_errno(ret);
532162306a36Sopenharmony_ci			goto out;
532262306a36Sopenharmony_ci		}
532362306a36Sopenharmony_ci	}
532462306a36Sopenharmony_ci
532562306a36Sopenharmony_ci	memset(&insert, 0, sizeof(struct ocfs2_insert_type));
532662306a36Sopenharmony_ci	insert.ins_appending = APPEND_NONE;
532762306a36Sopenharmony_ci	insert.ins_contig = CONTIG_NONE;
532862306a36Sopenharmony_ci	insert.ins_split = SPLIT_RIGHT;
532962306a36Sopenharmony_ci	insert.ins_tree_depth = depth;
533062306a36Sopenharmony_ci
533162306a36Sopenharmony_ci	ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
533262306a36Sopenharmony_ci	if (ret)
533362306a36Sopenharmony_ci		mlog_errno(ret);
533462306a36Sopenharmony_ci
533562306a36Sopenharmony_ciout:
533662306a36Sopenharmony_ci	brelse(last_eb_bh);
533762306a36Sopenharmony_ci	return ret;
533862306a36Sopenharmony_ci}
533962306a36Sopenharmony_ci
534062306a36Sopenharmony_cistatic int ocfs2_truncate_rec(handle_t *handle,
534162306a36Sopenharmony_ci			      struct ocfs2_extent_tree *et,
534262306a36Sopenharmony_ci			      struct ocfs2_path *path, int index,
534362306a36Sopenharmony_ci			      struct ocfs2_cached_dealloc_ctxt *dealloc,
534462306a36Sopenharmony_ci			      u32 cpos, u32 len)
534562306a36Sopenharmony_ci{
534662306a36Sopenharmony_ci	int ret;
534762306a36Sopenharmony_ci	u32 left_cpos, rec_range, trunc_range;
534862306a36Sopenharmony_ci	int is_rightmost_tree_rec = 0;
534962306a36Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
535062306a36Sopenharmony_ci	struct ocfs2_path *left_path = NULL;
535162306a36Sopenharmony_ci	struct ocfs2_extent_list *el = path_leaf_el(path);
535262306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
535362306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
535462306a36Sopenharmony_ci
535562306a36Sopenharmony_ci	if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
535662306a36Sopenharmony_ci		/* extend credit for ocfs2_remove_rightmost_path */
535762306a36Sopenharmony_ci		ret = ocfs2_extend_rotate_transaction(handle, 0,
535862306a36Sopenharmony_ci				jbd2_handle_buffer_credits(handle),
535962306a36Sopenharmony_ci				path);
536062306a36Sopenharmony_ci		if (ret) {
536162306a36Sopenharmony_ci			mlog_errno(ret);
536262306a36Sopenharmony_ci			goto out;
536362306a36Sopenharmony_ci		}
536462306a36Sopenharmony_ci
536562306a36Sopenharmony_ci		ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
536662306a36Sopenharmony_ci		if (ret) {
536762306a36Sopenharmony_ci			mlog_errno(ret);
536862306a36Sopenharmony_ci			goto out;
536962306a36Sopenharmony_ci		}
537062306a36Sopenharmony_ci
537162306a36Sopenharmony_ci		index--;
537262306a36Sopenharmony_ci	}
537362306a36Sopenharmony_ci
537462306a36Sopenharmony_ci	if (index == (le16_to_cpu(el->l_next_free_rec) - 1) &&
537562306a36Sopenharmony_ci	    path->p_tree_depth) {
537662306a36Sopenharmony_ci		/*
537762306a36Sopenharmony_ci		 * Check whether this is the rightmost tree record. If
537862306a36Sopenharmony_ci		 * we remove all of this record or part of its right
537962306a36Sopenharmony_ci		 * edge then an update of the record lengths above it
538062306a36Sopenharmony_ci		 * will be required.
538162306a36Sopenharmony_ci		 */
538262306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
538362306a36Sopenharmony_ci		if (eb->h_next_leaf_blk == 0)
538462306a36Sopenharmony_ci			is_rightmost_tree_rec = 1;
538562306a36Sopenharmony_ci	}
538662306a36Sopenharmony_ci
538762306a36Sopenharmony_ci	rec = &el->l_recs[index];
538862306a36Sopenharmony_ci	if (index == 0 && path->p_tree_depth &&
538962306a36Sopenharmony_ci	    le32_to_cpu(rec->e_cpos) == cpos) {
539062306a36Sopenharmony_ci		/*
539162306a36Sopenharmony_ci		 * Changing the leftmost offset (via partial or whole
539262306a36Sopenharmony_ci		 * record truncate) of an interior (or rightmost) path
539362306a36Sopenharmony_ci		 * means we have to update the subtree that is formed
539462306a36Sopenharmony_ci		 * by this leaf and the one to it's left.
539562306a36Sopenharmony_ci		 *
539662306a36Sopenharmony_ci		 * There are two cases we can skip:
539762306a36Sopenharmony_ci		 *   1) Path is the leftmost one in our btree.
539862306a36Sopenharmony_ci		 *   2) The leaf is rightmost and will be empty after
539962306a36Sopenharmony_ci		 *      we remove the extent record - the rotate code
540062306a36Sopenharmony_ci		 *      knows how to update the newly formed edge.
540162306a36Sopenharmony_ci		 */
540262306a36Sopenharmony_ci
540362306a36Sopenharmony_ci		ret = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
540462306a36Sopenharmony_ci		if (ret) {
540562306a36Sopenharmony_ci			mlog_errno(ret);
540662306a36Sopenharmony_ci			goto out;
540762306a36Sopenharmony_ci		}
540862306a36Sopenharmony_ci
540962306a36Sopenharmony_ci		if (left_cpos && le16_to_cpu(el->l_next_free_rec) > 1) {
541062306a36Sopenharmony_ci			left_path = ocfs2_new_path_from_path(path);
541162306a36Sopenharmony_ci			if (!left_path) {
541262306a36Sopenharmony_ci				ret = -ENOMEM;
541362306a36Sopenharmony_ci				mlog_errno(ret);
541462306a36Sopenharmony_ci				goto out;
541562306a36Sopenharmony_ci			}
541662306a36Sopenharmony_ci
541762306a36Sopenharmony_ci			ret = ocfs2_find_path(et->et_ci, left_path,
541862306a36Sopenharmony_ci					      left_cpos);
541962306a36Sopenharmony_ci			if (ret) {
542062306a36Sopenharmony_ci				mlog_errno(ret);
542162306a36Sopenharmony_ci				goto out;
542262306a36Sopenharmony_ci			}
542362306a36Sopenharmony_ci		}
542462306a36Sopenharmony_ci	}
542562306a36Sopenharmony_ci
542662306a36Sopenharmony_ci	ret = ocfs2_extend_rotate_transaction(handle, 0,
542762306a36Sopenharmony_ci					jbd2_handle_buffer_credits(handle),
542862306a36Sopenharmony_ci					path);
542962306a36Sopenharmony_ci	if (ret) {
543062306a36Sopenharmony_ci		mlog_errno(ret);
543162306a36Sopenharmony_ci		goto out;
543262306a36Sopenharmony_ci	}
543362306a36Sopenharmony_ci
543462306a36Sopenharmony_ci	ret = ocfs2_journal_access_path(et->et_ci, handle, path);
543562306a36Sopenharmony_ci	if (ret) {
543662306a36Sopenharmony_ci		mlog_errno(ret);
543762306a36Sopenharmony_ci		goto out;
543862306a36Sopenharmony_ci	}
543962306a36Sopenharmony_ci
544062306a36Sopenharmony_ci	ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
544162306a36Sopenharmony_ci	if (ret) {
544262306a36Sopenharmony_ci		mlog_errno(ret);
544362306a36Sopenharmony_ci		goto out;
544462306a36Sopenharmony_ci	}
544562306a36Sopenharmony_ci
544662306a36Sopenharmony_ci	rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
544762306a36Sopenharmony_ci	trunc_range = cpos + len;
544862306a36Sopenharmony_ci
544962306a36Sopenharmony_ci	if (le32_to_cpu(rec->e_cpos) == cpos && rec_range == trunc_range) {
545062306a36Sopenharmony_ci		int next_free;
545162306a36Sopenharmony_ci
545262306a36Sopenharmony_ci		memset(rec, 0, sizeof(*rec));
545362306a36Sopenharmony_ci		ocfs2_cleanup_merge(el, index);
545462306a36Sopenharmony_ci
545562306a36Sopenharmony_ci		next_free = le16_to_cpu(el->l_next_free_rec);
545662306a36Sopenharmony_ci		if (is_rightmost_tree_rec && next_free > 1) {
545762306a36Sopenharmony_ci			/*
545862306a36Sopenharmony_ci			 * We skip the edge update if this path will
545962306a36Sopenharmony_ci			 * be deleted by the rotate code.
546062306a36Sopenharmony_ci			 */
546162306a36Sopenharmony_ci			rec = &el->l_recs[next_free - 1];
546262306a36Sopenharmony_ci			ocfs2_adjust_rightmost_records(handle, et, path,
546362306a36Sopenharmony_ci						       rec);
546462306a36Sopenharmony_ci		}
546562306a36Sopenharmony_ci	} else if (le32_to_cpu(rec->e_cpos) == cpos) {
546662306a36Sopenharmony_ci		/* Remove leftmost portion of the record. */
546762306a36Sopenharmony_ci		le32_add_cpu(&rec->e_cpos, len);
546862306a36Sopenharmony_ci		le64_add_cpu(&rec->e_blkno, ocfs2_clusters_to_blocks(sb, len));
546962306a36Sopenharmony_ci		le16_add_cpu(&rec->e_leaf_clusters, -len);
547062306a36Sopenharmony_ci	} else if (rec_range == trunc_range) {
547162306a36Sopenharmony_ci		/* Remove rightmost portion of the record */
547262306a36Sopenharmony_ci		le16_add_cpu(&rec->e_leaf_clusters, -len);
547362306a36Sopenharmony_ci		if (is_rightmost_tree_rec)
547462306a36Sopenharmony_ci			ocfs2_adjust_rightmost_records(handle, et, path, rec);
547562306a36Sopenharmony_ci	} else {
547662306a36Sopenharmony_ci		/* Caller should have trapped this. */
547762306a36Sopenharmony_ci		mlog(ML_ERROR, "Owner %llu: Invalid record truncate: (%u, %u) "
547862306a36Sopenharmony_ci		     "(%u, %u)\n",
547962306a36Sopenharmony_ci		     (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
548062306a36Sopenharmony_ci		     le32_to_cpu(rec->e_cpos),
548162306a36Sopenharmony_ci		     le16_to_cpu(rec->e_leaf_clusters), cpos, len);
548262306a36Sopenharmony_ci		BUG();
548362306a36Sopenharmony_ci	}
548462306a36Sopenharmony_ci
548562306a36Sopenharmony_ci	if (left_path) {
548662306a36Sopenharmony_ci		int subtree_index;
548762306a36Sopenharmony_ci
548862306a36Sopenharmony_ci		subtree_index = ocfs2_find_subtree_root(et, left_path, path);
548962306a36Sopenharmony_ci		ocfs2_complete_edge_insert(handle, left_path, path,
549062306a36Sopenharmony_ci					   subtree_index);
549162306a36Sopenharmony_ci	}
549262306a36Sopenharmony_ci
549362306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, path_leaf_bh(path));
549462306a36Sopenharmony_ci
549562306a36Sopenharmony_ci	ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
549662306a36Sopenharmony_ci	if (ret)
549762306a36Sopenharmony_ci		mlog_errno(ret);
549862306a36Sopenharmony_ci
549962306a36Sopenharmony_ciout:
550062306a36Sopenharmony_ci	ocfs2_free_path(left_path);
550162306a36Sopenharmony_ci	return ret;
550262306a36Sopenharmony_ci}
550362306a36Sopenharmony_ci
550462306a36Sopenharmony_ciint ocfs2_remove_extent(handle_t *handle,
550562306a36Sopenharmony_ci			struct ocfs2_extent_tree *et,
550662306a36Sopenharmony_ci			u32 cpos, u32 len,
550762306a36Sopenharmony_ci			struct ocfs2_alloc_context *meta_ac,
550862306a36Sopenharmony_ci			struct ocfs2_cached_dealloc_ctxt *dealloc)
550962306a36Sopenharmony_ci{
551062306a36Sopenharmony_ci	int ret, index;
551162306a36Sopenharmony_ci	u32 rec_range, trunc_range;
551262306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
551362306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
551462306a36Sopenharmony_ci	struct ocfs2_path *path = NULL;
551562306a36Sopenharmony_ci
551662306a36Sopenharmony_ci	/*
551762306a36Sopenharmony_ci	 * XXX: Why are we truncating to 0 instead of wherever this
551862306a36Sopenharmony_ci	 * affects us?
551962306a36Sopenharmony_ci	 */
552062306a36Sopenharmony_ci	ocfs2_et_extent_map_truncate(et, 0);
552162306a36Sopenharmony_ci
552262306a36Sopenharmony_ci	path = ocfs2_new_path_from_et(et);
552362306a36Sopenharmony_ci	if (!path) {
552462306a36Sopenharmony_ci		ret = -ENOMEM;
552562306a36Sopenharmony_ci		mlog_errno(ret);
552662306a36Sopenharmony_ci		goto out;
552762306a36Sopenharmony_ci	}
552862306a36Sopenharmony_ci
552962306a36Sopenharmony_ci	ret = ocfs2_find_path(et->et_ci, path, cpos);
553062306a36Sopenharmony_ci	if (ret) {
553162306a36Sopenharmony_ci		mlog_errno(ret);
553262306a36Sopenharmony_ci		goto out;
553362306a36Sopenharmony_ci	}
553462306a36Sopenharmony_ci
553562306a36Sopenharmony_ci	el = path_leaf_el(path);
553662306a36Sopenharmony_ci	index = ocfs2_search_extent_list(el, cpos);
553762306a36Sopenharmony_ci	if (index == -1) {
553862306a36Sopenharmony_ci		ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
553962306a36Sopenharmony_ci			    "Owner %llu has an extent at cpos %u which can no longer be found\n",
554062306a36Sopenharmony_ci			    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
554162306a36Sopenharmony_ci			    cpos);
554262306a36Sopenharmony_ci		ret = -EROFS;
554362306a36Sopenharmony_ci		goto out;
554462306a36Sopenharmony_ci	}
554562306a36Sopenharmony_ci
554662306a36Sopenharmony_ci	/*
554762306a36Sopenharmony_ci	 * We have 3 cases of extent removal:
554862306a36Sopenharmony_ci	 *   1) Range covers the entire extent rec
554962306a36Sopenharmony_ci	 *   2) Range begins or ends on one edge of the extent rec
555062306a36Sopenharmony_ci	 *   3) Range is in the middle of the extent rec (no shared edges)
555162306a36Sopenharmony_ci	 *
555262306a36Sopenharmony_ci	 * For case 1 we remove the extent rec and left rotate to
555362306a36Sopenharmony_ci	 * fill the hole.
555462306a36Sopenharmony_ci	 *
555562306a36Sopenharmony_ci	 * For case 2 we just shrink the existing extent rec, with a
555662306a36Sopenharmony_ci	 * tree update if the shrinking edge is also the edge of an
555762306a36Sopenharmony_ci	 * extent block.
555862306a36Sopenharmony_ci	 *
555962306a36Sopenharmony_ci	 * For case 3 we do a right split to turn the extent rec into
556062306a36Sopenharmony_ci	 * something case 2 can handle.
556162306a36Sopenharmony_ci	 */
556262306a36Sopenharmony_ci	rec = &el->l_recs[index];
556362306a36Sopenharmony_ci	rec_range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
556462306a36Sopenharmony_ci	trunc_range = cpos + len;
556562306a36Sopenharmony_ci
556662306a36Sopenharmony_ci	BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range);
556762306a36Sopenharmony_ci
556862306a36Sopenharmony_ci	trace_ocfs2_remove_extent(
556962306a36Sopenharmony_ci		(unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
557062306a36Sopenharmony_ci		cpos, len, index, le32_to_cpu(rec->e_cpos),
557162306a36Sopenharmony_ci		ocfs2_rec_clusters(el, rec));
557262306a36Sopenharmony_ci
557362306a36Sopenharmony_ci	if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
557462306a36Sopenharmony_ci		ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
557562306a36Sopenharmony_ci					 cpos, len);
557662306a36Sopenharmony_ci		if (ret) {
557762306a36Sopenharmony_ci			mlog_errno(ret);
557862306a36Sopenharmony_ci			goto out;
557962306a36Sopenharmony_ci		}
558062306a36Sopenharmony_ci	} else {
558162306a36Sopenharmony_ci		ret = ocfs2_split_tree(handle, et, path, index,
558262306a36Sopenharmony_ci				       trunc_range, meta_ac);
558362306a36Sopenharmony_ci		if (ret) {
558462306a36Sopenharmony_ci			mlog_errno(ret);
558562306a36Sopenharmony_ci			goto out;
558662306a36Sopenharmony_ci		}
558762306a36Sopenharmony_ci
558862306a36Sopenharmony_ci		/*
558962306a36Sopenharmony_ci		 * The split could have manipulated the tree enough to
559062306a36Sopenharmony_ci		 * move the record location, so we have to look for it again.
559162306a36Sopenharmony_ci		 */
559262306a36Sopenharmony_ci		ocfs2_reinit_path(path, 1);
559362306a36Sopenharmony_ci
559462306a36Sopenharmony_ci		ret = ocfs2_find_path(et->et_ci, path, cpos);
559562306a36Sopenharmony_ci		if (ret) {
559662306a36Sopenharmony_ci			mlog_errno(ret);
559762306a36Sopenharmony_ci			goto out;
559862306a36Sopenharmony_ci		}
559962306a36Sopenharmony_ci
560062306a36Sopenharmony_ci		el = path_leaf_el(path);
560162306a36Sopenharmony_ci		index = ocfs2_search_extent_list(el, cpos);
560262306a36Sopenharmony_ci		if (index == -1) {
560362306a36Sopenharmony_ci			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
560462306a36Sopenharmony_ci				    "Owner %llu: split at cpos %u lost record\n",
560562306a36Sopenharmony_ci				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
560662306a36Sopenharmony_ci				    cpos);
560762306a36Sopenharmony_ci			ret = -EROFS;
560862306a36Sopenharmony_ci			goto out;
560962306a36Sopenharmony_ci		}
561062306a36Sopenharmony_ci
561162306a36Sopenharmony_ci		/*
561262306a36Sopenharmony_ci		 * Double check our values here. If anything is fishy,
561362306a36Sopenharmony_ci		 * it's easier to catch it at the top level.
561462306a36Sopenharmony_ci		 */
561562306a36Sopenharmony_ci		rec = &el->l_recs[index];
561662306a36Sopenharmony_ci		rec_range = le32_to_cpu(rec->e_cpos) +
561762306a36Sopenharmony_ci			ocfs2_rec_clusters(el, rec);
561862306a36Sopenharmony_ci		if (rec_range != trunc_range) {
561962306a36Sopenharmony_ci			ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
562062306a36Sopenharmony_ci				    "Owner %llu: error after split at cpos %u trunc len %u, existing record is (%u,%u)\n",
562162306a36Sopenharmony_ci				    (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
562262306a36Sopenharmony_ci				    cpos, len, le32_to_cpu(rec->e_cpos),
562362306a36Sopenharmony_ci				    ocfs2_rec_clusters(el, rec));
562462306a36Sopenharmony_ci			ret = -EROFS;
562562306a36Sopenharmony_ci			goto out;
562662306a36Sopenharmony_ci		}
562762306a36Sopenharmony_ci
562862306a36Sopenharmony_ci		ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
562962306a36Sopenharmony_ci					 cpos, len);
563062306a36Sopenharmony_ci		if (ret)
563162306a36Sopenharmony_ci			mlog_errno(ret);
563262306a36Sopenharmony_ci	}
563362306a36Sopenharmony_ci
563462306a36Sopenharmony_ciout:
563562306a36Sopenharmony_ci	ocfs2_free_path(path);
563662306a36Sopenharmony_ci	return ret;
563762306a36Sopenharmony_ci}
563862306a36Sopenharmony_ci
563962306a36Sopenharmony_ci/*
564062306a36Sopenharmony_ci * ocfs2_reserve_blocks_for_rec_trunc() would look basically the
564162306a36Sopenharmony_ci * same as ocfs2_lock_alloctors(), except for it accepts a blocks
564262306a36Sopenharmony_ci * number to reserve some extra blocks, and it only handles meta
564362306a36Sopenharmony_ci * data allocations.
564462306a36Sopenharmony_ci *
564562306a36Sopenharmony_ci * Currently, only ocfs2_remove_btree_range() uses it for truncating
564662306a36Sopenharmony_ci * and punching holes.
564762306a36Sopenharmony_ci */
564862306a36Sopenharmony_cistatic int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode,
564962306a36Sopenharmony_ci					      struct ocfs2_extent_tree *et,
565062306a36Sopenharmony_ci					      u32 extents_to_split,
565162306a36Sopenharmony_ci					      struct ocfs2_alloc_context **ac,
565262306a36Sopenharmony_ci					      int extra_blocks)
565362306a36Sopenharmony_ci{
565462306a36Sopenharmony_ci	int ret = 0, num_free_extents;
565562306a36Sopenharmony_ci	unsigned int max_recs_needed = 2 * extents_to_split;
565662306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
565762306a36Sopenharmony_ci
565862306a36Sopenharmony_ci	*ac = NULL;
565962306a36Sopenharmony_ci
566062306a36Sopenharmony_ci	num_free_extents = ocfs2_num_free_extents(et);
566162306a36Sopenharmony_ci	if (num_free_extents < 0) {
566262306a36Sopenharmony_ci		ret = num_free_extents;
566362306a36Sopenharmony_ci		mlog_errno(ret);
566462306a36Sopenharmony_ci		goto out;
566562306a36Sopenharmony_ci	}
566662306a36Sopenharmony_ci
566762306a36Sopenharmony_ci	if (!num_free_extents ||
566862306a36Sopenharmony_ci	    (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
566962306a36Sopenharmony_ci		extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
567062306a36Sopenharmony_ci
567162306a36Sopenharmony_ci	if (extra_blocks) {
567262306a36Sopenharmony_ci		ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, ac);
567362306a36Sopenharmony_ci		if (ret < 0) {
567462306a36Sopenharmony_ci			if (ret != -ENOSPC)
567562306a36Sopenharmony_ci				mlog_errno(ret);
567662306a36Sopenharmony_ci		}
567762306a36Sopenharmony_ci	}
567862306a36Sopenharmony_ci
567962306a36Sopenharmony_ciout:
568062306a36Sopenharmony_ci	if (ret) {
568162306a36Sopenharmony_ci		if (*ac) {
568262306a36Sopenharmony_ci			ocfs2_free_alloc_context(*ac);
568362306a36Sopenharmony_ci			*ac = NULL;
568462306a36Sopenharmony_ci		}
568562306a36Sopenharmony_ci	}
568662306a36Sopenharmony_ci
568762306a36Sopenharmony_ci	return ret;
568862306a36Sopenharmony_ci}
568962306a36Sopenharmony_ci
569062306a36Sopenharmony_ciint ocfs2_remove_btree_range(struct inode *inode,
569162306a36Sopenharmony_ci			     struct ocfs2_extent_tree *et,
569262306a36Sopenharmony_ci			     u32 cpos, u32 phys_cpos, u32 len, int flags,
569362306a36Sopenharmony_ci			     struct ocfs2_cached_dealloc_ctxt *dealloc,
569462306a36Sopenharmony_ci			     u64 refcount_loc, bool refcount_tree_locked)
569562306a36Sopenharmony_ci{
569662306a36Sopenharmony_ci	int ret, credits = 0, extra_blocks = 0;
569762306a36Sopenharmony_ci	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
569862306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
569962306a36Sopenharmony_ci	struct inode *tl_inode = osb->osb_tl_inode;
570062306a36Sopenharmony_ci	handle_t *handle;
570162306a36Sopenharmony_ci	struct ocfs2_alloc_context *meta_ac = NULL;
570262306a36Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree = NULL;
570362306a36Sopenharmony_ci
570462306a36Sopenharmony_ci	if ((flags & OCFS2_EXT_REFCOUNTED) && len) {
570562306a36Sopenharmony_ci		BUG_ON(!ocfs2_is_refcount_inode(inode));
570662306a36Sopenharmony_ci
570762306a36Sopenharmony_ci		if (!refcount_tree_locked) {
570862306a36Sopenharmony_ci			ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
570962306a36Sopenharmony_ci						       &ref_tree, NULL);
571062306a36Sopenharmony_ci			if (ret) {
571162306a36Sopenharmony_ci				mlog_errno(ret);
571262306a36Sopenharmony_ci				goto bail;
571362306a36Sopenharmony_ci			}
571462306a36Sopenharmony_ci		}
571562306a36Sopenharmony_ci
571662306a36Sopenharmony_ci		ret = ocfs2_prepare_refcount_change_for_del(inode,
571762306a36Sopenharmony_ci							    refcount_loc,
571862306a36Sopenharmony_ci							    phys_blkno,
571962306a36Sopenharmony_ci							    len,
572062306a36Sopenharmony_ci							    &credits,
572162306a36Sopenharmony_ci							    &extra_blocks);
572262306a36Sopenharmony_ci		if (ret < 0) {
572362306a36Sopenharmony_ci			mlog_errno(ret);
572462306a36Sopenharmony_ci			goto bail;
572562306a36Sopenharmony_ci		}
572662306a36Sopenharmony_ci	}
572762306a36Sopenharmony_ci
572862306a36Sopenharmony_ci	ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac,
572962306a36Sopenharmony_ci						 extra_blocks);
573062306a36Sopenharmony_ci	if (ret) {
573162306a36Sopenharmony_ci		mlog_errno(ret);
573262306a36Sopenharmony_ci		goto bail;
573362306a36Sopenharmony_ci	}
573462306a36Sopenharmony_ci
573562306a36Sopenharmony_ci	inode_lock(tl_inode);
573662306a36Sopenharmony_ci
573762306a36Sopenharmony_ci	if (ocfs2_truncate_log_needs_flush(osb)) {
573862306a36Sopenharmony_ci		ret = __ocfs2_flush_truncate_log(osb);
573962306a36Sopenharmony_ci		if (ret < 0) {
574062306a36Sopenharmony_ci			mlog_errno(ret);
574162306a36Sopenharmony_ci			goto out;
574262306a36Sopenharmony_ci		}
574362306a36Sopenharmony_ci	}
574462306a36Sopenharmony_ci
574562306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb,
574662306a36Sopenharmony_ci			ocfs2_remove_extent_credits(osb->sb) + credits);
574762306a36Sopenharmony_ci	if (IS_ERR(handle)) {
574862306a36Sopenharmony_ci		ret = PTR_ERR(handle);
574962306a36Sopenharmony_ci		mlog_errno(ret);
575062306a36Sopenharmony_ci		goto out;
575162306a36Sopenharmony_ci	}
575262306a36Sopenharmony_ci
575362306a36Sopenharmony_ci	ret = ocfs2_et_root_journal_access(handle, et,
575462306a36Sopenharmony_ci					   OCFS2_JOURNAL_ACCESS_WRITE);
575562306a36Sopenharmony_ci	if (ret) {
575662306a36Sopenharmony_ci		mlog_errno(ret);
575762306a36Sopenharmony_ci		goto out_commit;
575862306a36Sopenharmony_ci	}
575962306a36Sopenharmony_ci
576062306a36Sopenharmony_ci	dquot_free_space_nodirty(inode,
576162306a36Sopenharmony_ci				  ocfs2_clusters_to_bytes(inode->i_sb, len));
576262306a36Sopenharmony_ci
576362306a36Sopenharmony_ci	ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
576462306a36Sopenharmony_ci	if (ret) {
576562306a36Sopenharmony_ci		mlog_errno(ret);
576662306a36Sopenharmony_ci		goto out_commit;
576762306a36Sopenharmony_ci	}
576862306a36Sopenharmony_ci
576962306a36Sopenharmony_ci	ocfs2_et_update_clusters(et, -len);
577062306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 1);
577162306a36Sopenharmony_ci
577262306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, et->et_root_bh);
577362306a36Sopenharmony_ci
577462306a36Sopenharmony_ci	if (phys_blkno) {
577562306a36Sopenharmony_ci		if (flags & OCFS2_EXT_REFCOUNTED)
577662306a36Sopenharmony_ci			ret = ocfs2_decrease_refcount(inode, handle,
577762306a36Sopenharmony_ci					ocfs2_blocks_to_clusters(osb->sb,
577862306a36Sopenharmony_ci								 phys_blkno),
577962306a36Sopenharmony_ci					len, meta_ac,
578062306a36Sopenharmony_ci					dealloc, 1);
578162306a36Sopenharmony_ci		else
578262306a36Sopenharmony_ci			ret = ocfs2_truncate_log_append(osb, handle,
578362306a36Sopenharmony_ci							phys_blkno, len);
578462306a36Sopenharmony_ci		if (ret)
578562306a36Sopenharmony_ci			mlog_errno(ret);
578662306a36Sopenharmony_ci
578762306a36Sopenharmony_ci	}
578862306a36Sopenharmony_ci
578962306a36Sopenharmony_ciout_commit:
579062306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
579162306a36Sopenharmony_ciout:
579262306a36Sopenharmony_ci	inode_unlock(tl_inode);
579362306a36Sopenharmony_cibail:
579462306a36Sopenharmony_ci	if (meta_ac)
579562306a36Sopenharmony_ci		ocfs2_free_alloc_context(meta_ac);
579662306a36Sopenharmony_ci
579762306a36Sopenharmony_ci	if (ref_tree)
579862306a36Sopenharmony_ci		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
579962306a36Sopenharmony_ci
580062306a36Sopenharmony_ci	return ret;
580162306a36Sopenharmony_ci}
580262306a36Sopenharmony_ci
580362306a36Sopenharmony_ciint ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
580462306a36Sopenharmony_ci{
580562306a36Sopenharmony_ci	struct buffer_head *tl_bh = osb->osb_tl_bh;
580662306a36Sopenharmony_ci	struct ocfs2_dinode *di;
580762306a36Sopenharmony_ci	struct ocfs2_truncate_log *tl;
580862306a36Sopenharmony_ci
580962306a36Sopenharmony_ci	di = (struct ocfs2_dinode *) tl_bh->b_data;
581062306a36Sopenharmony_ci	tl = &di->id2.i_dealloc;
581162306a36Sopenharmony_ci
581262306a36Sopenharmony_ci	mlog_bug_on_msg(le16_to_cpu(tl->tl_used) > le16_to_cpu(tl->tl_count),
581362306a36Sopenharmony_ci			"slot %d, invalid truncate log parameters: used = "
581462306a36Sopenharmony_ci			"%u, count = %u\n", osb->slot_num,
581562306a36Sopenharmony_ci			le16_to_cpu(tl->tl_used), le16_to_cpu(tl->tl_count));
581662306a36Sopenharmony_ci	return le16_to_cpu(tl->tl_used) == le16_to_cpu(tl->tl_count);
581762306a36Sopenharmony_ci}
581862306a36Sopenharmony_ci
581962306a36Sopenharmony_cistatic int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl,
582062306a36Sopenharmony_ci					   unsigned int new_start)
582162306a36Sopenharmony_ci{
582262306a36Sopenharmony_ci	unsigned int tail_index;
582362306a36Sopenharmony_ci	unsigned int current_tail;
582462306a36Sopenharmony_ci
582562306a36Sopenharmony_ci	/* No records, nothing to coalesce */
582662306a36Sopenharmony_ci	if (!le16_to_cpu(tl->tl_used))
582762306a36Sopenharmony_ci		return 0;
582862306a36Sopenharmony_ci
582962306a36Sopenharmony_ci	tail_index = le16_to_cpu(tl->tl_used) - 1;
583062306a36Sopenharmony_ci	current_tail = le32_to_cpu(tl->tl_recs[tail_index].t_start);
583162306a36Sopenharmony_ci	current_tail += le32_to_cpu(tl->tl_recs[tail_index].t_clusters);
583262306a36Sopenharmony_ci
583362306a36Sopenharmony_ci	return current_tail == new_start;
583462306a36Sopenharmony_ci}
583562306a36Sopenharmony_ci
583662306a36Sopenharmony_ciint ocfs2_truncate_log_append(struct ocfs2_super *osb,
583762306a36Sopenharmony_ci			      handle_t *handle,
583862306a36Sopenharmony_ci			      u64 start_blk,
583962306a36Sopenharmony_ci			      unsigned int num_clusters)
584062306a36Sopenharmony_ci{
584162306a36Sopenharmony_ci	int status, index;
584262306a36Sopenharmony_ci	unsigned int start_cluster, tl_count;
584362306a36Sopenharmony_ci	struct inode *tl_inode = osb->osb_tl_inode;
584462306a36Sopenharmony_ci	struct buffer_head *tl_bh = osb->osb_tl_bh;
584562306a36Sopenharmony_ci	struct ocfs2_dinode *di;
584662306a36Sopenharmony_ci	struct ocfs2_truncate_log *tl;
584762306a36Sopenharmony_ci
584862306a36Sopenharmony_ci	BUG_ON(inode_trylock(tl_inode));
584962306a36Sopenharmony_ci
585062306a36Sopenharmony_ci	start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
585162306a36Sopenharmony_ci
585262306a36Sopenharmony_ci	di = (struct ocfs2_dinode *) tl_bh->b_data;
585362306a36Sopenharmony_ci
585462306a36Sopenharmony_ci	/* tl_bh is loaded from ocfs2_truncate_log_init().  It's validated
585562306a36Sopenharmony_ci	 * by the underlying call to ocfs2_read_inode_block(), so any
585662306a36Sopenharmony_ci	 * corruption is a code bug */
585762306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_DINODE(di));
585862306a36Sopenharmony_ci
585962306a36Sopenharmony_ci	tl = &di->id2.i_dealloc;
586062306a36Sopenharmony_ci	tl_count = le16_to_cpu(tl->tl_count);
586162306a36Sopenharmony_ci	mlog_bug_on_msg(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
586262306a36Sopenharmony_ci			tl_count == 0,
586362306a36Sopenharmony_ci			"Truncate record count on #%llu invalid "
586462306a36Sopenharmony_ci			"wanted %u, actual %u\n",
586562306a36Sopenharmony_ci			(unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
586662306a36Sopenharmony_ci			ocfs2_truncate_recs_per_inode(osb->sb),
586762306a36Sopenharmony_ci			le16_to_cpu(tl->tl_count));
586862306a36Sopenharmony_ci
586962306a36Sopenharmony_ci	/* Caller should have known to flush before calling us. */
587062306a36Sopenharmony_ci	index = le16_to_cpu(tl->tl_used);
587162306a36Sopenharmony_ci	if (index >= tl_count) {
587262306a36Sopenharmony_ci		status = -ENOSPC;
587362306a36Sopenharmony_ci		mlog_errno(status);
587462306a36Sopenharmony_ci		goto bail;
587562306a36Sopenharmony_ci	}
587662306a36Sopenharmony_ci
587762306a36Sopenharmony_ci	status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
587862306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
587962306a36Sopenharmony_ci	if (status < 0) {
588062306a36Sopenharmony_ci		mlog_errno(status);
588162306a36Sopenharmony_ci		goto bail;
588262306a36Sopenharmony_ci	}
588362306a36Sopenharmony_ci
588462306a36Sopenharmony_ci	trace_ocfs2_truncate_log_append(
588562306a36Sopenharmony_ci		(unsigned long long)OCFS2_I(tl_inode)->ip_blkno, index,
588662306a36Sopenharmony_ci		start_cluster, num_clusters);
588762306a36Sopenharmony_ci	if (ocfs2_truncate_log_can_coalesce(tl, start_cluster)) {
588862306a36Sopenharmony_ci		/*
588962306a36Sopenharmony_ci		 * Move index back to the record we are coalescing with.
589062306a36Sopenharmony_ci		 * ocfs2_truncate_log_can_coalesce() guarantees nonzero
589162306a36Sopenharmony_ci		 */
589262306a36Sopenharmony_ci		index--;
589362306a36Sopenharmony_ci
589462306a36Sopenharmony_ci		num_clusters += le32_to_cpu(tl->tl_recs[index].t_clusters);
589562306a36Sopenharmony_ci		trace_ocfs2_truncate_log_append(
589662306a36Sopenharmony_ci			(unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
589762306a36Sopenharmony_ci			index, le32_to_cpu(tl->tl_recs[index].t_start),
589862306a36Sopenharmony_ci			num_clusters);
589962306a36Sopenharmony_ci	} else {
590062306a36Sopenharmony_ci		tl->tl_recs[index].t_start = cpu_to_le32(start_cluster);
590162306a36Sopenharmony_ci		tl->tl_used = cpu_to_le16(index + 1);
590262306a36Sopenharmony_ci	}
590362306a36Sopenharmony_ci	tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
590462306a36Sopenharmony_ci
590562306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, tl_bh);
590662306a36Sopenharmony_ci
590762306a36Sopenharmony_ci	osb->truncated_clusters += num_clusters;
590862306a36Sopenharmony_cibail:
590962306a36Sopenharmony_ci	return status;
591062306a36Sopenharmony_ci}
591162306a36Sopenharmony_ci
591262306a36Sopenharmony_cistatic int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
591362306a36Sopenharmony_ci					 struct inode *data_alloc_inode,
591462306a36Sopenharmony_ci					 struct buffer_head *data_alloc_bh)
591562306a36Sopenharmony_ci{
591662306a36Sopenharmony_ci	int status = 0;
591762306a36Sopenharmony_ci	int i;
591862306a36Sopenharmony_ci	unsigned int num_clusters;
591962306a36Sopenharmony_ci	u64 start_blk;
592062306a36Sopenharmony_ci	struct ocfs2_truncate_rec rec;
592162306a36Sopenharmony_ci	struct ocfs2_dinode *di;
592262306a36Sopenharmony_ci	struct ocfs2_truncate_log *tl;
592362306a36Sopenharmony_ci	struct inode *tl_inode = osb->osb_tl_inode;
592462306a36Sopenharmony_ci	struct buffer_head *tl_bh = osb->osb_tl_bh;
592562306a36Sopenharmony_ci	handle_t *handle;
592662306a36Sopenharmony_ci
592762306a36Sopenharmony_ci	di = (struct ocfs2_dinode *) tl_bh->b_data;
592862306a36Sopenharmony_ci	tl = &di->id2.i_dealloc;
592962306a36Sopenharmony_ci	i = le16_to_cpu(tl->tl_used) - 1;
593062306a36Sopenharmony_ci	while (i >= 0) {
593162306a36Sopenharmony_ci		handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
593262306a36Sopenharmony_ci		if (IS_ERR(handle)) {
593362306a36Sopenharmony_ci			status = PTR_ERR(handle);
593462306a36Sopenharmony_ci			mlog_errno(status);
593562306a36Sopenharmony_ci			goto bail;
593662306a36Sopenharmony_ci		}
593762306a36Sopenharmony_ci
593862306a36Sopenharmony_ci		/* Caller has given us at least enough credits to
593962306a36Sopenharmony_ci		 * update the truncate log dinode */
594062306a36Sopenharmony_ci		status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
594162306a36Sopenharmony_ci						 OCFS2_JOURNAL_ACCESS_WRITE);
594262306a36Sopenharmony_ci		if (status < 0) {
594362306a36Sopenharmony_ci			ocfs2_commit_trans(osb, handle);
594462306a36Sopenharmony_ci			mlog_errno(status);
594562306a36Sopenharmony_ci			goto bail;
594662306a36Sopenharmony_ci		}
594762306a36Sopenharmony_ci
594862306a36Sopenharmony_ci		tl->tl_used = cpu_to_le16(i);
594962306a36Sopenharmony_ci
595062306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, tl_bh);
595162306a36Sopenharmony_ci
595262306a36Sopenharmony_ci		rec = tl->tl_recs[i];
595362306a36Sopenharmony_ci		start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
595462306a36Sopenharmony_ci						    le32_to_cpu(rec.t_start));
595562306a36Sopenharmony_ci		num_clusters = le32_to_cpu(rec.t_clusters);
595662306a36Sopenharmony_ci
595762306a36Sopenharmony_ci		/* if start_blk is not set, we ignore the record as
595862306a36Sopenharmony_ci		 * invalid. */
595962306a36Sopenharmony_ci		if (start_blk) {
596062306a36Sopenharmony_ci			trace_ocfs2_replay_truncate_records(
596162306a36Sopenharmony_ci				(unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
596262306a36Sopenharmony_ci				i, le32_to_cpu(rec.t_start), num_clusters);
596362306a36Sopenharmony_ci
596462306a36Sopenharmony_ci			status = ocfs2_free_clusters(handle, data_alloc_inode,
596562306a36Sopenharmony_ci						     data_alloc_bh, start_blk,
596662306a36Sopenharmony_ci						     num_clusters);
596762306a36Sopenharmony_ci			if (status < 0) {
596862306a36Sopenharmony_ci				ocfs2_commit_trans(osb, handle);
596962306a36Sopenharmony_ci				mlog_errno(status);
597062306a36Sopenharmony_ci				goto bail;
597162306a36Sopenharmony_ci			}
597262306a36Sopenharmony_ci		}
597362306a36Sopenharmony_ci
597462306a36Sopenharmony_ci		ocfs2_commit_trans(osb, handle);
597562306a36Sopenharmony_ci		i--;
597662306a36Sopenharmony_ci	}
597762306a36Sopenharmony_ci
597862306a36Sopenharmony_ci	osb->truncated_clusters = 0;
597962306a36Sopenharmony_ci
598062306a36Sopenharmony_cibail:
598162306a36Sopenharmony_ci	return status;
598262306a36Sopenharmony_ci}
598362306a36Sopenharmony_ci
598462306a36Sopenharmony_ci/* Expects you to already be holding tl_inode->i_rwsem */
598562306a36Sopenharmony_ciint __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
598662306a36Sopenharmony_ci{
598762306a36Sopenharmony_ci	int status;
598862306a36Sopenharmony_ci	unsigned int num_to_flush;
598962306a36Sopenharmony_ci	struct inode *tl_inode = osb->osb_tl_inode;
599062306a36Sopenharmony_ci	struct inode *data_alloc_inode = NULL;
599162306a36Sopenharmony_ci	struct buffer_head *tl_bh = osb->osb_tl_bh;
599262306a36Sopenharmony_ci	struct buffer_head *data_alloc_bh = NULL;
599362306a36Sopenharmony_ci	struct ocfs2_dinode *di;
599462306a36Sopenharmony_ci	struct ocfs2_truncate_log *tl;
599562306a36Sopenharmony_ci	struct ocfs2_journal *journal = osb->journal;
599662306a36Sopenharmony_ci
599762306a36Sopenharmony_ci	BUG_ON(inode_trylock(tl_inode));
599862306a36Sopenharmony_ci
599962306a36Sopenharmony_ci	di = (struct ocfs2_dinode *) tl_bh->b_data;
600062306a36Sopenharmony_ci
600162306a36Sopenharmony_ci	/* tl_bh is loaded from ocfs2_truncate_log_init().  It's validated
600262306a36Sopenharmony_ci	 * by the underlying call to ocfs2_read_inode_block(), so any
600362306a36Sopenharmony_ci	 * corruption is a code bug */
600462306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_DINODE(di));
600562306a36Sopenharmony_ci
600662306a36Sopenharmony_ci	tl = &di->id2.i_dealloc;
600762306a36Sopenharmony_ci	num_to_flush = le16_to_cpu(tl->tl_used);
600862306a36Sopenharmony_ci	trace_ocfs2_flush_truncate_log(
600962306a36Sopenharmony_ci		(unsigned long long)OCFS2_I(tl_inode)->ip_blkno,
601062306a36Sopenharmony_ci		num_to_flush);
601162306a36Sopenharmony_ci	if (!num_to_flush) {
601262306a36Sopenharmony_ci		status = 0;
601362306a36Sopenharmony_ci		goto out;
601462306a36Sopenharmony_ci	}
601562306a36Sopenharmony_ci
601662306a36Sopenharmony_ci	/* Appending truncate log(TA) and flushing truncate log(TF) are
601762306a36Sopenharmony_ci	 * two separated transactions. They can be both committed but not
601862306a36Sopenharmony_ci	 * checkpointed. If crash occurs then, both two transaction will be
601962306a36Sopenharmony_ci	 * replayed with several already released to global bitmap clusters.
602062306a36Sopenharmony_ci	 * Then truncate log will be replayed resulting in cluster double free.
602162306a36Sopenharmony_ci	 */
602262306a36Sopenharmony_ci	jbd2_journal_lock_updates(journal->j_journal);
602362306a36Sopenharmony_ci	status = jbd2_journal_flush(journal->j_journal, 0);
602462306a36Sopenharmony_ci	jbd2_journal_unlock_updates(journal->j_journal);
602562306a36Sopenharmony_ci	if (status < 0) {
602662306a36Sopenharmony_ci		mlog_errno(status);
602762306a36Sopenharmony_ci		goto out;
602862306a36Sopenharmony_ci	}
602962306a36Sopenharmony_ci
603062306a36Sopenharmony_ci	data_alloc_inode = ocfs2_get_system_file_inode(osb,
603162306a36Sopenharmony_ci						       GLOBAL_BITMAP_SYSTEM_INODE,
603262306a36Sopenharmony_ci						       OCFS2_INVALID_SLOT);
603362306a36Sopenharmony_ci	if (!data_alloc_inode) {
603462306a36Sopenharmony_ci		status = -EINVAL;
603562306a36Sopenharmony_ci		mlog(ML_ERROR, "Could not get bitmap inode!\n");
603662306a36Sopenharmony_ci		goto out;
603762306a36Sopenharmony_ci	}
603862306a36Sopenharmony_ci
603962306a36Sopenharmony_ci	inode_lock(data_alloc_inode);
604062306a36Sopenharmony_ci
604162306a36Sopenharmony_ci	status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1);
604262306a36Sopenharmony_ci	if (status < 0) {
604362306a36Sopenharmony_ci		mlog_errno(status);
604462306a36Sopenharmony_ci		goto out_mutex;
604562306a36Sopenharmony_ci	}
604662306a36Sopenharmony_ci
604762306a36Sopenharmony_ci	status = ocfs2_replay_truncate_records(osb, data_alloc_inode,
604862306a36Sopenharmony_ci					       data_alloc_bh);
604962306a36Sopenharmony_ci	if (status < 0)
605062306a36Sopenharmony_ci		mlog_errno(status);
605162306a36Sopenharmony_ci
605262306a36Sopenharmony_ci	brelse(data_alloc_bh);
605362306a36Sopenharmony_ci	ocfs2_inode_unlock(data_alloc_inode, 1);
605462306a36Sopenharmony_ci
605562306a36Sopenharmony_ciout_mutex:
605662306a36Sopenharmony_ci	inode_unlock(data_alloc_inode);
605762306a36Sopenharmony_ci	iput(data_alloc_inode);
605862306a36Sopenharmony_ci
605962306a36Sopenharmony_ciout:
606062306a36Sopenharmony_ci	return status;
606162306a36Sopenharmony_ci}
606262306a36Sopenharmony_ci
606362306a36Sopenharmony_ciint ocfs2_flush_truncate_log(struct ocfs2_super *osb)
606462306a36Sopenharmony_ci{
606562306a36Sopenharmony_ci	int status;
606662306a36Sopenharmony_ci	struct inode *tl_inode = osb->osb_tl_inode;
606762306a36Sopenharmony_ci
606862306a36Sopenharmony_ci	inode_lock(tl_inode);
606962306a36Sopenharmony_ci	status = __ocfs2_flush_truncate_log(osb);
607062306a36Sopenharmony_ci	inode_unlock(tl_inode);
607162306a36Sopenharmony_ci
607262306a36Sopenharmony_ci	return status;
607362306a36Sopenharmony_ci}
607462306a36Sopenharmony_ci
607562306a36Sopenharmony_cistatic void ocfs2_truncate_log_worker(struct work_struct *work)
607662306a36Sopenharmony_ci{
607762306a36Sopenharmony_ci	int status;
607862306a36Sopenharmony_ci	struct ocfs2_super *osb =
607962306a36Sopenharmony_ci		container_of(work, struct ocfs2_super,
608062306a36Sopenharmony_ci			     osb_truncate_log_wq.work);
608162306a36Sopenharmony_ci
608262306a36Sopenharmony_ci	status = ocfs2_flush_truncate_log(osb);
608362306a36Sopenharmony_ci	if (status < 0)
608462306a36Sopenharmony_ci		mlog_errno(status);
608562306a36Sopenharmony_ci	else
608662306a36Sopenharmony_ci		ocfs2_init_steal_slots(osb);
608762306a36Sopenharmony_ci}
608862306a36Sopenharmony_ci
608962306a36Sopenharmony_ci#define OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL (2 * HZ)
609062306a36Sopenharmony_civoid ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
609162306a36Sopenharmony_ci				       int cancel)
609262306a36Sopenharmony_ci{
609362306a36Sopenharmony_ci	if (osb->osb_tl_inode &&
609462306a36Sopenharmony_ci			atomic_read(&osb->osb_tl_disable) == 0) {
609562306a36Sopenharmony_ci		/* We want to push off log flushes while truncates are
609662306a36Sopenharmony_ci		 * still running. */
609762306a36Sopenharmony_ci		if (cancel)
609862306a36Sopenharmony_ci			cancel_delayed_work(&osb->osb_truncate_log_wq);
609962306a36Sopenharmony_ci
610062306a36Sopenharmony_ci		queue_delayed_work(osb->ocfs2_wq, &osb->osb_truncate_log_wq,
610162306a36Sopenharmony_ci				   OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
610262306a36Sopenharmony_ci	}
610362306a36Sopenharmony_ci}
610462306a36Sopenharmony_ci
610562306a36Sopenharmony_ci/*
610662306a36Sopenharmony_ci * Try to flush truncate logs if we can free enough clusters from it.
610762306a36Sopenharmony_ci * As for return value, "< 0" means error, "0" no space and "1" means
610862306a36Sopenharmony_ci * we have freed enough spaces and let the caller try to allocate again.
610962306a36Sopenharmony_ci */
611062306a36Sopenharmony_ciint ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
611162306a36Sopenharmony_ci					unsigned int needed)
611262306a36Sopenharmony_ci{
611362306a36Sopenharmony_ci	tid_t target;
611462306a36Sopenharmony_ci	int ret = 0;
611562306a36Sopenharmony_ci	unsigned int truncated_clusters;
611662306a36Sopenharmony_ci
611762306a36Sopenharmony_ci	inode_lock(osb->osb_tl_inode);
611862306a36Sopenharmony_ci	truncated_clusters = osb->truncated_clusters;
611962306a36Sopenharmony_ci	inode_unlock(osb->osb_tl_inode);
612062306a36Sopenharmony_ci
612162306a36Sopenharmony_ci	/*
612262306a36Sopenharmony_ci	 * Check whether we can succeed in allocating if we free
612362306a36Sopenharmony_ci	 * the truncate log.
612462306a36Sopenharmony_ci	 */
612562306a36Sopenharmony_ci	if (truncated_clusters < needed)
612662306a36Sopenharmony_ci		goto out;
612762306a36Sopenharmony_ci
612862306a36Sopenharmony_ci	ret = ocfs2_flush_truncate_log(osb);
612962306a36Sopenharmony_ci	if (ret) {
613062306a36Sopenharmony_ci		mlog_errno(ret);
613162306a36Sopenharmony_ci		goto out;
613262306a36Sopenharmony_ci	}
613362306a36Sopenharmony_ci
613462306a36Sopenharmony_ci	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
613562306a36Sopenharmony_ci		jbd2_log_wait_commit(osb->journal->j_journal, target);
613662306a36Sopenharmony_ci		ret = 1;
613762306a36Sopenharmony_ci	}
613862306a36Sopenharmony_ciout:
613962306a36Sopenharmony_ci	return ret;
614062306a36Sopenharmony_ci}
614162306a36Sopenharmony_ci
614262306a36Sopenharmony_cistatic int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
614362306a36Sopenharmony_ci				       int slot_num,
614462306a36Sopenharmony_ci				       struct inode **tl_inode,
614562306a36Sopenharmony_ci				       struct buffer_head **tl_bh)
614662306a36Sopenharmony_ci{
614762306a36Sopenharmony_ci	int status;
614862306a36Sopenharmony_ci	struct inode *inode = NULL;
614962306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
615062306a36Sopenharmony_ci
615162306a36Sopenharmony_ci	inode = ocfs2_get_system_file_inode(osb,
615262306a36Sopenharmony_ci					   TRUNCATE_LOG_SYSTEM_INODE,
615362306a36Sopenharmony_ci					   slot_num);
615462306a36Sopenharmony_ci	if (!inode) {
615562306a36Sopenharmony_ci		status = -EINVAL;
615662306a36Sopenharmony_ci		mlog(ML_ERROR, "Could not get load truncate log inode!\n");
615762306a36Sopenharmony_ci		goto bail;
615862306a36Sopenharmony_ci	}
615962306a36Sopenharmony_ci
616062306a36Sopenharmony_ci	status = ocfs2_read_inode_block(inode, &bh);
616162306a36Sopenharmony_ci	if (status < 0) {
616262306a36Sopenharmony_ci		iput(inode);
616362306a36Sopenharmony_ci		mlog_errno(status);
616462306a36Sopenharmony_ci		goto bail;
616562306a36Sopenharmony_ci	}
616662306a36Sopenharmony_ci
616762306a36Sopenharmony_ci	*tl_inode = inode;
616862306a36Sopenharmony_ci	*tl_bh    = bh;
616962306a36Sopenharmony_cibail:
617062306a36Sopenharmony_ci	return status;
617162306a36Sopenharmony_ci}
617262306a36Sopenharmony_ci
617362306a36Sopenharmony_ci/* called during the 1st stage of node recovery. we stamp a clean
617462306a36Sopenharmony_ci * truncate log and pass back a copy for processing later. if the
617562306a36Sopenharmony_ci * truncate log does not require processing, a *tl_copy is set to
617662306a36Sopenharmony_ci * NULL. */
617762306a36Sopenharmony_ciint ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
617862306a36Sopenharmony_ci				      int slot_num,
617962306a36Sopenharmony_ci				      struct ocfs2_dinode **tl_copy)
618062306a36Sopenharmony_ci{
618162306a36Sopenharmony_ci	int status;
618262306a36Sopenharmony_ci	struct inode *tl_inode = NULL;
618362306a36Sopenharmony_ci	struct buffer_head *tl_bh = NULL;
618462306a36Sopenharmony_ci	struct ocfs2_dinode *di;
618562306a36Sopenharmony_ci	struct ocfs2_truncate_log *tl;
618662306a36Sopenharmony_ci
618762306a36Sopenharmony_ci	*tl_copy = NULL;
618862306a36Sopenharmony_ci
618962306a36Sopenharmony_ci	trace_ocfs2_begin_truncate_log_recovery(slot_num);
619062306a36Sopenharmony_ci
619162306a36Sopenharmony_ci	status = ocfs2_get_truncate_log_info(osb, slot_num, &tl_inode, &tl_bh);
619262306a36Sopenharmony_ci	if (status < 0) {
619362306a36Sopenharmony_ci		mlog_errno(status);
619462306a36Sopenharmony_ci		goto bail;
619562306a36Sopenharmony_ci	}
619662306a36Sopenharmony_ci
619762306a36Sopenharmony_ci	di = (struct ocfs2_dinode *) tl_bh->b_data;
619862306a36Sopenharmony_ci
619962306a36Sopenharmony_ci	/* tl_bh is loaded from ocfs2_get_truncate_log_info().  It's
620062306a36Sopenharmony_ci	 * validated by the underlying call to ocfs2_read_inode_block(),
620162306a36Sopenharmony_ci	 * so any corruption is a code bug */
620262306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_DINODE(di));
620362306a36Sopenharmony_ci
620462306a36Sopenharmony_ci	tl = &di->id2.i_dealloc;
620562306a36Sopenharmony_ci	if (le16_to_cpu(tl->tl_used)) {
620662306a36Sopenharmony_ci		trace_ocfs2_truncate_log_recovery_num(le16_to_cpu(tl->tl_used));
620762306a36Sopenharmony_ci
620862306a36Sopenharmony_ci		/*
620962306a36Sopenharmony_ci		 * Assuming the write-out below goes well, this copy will be
621062306a36Sopenharmony_ci		 * passed back to recovery for processing.
621162306a36Sopenharmony_ci		 */
621262306a36Sopenharmony_ci		*tl_copy = kmemdup(tl_bh->b_data, tl_bh->b_size, GFP_KERNEL);
621362306a36Sopenharmony_ci		if (!(*tl_copy)) {
621462306a36Sopenharmony_ci			status = -ENOMEM;
621562306a36Sopenharmony_ci			mlog_errno(status);
621662306a36Sopenharmony_ci			goto bail;
621762306a36Sopenharmony_ci		}
621862306a36Sopenharmony_ci
621962306a36Sopenharmony_ci		/* All we need to do to clear the truncate log is set
622062306a36Sopenharmony_ci		 * tl_used. */
622162306a36Sopenharmony_ci		tl->tl_used = 0;
622262306a36Sopenharmony_ci
622362306a36Sopenharmony_ci		ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check);
622462306a36Sopenharmony_ci		status = ocfs2_write_block(osb, tl_bh, INODE_CACHE(tl_inode));
622562306a36Sopenharmony_ci		if (status < 0) {
622662306a36Sopenharmony_ci			mlog_errno(status);
622762306a36Sopenharmony_ci			goto bail;
622862306a36Sopenharmony_ci		}
622962306a36Sopenharmony_ci	}
623062306a36Sopenharmony_ci
623162306a36Sopenharmony_cibail:
623262306a36Sopenharmony_ci	iput(tl_inode);
623362306a36Sopenharmony_ci	brelse(tl_bh);
623462306a36Sopenharmony_ci
623562306a36Sopenharmony_ci	if (status < 0) {
623662306a36Sopenharmony_ci		kfree(*tl_copy);
623762306a36Sopenharmony_ci		*tl_copy = NULL;
623862306a36Sopenharmony_ci		mlog_errno(status);
623962306a36Sopenharmony_ci	}
624062306a36Sopenharmony_ci
624162306a36Sopenharmony_ci	return status;
624262306a36Sopenharmony_ci}
624362306a36Sopenharmony_ci
624462306a36Sopenharmony_ciint ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
624562306a36Sopenharmony_ci					 struct ocfs2_dinode *tl_copy)
624662306a36Sopenharmony_ci{
624762306a36Sopenharmony_ci	int status = 0;
624862306a36Sopenharmony_ci	int i;
624962306a36Sopenharmony_ci	unsigned int clusters, num_recs, start_cluster;
625062306a36Sopenharmony_ci	u64 start_blk;
625162306a36Sopenharmony_ci	handle_t *handle;
625262306a36Sopenharmony_ci	struct inode *tl_inode = osb->osb_tl_inode;
625362306a36Sopenharmony_ci	struct ocfs2_truncate_log *tl;
625462306a36Sopenharmony_ci
625562306a36Sopenharmony_ci	if (OCFS2_I(tl_inode)->ip_blkno == le64_to_cpu(tl_copy->i_blkno)) {
625662306a36Sopenharmony_ci		mlog(ML_ERROR, "Asked to recover my own truncate log!\n");
625762306a36Sopenharmony_ci		return -EINVAL;
625862306a36Sopenharmony_ci	}
625962306a36Sopenharmony_ci
626062306a36Sopenharmony_ci	tl = &tl_copy->id2.i_dealloc;
626162306a36Sopenharmony_ci	num_recs = le16_to_cpu(tl->tl_used);
626262306a36Sopenharmony_ci	trace_ocfs2_complete_truncate_log_recovery(
626362306a36Sopenharmony_ci		(unsigned long long)le64_to_cpu(tl_copy->i_blkno),
626462306a36Sopenharmony_ci		num_recs);
626562306a36Sopenharmony_ci
626662306a36Sopenharmony_ci	inode_lock(tl_inode);
626762306a36Sopenharmony_ci	for(i = 0; i < num_recs; i++) {
626862306a36Sopenharmony_ci		if (ocfs2_truncate_log_needs_flush(osb)) {
626962306a36Sopenharmony_ci			status = __ocfs2_flush_truncate_log(osb);
627062306a36Sopenharmony_ci			if (status < 0) {
627162306a36Sopenharmony_ci				mlog_errno(status);
627262306a36Sopenharmony_ci				goto bail_up;
627362306a36Sopenharmony_ci			}
627462306a36Sopenharmony_ci		}
627562306a36Sopenharmony_ci
627662306a36Sopenharmony_ci		handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
627762306a36Sopenharmony_ci		if (IS_ERR(handle)) {
627862306a36Sopenharmony_ci			status = PTR_ERR(handle);
627962306a36Sopenharmony_ci			mlog_errno(status);
628062306a36Sopenharmony_ci			goto bail_up;
628162306a36Sopenharmony_ci		}
628262306a36Sopenharmony_ci
628362306a36Sopenharmony_ci		clusters = le32_to_cpu(tl->tl_recs[i].t_clusters);
628462306a36Sopenharmony_ci		start_cluster = le32_to_cpu(tl->tl_recs[i].t_start);
628562306a36Sopenharmony_ci		start_blk = ocfs2_clusters_to_blocks(osb->sb, start_cluster);
628662306a36Sopenharmony_ci
628762306a36Sopenharmony_ci		status = ocfs2_truncate_log_append(osb, handle,
628862306a36Sopenharmony_ci						   start_blk, clusters);
628962306a36Sopenharmony_ci		ocfs2_commit_trans(osb, handle);
629062306a36Sopenharmony_ci		if (status < 0) {
629162306a36Sopenharmony_ci			mlog_errno(status);
629262306a36Sopenharmony_ci			goto bail_up;
629362306a36Sopenharmony_ci		}
629462306a36Sopenharmony_ci	}
629562306a36Sopenharmony_ci
629662306a36Sopenharmony_cibail_up:
629762306a36Sopenharmony_ci	inode_unlock(tl_inode);
629862306a36Sopenharmony_ci
629962306a36Sopenharmony_ci	return status;
630062306a36Sopenharmony_ci}
630162306a36Sopenharmony_ci
630262306a36Sopenharmony_civoid ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
630362306a36Sopenharmony_ci{
630462306a36Sopenharmony_ci	int status;
630562306a36Sopenharmony_ci	struct inode *tl_inode = osb->osb_tl_inode;
630662306a36Sopenharmony_ci
630762306a36Sopenharmony_ci	atomic_set(&osb->osb_tl_disable, 1);
630862306a36Sopenharmony_ci
630962306a36Sopenharmony_ci	if (tl_inode) {
631062306a36Sopenharmony_ci		cancel_delayed_work(&osb->osb_truncate_log_wq);
631162306a36Sopenharmony_ci		flush_workqueue(osb->ocfs2_wq);
631262306a36Sopenharmony_ci
631362306a36Sopenharmony_ci		status = ocfs2_flush_truncate_log(osb);
631462306a36Sopenharmony_ci		if (status < 0)
631562306a36Sopenharmony_ci			mlog_errno(status);
631662306a36Sopenharmony_ci
631762306a36Sopenharmony_ci		brelse(osb->osb_tl_bh);
631862306a36Sopenharmony_ci		iput(osb->osb_tl_inode);
631962306a36Sopenharmony_ci	}
632062306a36Sopenharmony_ci}
632162306a36Sopenharmony_ci
632262306a36Sopenharmony_ciint ocfs2_truncate_log_init(struct ocfs2_super *osb)
632362306a36Sopenharmony_ci{
632462306a36Sopenharmony_ci	int status;
632562306a36Sopenharmony_ci	struct inode *tl_inode = NULL;
632662306a36Sopenharmony_ci	struct buffer_head *tl_bh = NULL;
632762306a36Sopenharmony_ci
632862306a36Sopenharmony_ci	status = ocfs2_get_truncate_log_info(osb,
632962306a36Sopenharmony_ci					     osb->slot_num,
633062306a36Sopenharmony_ci					     &tl_inode,
633162306a36Sopenharmony_ci					     &tl_bh);
633262306a36Sopenharmony_ci	if (status < 0)
633362306a36Sopenharmony_ci		mlog_errno(status);
633462306a36Sopenharmony_ci
633562306a36Sopenharmony_ci	/* ocfs2_truncate_log_shutdown keys on the existence of
633662306a36Sopenharmony_ci	 * osb->osb_tl_inode so we don't set any of the osb variables
633762306a36Sopenharmony_ci	 * until we're sure all is well. */
633862306a36Sopenharmony_ci	INIT_DELAYED_WORK(&osb->osb_truncate_log_wq,
633962306a36Sopenharmony_ci			  ocfs2_truncate_log_worker);
634062306a36Sopenharmony_ci	atomic_set(&osb->osb_tl_disable, 0);
634162306a36Sopenharmony_ci	osb->osb_tl_bh    = tl_bh;
634262306a36Sopenharmony_ci	osb->osb_tl_inode = tl_inode;
634362306a36Sopenharmony_ci
634462306a36Sopenharmony_ci	return status;
634562306a36Sopenharmony_ci}
634662306a36Sopenharmony_ci
634762306a36Sopenharmony_ci/*
634862306a36Sopenharmony_ci * Delayed de-allocation of suballocator blocks.
634962306a36Sopenharmony_ci *
635062306a36Sopenharmony_ci * Some sets of block de-allocations might involve multiple suballocator inodes.
635162306a36Sopenharmony_ci *
635262306a36Sopenharmony_ci * The locking for this can get extremely complicated, especially when
635362306a36Sopenharmony_ci * the suballocator inodes to delete from aren't known until deep
635462306a36Sopenharmony_ci * within an unrelated codepath.
635562306a36Sopenharmony_ci *
635662306a36Sopenharmony_ci * ocfs2_extent_block structures are a good example of this - an inode
635762306a36Sopenharmony_ci * btree could have been grown by any number of nodes each allocating
635862306a36Sopenharmony_ci * out of their own suballoc inode.
635962306a36Sopenharmony_ci *
636062306a36Sopenharmony_ci * These structures allow the delay of block de-allocation until a
636162306a36Sopenharmony_ci * later time, when locking of multiple cluster inodes won't cause
636262306a36Sopenharmony_ci * deadlock.
636362306a36Sopenharmony_ci */
636462306a36Sopenharmony_ci
636562306a36Sopenharmony_ci/*
636662306a36Sopenharmony_ci * Describe a single bit freed from a suballocator.  For the block
636762306a36Sopenharmony_ci * suballocators, it represents one block.  For the global cluster
636862306a36Sopenharmony_ci * allocator, it represents some clusters and free_bit indicates
636962306a36Sopenharmony_ci * clusters number.
637062306a36Sopenharmony_ci */
637162306a36Sopenharmony_cistruct ocfs2_cached_block_free {
637262306a36Sopenharmony_ci	struct ocfs2_cached_block_free		*free_next;
637362306a36Sopenharmony_ci	u64					free_bg;
637462306a36Sopenharmony_ci	u64					free_blk;
637562306a36Sopenharmony_ci	unsigned int				free_bit;
637662306a36Sopenharmony_ci};
637762306a36Sopenharmony_ci
637862306a36Sopenharmony_cistruct ocfs2_per_slot_free_list {
637962306a36Sopenharmony_ci	struct ocfs2_per_slot_free_list		*f_next_suballocator;
638062306a36Sopenharmony_ci	int					f_inode_type;
638162306a36Sopenharmony_ci	int					f_slot;
638262306a36Sopenharmony_ci	struct ocfs2_cached_block_free		*f_first;
638362306a36Sopenharmony_ci};
638462306a36Sopenharmony_ci
638562306a36Sopenharmony_cistatic int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
638662306a36Sopenharmony_ci				    int sysfile_type,
638762306a36Sopenharmony_ci				    int slot,
638862306a36Sopenharmony_ci				    struct ocfs2_cached_block_free *head)
638962306a36Sopenharmony_ci{
639062306a36Sopenharmony_ci	int ret;
639162306a36Sopenharmony_ci	u64 bg_blkno;
639262306a36Sopenharmony_ci	handle_t *handle;
639362306a36Sopenharmony_ci	struct inode *inode;
639462306a36Sopenharmony_ci	struct buffer_head *di_bh = NULL;
639562306a36Sopenharmony_ci	struct ocfs2_cached_block_free *tmp;
639662306a36Sopenharmony_ci
639762306a36Sopenharmony_ci	inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot);
639862306a36Sopenharmony_ci	if (!inode) {
639962306a36Sopenharmony_ci		ret = -EINVAL;
640062306a36Sopenharmony_ci		mlog_errno(ret);
640162306a36Sopenharmony_ci		goto out;
640262306a36Sopenharmony_ci	}
640362306a36Sopenharmony_ci
640462306a36Sopenharmony_ci	inode_lock(inode);
640562306a36Sopenharmony_ci
640662306a36Sopenharmony_ci	ret = ocfs2_inode_lock(inode, &di_bh, 1);
640762306a36Sopenharmony_ci	if (ret) {
640862306a36Sopenharmony_ci		mlog_errno(ret);
640962306a36Sopenharmony_ci		goto out_mutex;
641062306a36Sopenharmony_ci	}
641162306a36Sopenharmony_ci
641262306a36Sopenharmony_ci	while (head) {
641362306a36Sopenharmony_ci		if (head->free_bg)
641462306a36Sopenharmony_ci			bg_blkno = head->free_bg;
641562306a36Sopenharmony_ci		else
641662306a36Sopenharmony_ci			bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
641762306a36Sopenharmony_ci							      head->free_bit);
641862306a36Sopenharmony_ci		handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
641962306a36Sopenharmony_ci		if (IS_ERR(handle)) {
642062306a36Sopenharmony_ci			ret = PTR_ERR(handle);
642162306a36Sopenharmony_ci			mlog_errno(ret);
642262306a36Sopenharmony_ci			goto out_unlock;
642362306a36Sopenharmony_ci		}
642462306a36Sopenharmony_ci
642562306a36Sopenharmony_ci		trace_ocfs2_free_cached_blocks(
642662306a36Sopenharmony_ci		     (unsigned long long)head->free_blk, head->free_bit);
642762306a36Sopenharmony_ci
642862306a36Sopenharmony_ci		ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
642962306a36Sopenharmony_ci					       head->free_bit, bg_blkno, 1);
643062306a36Sopenharmony_ci		if (ret)
643162306a36Sopenharmony_ci			mlog_errno(ret);
643262306a36Sopenharmony_ci
643362306a36Sopenharmony_ci		ocfs2_commit_trans(osb, handle);
643462306a36Sopenharmony_ci
643562306a36Sopenharmony_ci		tmp = head;
643662306a36Sopenharmony_ci		head = head->free_next;
643762306a36Sopenharmony_ci		kfree(tmp);
643862306a36Sopenharmony_ci	}
643962306a36Sopenharmony_ci
644062306a36Sopenharmony_ciout_unlock:
644162306a36Sopenharmony_ci	ocfs2_inode_unlock(inode, 1);
644262306a36Sopenharmony_ci	brelse(di_bh);
644362306a36Sopenharmony_ciout_mutex:
644462306a36Sopenharmony_ci	inode_unlock(inode);
644562306a36Sopenharmony_ci	iput(inode);
644662306a36Sopenharmony_ciout:
644762306a36Sopenharmony_ci	while(head) {
644862306a36Sopenharmony_ci		/* Premature exit may have left some dangling items. */
644962306a36Sopenharmony_ci		tmp = head;
645062306a36Sopenharmony_ci		head = head->free_next;
645162306a36Sopenharmony_ci		kfree(tmp);
645262306a36Sopenharmony_ci	}
645362306a36Sopenharmony_ci
645462306a36Sopenharmony_ci	return ret;
645562306a36Sopenharmony_ci}
645662306a36Sopenharmony_ci
645762306a36Sopenharmony_ciint ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
645862306a36Sopenharmony_ci				u64 blkno, unsigned int bit)
645962306a36Sopenharmony_ci{
646062306a36Sopenharmony_ci	int ret = 0;
646162306a36Sopenharmony_ci	struct ocfs2_cached_block_free *item;
646262306a36Sopenharmony_ci
646362306a36Sopenharmony_ci	item = kzalloc(sizeof(*item), GFP_NOFS);
646462306a36Sopenharmony_ci	if (item == NULL) {
646562306a36Sopenharmony_ci		ret = -ENOMEM;
646662306a36Sopenharmony_ci		mlog_errno(ret);
646762306a36Sopenharmony_ci		return ret;
646862306a36Sopenharmony_ci	}
646962306a36Sopenharmony_ci
647062306a36Sopenharmony_ci	trace_ocfs2_cache_cluster_dealloc((unsigned long long)blkno, bit);
647162306a36Sopenharmony_ci
647262306a36Sopenharmony_ci	item->free_blk = blkno;
647362306a36Sopenharmony_ci	item->free_bit = bit;
647462306a36Sopenharmony_ci	item->free_next = ctxt->c_global_allocator;
647562306a36Sopenharmony_ci
647662306a36Sopenharmony_ci	ctxt->c_global_allocator = item;
647762306a36Sopenharmony_ci	return ret;
647862306a36Sopenharmony_ci}
647962306a36Sopenharmony_ci
648062306a36Sopenharmony_cistatic int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
648162306a36Sopenharmony_ci				      struct ocfs2_cached_block_free *head)
648262306a36Sopenharmony_ci{
648362306a36Sopenharmony_ci	struct ocfs2_cached_block_free *tmp;
648462306a36Sopenharmony_ci	struct inode *tl_inode = osb->osb_tl_inode;
648562306a36Sopenharmony_ci	handle_t *handle;
648662306a36Sopenharmony_ci	int ret = 0;
648762306a36Sopenharmony_ci
648862306a36Sopenharmony_ci	inode_lock(tl_inode);
648962306a36Sopenharmony_ci
649062306a36Sopenharmony_ci	while (head) {
649162306a36Sopenharmony_ci		if (ocfs2_truncate_log_needs_flush(osb)) {
649262306a36Sopenharmony_ci			ret = __ocfs2_flush_truncate_log(osb);
649362306a36Sopenharmony_ci			if (ret < 0) {
649462306a36Sopenharmony_ci				mlog_errno(ret);
649562306a36Sopenharmony_ci				break;
649662306a36Sopenharmony_ci			}
649762306a36Sopenharmony_ci		}
649862306a36Sopenharmony_ci
649962306a36Sopenharmony_ci		handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
650062306a36Sopenharmony_ci		if (IS_ERR(handle)) {
650162306a36Sopenharmony_ci			ret = PTR_ERR(handle);
650262306a36Sopenharmony_ci			mlog_errno(ret);
650362306a36Sopenharmony_ci			break;
650462306a36Sopenharmony_ci		}
650562306a36Sopenharmony_ci
650662306a36Sopenharmony_ci		ret = ocfs2_truncate_log_append(osb, handle, head->free_blk,
650762306a36Sopenharmony_ci						head->free_bit);
650862306a36Sopenharmony_ci
650962306a36Sopenharmony_ci		ocfs2_commit_trans(osb, handle);
651062306a36Sopenharmony_ci		tmp = head;
651162306a36Sopenharmony_ci		head = head->free_next;
651262306a36Sopenharmony_ci		kfree(tmp);
651362306a36Sopenharmony_ci
651462306a36Sopenharmony_ci		if (ret < 0) {
651562306a36Sopenharmony_ci			mlog_errno(ret);
651662306a36Sopenharmony_ci			break;
651762306a36Sopenharmony_ci		}
651862306a36Sopenharmony_ci	}
651962306a36Sopenharmony_ci
652062306a36Sopenharmony_ci	inode_unlock(tl_inode);
652162306a36Sopenharmony_ci
652262306a36Sopenharmony_ci	while (head) {
652362306a36Sopenharmony_ci		/* Premature exit may have left some dangling items. */
652462306a36Sopenharmony_ci		tmp = head;
652562306a36Sopenharmony_ci		head = head->free_next;
652662306a36Sopenharmony_ci		kfree(tmp);
652762306a36Sopenharmony_ci	}
652862306a36Sopenharmony_ci
652962306a36Sopenharmony_ci	return ret;
653062306a36Sopenharmony_ci}
653162306a36Sopenharmony_ci
653262306a36Sopenharmony_ciint ocfs2_run_deallocs(struct ocfs2_super *osb,
653362306a36Sopenharmony_ci		       struct ocfs2_cached_dealloc_ctxt *ctxt)
653462306a36Sopenharmony_ci{
653562306a36Sopenharmony_ci	int ret = 0, ret2;
653662306a36Sopenharmony_ci	struct ocfs2_per_slot_free_list *fl;
653762306a36Sopenharmony_ci
653862306a36Sopenharmony_ci	if (!ctxt)
653962306a36Sopenharmony_ci		return 0;
654062306a36Sopenharmony_ci
654162306a36Sopenharmony_ci	while (ctxt->c_first_suballocator) {
654262306a36Sopenharmony_ci		fl = ctxt->c_first_suballocator;
654362306a36Sopenharmony_ci
654462306a36Sopenharmony_ci		if (fl->f_first) {
654562306a36Sopenharmony_ci			trace_ocfs2_run_deallocs(fl->f_inode_type,
654662306a36Sopenharmony_ci						 fl->f_slot);
654762306a36Sopenharmony_ci			ret2 = ocfs2_free_cached_blocks(osb,
654862306a36Sopenharmony_ci							fl->f_inode_type,
654962306a36Sopenharmony_ci							fl->f_slot,
655062306a36Sopenharmony_ci							fl->f_first);
655162306a36Sopenharmony_ci			if (ret2)
655262306a36Sopenharmony_ci				mlog_errno(ret2);
655362306a36Sopenharmony_ci			if (!ret)
655462306a36Sopenharmony_ci				ret = ret2;
655562306a36Sopenharmony_ci		}
655662306a36Sopenharmony_ci
655762306a36Sopenharmony_ci		ctxt->c_first_suballocator = fl->f_next_suballocator;
655862306a36Sopenharmony_ci		kfree(fl);
655962306a36Sopenharmony_ci	}
656062306a36Sopenharmony_ci
656162306a36Sopenharmony_ci	if (ctxt->c_global_allocator) {
656262306a36Sopenharmony_ci		ret2 = ocfs2_free_cached_clusters(osb,
656362306a36Sopenharmony_ci						  ctxt->c_global_allocator);
656462306a36Sopenharmony_ci		if (ret2)
656562306a36Sopenharmony_ci			mlog_errno(ret2);
656662306a36Sopenharmony_ci		if (!ret)
656762306a36Sopenharmony_ci			ret = ret2;
656862306a36Sopenharmony_ci
656962306a36Sopenharmony_ci		ctxt->c_global_allocator = NULL;
657062306a36Sopenharmony_ci	}
657162306a36Sopenharmony_ci
657262306a36Sopenharmony_ci	return ret;
657362306a36Sopenharmony_ci}
657462306a36Sopenharmony_ci
657562306a36Sopenharmony_cistatic struct ocfs2_per_slot_free_list *
657662306a36Sopenharmony_ciocfs2_find_per_slot_free_list(int type,
657762306a36Sopenharmony_ci			      int slot,
657862306a36Sopenharmony_ci			      struct ocfs2_cached_dealloc_ctxt *ctxt)
657962306a36Sopenharmony_ci{
658062306a36Sopenharmony_ci	struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator;
658162306a36Sopenharmony_ci
658262306a36Sopenharmony_ci	while (fl) {
658362306a36Sopenharmony_ci		if (fl->f_inode_type == type && fl->f_slot == slot)
658462306a36Sopenharmony_ci			return fl;
658562306a36Sopenharmony_ci
658662306a36Sopenharmony_ci		fl = fl->f_next_suballocator;
658762306a36Sopenharmony_ci	}
658862306a36Sopenharmony_ci
658962306a36Sopenharmony_ci	fl = kmalloc(sizeof(*fl), GFP_NOFS);
659062306a36Sopenharmony_ci	if (fl) {
659162306a36Sopenharmony_ci		fl->f_inode_type = type;
659262306a36Sopenharmony_ci		fl->f_slot = slot;
659362306a36Sopenharmony_ci		fl->f_first = NULL;
659462306a36Sopenharmony_ci		fl->f_next_suballocator = ctxt->c_first_suballocator;
659562306a36Sopenharmony_ci
659662306a36Sopenharmony_ci		ctxt->c_first_suballocator = fl;
659762306a36Sopenharmony_ci	}
659862306a36Sopenharmony_ci	return fl;
659962306a36Sopenharmony_ci}
660062306a36Sopenharmony_ci
660162306a36Sopenharmony_cistatic struct ocfs2_per_slot_free_list *
660262306a36Sopenharmony_ciocfs2_find_preferred_free_list(int type,
660362306a36Sopenharmony_ci			       int preferred_slot,
660462306a36Sopenharmony_ci			       int *real_slot,
660562306a36Sopenharmony_ci			       struct ocfs2_cached_dealloc_ctxt *ctxt)
660662306a36Sopenharmony_ci{
660762306a36Sopenharmony_ci	struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator;
660862306a36Sopenharmony_ci
660962306a36Sopenharmony_ci	while (fl) {
661062306a36Sopenharmony_ci		if (fl->f_inode_type == type && fl->f_slot == preferred_slot) {
661162306a36Sopenharmony_ci			*real_slot = fl->f_slot;
661262306a36Sopenharmony_ci			return fl;
661362306a36Sopenharmony_ci		}
661462306a36Sopenharmony_ci
661562306a36Sopenharmony_ci		fl = fl->f_next_suballocator;
661662306a36Sopenharmony_ci	}
661762306a36Sopenharmony_ci
661862306a36Sopenharmony_ci	/* If we can't find any free list matching preferred slot, just use
661962306a36Sopenharmony_ci	 * the first one.
662062306a36Sopenharmony_ci	 */
662162306a36Sopenharmony_ci	fl = ctxt->c_first_suballocator;
662262306a36Sopenharmony_ci	*real_slot = fl->f_slot;
662362306a36Sopenharmony_ci
662462306a36Sopenharmony_ci	return fl;
662562306a36Sopenharmony_ci}
662662306a36Sopenharmony_ci
662762306a36Sopenharmony_ci/* Return Value 1 indicates empty */
662862306a36Sopenharmony_cistatic int ocfs2_is_dealloc_empty(struct ocfs2_extent_tree *et)
662962306a36Sopenharmony_ci{
663062306a36Sopenharmony_ci	struct ocfs2_per_slot_free_list *fl = NULL;
663162306a36Sopenharmony_ci
663262306a36Sopenharmony_ci	if (!et->et_dealloc)
663362306a36Sopenharmony_ci		return 1;
663462306a36Sopenharmony_ci
663562306a36Sopenharmony_ci	fl = et->et_dealloc->c_first_suballocator;
663662306a36Sopenharmony_ci	if (!fl)
663762306a36Sopenharmony_ci		return 1;
663862306a36Sopenharmony_ci
663962306a36Sopenharmony_ci	if (!fl->f_first)
664062306a36Sopenharmony_ci		return 1;
664162306a36Sopenharmony_ci
664262306a36Sopenharmony_ci	return 0;
664362306a36Sopenharmony_ci}
664462306a36Sopenharmony_ci
664562306a36Sopenharmony_ci/* If extent was deleted from tree due to extent rotation and merging, and
664662306a36Sopenharmony_ci * no metadata is reserved ahead of time. Try to reuse some extents
664762306a36Sopenharmony_ci * just deleted. This is only used to reuse extent blocks.
664862306a36Sopenharmony_ci * It is supposed to find enough extent blocks in dealloc if our estimation
664962306a36Sopenharmony_ci * on metadata is accurate.
665062306a36Sopenharmony_ci */
665162306a36Sopenharmony_cistatic int ocfs2_reuse_blk_from_dealloc(handle_t *handle,
665262306a36Sopenharmony_ci					struct ocfs2_extent_tree *et,
665362306a36Sopenharmony_ci					struct buffer_head **new_eb_bh,
665462306a36Sopenharmony_ci					int blk_wanted, int *blk_given)
665562306a36Sopenharmony_ci{
665662306a36Sopenharmony_ci	int i, status = 0, real_slot;
665762306a36Sopenharmony_ci	struct ocfs2_cached_dealloc_ctxt *dealloc;
665862306a36Sopenharmony_ci	struct ocfs2_per_slot_free_list *fl;
665962306a36Sopenharmony_ci	struct ocfs2_cached_block_free *bf;
666062306a36Sopenharmony_ci	struct ocfs2_extent_block *eb;
666162306a36Sopenharmony_ci	struct ocfs2_super *osb =
666262306a36Sopenharmony_ci		OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
666362306a36Sopenharmony_ci
666462306a36Sopenharmony_ci	*blk_given = 0;
666562306a36Sopenharmony_ci
666662306a36Sopenharmony_ci	/* If extent tree doesn't have a dealloc, this is not faulty. Just
666762306a36Sopenharmony_ci	 * tell upper caller dealloc can't provide any block and it should
666862306a36Sopenharmony_ci	 * ask for alloc to claim more space.
666962306a36Sopenharmony_ci	 */
667062306a36Sopenharmony_ci	dealloc = et->et_dealloc;
667162306a36Sopenharmony_ci	if (!dealloc)
667262306a36Sopenharmony_ci		goto bail;
667362306a36Sopenharmony_ci
667462306a36Sopenharmony_ci	for (i = 0; i < blk_wanted; i++) {
667562306a36Sopenharmony_ci		/* Prefer to use local slot */
667662306a36Sopenharmony_ci		fl = ocfs2_find_preferred_free_list(EXTENT_ALLOC_SYSTEM_INODE,
667762306a36Sopenharmony_ci						    osb->slot_num, &real_slot,
667862306a36Sopenharmony_ci						    dealloc);
667962306a36Sopenharmony_ci		/* If no more block can be reused, we should claim more
668062306a36Sopenharmony_ci		 * from alloc. Just return here normally.
668162306a36Sopenharmony_ci		 */
668262306a36Sopenharmony_ci		if (!fl) {
668362306a36Sopenharmony_ci			status = 0;
668462306a36Sopenharmony_ci			break;
668562306a36Sopenharmony_ci		}
668662306a36Sopenharmony_ci
668762306a36Sopenharmony_ci		bf = fl->f_first;
668862306a36Sopenharmony_ci		fl->f_first = bf->free_next;
668962306a36Sopenharmony_ci
669062306a36Sopenharmony_ci		new_eb_bh[i] = sb_getblk(osb->sb, bf->free_blk);
669162306a36Sopenharmony_ci		if (new_eb_bh[i] == NULL) {
669262306a36Sopenharmony_ci			status = -ENOMEM;
669362306a36Sopenharmony_ci			mlog_errno(status);
669462306a36Sopenharmony_ci			goto bail;
669562306a36Sopenharmony_ci		}
669662306a36Sopenharmony_ci
669762306a36Sopenharmony_ci		mlog(0, "Reusing block(%llu) from "
669862306a36Sopenharmony_ci		     "dealloc(local slot:%d, real slot:%d)\n",
669962306a36Sopenharmony_ci		     bf->free_blk, osb->slot_num, real_slot);
670062306a36Sopenharmony_ci
670162306a36Sopenharmony_ci		ocfs2_set_new_buffer_uptodate(et->et_ci, new_eb_bh[i]);
670262306a36Sopenharmony_ci
670362306a36Sopenharmony_ci		status = ocfs2_journal_access_eb(handle, et->et_ci,
670462306a36Sopenharmony_ci						 new_eb_bh[i],
670562306a36Sopenharmony_ci						 OCFS2_JOURNAL_ACCESS_CREATE);
670662306a36Sopenharmony_ci		if (status < 0) {
670762306a36Sopenharmony_ci			mlog_errno(status);
670862306a36Sopenharmony_ci			goto bail;
670962306a36Sopenharmony_ci		}
671062306a36Sopenharmony_ci
671162306a36Sopenharmony_ci		memset(new_eb_bh[i]->b_data, 0, osb->sb->s_blocksize);
671262306a36Sopenharmony_ci		eb = (struct ocfs2_extent_block *) new_eb_bh[i]->b_data;
671362306a36Sopenharmony_ci
671462306a36Sopenharmony_ci		/* We can't guarantee that buffer head is still cached, so
671562306a36Sopenharmony_ci		 * polutlate the extent block again.
671662306a36Sopenharmony_ci		 */
671762306a36Sopenharmony_ci		strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
671862306a36Sopenharmony_ci		eb->h_blkno = cpu_to_le64(bf->free_blk);
671962306a36Sopenharmony_ci		eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
672062306a36Sopenharmony_ci		eb->h_suballoc_slot = cpu_to_le16(real_slot);
672162306a36Sopenharmony_ci		eb->h_suballoc_loc = cpu_to_le64(bf->free_bg);
672262306a36Sopenharmony_ci		eb->h_suballoc_bit = cpu_to_le16(bf->free_bit);
672362306a36Sopenharmony_ci		eb->h_list.l_count =
672462306a36Sopenharmony_ci			cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
672562306a36Sopenharmony_ci
672662306a36Sopenharmony_ci		/* We'll also be dirtied by the caller, so
672762306a36Sopenharmony_ci		 * this isn't absolutely necessary.
672862306a36Sopenharmony_ci		 */
672962306a36Sopenharmony_ci		ocfs2_journal_dirty(handle, new_eb_bh[i]);
673062306a36Sopenharmony_ci
673162306a36Sopenharmony_ci		if (!fl->f_first) {
673262306a36Sopenharmony_ci			dealloc->c_first_suballocator = fl->f_next_suballocator;
673362306a36Sopenharmony_ci			kfree(fl);
673462306a36Sopenharmony_ci		}
673562306a36Sopenharmony_ci		kfree(bf);
673662306a36Sopenharmony_ci	}
673762306a36Sopenharmony_ci
673862306a36Sopenharmony_ci	*blk_given = i;
673962306a36Sopenharmony_ci
674062306a36Sopenharmony_cibail:
674162306a36Sopenharmony_ci	if (unlikely(status < 0)) {
674262306a36Sopenharmony_ci		for (i = 0; i < blk_wanted; i++)
674362306a36Sopenharmony_ci			brelse(new_eb_bh[i]);
674462306a36Sopenharmony_ci	}
674562306a36Sopenharmony_ci
674662306a36Sopenharmony_ci	return status;
674762306a36Sopenharmony_ci}
674862306a36Sopenharmony_ci
674962306a36Sopenharmony_ciint ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
675062306a36Sopenharmony_ci			      int type, int slot, u64 suballoc,
675162306a36Sopenharmony_ci			      u64 blkno, unsigned int bit)
675262306a36Sopenharmony_ci{
675362306a36Sopenharmony_ci	int ret;
675462306a36Sopenharmony_ci	struct ocfs2_per_slot_free_list *fl;
675562306a36Sopenharmony_ci	struct ocfs2_cached_block_free *item;
675662306a36Sopenharmony_ci
675762306a36Sopenharmony_ci	fl = ocfs2_find_per_slot_free_list(type, slot, ctxt);
675862306a36Sopenharmony_ci	if (fl == NULL) {
675962306a36Sopenharmony_ci		ret = -ENOMEM;
676062306a36Sopenharmony_ci		mlog_errno(ret);
676162306a36Sopenharmony_ci		goto out;
676262306a36Sopenharmony_ci	}
676362306a36Sopenharmony_ci
676462306a36Sopenharmony_ci	item = kzalloc(sizeof(*item), GFP_NOFS);
676562306a36Sopenharmony_ci	if (item == NULL) {
676662306a36Sopenharmony_ci		ret = -ENOMEM;
676762306a36Sopenharmony_ci		mlog_errno(ret);
676862306a36Sopenharmony_ci		goto out;
676962306a36Sopenharmony_ci	}
677062306a36Sopenharmony_ci
677162306a36Sopenharmony_ci	trace_ocfs2_cache_block_dealloc(type, slot,
677262306a36Sopenharmony_ci					(unsigned long long)suballoc,
677362306a36Sopenharmony_ci					(unsigned long long)blkno, bit);
677462306a36Sopenharmony_ci
677562306a36Sopenharmony_ci	item->free_bg = suballoc;
677662306a36Sopenharmony_ci	item->free_blk = blkno;
677762306a36Sopenharmony_ci	item->free_bit = bit;
677862306a36Sopenharmony_ci	item->free_next = fl->f_first;
677962306a36Sopenharmony_ci
678062306a36Sopenharmony_ci	fl->f_first = item;
678162306a36Sopenharmony_ci
678262306a36Sopenharmony_ci	ret = 0;
678362306a36Sopenharmony_ciout:
678462306a36Sopenharmony_ci	return ret;
678562306a36Sopenharmony_ci}
678662306a36Sopenharmony_ci
678762306a36Sopenharmony_cistatic int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
678862306a36Sopenharmony_ci					 struct ocfs2_extent_block *eb)
678962306a36Sopenharmony_ci{
679062306a36Sopenharmony_ci	return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE,
679162306a36Sopenharmony_ci					 le16_to_cpu(eb->h_suballoc_slot),
679262306a36Sopenharmony_ci					 le64_to_cpu(eb->h_suballoc_loc),
679362306a36Sopenharmony_ci					 le64_to_cpu(eb->h_blkno),
679462306a36Sopenharmony_ci					 le16_to_cpu(eb->h_suballoc_bit));
679562306a36Sopenharmony_ci}
679662306a36Sopenharmony_ci
679762306a36Sopenharmony_cistatic int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
679862306a36Sopenharmony_ci{
679962306a36Sopenharmony_ci	set_buffer_uptodate(bh);
680062306a36Sopenharmony_ci	mark_buffer_dirty(bh);
680162306a36Sopenharmony_ci	return 0;
680262306a36Sopenharmony_ci}
680362306a36Sopenharmony_ci
680462306a36Sopenharmony_civoid ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
680562306a36Sopenharmony_ci			      unsigned int from, unsigned int to,
680662306a36Sopenharmony_ci			      struct page *page, int zero, u64 *phys)
680762306a36Sopenharmony_ci{
680862306a36Sopenharmony_ci	int ret, partial = 0;
680962306a36Sopenharmony_ci	loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
681062306a36Sopenharmony_ci	loff_t length = to - from;
681162306a36Sopenharmony_ci
681262306a36Sopenharmony_ci	ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
681362306a36Sopenharmony_ci	if (ret)
681462306a36Sopenharmony_ci		mlog_errno(ret);
681562306a36Sopenharmony_ci
681662306a36Sopenharmony_ci	if (zero)
681762306a36Sopenharmony_ci		zero_user_segment(page, from, to);
681862306a36Sopenharmony_ci
681962306a36Sopenharmony_ci	/*
682062306a36Sopenharmony_ci	 * Need to set the buffers we zero'd into uptodate
682162306a36Sopenharmony_ci	 * here if they aren't - ocfs2_map_page_blocks()
682262306a36Sopenharmony_ci	 * might've skipped some
682362306a36Sopenharmony_ci	 */
682462306a36Sopenharmony_ci	ret = walk_page_buffers(handle, page_buffers(page),
682562306a36Sopenharmony_ci				from, to, &partial,
682662306a36Sopenharmony_ci				ocfs2_zero_func);
682762306a36Sopenharmony_ci	if (ret < 0)
682862306a36Sopenharmony_ci		mlog_errno(ret);
682962306a36Sopenharmony_ci	else if (ocfs2_should_order_data(inode)) {
683062306a36Sopenharmony_ci		ret = ocfs2_jbd2_inode_add_write(handle, inode,
683162306a36Sopenharmony_ci						 start_byte, length);
683262306a36Sopenharmony_ci		if (ret < 0)
683362306a36Sopenharmony_ci			mlog_errno(ret);
683462306a36Sopenharmony_ci	}
683562306a36Sopenharmony_ci
683662306a36Sopenharmony_ci	if (!partial)
683762306a36Sopenharmony_ci		SetPageUptodate(page);
683862306a36Sopenharmony_ci
683962306a36Sopenharmony_ci	flush_dcache_page(page);
684062306a36Sopenharmony_ci}
684162306a36Sopenharmony_ci
684262306a36Sopenharmony_cistatic void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
684362306a36Sopenharmony_ci				     loff_t end, struct page **pages,
684462306a36Sopenharmony_ci				     int numpages, u64 phys, handle_t *handle)
684562306a36Sopenharmony_ci{
684662306a36Sopenharmony_ci	int i;
684762306a36Sopenharmony_ci	struct page *page;
684862306a36Sopenharmony_ci	unsigned int from, to = PAGE_SIZE;
684962306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
685062306a36Sopenharmony_ci
685162306a36Sopenharmony_ci	BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
685262306a36Sopenharmony_ci
685362306a36Sopenharmony_ci	if (numpages == 0)
685462306a36Sopenharmony_ci		goto out;
685562306a36Sopenharmony_ci
685662306a36Sopenharmony_ci	to = PAGE_SIZE;
685762306a36Sopenharmony_ci	for(i = 0; i < numpages; i++) {
685862306a36Sopenharmony_ci		page = pages[i];
685962306a36Sopenharmony_ci
686062306a36Sopenharmony_ci		from = start & (PAGE_SIZE - 1);
686162306a36Sopenharmony_ci		if ((end >> PAGE_SHIFT) == page->index)
686262306a36Sopenharmony_ci			to = end & (PAGE_SIZE - 1);
686362306a36Sopenharmony_ci
686462306a36Sopenharmony_ci		BUG_ON(from > PAGE_SIZE);
686562306a36Sopenharmony_ci		BUG_ON(to > PAGE_SIZE);
686662306a36Sopenharmony_ci
686762306a36Sopenharmony_ci		ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
686862306a36Sopenharmony_ci					 &phys);
686962306a36Sopenharmony_ci
687062306a36Sopenharmony_ci		start = (page->index + 1) << PAGE_SHIFT;
687162306a36Sopenharmony_ci	}
687262306a36Sopenharmony_ciout:
687362306a36Sopenharmony_ci	if (pages)
687462306a36Sopenharmony_ci		ocfs2_unlock_and_free_pages(pages, numpages);
687562306a36Sopenharmony_ci}
687662306a36Sopenharmony_ci
687762306a36Sopenharmony_ciint ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
687862306a36Sopenharmony_ci		     struct page **pages, int *num)
687962306a36Sopenharmony_ci{
688062306a36Sopenharmony_ci	int numpages, ret = 0;
688162306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
688262306a36Sopenharmony_ci	unsigned long index;
688362306a36Sopenharmony_ci	loff_t last_page_bytes;
688462306a36Sopenharmony_ci
688562306a36Sopenharmony_ci	BUG_ON(start > end);
688662306a36Sopenharmony_ci
688762306a36Sopenharmony_ci	numpages = 0;
688862306a36Sopenharmony_ci	last_page_bytes = PAGE_ALIGN(end);
688962306a36Sopenharmony_ci	index = start >> PAGE_SHIFT;
689062306a36Sopenharmony_ci	do {
689162306a36Sopenharmony_ci		pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
689262306a36Sopenharmony_ci		if (!pages[numpages]) {
689362306a36Sopenharmony_ci			ret = -ENOMEM;
689462306a36Sopenharmony_ci			mlog_errno(ret);
689562306a36Sopenharmony_ci			goto out;
689662306a36Sopenharmony_ci		}
689762306a36Sopenharmony_ci
689862306a36Sopenharmony_ci		numpages++;
689962306a36Sopenharmony_ci		index++;
690062306a36Sopenharmony_ci	} while (index < (last_page_bytes >> PAGE_SHIFT));
690162306a36Sopenharmony_ci
690262306a36Sopenharmony_ciout:
690362306a36Sopenharmony_ci	if (ret != 0) {
690462306a36Sopenharmony_ci		if (pages)
690562306a36Sopenharmony_ci			ocfs2_unlock_and_free_pages(pages, numpages);
690662306a36Sopenharmony_ci		numpages = 0;
690762306a36Sopenharmony_ci	}
690862306a36Sopenharmony_ci
690962306a36Sopenharmony_ci	*num = numpages;
691062306a36Sopenharmony_ci
691162306a36Sopenharmony_ci	return ret;
691262306a36Sopenharmony_ci}
691362306a36Sopenharmony_ci
691462306a36Sopenharmony_cistatic int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
691562306a36Sopenharmony_ci				struct page **pages, int *num)
691662306a36Sopenharmony_ci{
691762306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
691862306a36Sopenharmony_ci
691962306a36Sopenharmony_ci	BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
692062306a36Sopenharmony_ci	       (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
692162306a36Sopenharmony_ci
692262306a36Sopenharmony_ci	return ocfs2_grab_pages(inode, start, end, pages, num);
692362306a36Sopenharmony_ci}
692462306a36Sopenharmony_ci
692562306a36Sopenharmony_ci/*
692662306a36Sopenharmony_ci * Zero partial cluster for a hole punch or truncate. This avoids exposing
692762306a36Sopenharmony_ci * nonzero data on subsequent file extends.
692862306a36Sopenharmony_ci *
692962306a36Sopenharmony_ci * We need to call this before i_size is updated on the inode because
693062306a36Sopenharmony_ci * otherwise block_write_full_page() will skip writeout of pages past
693162306a36Sopenharmony_ci * i_size.
693262306a36Sopenharmony_ci */
693362306a36Sopenharmony_ciint ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
693462306a36Sopenharmony_ci				  u64 range_start, u64 range_end)
693562306a36Sopenharmony_ci{
693662306a36Sopenharmony_ci	int ret = 0, numpages;
693762306a36Sopenharmony_ci	struct page **pages = NULL;
693862306a36Sopenharmony_ci	u64 phys;
693962306a36Sopenharmony_ci	unsigned int ext_flags;
694062306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
694162306a36Sopenharmony_ci
694262306a36Sopenharmony_ci	/*
694362306a36Sopenharmony_ci	 * File systems which don't support sparse files zero on every
694462306a36Sopenharmony_ci	 * extend.
694562306a36Sopenharmony_ci	 */
694662306a36Sopenharmony_ci	if (!ocfs2_sparse_alloc(OCFS2_SB(sb)))
694762306a36Sopenharmony_ci		return 0;
694862306a36Sopenharmony_ci
694962306a36Sopenharmony_ci	/*
695062306a36Sopenharmony_ci	 * Avoid zeroing pages fully beyond current i_size. It is pointless as
695162306a36Sopenharmony_ci	 * underlying blocks of those pages should be already zeroed out and
695262306a36Sopenharmony_ci	 * page writeback will skip them anyway.
695362306a36Sopenharmony_ci	 */
695462306a36Sopenharmony_ci	range_end = min_t(u64, range_end, i_size_read(inode));
695562306a36Sopenharmony_ci	if (range_start >= range_end)
695662306a36Sopenharmony_ci		return 0;
695762306a36Sopenharmony_ci
695862306a36Sopenharmony_ci	pages = kcalloc(ocfs2_pages_per_cluster(sb),
695962306a36Sopenharmony_ci			sizeof(struct page *), GFP_NOFS);
696062306a36Sopenharmony_ci	if (pages == NULL) {
696162306a36Sopenharmony_ci		ret = -ENOMEM;
696262306a36Sopenharmony_ci		mlog_errno(ret);
696362306a36Sopenharmony_ci		goto out;
696462306a36Sopenharmony_ci	}
696562306a36Sopenharmony_ci
696662306a36Sopenharmony_ci	ret = ocfs2_extent_map_get_blocks(inode,
696762306a36Sopenharmony_ci					  range_start >> sb->s_blocksize_bits,
696862306a36Sopenharmony_ci					  &phys, NULL, &ext_flags);
696962306a36Sopenharmony_ci	if (ret) {
697062306a36Sopenharmony_ci		mlog_errno(ret);
697162306a36Sopenharmony_ci		goto out;
697262306a36Sopenharmony_ci	}
697362306a36Sopenharmony_ci
697462306a36Sopenharmony_ci	/*
697562306a36Sopenharmony_ci	 * Tail is a hole, or is marked unwritten. In either case, we
697662306a36Sopenharmony_ci	 * can count on read and write to return/push zero's.
697762306a36Sopenharmony_ci	 */
697862306a36Sopenharmony_ci	if (phys == 0 || ext_flags & OCFS2_EXT_UNWRITTEN)
697962306a36Sopenharmony_ci		goto out;
698062306a36Sopenharmony_ci
698162306a36Sopenharmony_ci	ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages,
698262306a36Sopenharmony_ci				   &numpages);
698362306a36Sopenharmony_ci	if (ret) {
698462306a36Sopenharmony_ci		mlog_errno(ret);
698562306a36Sopenharmony_ci		goto out;
698662306a36Sopenharmony_ci	}
698762306a36Sopenharmony_ci
698862306a36Sopenharmony_ci	ocfs2_zero_cluster_pages(inode, range_start, range_end, pages,
698962306a36Sopenharmony_ci				 numpages, phys, handle);
699062306a36Sopenharmony_ci
699162306a36Sopenharmony_ci	/*
699262306a36Sopenharmony_ci	 * Initiate writeout of the pages we zero'd here. We don't
699362306a36Sopenharmony_ci	 * wait on them - the truncate_inode_pages() call later will
699462306a36Sopenharmony_ci	 * do that for us.
699562306a36Sopenharmony_ci	 */
699662306a36Sopenharmony_ci	ret = filemap_fdatawrite_range(inode->i_mapping, range_start,
699762306a36Sopenharmony_ci				       range_end - 1);
699862306a36Sopenharmony_ci	if (ret)
699962306a36Sopenharmony_ci		mlog_errno(ret);
700062306a36Sopenharmony_ci
700162306a36Sopenharmony_ciout:
700262306a36Sopenharmony_ci	kfree(pages);
700362306a36Sopenharmony_ci
700462306a36Sopenharmony_ci	return ret;
700562306a36Sopenharmony_ci}
700662306a36Sopenharmony_ci
700762306a36Sopenharmony_cistatic void ocfs2_zero_dinode_id2_with_xattr(struct inode *inode,
700862306a36Sopenharmony_ci					     struct ocfs2_dinode *di)
700962306a36Sopenharmony_ci{
701062306a36Sopenharmony_ci	unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
701162306a36Sopenharmony_ci	unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
701262306a36Sopenharmony_ci
701362306a36Sopenharmony_ci	if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
701462306a36Sopenharmony_ci		memset(&di->id2, 0, blocksize -
701562306a36Sopenharmony_ci				    offsetof(struct ocfs2_dinode, id2) -
701662306a36Sopenharmony_ci				    xattrsize);
701762306a36Sopenharmony_ci	else
701862306a36Sopenharmony_ci		memset(&di->id2, 0, blocksize -
701962306a36Sopenharmony_ci				    offsetof(struct ocfs2_dinode, id2));
702062306a36Sopenharmony_ci}
702162306a36Sopenharmony_ci
702262306a36Sopenharmony_civoid ocfs2_dinode_new_extent_list(struct inode *inode,
702362306a36Sopenharmony_ci				  struct ocfs2_dinode *di)
702462306a36Sopenharmony_ci{
702562306a36Sopenharmony_ci	ocfs2_zero_dinode_id2_with_xattr(inode, di);
702662306a36Sopenharmony_ci	di->id2.i_list.l_tree_depth = 0;
702762306a36Sopenharmony_ci	di->id2.i_list.l_next_free_rec = 0;
702862306a36Sopenharmony_ci	di->id2.i_list.l_count = cpu_to_le16(
702962306a36Sopenharmony_ci		ocfs2_extent_recs_per_inode_with_xattr(inode->i_sb, di));
703062306a36Sopenharmony_ci}
703162306a36Sopenharmony_ci
703262306a36Sopenharmony_civoid ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
703362306a36Sopenharmony_ci{
703462306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
703562306a36Sopenharmony_ci	struct ocfs2_inline_data *idata = &di->id2.i_data;
703662306a36Sopenharmony_ci
703762306a36Sopenharmony_ci	spin_lock(&oi->ip_lock);
703862306a36Sopenharmony_ci	oi->ip_dyn_features |= OCFS2_INLINE_DATA_FL;
703962306a36Sopenharmony_ci	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
704062306a36Sopenharmony_ci	spin_unlock(&oi->ip_lock);
704162306a36Sopenharmony_ci
704262306a36Sopenharmony_ci	/*
704362306a36Sopenharmony_ci	 * We clear the entire i_data structure here so that all
704462306a36Sopenharmony_ci	 * fields can be properly initialized.
704562306a36Sopenharmony_ci	 */
704662306a36Sopenharmony_ci	ocfs2_zero_dinode_id2_with_xattr(inode, di);
704762306a36Sopenharmony_ci
704862306a36Sopenharmony_ci	idata->id_count = cpu_to_le16(
704962306a36Sopenharmony_ci			ocfs2_max_inline_data_with_xattr(inode->i_sb, di));
705062306a36Sopenharmony_ci}
705162306a36Sopenharmony_ci
705262306a36Sopenharmony_ciint ocfs2_convert_inline_data_to_extents(struct inode *inode,
705362306a36Sopenharmony_ci					 struct buffer_head *di_bh)
705462306a36Sopenharmony_ci{
705562306a36Sopenharmony_ci	int ret, has_data, num_pages = 0;
705662306a36Sopenharmony_ci	int need_free = 0;
705762306a36Sopenharmony_ci	u32 bit_off, num;
705862306a36Sopenharmony_ci	handle_t *handle;
705962306a36Sopenharmony_ci	u64 block;
706062306a36Sopenharmony_ci	struct ocfs2_inode_info *oi = OCFS2_I(inode);
706162306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
706262306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
706362306a36Sopenharmony_ci	struct ocfs2_alloc_context *data_ac = NULL;
706462306a36Sopenharmony_ci	struct page *page = NULL;
706562306a36Sopenharmony_ci	struct ocfs2_extent_tree et;
706662306a36Sopenharmony_ci	int did_quota = 0;
706762306a36Sopenharmony_ci
706862306a36Sopenharmony_ci	has_data = i_size_read(inode) ? 1 : 0;
706962306a36Sopenharmony_ci
707062306a36Sopenharmony_ci	if (has_data) {
707162306a36Sopenharmony_ci		ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
707262306a36Sopenharmony_ci		if (ret) {
707362306a36Sopenharmony_ci			mlog_errno(ret);
707462306a36Sopenharmony_ci			goto out;
707562306a36Sopenharmony_ci		}
707662306a36Sopenharmony_ci	}
707762306a36Sopenharmony_ci
707862306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb,
707962306a36Sopenharmony_ci				   ocfs2_inline_to_extents_credits(osb->sb));
708062306a36Sopenharmony_ci	if (IS_ERR(handle)) {
708162306a36Sopenharmony_ci		ret = PTR_ERR(handle);
708262306a36Sopenharmony_ci		mlog_errno(ret);
708362306a36Sopenharmony_ci		goto out;
708462306a36Sopenharmony_ci	}
708562306a36Sopenharmony_ci
708662306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
708762306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
708862306a36Sopenharmony_ci	if (ret) {
708962306a36Sopenharmony_ci		mlog_errno(ret);
709062306a36Sopenharmony_ci		goto out_commit;
709162306a36Sopenharmony_ci	}
709262306a36Sopenharmony_ci
709362306a36Sopenharmony_ci	if (has_data) {
709462306a36Sopenharmony_ci		unsigned int page_end = min_t(unsigned, PAGE_SIZE,
709562306a36Sopenharmony_ci							osb->s_clustersize);
709662306a36Sopenharmony_ci		u64 phys;
709762306a36Sopenharmony_ci
709862306a36Sopenharmony_ci		ret = dquot_alloc_space_nodirty(inode,
709962306a36Sopenharmony_ci				       ocfs2_clusters_to_bytes(osb->sb, 1));
710062306a36Sopenharmony_ci		if (ret)
710162306a36Sopenharmony_ci			goto out_commit;
710262306a36Sopenharmony_ci		did_quota = 1;
710362306a36Sopenharmony_ci
710462306a36Sopenharmony_ci		data_ac->ac_resv = &oi->ip_la_data_resv;
710562306a36Sopenharmony_ci
710662306a36Sopenharmony_ci		ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
710762306a36Sopenharmony_ci					   &num);
710862306a36Sopenharmony_ci		if (ret) {
710962306a36Sopenharmony_ci			mlog_errno(ret);
711062306a36Sopenharmony_ci			goto out_commit;
711162306a36Sopenharmony_ci		}
711262306a36Sopenharmony_ci
711362306a36Sopenharmony_ci		/*
711462306a36Sopenharmony_ci		 * Save two copies, one for insert, and one that can
711562306a36Sopenharmony_ci		 * be changed by ocfs2_map_and_dirty_page() below.
711662306a36Sopenharmony_ci		 */
711762306a36Sopenharmony_ci		block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
711862306a36Sopenharmony_ci
711962306a36Sopenharmony_ci		ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page,
712062306a36Sopenharmony_ci					   &num_pages);
712162306a36Sopenharmony_ci		if (ret) {
712262306a36Sopenharmony_ci			mlog_errno(ret);
712362306a36Sopenharmony_ci			need_free = 1;
712462306a36Sopenharmony_ci			goto out_commit;
712562306a36Sopenharmony_ci		}
712662306a36Sopenharmony_ci
712762306a36Sopenharmony_ci		/*
712862306a36Sopenharmony_ci		 * This should populate the 1st page for us and mark
712962306a36Sopenharmony_ci		 * it up to date.
713062306a36Sopenharmony_ci		 */
713162306a36Sopenharmony_ci		ret = ocfs2_read_inline_data(inode, page, di_bh);
713262306a36Sopenharmony_ci		if (ret) {
713362306a36Sopenharmony_ci			mlog_errno(ret);
713462306a36Sopenharmony_ci			need_free = 1;
713562306a36Sopenharmony_ci			goto out_unlock;
713662306a36Sopenharmony_ci		}
713762306a36Sopenharmony_ci
713862306a36Sopenharmony_ci		ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0,
713962306a36Sopenharmony_ci					 &phys);
714062306a36Sopenharmony_ci	}
714162306a36Sopenharmony_ci
714262306a36Sopenharmony_ci	spin_lock(&oi->ip_lock);
714362306a36Sopenharmony_ci	oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
714462306a36Sopenharmony_ci	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
714562306a36Sopenharmony_ci	spin_unlock(&oi->ip_lock);
714662306a36Sopenharmony_ci
714762306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 1);
714862306a36Sopenharmony_ci	ocfs2_dinode_new_extent_list(inode, di);
714962306a36Sopenharmony_ci
715062306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, di_bh);
715162306a36Sopenharmony_ci
715262306a36Sopenharmony_ci	if (has_data) {
715362306a36Sopenharmony_ci		/*
715462306a36Sopenharmony_ci		 * An error at this point should be extremely rare. If
715562306a36Sopenharmony_ci		 * this proves to be false, we could always re-build
715662306a36Sopenharmony_ci		 * the in-inode data from our pages.
715762306a36Sopenharmony_ci		 */
715862306a36Sopenharmony_ci		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
715962306a36Sopenharmony_ci		ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
716062306a36Sopenharmony_ci		if (ret) {
716162306a36Sopenharmony_ci			mlog_errno(ret);
716262306a36Sopenharmony_ci			need_free = 1;
716362306a36Sopenharmony_ci			goto out_unlock;
716462306a36Sopenharmony_ci		}
716562306a36Sopenharmony_ci
716662306a36Sopenharmony_ci		inode->i_blocks = ocfs2_inode_sector_count(inode);
716762306a36Sopenharmony_ci	}
716862306a36Sopenharmony_ci
716962306a36Sopenharmony_ciout_unlock:
717062306a36Sopenharmony_ci	if (page)
717162306a36Sopenharmony_ci		ocfs2_unlock_and_free_pages(&page, num_pages);
717262306a36Sopenharmony_ci
717362306a36Sopenharmony_ciout_commit:
717462306a36Sopenharmony_ci	if (ret < 0 && did_quota)
717562306a36Sopenharmony_ci		dquot_free_space_nodirty(inode,
717662306a36Sopenharmony_ci					  ocfs2_clusters_to_bytes(osb->sb, 1));
717762306a36Sopenharmony_ci
717862306a36Sopenharmony_ci	if (need_free) {
717962306a36Sopenharmony_ci		if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
718062306a36Sopenharmony_ci			ocfs2_free_local_alloc_bits(osb, handle, data_ac,
718162306a36Sopenharmony_ci					bit_off, num);
718262306a36Sopenharmony_ci		else
718362306a36Sopenharmony_ci			ocfs2_free_clusters(handle,
718462306a36Sopenharmony_ci					data_ac->ac_inode,
718562306a36Sopenharmony_ci					data_ac->ac_bh,
718662306a36Sopenharmony_ci					ocfs2_clusters_to_blocks(osb->sb, bit_off),
718762306a36Sopenharmony_ci					num);
718862306a36Sopenharmony_ci	}
718962306a36Sopenharmony_ci
719062306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
719162306a36Sopenharmony_ci
719262306a36Sopenharmony_ciout:
719362306a36Sopenharmony_ci	if (data_ac)
719462306a36Sopenharmony_ci		ocfs2_free_alloc_context(data_ac);
719562306a36Sopenharmony_ci	return ret;
719662306a36Sopenharmony_ci}
719762306a36Sopenharmony_ci
719862306a36Sopenharmony_ci/*
719962306a36Sopenharmony_ci * It is expected, that by the time you call this function,
720062306a36Sopenharmony_ci * inode->i_size and fe->i_size have been adjusted.
720162306a36Sopenharmony_ci *
720262306a36Sopenharmony_ci * WARNING: This will kfree the truncate context
720362306a36Sopenharmony_ci */
720462306a36Sopenharmony_ciint ocfs2_commit_truncate(struct ocfs2_super *osb,
720562306a36Sopenharmony_ci			  struct inode *inode,
720662306a36Sopenharmony_ci			  struct buffer_head *di_bh)
720762306a36Sopenharmony_ci{
720862306a36Sopenharmony_ci	int status = 0, i, flags = 0;
720962306a36Sopenharmony_ci	u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff;
721062306a36Sopenharmony_ci	u64 blkno = 0;
721162306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
721262306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
721362306a36Sopenharmony_ci	struct ocfs2_path *path = NULL;
721462306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
721562306a36Sopenharmony_ci	struct ocfs2_extent_list *root_el = &(di->id2.i_list);
721662306a36Sopenharmony_ci	u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
721762306a36Sopenharmony_ci	struct ocfs2_extent_tree et;
721862306a36Sopenharmony_ci	struct ocfs2_cached_dealloc_ctxt dealloc;
721962306a36Sopenharmony_ci	struct ocfs2_refcount_tree *ref_tree = NULL;
722062306a36Sopenharmony_ci
722162306a36Sopenharmony_ci	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
722262306a36Sopenharmony_ci	ocfs2_init_dealloc_ctxt(&dealloc);
722362306a36Sopenharmony_ci
722462306a36Sopenharmony_ci	new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
722562306a36Sopenharmony_ci						     i_size_read(inode));
722662306a36Sopenharmony_ci
722762306a36Sopenharmony_ci	path = ocfs2_new_path(di_bh, &di->id2.i_list,
722862306a36Sopenharmony_ci			      ocfs2_journal_access_di);
722962306a36Sopenharmony_ci	if (!path) {
723062306a36Sopenharmony_ci		status = -ENOMEM;
723162306a36Sopenharmony_ci		mlog_errno(status);
723262306a36Sopenharmony_ci		goto bail;
723362306a36Sopenharmony_ci	}
723462306a36Sopenharmony_ci
723562306a36Sopenharmony_ci	ocfs2_extent_map_trunc(inode, new_highest_cpos);
723662306a36Sopenharmony_ci
723762306a36Sopenharmony_cistart:
723862306a36Sopenharmony_ci	/*
723962306a36Sopenharmony_ci	 * Check that we still have allocation to delete.
724062306a36Sopenharmony_ci	 */
724162306a36Sopenharmony_ci	if (OCFS2_I(inode)->ip_clusters == 0) {
724262306a36Sopenharmony_ci		status = 0;
724362306a36Sopenharmony_ci		goto bail;
724462306a36Sopenharmony_ci	}
724562306a36Sopenharmony_ci
724662306a36Sopenharmony_ci	/*
724762306a36Sopenharmony_ci	 * Truncate always works against the rightmost tree branch.
724862306a36Sopenharmony_ci	 */
724962306a36Sopenharmony_ci	status = ocfs2_find_path(INODE_CACHE(inode), path, UINT_MAX);
725062306a36Sopenharmony_ci	if (status) {
725162306a36Sopenharmony_ci		mlog_errno(status);
725262306a36Sopenharmony_ci		goto bail;
725362306a36Sopenharmony_ci	}
725462306a36Sopenharmony_ci
725562306a36Sopenharmony_ci	trace_ocfs2_commit_truncate(
725662306a36Sopenharmony_ci		(unsigned long long)OCFS2_I(inode)->ip_blkno,
725762306a36Sopenharmony_ci		new_highest_cpos,
725862306a36Sopenharmony_ci		OCFS2_I(inode)->ip_clusters,
725962306a36Sopenharmony_ci		path->p_tree_depth);
726062306a36Sopenharmony_ci
726162306a36Sopenharmony_ci	/*
726262306a36Sopenharmony_ci	 * By now, el will point to the extent list on the bottom most
726362306a36Sopenharmony_ci	 * portion of this tree. Only the tail record is considered in
726462306a36Sopenharmony_ci	 * each pass.
726562306a36Sopenharmony_ci	 *
726662306a36Sopenharmony_ci	 * We handle the following cases, in order:
726762306a36Sopenharmony_ci	 * - empty extent: delete the remaining branch
726862306a36Sopenharmony_ci	 * - remove the entire record
726962306a36Sopenharmony_ci	 * - remove a partial record
727062306a36Sopenharmony_ci	 * - no record needs to be removed (truncate has completed)
727162306a36Sopenharmony_ci	 */
727262306a36Sopenharmony_ci	el = path_leaf_el(path);
727362306a36Sopenharmony_ci	if (le16_to_cpu(el->l_next_free_rec) == 0) {
727462306a36Sopenharmony_ci		ocfs2_error(inode->i_sb,
727562306a36Sopenharmony_ci			    "Inode %llu has empty extent block at %llu\n",
727662306a36Sopenharmony_ci			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
727762306a36Sopenharmony_ci			    (unsigned long long)path_leaf_bh(path)->b_blocknr);
727862306a36Sopenharmony_ci		status = -EROFS;
727962306a36Sopenharmony_ci		goto bail;
728062306a36Sopenharmony_ci	}
728162306a36Sopenharmony_ci
728262306a36Sopenharmony_ci	i = le16_to_cpu(el->l_next_free_rec) - 1;
728362306a36Sopenharmony_ci	rec = &el->l_recs[i];
728462306a36Sopenharmony_ci	flags = rec->e_flags;
728562306a36Sopenharmony_ci	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
728662306a36Sopenharmony_ci
728762306a36Sopenharmony_ci	if (i == 0 && ocfs2_is_empty_extent(rec)) {
728862306a36Sopenharmony_ci		/*
728962306a36Sopenharmony_ci		 * Lower levels depend on this never happening, but it's best
729062306a36Sopenharmony_ci		 * to check it up here before changing the tree.
729162306a36Sopenharmony_ci		*/
729262306a36Sopenharmony_ci		if (root_el->l_tree_depth && rec->e_int_clusters == 0) {
729362306a36Sopenharmony_ci			mlog(ML_ERROR, "Inode %lu has an empty "
729462306a36Sopenharmony_ci				    "extent record, depth %u\n", inode->i_ino,
729562306a36Sopenharmony_ci				    le16_to_cpu(root_el->l_tree_depth));
729662306a36Sopenharmony_ci			status = ocfs2_remove_rightmost_empty_extent(osb,
729762306a36Sopenharmony_ci					&et, path, &dealloc);
729862306a36Sopenharmony_ci			if (status) {
729962306a36Sopenharmony_ci				mlog_errno(status);
730062306a36Sopenharmony_ci				goto bail;
730162306a36Sopenharmony_ci			}
730262306a36Sopenharmony_ci
730362306a36Sopenharmony_ci			ocfs2_reinit_path(path, 1);
730462306a36Sopenharmony_ci			goto start;
730562306a36Sopenharmony_ci		} else {
730662306a36Sopenharmony_ci			trunc_cpos = le32_to_cpu(rec->e_cpos);
730762306a36Sopenharmony_ci			trunc_len = 0;
730862306a36Sopenharmony_ci			blkno = 0;
730962306a36Sopenharmony_ci		}
731062306a36Sopenharmony_ci	} else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) {
731162306a36Sopenharmony_ci		/*
731262306a36Sopenharmony_ci		 * Truncate entire record.
731362306a36Sopenharmony_ci		 */
731462306a36Sopenharmony_ci		trunc_cpos = le32_to_cpu(rec->e_cpos);
731562306a36Sopenharmony_ci		trunc_len = ocfs2_rec_clusters(el, rec);
731662306a36Sopenharmony_ci		blkno = le64_to_cpu(rec->e_blkno);
731762306a36Sopenharmony_ci	} else if (range > new_highest_cpos) {
731862306a36Sopenharmony_ci		/*
731962306a36Sopenharmony_ci		 * Partial truncate. it also should be
732062306a36Sopenharmony_ci		 * the last truncate we're doing.
732162306a36Sopenharmony_ci		 */
732262306a36Sopenharmony_ci		trunc_cpos = new_highest_cpos;
732362306a36Sopenharmony_ci		trunc_len = range - new_highest_cpos;
732462306a36Sopenharmony_ci		coff = new_highest_cpos - le32_to_cpu(rec->e_cpos);
732562306a36Sopenharmony_ci		blkno = le64_to_cpu(rec->e_blkno) +
732662306a36Sopenharmony_ci				ocfs2_clusters_to_blocks(inode->i_sb, coff);
732762306a36Sopenharmony_ci	} else {
732862306a36Sopenharmony_ci		/*
732962306a36Sopenharmony_ci		 * Truncate completed, leave happily.
733062306a36Sopenharmony_ci		 */
733162306a36Sopenharmony_ci		status = 0;
733262306a36Sopenharmony_ci		goto bail;
733362306a36Sopenharmony_ci	}
733462306a36Sopenharmony_ci
733562306a36Sopenharmony_ci	phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
733662306a36Sopenharmony_ci
733762306a36Sopenharmony_ci	if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) {
733862306a36Sopenharmony_ci		status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
733962306a36Sopenharmony_ci				&ref_tree, NULL);
734062306a36Sopenharmony_ci		if (status) {
734162306a36Sopenharmony_ci			mlog_errno(status);
734262306a36Sopenharmony_ci			goto bail;
734362306a36Sopenharmony_ci		}
734462306a36Sopenharmony_ci	}
734562306a36Sopenharmony_ci
734662306a36Sopenharmony_ci	status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
734762306a36Sopenharmony_ci					  phys_cpos, trunc_len, flags, &dealloc,
734862306a36Sopenharmony_ci					  refcount_loc, true);
734962306a36Sopenharmony_ci	if (status < 0) {
735062306a36Sopenharmony_ci		mlog_errno(status);
735162306a36Sopenharmony_ci		goto bail;
735262306a36Sopenharmony_ci	}
735362306a36Sopenharmony_ci
735462306a36Sopenharmony_ci	ocfs2_reinit_path(path, 1);
735562306a36Sopenharmony_ci
735662306a36Sopenharmony_ci	/*
735762306a36Sopenharmony_ci	 * The check above will catch the case where we've truncated
735862306a36Sopenharmony_ci	 * away all allocation.
735962306a36Sopenharmony_ci	 */
736062306a36Sopenharmony_ci	goto start;
736162306a36Sopenharmony_ci
736262306a36Sopenharmony_cibail:
736362306a36Sopenharmony_ci	if (ref_tree)
736462306a36Sopenharmony_ci		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
736562306a36Sopenharmony_ci
736662306a36Sopenharmony_ci	ocfs2_schedule_truncate_log_flush(osb, 1);
736762306a36Sopenharmony_ci
736862306a36Sopenharmony_ci	ocfs2_run_deallocs(osb, &dealloc);
736962306a36Sopenharmony_ci
737062306a36Sopenharmony_ci	ocfs2_free_path(path);
737162306a36Sopenharmony_ci
737262306a36Sopenharmony_ci	return status;
737362306a36Sopenharmony_ci}
737462306a36Sopenharmony_ci
737562306a36Sopenharmony_ci/*
737662306a36Sopenharmony_ci * 'start' is inclusive, 'end' is not.
737762306a36Sopenharmony_ci */
737862306a36Sopenharmony_ciint ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
737962306a36Sopenharmony_ci			  unsigned int start, unsigned int end, int trunc)
738062306a36Sopenharmony_ci{
738162306a36Sopenharmony_ci	int ret;
738262306a36Sopenharmony_ci	unsigned int numbytes;
738362306a36Sopenharmony_ci	handle_t *handle;
738462306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
738562306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
738662306a36Sopenharmony_ci	struct ocfs2_inline_data *idata = &di->id2.i_data;
738762306a36Sopenharmony_ci
738862306a36Sopenharmony_ci	/* No need to punch hole beyond i_size. */
738962306a36Sopenharmony_ci	if (start >= i_size_read(inode))
739062306a36Sopenharmony_ci		return 0;
739162306a36Sopenharmony_ci
739262306a36Sopenharmony_ci	if (end > i_size_read(inode))
739362306a36Sopenharmony_ci		end = i_size_read(inode);
739462306a36Sopenharmony_ci
739562306a36Sopenharmony_ci	BUG_ON(start > end);
739662306a36Sopenharmony_ci
739762306a36Sopenharmony_ci	if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
739862306a36Sopenharmony_ci	    !(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
739962306a36Sopenharmony_ci	    !ocfs2_supports_inline_data(osb)) {
740062306a36Sopenharmony_ci		ocfs2_error(inode->i_sb,
740162306a36Sopenharmony_ci			    "Inline data flags for inode %llu don't agree! Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
740262306a36Sopenharmony_ci			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
740362306a36Sopenharmony_ci			    le16_to_cpu(di->i_dyn_features),
740462306a36Sopenharmony_ci			    OCFS2_I(inode)->ip_dyn_features,
740562306a36Sopenharmony_ci			    osb->s_feature_incompat);
740662306a36Sopenharmony_ci		ret = -EROFS;
740762306a36Sopenharmony_ci		goto out;
740862306a36Sopenharmony_ci	}
740962306a36Sopenharmony_ci
741062306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
741162306a36Sopenharmony_ci	if (IS_ERR(handle)) {
741262306a36Sopenharmony_ci		ret = PTR_ERR(handle);
741362306a36Sopenharmony_ci		mlog_errno(ret);
741462306a36Sopenharmony_ci		goto out;
741562306a36Sopenharmony_ci	}
741662306a36Sopenharmony_ci
741762306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
741862306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
741962306a36Sopenharmony_ci	if (ret) {
742062306a36Sopenharmony_ci		mlog_errno(ret);
742162306a36Sopenharmony_ci		goto out_commit;
742262306a36Sopenharmony_ci	}
742362306a36Sopenharmony_ci
742462306a36Sopenharmony_ci	numbytes = end - start;
742562306a36Sopenharmony_ci	memset(idata->id_data + start, 0, numbytes);
742662306a36Sopenharmony_ci
742762306a36Sopenharmony_ci	/*
742862306a36Sopenharmony_ci	 * No need to worry about the data page here - it's been
742962306a36Sopenharmony_ci	 * truncated already and inline data doesn't need it for
743062306a36Sopenharmony_ci	 * pushing zero's to disk, so we'll let read_folio pick it up
743162306a36Sopenharmony_ci	 * later.
743262306a36Sopenharmony_ci	 */
743362306a36Sopenharmony_ci	if (trunc) {
743462306a36Sopenharmony_ci		i_size_write(inode, start);
743562306a36Sopenharmony_ci		di->i_size = cpu_to_le64(start);
743662306a36Sopenharmony_ci	}
743762306a36Sopenharmony_ci
743862306a36Sopenharmony_ci	inode->i_blocks = ocfs2_inode_sector_count(inode);
743962306a36Sopenharmony_ci	inode->i_mtime = inode_set_ctime_current(inode);
744062306a36Sopenharmony_ci
744162306a36Sopenharmony_ci	di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
744262306a36Sopenharmony_ci	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
744362306a36Sopenharmony_ci
744462306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, inode, 1);
744562306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, di_bh);
744662306a36Sopenharmony_ci
744762306a36Sopenharmony_ciout_commit:
744862306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
744962306a36Sopenharmony_ci
745062306a36Sopenharmony_ciout:
745162306a36Sopenharmony_ci	return ret;
745262306a36Sopenharmony_ci}
745362306a36Sopenharmony_ci
745462306a36Sopenharmony_cistatic int ocfs2_trim_extent(struct super_block *sb,
745562306a36Sopenharmony_ci			     struct ocfs2_group_desc *gd,
745662306a36Sopenharmony_ci			     u64 group, u32 start, u32 count)
745762306a36Sopenharmony_ci{
745862306a36Sopenharmony_ci	u64 discard, bcount;
745962306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(sb);
746062306a36Sopenharmony_ci
746162306a36Sopenharmony_ci	bcount = ocfs2_clusters_to_blocks(sb, count);
746262306a36Sopenharmony_ci	discard = ocfs2_clusters_to_blocks(sb, start);
746362306a36Sopenharmony_ci
746462306a36Sopenharmony_ci	/*
746562306a36Sopenharmony_ci	 * For the first cluster group, the gd->bg_blkno is not at the start
746662306a36Sopenharmony_ci	 * of the group, but at an offset from the start. If we add it while
746762306a36Sopenharmony_ci	 * calculating discard for first group, we will wrongly start fstrim a
746862306a36Sopenharmony_ci	 * few blocks after the desried start block and the range can cross
746962306a36Sopenharmony_ci	 * over into the next cluster group. So, add it only if this is not
747062306a36Sopenharmony_ci	 * the first cluster group.
747162306a36Sopenharmony_ci	 */
747262306a36Sopenharmony_ci	if (group != osb->first_cluster_group_blkno)
747362306a36Sopenharmony_ci		discard += le64_to_cpu(gd->bg_blkno);
747462306a36Sopenharmony_ci
747562306a36Sopenharmony_ci	trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
747662306a36Sopenharmony_ci
747762306a36Sopenharmony_ci	return sb_issue_discard(sb, discard, bcount, GFP_NOFS, 0);
747862306a36Sopenharmony_ci}
747962306a36Sopenharmony_ci
748062306a36Sopenharmony_cistatic int ocfs2_trim_group(struct super_block *sb,
748162306a36Sopenharmony_ci			    struct ocfs2_group_desc *gd, u64 group,
748262306a36Sopenharmony_ci			    u32 start, u32 max, u32 minbits)
748362306a36Sopenharmony_ci{
748462306a36Sopenharmony_ci	int ret = 0, count = 0, next;
748562306a36Sopenharmony_ci	void *bitmap = gd->bg_bitmap;
748662306a36Sopenharmony_ci
748762306a36Sopenharmony_ci	if (le16_to_cpu(gd->bg_free_bits_count) < minbits)
748862306a36Sopenharmony_ci		return 0;
748962306a36Sopenharmony_ci
749062306a36Sopenharmony_ci	trace_ocfs2_trim_group((unsigned long long)le64_to_cpu(gd->bg_blkno),
749162306a36Sopenharmony_ci			       start, max, minbits);
749262306a36Sopenharmony_ci
749362306a36Sopenharmony_ci	while (start < max) {
749462306a36Sopenharmony_ci		start = ocfs2_find_next_zero_bit(bitmap, max, start);
749562306a36Sopenharmony_ci		if (start >= max)
749662306a36Sopenharmony_ci			break;
749762306a36Sopenharmony_ci		next = ocfs2_find_next_bit(bitmap, max, start);
749862306a36Sopenharmony_ci
749962306a36Sopenharmony_ci		if ((next - start) >= minbits) {
750062306a36Sopenharmony_ci			ret = ocfs2_trim_extent(sb, gd, group,
750162306a36Sopenharmony_ci						start, next - start);
750262306a36Sopenharmony_ci			if (ret < 0) {
750362306a36Sopenharmony_ci				mlog_errno(ret);
750462306a36Sopenharmony_ci				break;
750562306a36Sopenharmony_ci			}
750662306a36Sopenharmony_ci			count += next - start;
750762306a36Sopenharmony_ci		}
750862306a36Sopenharmony_ci		start = next + 1;
750962306a36Sopenharmony_ci
751062306a36Sopenharmony_ci		if (fatal_signal_pending(current)) {
751162306a36Sopenharmony_ci			count = -ERESTARTSYS;
751262306a36Sopenharmony_ci			break;
751362306a36Sopenharmony_ci		}
751462306a36Sopenharmony_ci
751562306a36Sopenharmony_ci		if ((le16_to_cpu(gd->bg_free_bits_count) - count) < minbits)
751662306a36Sopenharmony_ci			break;
751762306a36Sopenharmony_ci	}
751862306a36Sopenharmony_ci
751962306a36Sopenharmony_ci	if (ret < 0)
752062306a36Sopenharmony_ci		count = ret;
752162306a36Sopenharmony_ci
752262306a36Sopenharmony_ci	return count;
752362306a36Sopenharmony_ci}
752462306a36Sopenharmony_ci
752562306a36Sopenharmony_cistatic
752662306a36Sopenharmony_ciint ocfs2_trim_mainbm(struct super_block *sb, struct fstrim_range *range)
752762306a36Sopenharmony_ci{
752862306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(sb);
752962306a36Sopenharmony_ci	u64 start, len, trimmed = 0, first_group, last_group = 0, group = 0;
753062306a36Sopenharmony_ci	int ret, cnt;
753162306a36Sopenharmony_ci	u32 first_bit, last_bit, minlen;
753262306a36Sopenharmony_ci	struct buffer_head *main_bm_bh = NULL;
753362306a36Sopenharmony_ci	struct inode *main_bm_inode = NULL;
753462306a36Sopenharmony_ci	struct buffer_head *gd_bh = NULL;
753562306a36Sopenharmony_ci	struct ocfs2_dinode *main_bm;
753662306a36Sopenharmony_ci	struct ocfs2_group_desc *gd = NULL;
753762306a36Sopenharmony_ci
753862306a36Sopenharmony_ci	start = range->start >> osb->s_clustersize_bits;
753962306a36Sopenharmony_ci	len = range->len >> osb->s_clustersize_bits;
754062306a36Sopenharmony_ci	minlen = range->minlen >> osb->s_clustersize_bits;
754162306a36Sopenharmony_ci
754262306a36Sopenharmony_ci	if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
754362306a36Sopenharmony_ci		return -EINVAL;
754462306a36Sopenharmony_ci
754562306a36Sopenharmony_ci	trace_ocfs2_trim_mainbm(start, len, minlen);
754662306a36Sopenharmony_ci
754762306a36Sopenharmony_cinext_group:
754862306a36Sopenharmony_ci	main_bm_inode = ocfs2_get_system_file_inode(osb,
754962306a36Sopenharmony_ci						    GLOBAL_BITMAP_SYSTEM_INODE,
755062306a36Sopenharmony_ci						    OCFS2_INVALID_SLOT);
755162306a36Sopenharmony_ci	if (!main_bm_inode) {
755262306a36Sopenharmony_ci		ret = -EIO;
755362306a36Sopenharmony_ci		mlog_errno(ret);
755462306a36Sopenharmony_ci		goto out;
755562306a36Sopenharmony_ci	}
755662306a36Sopenharmony_ci
755762306a36Sopenharmony_ci	inode_lock(main_bm_inode);
755862306a36Sopenharmony_ci
755962306a36Sopenharmony_ci	ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
756062306a36Sopenharmony_ci	if (ret < 0) {
756162306a36Sopenharmony_ci		mlog_errno(ret);
756262306a36Sopenharmony_ci		goto out_mutex;
756362306a36Sopenharmony_ci	}
756462306a36Sopenharmony_ci	main_bm = (struct ocfs2_dinode *)main_bm_bh->b_data;
756562306a36Sopenharmony_ci
756662306a36Sopenharmony_ci	/*
756762306a36Sopenharmony_ci	 * Do some check before trim the first group.
756862306a36Sopenharmony_ci	 */
756962306a36Sopenharmony_ci	if (!group) {
757062306a36Sopenharmony_ci		if (start >= le32_to_cpu(main_bm->i_clusters)) {
757162306a36Sopenharmony_ci			ret = -EINVAL;
757262306a36Sopenharmony_ci			goto out_unlock;
757362306a36Sopenharmony_ci		}
757462306a36Sopenharmony_ci
757562306a36Sopenharmony_ci		if (start + len > le32_to_cpu(main_bm->i_clusters))
757662306a36Sopenharmony_ci			len = le32_to_cpu(main_bm->i_clusters) - start;
757762306a36Sopenharmony_ci
757862306a36Sopenharmony_ci		/*
757962306a36Sopenharmony_ci		 * Determine first and last group to examine based on
758062306a36Sopenharmony_ci		 * start and len
758162306a36Sopenharmony_ci		 */
758262306a36Sopenharmony_ci		first_group = ocfs2_which_cluster_group(main_bm_inode, start);
758362306a36Sopenharmony_ci		if (first_group == osb->first_cluster_group_blkno)
758462306a36Sopenharmony_ci			first_bit = start;
758562306a36Sopenharmony_ci		else
758662306a36Sopenharmony_ci			first_bit = start - ocfs2_blocks_to_clusters(sb,
758762306a36Sopenharmony_ci								first_group);
758862306a36Sopenharmony_ci		last_group = ocfs2_which_cluster_group(main_bm_inode,
758962306a36Sopenharmony_ci						       start + len - 1);
759062306a36Sopenharmony_ci		group = first_group;
759162306a36Sopenharmony_ci	}
759262306a36Sopenharmony_ci
759362306a36Sopenharmony_ci	do {
759462306a36Sopenharmony_ci		if (first_bit + len >= osb->bitmap_cpg)
759562306a36Sopenharmony_ci			last_bit = osb->bitmap_cpg;
759662306a36Sopenharmony_ci		else
759762306a36Sopenharmony_ci			last_bit = first_bit + len;
759862306a36Sopenharmony_ci
759962306a36Sopenharmony_ci		ret = ocfs2_read_group_descriptor(main_bm_inode,
760062306a36Sopenharmony_ci						  main_bm, group,
760162306a36Sopenharmony_ci						  &gd_bh);
760262306a36Sopenharmony_ci		if (ret < 0) {
760362306a36Sopenharmony_ci			mlog_errno(ret);
760462306a36Sopenharmony_ci			break;
760562306a36Sopenharmony_ci		}
760662306a36Sopenharmony_ci
760762306a36Sopenharmony_ci		gd = (struct ocfs2_group_desc *)gd_bh->b_data;
760862306a36Sopenharmony_ci		cnt = ocfs2_trim_group(sb, gd, group,
760962306a36Sopenharmony_ci				       first_bit, last_bit, minlen);
761062306a36Sopenharmony_ci		brelse(gd_bh);
761162306a36Sopenharmony_ci		gd_bh = NULL;
761262306a36Sopenharmony_ci		if (cnt < 0) {
761362306a36Sopenharmony_ci			ret = cnt;
761462306a36Sopenharmony_ci			mlog_errno(ret);
761562306a36Sopenharmony_ci			break;
761662306a36Sopenharmony_ci		}
761762306a36Sopenharmony_ci
761862306a36Sopenharmony_ci		trimmed += cnt;
761962306a36Sopenharmony_ci		len -= osb->bitmap_cpg - first_bit;
762062306a36Sopenharmony_ci		first_bit = 0;
762162306a36Sopenharmony_ci		if (group == osb->first_cluster_group_blkno)
762262306a36Sopenharmony_ci			group = ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
762362306a36Sopenharmony_ci		else
762462306a36Sopenharmony_ci			group += ocfs2_clusters_to_blocks(sb, osb->bitmap_cpg);
762562306a36Sopenharmony_ci	} while (0);
762662306a36Sopenharmony_ci
762762306a36Sopenharmony_ciout_unlock:
762862306a36Sopenharmony_ci	ocfs2_inode_unlock(main_bm_inode, 0);
762962306a36Sopenharmony_ci	brelse(main_bm_bh);
763062306a36Sopenharmony_ci	main_bm_bh = NULL;
763162306a36Sopenharmony_ciout_mutex:
763262306a36Sopenharmony_ci	inode_unlock(main_bm_inode);
763362306a36Sopenharmony_ci	iput(main_bm_inode);
763462306a36Sopenharmony_ci
763562306a36Sopenharmony_ci	/*
763662306a36Sopenharmony_ci	 * If all the groups trim are not done or failed, but we should release
763762306a36Sopenharmony_ci	 * main_bm related locks for avoiding the current IO starve, then go to
763862306a36Sopenharmony_ci	 * trim the next group
763962306a36Sopenharmony_ci	 */
764062306a36Sopenharmony_ci	if (ret >= 0 && group <= last_group) {
764162306a36Sopenharmony_ci		cond_resched();
764262306a36Sopenharmony_ci		goto next_group;
764362306a36Sopenharmony_ci	}
764462306a36Sopenharmony_ciout:
764562306a36Sopenharmony_ci	range->len = trimmed * sb->s_blocksize;
764662306a36Sopenharmony_ci	return ret;
764762306a36Sopenharmony_ci}
764862306a36Sopenharmony_ci
764962306a36Sopenharmony_ciint ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
765062306a36Sopenharmony_ci{
765162306a36Sopenharmony_ci	int ret;
765262306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(sb);
765362306a36Sopenharmony_ci	struct ocfs2_trim_fs_info info, *pinfo = NULL;
765462306a36Sopenharmony_ci
765562306a36Sopenharmony_ci	ocfs2_trim_fs_lock_res_init(osb);
765662306a36Sopenharmony_ci
765762306a36Sopenharmony_ci	trace_ocfs2_trim_fs(range->start, range->len, range->minlen);
765862306a36Sopenharmony_ci
765962306a36Sopenharmony_ci	ret = ocfs2_trim_fs_lock(osb, NULL, 1);
766062306a36Sopenharmony_ci	if (ret < 0) {
766162306a36Sopenharmony_ci		if (ret != -EAGAIN) {
766262306a36Sopenharmony_ci			mlog_errno(ret);
766362306a36Sopenharmony_ci			ocfs2_trim_fs_lock_res_uninit(osb);
766462306a36Sopenharmony_ci			return ret;
766562306a36Sopenharmony_ci		}
766662306a36Sopenharmony_ci
766762306a36Sopenharmony_ci		mlog(ML_NOTICE, "Wait for trim on device (%s) to "
766862306a36Sopenharmony_ci		     "finish, which is running from another node.\n",
766962306a36Sopenharmony_ci		     osb->dev_str);
767062306a36Sopenharmony_ci		ret = ocfs2_trim_fs_lock(osb, &info, 0);
767162306a36Sopenharmony_ci		if (ret < 0) {
767262306a36Sopenharmony_ci			mlog_errno(ret);
767362306a36Sopenharmony_ci			ocfs2_trim_fs_lock_res_uninit(osb);
767462306a36Sopenharmony_ci			return ret;
767562306a36Sopenharmony_ci		}
767662306a36Sopenharmony_ci
767762306a36Sopenharmony_ci		if (info.tf_valid && info.tf_success &&
767862306a36Sopenharmony_ci		    info.tf_start == range->start &&
767962306a36Sopenharmony_ci		    info.tf_len == range->len &&
768062306a36Sopenharmony_ci		    info.tf_minlen == range->minlen) {
768162306a36Sopenharmony_ci			/* Avoid sending duplicated trim to a shared device */
768262306a36Sopenharmony_ci			mlog(ML_NOTICE, "The same trim on device (%s) was "
768362306a36Sopenharmony_ci			     "just done from node (%u), return.\n",
768462306a36Sopenharmony_ci			     osb->dev_str, info.tf_nodenum);
768562306a36Sopenharmony_ci			range->len = info.tf_trimlen;
768662306a36Sopenharmony_ci			goto out;
768762306a36Sopenharmony_ci		}
768862306a36Sopenharmony_ci	}
768962306a36Sopenharmony_ci
769062306a36Sopenharmony_ci	info.tf_nodenum = osb->node_num;
769162306a36Sopenharmony_ci	info.tf_start = range->start;
769262306a36Sopenharmony_ci	info.tf_len = range->len;
769362306a36Sopenharmony_ci	info.tf_minlen = range->minlen;
769462306a36Sopenharmony_ci
769562306a36Sopenharmony_ci	ret = ocfs2_trim_mainbm(sb, range);
769662306a36Sopenharmony_ci
769762306a36Sopenharmony_ci	info.tf_trimlen = range->len;
769862306a36Sopenharmony_ci	info.tf_success = (ret < 0 ? 0 : 1);
769962306a36Sopenharmony_ci	pinfo = &info;
770062306a36Sopenharmony_ciout:
770162306a36Sopenharmony_ci	ocfs2_trim_fs_unlock(osb, pinfo);
770262306a36Sopenharmony_ci	ocfs2_trim_fs_lock_res_uninit(osb);
770362306a36Sopenharmony_ci	return ret;
770462306a36Sopenharmony_ci}
7705