162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * suballoc.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * metadata alloc and free
662306a36Sopenharmony_ci * Inspired by ext3 block groups.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
962306a36Sopenharmony_ci */
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/fs.h>
1262306a36Sopenharmony_ci#include <linux/types.h>
1362306a36Sopenharmony_ci#include <linux/slab.h>
1462306a36Sopenharmony_ci#include <linux/highmem.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include <cluster/masklog.h>
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#include "ocfs2.h"
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#include "alloc.h"
2162306a36Sopenharmony_ci#include "blockcheck.h"
2262306a36Sopenharmony_ci#include "dlmglue.h"
2362306a36Sopenharmony_ci#include "inode.h"
2462306a36Sopenharmony_ci#include "journal.h"
2562306a36Sopenharmony_ci#include "localalloc.h"
2662306a36Sopenharmony_ci#include "suballoc.h"
2762306a36Sopenharmony_ci#include "super.h"
2862306a36Sopenharmony_ci#include "sysfile.h"
2962306a36Sopenharmony_ci#include "uptodate.h"
3062306a36Sopenharmony_ci#include "ocfs2_trace.h"
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci#include "buffer_head_io.h"
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci#define NOT_ALLOC_NEW_GROUP		0
3562306a36Sopenharmony_ci#define ALLOC_NEW_GROUP			0x1
3662306a36Sopenharmony_ci#define ALLOC_GROUPS_FROM_GLOBAL	0x2
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci#define OCFS2_MAX_TO_STEAL		1024
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistruct ocfs2_suballoc_result {
4162306a36Sopenharmony_ci	u64		sr_bg_blkno;	/* The bg we allocated from.  Set
4262306a36Sopenharmony_ci					   to 0 when a block group is
4362306a36Sopenharmony_ci					   contiguous. */
4462306a36Sopenharmony_ci	u64		sr_bg_stable_blkno; /*
4562306a36Sopenharmony_ci					     * Doesn't change, always
4662306a36Sopenharmony_ci					     * set to target block
4762306a36Sopenharmony_ci					     * group descriptor
4862306a36Sopenharmony_ci					     * block.
4962306a36Sopenharmony_ci					     */
5062306a36Sopenharmony_ci	u64		sr_blkno;	/* The first allocated block */
5162306a36Sopenharmony_ci	unsigned int	sr_bit_offset;	/* The bit in the bg */
5262306a36Sopenharmony_ci	unsigned int	sr_bits;	/* How many bits we claimed */
5362306a36Sopenharmony_ci};
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cistatic u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
5662306a36Sopenharmony_ci{
5762306a36Sopenharmony_ci	if (res->sr_blkno == 0)
5862306a36Sopenharmony_ci		return 0;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	if (res->sr_bg_blkno)
6162306a36Sopenharmony_ci		return res->sr_bg_blkno;
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci	return ocfs2_which_suballoc_group(res->sr_blkno, res->sr_bit_offset);
6462306a36Sopenharmony_ci}
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_cistatic inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
6762306a36Sopenharmony_cistatic int ocfs2_block_group_fill(handle_t *handle,
6862306a36Sopenharmony_ci				  struct inode *alloc_inode,
6962306a36Sopenharmony_ci				  struct buffer_head *bg_bh,
7062306a36Sopenharmony_ci				  u64 group_blkno,
7162306a36Sopenharmony_ci				  unsigned int group_clusters,
7262306a36Sopenharmony_ci				  u16 my_chain,
7362306a36Sopenharmony_ci				  struct ocfs2_chain_list *cl);
7462306a36Sopenharmony_cistatic int ocfs2_block_group_alloc(struct ocfs2_super *osb,
7562306a36Sopenharmony_ci				   struct inode *alloc_inode,
7662306a36Sopenharmony_ci				   struct buffer_head *bh,
7762306a36Sopenharmony_ci				   u64 max_block,
7862306a36Sopenharmony_ci				   u64 *last_alloc_group,
7962306a36Sopenharmony_ci				   int flags);
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_cistatic int ocfs2_cluster_group_search(struct inode *inode,
8262306a36Sopenharmony_ci				      struct buffer_head *group_bh,
8362306a36Sopenharmony_ci				      u32 bits_wanted, u32 min_bits,
8462306a36Sopenharmony_ci				      u64 max_block,
8562306a36Sopenharmony_ci				      struct ocfs2_suballoc_result *res);
8662306a36Sopenharmony_cistatic int ocfs2_block_group_search(struct inode *inode,
8762306a36Sopenharmony_ci				    struct buffer_head *group_bh,
8862306a36Sopenharmony_ci				    u32 bits_wanted, u32 min_bits,
8962306a36Sopenharmony_ci				    u64 max_block,
9062306a36Sopenharmony_ci				    struct ocfs2_suballoc_result *res);
9162306a36Sopenharmony_cistatic int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
9262306a36Sopenharmony_ci				     handle_t *handle,
9362306a36Sopenharmony_ci				     u32 bits_wanted,
9462306a36Sopenharmony_ci				     u32 min_bits,
9562306a36Sopenharmony_ci				     struct ocfs2_suballoc_result *res);
9662306a36Sopenharmony_cistatic int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
9762306a36Sopenharmony_ci					 int nr);
9862306a36Sopenharmony_cistatic int ocfs2_relink_block_group(handle_t *handle,
9962306a36Sopenharmony_ci				    struct inode *alloc_inode,
10062306a36Sopenharmony_ci				    struct buffer_head *fe_bh,
10162306a36Sopenharmony_ci				    struct buffer_head *bg_bh,
10262306a36Sopenharmony_ci				    struct buffer_head *prev_bg_bh,
10362306a36Sopenharmony_ci				    u16 chain);
10462306a36Sopenharmony_cistatic inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
10562306a36Sopenharmony_ci						     u32 wanted);
10662306a36Sopenharmony_cistatic inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
10762306a36Sopenharmony_ci						   u64 bg_blkno,
10862306a36Sopenharmony_ci						   u16 bg_bit_off);
10962306a36Sopenharmony_cistatic inline void ocfs2_block_to_cluster_group(struct inode *inode,
11062306a36Sopenharmony_ci						u64 data_blkno,
11162306a36Sopenharmony_ci						u64 *bg_blkno,
11262306a36Sopenharmony_ci						u16 *bg_bit_off);
11362306a36Sopenharmony_cistatic int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
11462306a36Sopenharmony_ci					     u32 bits_wanted, u64 max_block,
11562306a36Sopenharmony_ci					     int flags,
11662306a36Sopenharmony_ci					     struct ocfs2_alloc_context **ac);
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_civoid ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
11962306a36Sopenharmony_ci{
12062306a36Sopenharmony_ci	struct inode *inode = ac->ac_inode;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	if (inode) {
12362306a36Sopenharmony_ci		if (ac->ac_which != OCFS2_AC_USE_LOCAL)
12462306a36Sopenharmony_ci			ocfs2_inode_unlock(inode, 1);
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci		inode_unlock(inode);
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci		iput(inode);
12962306a36Sopenharmony_ci		ac->ac_inode = NULL;
13062306a36Sopenharmony_ci	}
13162306a36Sopenharmony_ci	brelse(ac->ac_bh);
13262306a36Sopenharmony_ci	ac->ac_bh = NULL;
13362306a36Sopenharmony_ci	ac->ac_resv = NULL;
13462306a36Sopenharmony_ci	kfree(ac->ac_find_loc_priv);
13562306a36Sopenharmony_ci	ac->ac_find_loc_priv = NULL;
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_civoid ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
13962306a36Sopenharmony_ci{
14062306a36Sopenharmony_ci	ocfs2_free_ac_resource(ac);
14162306a36Sopenharmony_ci	kfree(ac);
14262306a36Sopenharmony_ci}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_cistatic u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
14562306a36Sopenharmony_ci{
14662306a36Sopenharmony_ci	return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
14762306a36Sopenharmony_ci}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci#define do_error(fmt, ...)						\
15062306a36Sopenharmony_cido {									\
15162306a36Sopenharmony_ci	if (resize)							\
15262306a36Sopenharmony_ci		mlog(ML_ERROR, fmt, ##__VA_ARGS__);			\
15362306a36Sopenharmony_ci	else								\
15462306a36Sopenharmony_ci		return ocfs2_error(sb, fmt, ##__VA_ARGS__);		\
15562306a36Sopenharmony_ci} while (0)
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_cistatic int ocfs2_validate_gd_self(struct super_block *sb,
15862306a36Sopenharmony_ci				  struct buffer_head *bh,
15962306a36Sopenharmony_ci				  int resize)
16062306a36Sopenharmony_ci{
16162306a36Sopenharmony_ci	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
16462306a36Sopenharmony_ci		do_error("Group descriptor #%llu has bad signature %.*s\n",
16562306a36Sopenharmony_ci			 (unsigned long long)bh->b_blocknr, 7,
16662306a36Sopenharmony_ci			 gd->bg_signature);
16762306a36Sopenharmony_ci	}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
17062306a36Sopenharmony_ci		do_error("Group descriptor #%llu has an invalid bg_blkno of %llu\n",
17162306a36Sopenharmony_ci			 (unsigned long long)bh->b_blocknr,
17262306a36Sopenharmony_ci			 (unsigned long long)le64_to_cpu(gd->bg_blkno));
17362306a36Sopenharmony_ci	}
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
17662306a36Sopenharmony_ci		do_error("Group descriptor #%llu has an invalid fs_generation of #%u\n",
17762306a36Sopenharmony_ci			 (unsigned long long)bh->b_blocknr,
17862306a36Sopenharmony_ci			 le32_to_cpu(gd->bg_generation));
17962306a36Sopenharmony_ci	}
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
18262306a36Sopenharmony_ci		do_error("Group descriptor #%llu has bit count %u but claims that %u are free\n",
18362306a36Sopenharmony_ci			 (unsigned long long)bh->b_blocknr,
18462306a36Sopenharmony_ci			 le16_to_cpu(gd->bg_bits),
18562306a36Sopenharmony_ci			 le16_to_cpu(gd->bg_free_bits_count));
18662306a36Sopenharmony_ci	}
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
18962306a36Sopenharmony_ci		do_error("Group descriptor #%llu has bit count %u but max bitmap bits of %u\n",
19062306a36Sopenharmony_ci			 (unsigned long long)bh->b_blocknr,
19162306a36Sopenharmony_ci			 le16_to_cpu(gd->bg_bits),
19262306a36Sopenharmony_ci			 8 * le16_to_cpu(gd->bg_size));
19362306a36Sopenharmony_ci	}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	return 0;
19662306a36Sopenharmony_ci}
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_cistatic int ocfs2_validate_gd_parent(struct super_block *sb,
19962306a36Sopenharmony_ci				    struct ocfs2_dinode *di,
20062306a36Sopenharmony_ci				    struct buffer_head *bh,
20162306a36Sopenharmony_ci				    int resize)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	unsigned int max_bits;
20462306a36Sopenharmony_ci	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	if (di->i_blkno != gd->bg_parent_dinode) {
20762306a36Sopenharmony_ci		do_error("Group descriptor #%llu has bad parent pointer (%llu, expected %llu)\n",
20862306a36Sopenharmony_ci			 (unsigned long long)bh->b_blocknr,
20962306a36Sopenharmony_ci			 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
21062306a36Sopenharmony_ci			 (unsigned long long)le64_to_cpu(di->i_blkno));
21162306a36Sopenharmony_ci	}
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
21462306a36Sopenharmony_ci	if (le16_to_cpu(gd->bg_bits) > max_bits) {
21562306a36Sopenharmony_ci		do_error("Group descriptor #%llu has bit count of %u\n",
21662306a36Sopenharmony_ci			 (unsigned long long)bh->b_blocknr,
21762306a36Sopenharmony_ci			 le16_to_cpu(gd->bg_bits));
21862306a36Sopenharmony_ci	}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	/* In resize, we may meet the case bg_chain == cl_next_free_rec. */
22162306a36Sopenharmony_ci	if ((le16_to_cpu(gd->bg_chain) >
22262306a36Sopenharmony_ci	     le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) ||
22362306a36Sopenharmony_ci	    ((le16_to_cpu(gd->bg_chain) ==
22462306a36Sopenharmony_ci	     le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) {
22562306a36Sopenharmony_ci		do_error("Group descriptor #%llu has bad chain %u\n",
22662306a36Sopenharmony_ci			 (unsigned long long)bh->b_blocknr,
22762306a36Sopenharmony_ci			 le16_to_cpu(gd->bg_chain));
22862306a36Sopenharmony_ci	}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	return 0;
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci#undef do_error
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci/*
23662306a36Sopenharmony_ci * This version only prints errors.  It does not fail the filesystem, and
23762306a36Sopenharmony_ci * exists only for resize.
23862306a36Sopenharmony_ci */
23962306a36Sopenharmony_ciint ocfs2_check_group_descriptor(struct super_block *sb,
24062306a36Sopenharmony_ci				 struct ocfs2_dinode *di,
24162306a36Sopenharmony_ci				 struct buffer_head *bh)
24262306a36Sopenharmony_ci{
24362306a36Sopenharmony_ci	int rc;
24462306a36Sopenharmony_ci	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	BUG_ON(!buffer_uptodate(bh));
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	/*
24962306a36Sopenharmony_ci	 * If the ecc fails, we return the error but otherwise
25062306a36Sopenharmony_ci	 * leave the filesystem running.  We know any error is
25162306a36Sopenharmony_ci	 * local to this block.
25262306a36Sopenharmony_ci	 */
25362306a36Sopenharmony_ci	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
25462306a36Sopenharmony_ci	if (rc) {
25562306a36Sopenharmony_ci		mlog(ML_ERROR,
25662306a36Sopenharmony_ci		     "Checksum failed for group descriptor %llu\n",
25762306a36Sopenharmony_ci		     (unsigned long long)bh->b_blocknr);
25862306a36Sopenharmony_ci	} else
25962306a36Sopenharmony_ci		rc = ocfs2_validate_gd_self(sb, bh, 1);
26062306a36Sopenharmony_ci	if (!rc)
26162306a36Sopenharmony_ci		rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	return rc;
26462306a36Sopenharmony_ci}
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_cistatic int ocfs2_validate_group_descriptor(struct super_block *sb,
26762306a36Sopenharmony_ci					   struct buffer_head *bh)
26862306a36Sopenharmony_ci{
26962306a36Sopenharmony_ci	int rc;
27062306a36Sopenharmony_ci	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	trace_ocfs2_validate_group_descriptor(
27362306a36Sopenharmony_ci					(unsigned long long)bh->b_blocknr);
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	BUG_ON(!buffer_uptodate(bh));
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	/*
27862306a36Sopenharmony_ci	 * If the ecc fails, we return the error but otherwise
27962306a36Sopenharmony_ci	 * leave the filesystem running.  We know any error is
28062306a36Sopenharmony_ci	 * local to this block.
28162306a36Sopenharmony_ci	 */
28262306a36Sopenharmony_ci	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
28362306a36Sopenharmony_ci	if (rc)
28462306a36Sopenharmony_ci		return rc;
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	/*
28762306a36Sopenharmony_ci	 * Errors after here are fatal.
28862306a36Sopenharmony_ci	 */
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	return ocfs2_validate_gd_self(sb, bh, 0);
29162306a36Sopenharmony_ci}
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ciint ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
29462306a36Sopenharmony_ci				u64 gd_blkno, struct buffer_head **bh)
29562306a36Sopenharmony_ci{
29662306a36Sopenharmony_ci	int rc;
29762306a36Sopenharmony_ci	struct buffer_head *tmp = *bh;
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp,
30062306a36Sopenharmony_ci			      ocfs2_validate_group_descriptor);
30162306a36Sopenharmony_ci	if (rc)
30262306a36Sopenharmony_ci		goto out;
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
30562306a36Sopenharmony_ci	if (rc) {
30662306a36Sopenharmony_ci		brelse(tmp);
30762306a36Sopenharmony_ci		goto out;
30862306a36Sopenharmony_ci	}
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	/* If ocfs2_read_block() got us a new bh, pass it up. */
31162306a36Sopenharmony_ci	if (!*bh)
31262306a36Sopenharmony_ci		*bh = tmp;
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ciout:
31562306a36Sopenharmony_ci	return rc;
31662306a36Sopenharmony_ci}
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_cistatic void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
31962306a36Sopenharmony_ci					  struct ocfs2_group_desc *bg,
32062306a36Sopenharmony_ci					  struct ocfs2_chain_list *cl,
32162306a36Sopenharmony_ci					  u64 p_blkno, unsigned int clusters)
32262306a36Sopenharmony_ci{
32362306a36Sopenharmony_ci	struct ocfs2_extent_list *el = &bg->bg_list;
32462306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	BUG_ON(!ocfs2_supports_discontig_bg(osb));
32762306a36Sopenharmony_ci	if (!el->l_next_free_rec)
32862306a36Sopenharmony_ci		el->l_count = cpu_to_le16(ocfs2_extent_recs_per_gd(osb->sb));
32962306a36Sopenharmony_ci	rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec)];
33062306a36Sopenharmony_ci	rec->e_blkno = cpu_to_le64(p_blkno);
33162306a36Sopenharmony_ci	rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
33262306a36Sopenharmony_ci				  le16_to_cpu(cl->cl_bpc));
33362306a36Sopenharmony_ci	rec->e_leaf_clusters = cpu_to_le16(clusters);
33462306a36Sopenharmony_ci	le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
33562306a36Sopenharmony_ci	le16_add_cpu(&bg->bg_free_bits_count,
33662306a36Sopenharmony_ci		     clusters * le16_to_cpu(cl->cl_bpc));
33762306a36Sopenharmony_ci	le16_add_cpu(&el->l_next_free_rec, 1);
33862306a36Sopenharmony_ci}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_cistatic int ocfs2_block_group_fill(handle_t *handle,
34162306a36Sopenharmony_ci				  struct inode *alloc_inode,
34262306a36Sopenharmony_ci				  struct buffer_head *bg_bh,
34362306a36Sopenharmony_ci				  u64 group_blkno,
34462306a36Sopenharmony_ci				  unsigned int group_clusters,
34562306a36Sopenharmony_ci				  u16 my_chain,
34662306a36Sopenharmony_ci				  struct ocfs2_chain_list *cl)
34762306a36Sopenharmony_ci{
34862306a36Sopenharmony_ci	int status = 0;
34962306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
35062306a36Sopenharmony_ci	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
35162306a36Sopenharmony_ci	struct super_block * sb = alloc_inode->i_sb;
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
35462306a36Sopenharmony_ci		status = ocfs2_error(alloc_inode->i_sb,
35562306a36Sopenharmony_ci				     "group block (%llu) != b_blocknr (%llu)\n",
35662306a36Sopenharmony_ci				     (unsigned long long)group_blkno,
35762306a36Sopenharmony_ci				     (unsigned long long) bg_bh->b_blocknr);
35862306a36Sopenharmony_ci		goto bail;
35962306a36Sopenharmony_ci	}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	status = ocfs2_journal_access_gd(handle,
36262306a36Sopenharmony_ci					 INODE_CACHE(alloc_inode),
36362306a36Sopenharmony_ci					 bg_bh,
36462306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_CREATE);
36562306a36Sopenharmony_ci	if (status < 0) {
36662306a36Sopenharmony_ci		mlog_errno(status);
36762306a36Sopenharmony_ci		goto bail;
36862306a36Sopenharmony_ci	}
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	memset(bg, 0, sb->s_blocksize);
37162306a36Sopenharmony_ci	strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
37262306a36Sopenharmony_ci	bg->bg_generation = cpu_to_le32(osb->fs_generation);
37362306a36Sopenharmony_ci	bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1,
37462306a36Sopenharmony_ci						osb->s_feature_incompat));
37562306a36Sopenharmony_ci	bg->bg_chain = cpu_to_le16(my_chain);
37662306a36Sopenharmony_ci	bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
37762306a36Sopenharmony_ci	bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
37862306a36Sopenharmony_ci	bg->bg_blkno = cpu_to_le64(group_blkno);
37962306a36Sopenharmony_ci	if (group_clusters == le16_to_cpu(cl->cl_cpg))
38062306a36Sopenharmony_ci		bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
38162306a36Sopenharmony_ci	else
38262306a36Sopenharmony_ci		ocfs2_bg_discontig_add_extent(osb, bg, cl, group_blkno,
38362306a36Sopenharmony_ci					      group_clusters);
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci	/* set the 1st bit in the bitmap to account for the descriptor block */
38662306a36Sopenharmony_ci	ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
38762306a36Sopenharmony_ci	bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bg_bh);
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	/* There is no need to zero out or otherwise initialize the
39262306a36Sopenharmony_ci	 * other blocks in a group - All valid FS metadata in a block
39362306a36Sopenharmony_ci	 * group stores the superblock fs_generation value at
39462306a36Sopenharmony_ci	 * allocation time. */
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_cibail:
39762306a36Sopenharmony_ci	if (status)
39862306a36Sopenharmony_ci		mlog_errno(status);
39962306a36Sopenharmony_ci	return status;
40062306a36Sopenharmony_ci}
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_cistatic inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
40362306a36Sopenharmony_ci{
40462306a36Sopenharmony_ci	u16 curr, best;
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_ci	best = curr = 0;
40762306a36Sopenharmony_ci	while (curr < le16_to_cpu(cl->cl_count)) {
40862306a36Sopenharmony_ci		if (le32_to_cpu(cl->cl_recs[best].c_total) >
40962306a36Sopenharmony_ci		    le32_to_cpu(cl->cl_recs[curr].c_total))
41062306a36Sopenharmony_ci			best = curr;
41162306a36Sopenharmony_ci		curr++;
41262306a36Sopenharmony_ci	}
41362306a36Sopenharmony_ci	return best;
41462306a36Sopenharmony_ci}
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_cistatic struct buffer_head *
41762306a36Sopenharmony_ciocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
41862306a36Sopenharmony_ci			       struct inode *alloc_inode,
41962306a36Sopenharmony_ci			       struct ocfs2_alloc_context *ac,
42062306a36Sopenharmony_ci			       struct ocfs2_chain_list *cl)
42162306a36Sopenharmony_ci{
42262306a36Sopenharmony_ci	int status;
42362306a36Sopenharmony_ci	u32 bit_off, num_bits;
42462306a36Sopenharmony_ci	u64 bg_blkno;
42562306a36Sopenharmony_ci	struct buffer_head *bg_bh;
42662306a36Sopenharmony_ci	unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	status = ocfs2_claim_clusters(handle, ac,
42962306a36Sopenharmony_ci				      le16_to_cpu(cl->cl_cpg), &bit_off,
43062306a36Sopenharmony_ci				      &num_bits);
43162306a36Sopenharmony_ci	if (status < 0) {
43262306a36Sopenharmony_ci		if (status != -ENOSPC)
43362306a36Sopenharmony_ci			mlog_errno(status);
43462306a36Sopenharmony_ci		goto bail;
43562306a36Sopenharmony_ci	}
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci	/* setup the group */
43862306a36Sopenharmony_ci	bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
43962306a36Sopenharmony_ci	trace_ocfs2_block_group_alloc_contig(
44062306a36Sopenharmony_ci	     (unsigned long long)bg_blkno, alloc_rec);
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	bg_bh = sb_getblk(osb->sb, bg_blkno);
44362306a36Sopenharmony_ci	if (!bg_bh) {
44462306a36Sopenharmony_ci		status = -ENOMEM;
44562306a36Sopenharmony_ci		mlog_errno(status);
44662306a36Sopenharmony_ci		goto bail;
44762306a36Sopenharmony_ci	}
44862306a36Sopenharmony_ci	ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci	status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
45162306a36Sopenharmony_ci					bg_blkno, num_bits, alloc_rec, cl);
45262306a36Sopenharmony_ci	if (status < 0) {
45362306a36Sopenharmony_ci		brelse(bg_bh);
45462306a36Sopenharmony_ci		mlog_errno(status);
45562306a36Sopenharmony_ci	}
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_cibail:
45862306a36Sopenharmony_ci	return status ? ERR_PTR(status) : bg_bh;
45962306a36Sopenharmony_ci}
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_cistatic int ocfs2_block_group_claim_bits(struct ocfs2_super *osb,
46262306a36Sopenharmony_ci					handle_t *handle,
46362306a36Sopenharmony_ci					struct ocfs2_alloc_context *ac,
46462306a36Sopenharmony_ci					unsigned int min_bits,
46562306a36Sopenharmony_ci					u32 *bit_off, u32 *num_bits)
46662306a36Sopenharmony_ci{
46762306a36Sopenharmony_ci	int status = 0;
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci	while (min_bits) {
47062306a36Sopenharmony_ci		status = ocfs2_claim_clusters(handle, ac, min_bits,
47162306a36Sopenharmony_ci					      bit_off, num_bits);
47262306a36Sopenharmony_ci		if (status != -ENOSPC)
47362306a36Sopenharmony_ci			break;
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci		min_bits >>= 1;
47662306a36Sopenharmony_ci	}
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	return status;
47962306a36Sopenharmony_ci}
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_cistatic int ocfs2_block_group_grow_discontig(handle_t *handle,
48262306a36Sopenharmony_ci					    struct inode *alloc_inode,
48362306a36Sopenharmony_ci					    struct buffer_head *bg_bh,
48462306a36Sopenharmony_ci					    struct ocfs2_alloc_context *ac,
48562306a36Sopenharmony_ci					    struct ocfs2_chain_list *cl,
48662306a36Sopenharmony_ci					    unsigned int min_bits)
48762306a36Sopenharmony_ci{
48862306a36Sopenharmony_ci	int status;
48962306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
49062306a36Sopenharmony_ci	struct ocfs2_group_desc *bg =
49162306a36Sopenharmony_ci		(struct ocfs2_group_desc *)bg_bh->b_data;
49262306a36Sopenharmony_ci	unsigned int needed = le16_to_cpu(cl->cl_cpg) -
49362306a36Sopenharmony_ci			 le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc);
49462306a36Sopenharmony_ci	u32 p_cpos, clusters;
49562306a36Sopenharmony_ci	u64 p_blkno;
49662306a36Sopenharmony_ci	struct ocfs2_extent_list *el = &bg->bg_list;
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	status = ocfs2_journal_access_gd(handle,
49962306a36Sopenharmony_ci					 INODE_CACHE(alloc_inode),
50062306a36Sopenharmony_ci					 bg_bh,
50162306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_CREATE);
50262306a36Sopenharmony_ci	if (status < 0) {
50362306a36Sopenharmony_ci		mlog_errno(status);
50462306a36Sopenharmony_ci		goto bail;
50562306a36Sopenharmony_ci	}
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	while ((needed > 0) && (le16_to_cpu(el->l_next_free_rec) <
50862306a36Sopenharmony_ci				le16_to_cpu(el->l_count))) {
50962306a36Sopenharmony_ci		if (min_bits > needed)
51062306a36Sopenharmony_ci			min_bits = needed;
51162306a36Sopenharmony_ci		status = ocfs2_block_group_claim_bits(osb, handle, ac,
51262306a36Sopenharmony_ci						      min_bits, &p_cpos,
51362306a36Sopenharmony_ci						      &clusters);
51462306a36Sopenharmony_ci		if (status < 0) {
51562306a36Sopenharmony_ci			if (status != -ENOSPC)
51662306a36Sopenharmony_ci				mlog_errno(status);
51762306a36Sopenharmony_ci			goto bail;
51862306a36Sopenharmony_ci		}
51962306a36Sopenharmony_ci		p_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cpos);
52062306a36Sopenharmony_ci		ocfs2_bg_discontig_add_extent(osb, bg, cl, p_blkno,
52162306a36Sopenharmony_ci					      clusters);
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci		min_bits = clusters;
52462306a36Sopenharmony_ci		needed = le16_to_cpu(cl->cl_cpg) -
52562306a36Sopenharmony_ci			 le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc);
52662306a36Sopenharmony_ci	}
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci	if (needed > 0) {
52962306a36Sopenharmony_ci		/*
53062306a36Sopenharmony_ci		 * We have used up all the extent rec but can't fill up
53162306a36Sopenharmony_ci		 * the cpg. So bail out.
53262306a36Sopenharmony_ci		 */
53362306a36Sopenharmony_ci		status = -ENOSPC;
53462306a36Sopenharmony_ci		goto bail;
53562306a36Sopenharmony_ci	}
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bg_bh);
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_cibail:
54062306a36Sopenharmony_ci	return status;
54162306a36Sopenharmony_ci}
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_cistatic void ocfs2_bg_alloc_cleanup(handle_t *handle,
54462306a36Sopenharmony_ci				   struct ocfs2_alloc_context *cluster_ac,
54562306a36Sopenharmony_ci				   struct inode *alloc_inode,
54662306a36Sopenharmony_ci				   struct buffer_head *bg_bh)
54762306a36Sopenharmony_ci{
54862306a36Sopenharmony_ci	int i, ret;
54962306a36Sopenharmony_ci	struct ocfs2_group_desc *bg;
55062306a36Sopenharmony_ci	struct ocfs2_extent_list *el;
55162306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	if (!bg_bh)
55462306a36Sopenharmony_ci		return;
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	bg = (struct ocfs2_group_desc *)bg_bh->b_data;
55762306a36Sopenharmony_ci	el = &bg->bg_list;
55862306a36Sopenharmony_ci	for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
55962306a36Sopenharmony_ci		rec = &el->l_recs[i];
56062306a36Sopenharmony_ci		ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode,
56162306a36Sopenharmony_ci					  cluster_ac->ac_bh,
56262306a36Sopenharmony_ci					  le64_to_cpu(rec->e_blkno),
56362306a36Sopenharmony_ci					  le16_to_cpu(rec->e_leaf_clusters));
56462306a36Sopenharmony_ci		if (ret)
56562306a36Sopenharmony_ci			mlog_errno(ret);
56662306a36Sopenharmony_ci		/* Try all the clusters to free */
56762306a36Sopenharmony_ci	}
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	ocfs2_remove_from_cache(INODE_CACHE(alloc_inode), bg_bh);
57062306a36Sopenharmony_ci	brelse(bg_bh);
57162306a36Sopenharmony_ci}
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_cistatic struct buffer_head *
57462306a36Sopenharmony_ciocfs2_block_group_alloc_discontig(handle_t *handle,
57562306a36Sopenharmony_ci				  struct inode *alloc_inode,
57662306a36Sopenharmony_ci				  struct ocfs2_alloc_context *ac,
57762306a36Sopenharmony_ci				  struct ocfs2_chain_list *cl)
57862306a36Sopenharmony_ci{
57962306a36Sopenharmony_ci	int status;
58062306a36Sopenharmony_ci	u32 bit_off, num_bits;
58162306a36Sopenharmony_ci	u64 bg_blkno;
58262306a36Sopenharmony_ci	unsigned int min_bits = le16_to_cpu(cl->cl_cpg) >> 1;
58362306a36Sopenharmony_ci	struct buffer_head *bg_bh = NULL;
58462306a36Sopenharmony_ci	unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
58562306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	if (!ocfs2_supports_discontig_bg(osb)) {
58862306a36Sopenharmony_ci		status = -ENOSPC;
58962306a36Sopenharmony_ci		goto bail;
59062306a36Sopenharmony_ci	}
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ci	status = ocfs2_extend_trans(handle,
59362306a36Sopenharmony_ci				    ocfs2_calc_bg_discontig_credits(osb->sb));
59462306a36Sopenharmony_ci	if (status) {
59562306a36Sopenharmony_ci		mlog_errno(status);
59662306a36Sopenharmony_ci		goto bail;
59762306a36Sopenharmony_ci	}
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	/*
60062306a36Sopenharmony_ci	 * We're going to be grabbing from multiple cluster groups.
60162306a36Sopenharmony_ci	 * We don't have enough credits to relink them all, and the
60262306a36Sopenharmony_ci	 * cluster groups will be staying in cache for the duration of
60362306a36Sopenharmony_ci	 * this operation.
60462306a36Sopenharmony_ci	 */
60562306a36Sopenharmony_ci	ac->ac_disable_chain_relink = 1;
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci	/* Claim the first region */
60862306a36Sopenharmony_ci	status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
60962306a36Sopenharmony_ci					      &bit_off, &num_bits);
61062306a36Sopenharmony_ci	if (status < 0) {
61162306a36Sopenharmony_ci		if (status != -ENOSPC)
61262306a36Sopenharmony_ci			mlog_errno(status);
61362306a36Sopenharmony_ci		goto bail;
61462306a36Sopenharmony_ci	}
61562306a36Sopenharmony_ci	min_bits = num_bits;
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	/* setup the group */
61862306a36Sopenharmony_ci	bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
61962306a36Sopenharmony_ci	trace_ocfs2_block_group_alloc_discontig(
62062306a36Sopenharmony_ci				(unsigned long long)bg_blkno, alloc_rec);
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci	bg_bh = sb_getblk(osb->sb, bg_blkno);
62362306a36Sopenharmony_ci	if (!bg_bh) {
62462306a36Sopenharmony_ci		status = -ENOMEM;
62562306a36Sopenharmony_ci		mlog_errno(status);
62662306a36Sopenharmony_ci		goto bail;
62762306a36Sopenharmony_ci	}
62862306a36Sopenharmony_ci	ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci	status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
63162306a36Sopenharmony_ci					bg_blkno, num_bits, alloc_rec, cl);
63262306a36Sopenharmony_ci	if (status < 0) {
63362306a36Sopenharmony_ci		mlog_errno(status);
63462306a36Sopenharmony_ci		goto bail;
63562306a36Sopenharmony_ci	}
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci	status = ocfs2_block_group_grow_discontig(handle, alloc_inode,
63862306a36Sopenharmony_ci						  bg_bh, ac, cl, min_bits);
63962306a36Sopenharmony_ci	if (status)
64062306a36Sopenharmony_ci		mlog_errno(status);
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_cibail:
64362306a36Sopenharmony_ci	if (status)
64462306a36Sopenharmony_ci		ocfs2_bg_alloc_cleanup(handle, ac, alloc_inode, bg_bh);
64562306a36Sopenharmony_ci	return status ? ERR_PTR(status) : bg_bh;
64662306a36Sopenharmony_ci}
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci/*
64962306a36Sopenharmony_ci * We expect the block group allocator to already be locked.
65062306a36Sopenharmony_ci */
65162306a36Sopenharmony_cistatic int ocfs2_block_group_alloc(struct ocfs2_super *osb,
65262306a36Sopenharmony_ci				   struct inode *alloc_inode,
65362306a36Sopenharmony_ci				   struct buffer_head *bh,
65462306a36Sopenharmony_ci				   u64 max_block,
65562306a36Sopenharmony_ci				   u64 *last_alloc_group,
65662306a36Sopenharmony_ci				   int flags)
65762306a36Sopenharmony_ci{
65862306a36Sopenharmony_ci	int status, credits;
65962306a36Sopenharmony_ci	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
66062306a36Sopenharmony_ci	struct ocfs2_chain_list *cl;
66162306a36Sopenharmony_ci	struct ocfs2_alloc_context *ac = NULL;
66262306a36Sopenharmony_ci	handle_t *handle = NULL;
66362306a36Sopenharmony_ci	u16 alloc_rec;
66462306a36Sopenharmony_ci	struct buffer_head *bg_bh = NULL;
66562306a36Sopenharmony_ci	struct ocfs2_group_desc *bg;
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci	BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	cl = &fe->id2.i_chain;
67062306a36Sopenharmony_ci	status = ocfs2_reserve_clusters_with_limit(osb,
67162306a36Sopenharmony_ci						   le16_to_cpu(cl->cl_cpg),
67262306a36Sopenharmony_ci						   max_block, flags, &ac);
67362306a36Sopenharmony_ci	if (status < 0) {
67462306a36Sopenharmony_ci		if (status != -ENOSPC)
67562306a36Sopenharmony_ci			mlog_errno(status);
67662306a36Sopenharmony_ci		goto bail;
67762306a36Sopenharmony_ci	}
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_ci	credits = ocfs2_calc_group_alloc_credits(osb->sb,
68062306a36Sopenharmony_ci						 le16_to_cpu(cl->cl_cpg));
68162306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, credits);
68262306a36Sopenharmony_ci	if (IS_ERR(handle)) {
68362306a36Sopenharmony_ci		status = PTR_ERR(handle);
68462306a36Sopenharmony_ci		handle = NULL;
68562306a36Sopenharmony_ci		mlog_errno(status);
68662306a36Sopenharmony_ci		goto bail;
68762306a36Sopenharmony_ci	}
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci	if (last_alloc_group && *last_alloc_group != 0) {
69062306a36Sopenharmony_ci		trace_ocfs2_block_group_alloc(
69162306a36Sopenharmony_ci				(unsigned long long)*last_alloc_group);
69262306a36Sopenharmony_ci		ac->ac_last_group = *last_alloc_group;
69362306a36Sopenharmony_ci	}
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_ci	bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
69662306a36Sopenharmony_ci					       ac, cl);
69762306a36Sopenharmony_ci	if (PTR_ERR(bg_bh) == -ENOSPC)
69862306a36Sopenharmony_ci		bg_bh = ocfs2_block_group_alloc_discontig(handle,
69962306a36Sopenharmony_ci							  alloc_inode,
70062306a36Sopenharmony_ci							  ac, cl);
70162306a36Sopenharmony_ci	if (IS_ERR(bg_bh)) {
70262306a36Sopenharmony_ci		status = PTR_ERR(bg_bh);
70362306a36Sopenharmony_ci		bg_bh = NULL;
70462306a36Sopenharmony_ci		if (status != -ENOSPC)
70562306a36Sopenharmony_ci			mlog_errno(status);
70662306a36Sopenharmony_ci		goto bail;
70762306a36Sopenharmony_ci	}
70862306a36Sopenharmony_ci	bg = (struct ocfs2_group_desc *) bg_bh->b_data;
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
71162306a36Sopenharmony_ci					 bh, OCFS2_JOURNAL_ACCESS_WRITE);
71262306a36Sopenharmony_ci	if (status < 0) {
71362306a36Sopenharmony_ci		mlog_errno(status);
71462306a36Sopenharmony_ci		goto bail;
71562306a36Sopenharmony_ci	}
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	alloc_rec = le16_to_cpu(bg->bg_chain);
71862306a36Sopenharmony_ci	le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
71962306a36Sopenharmony_ci		     le16_to_cpu(bg->bg_free_bits_count));
72062306a36Sopenharmony_ci	le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
72162306a36Sopenharmony_ci		     le16_to_cpu(bg->bg_bits));
72262306a36Sopenharmony_ci	cl->cl_recs[alloc_rec].c_blkno = bg->bg_blkno;
72362306a36Sopenharmony_ci	if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
72462306a36Sopenharmony_ci		le16_add_cpu(&cl->cl_next_free_rec, 1);
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) -
72762306a36Sopenharmony_ci					le16_to_cpu(bg->bg_free_bits_count));
72862306a36Sopenharmony_ci	le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
72962306a36Sopenharmony_ci	le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
73462306a36Sopenharmony_ci	OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
73562306a36Sopenharmony_ci	fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb,
73662306a36Sopenharmony_ci					     le32_to_cpu(fe->i_clusters)));
73762306a36Sopenharmony_ci	spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
73862306a36Sopenharmony_ci	i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
73962306a36Sopenharmony_ci	alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
74062306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0);
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci	status = 0;
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ci	/* save the new last alloc group so that the caller can cache it. */
74562306a36Sopenharmony_ci	if (last_alloc_group)
74662306a36Sopenharmony_ci		*last_alloc_group = ac->ac_last_group;
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_cibail:
74962306a36Sopenharmony_ci	if (handle)
75062306a36Sopenharmony_ci		ocfs2_commit_trans(osb, handle);
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci	if (ac)
75362306a36Sopenharmony_ci		ocfs2_free_alloc_context(ac);
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	brelse(bg_bh);
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci	if (status)
75862306a36Sopenharmony_ci		mlog_errno(status);
75962306a36Sopenharmony_ci	return status;
76062306a36Sopenharmony_ci}
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_cistatic int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
76362306a36Sopenharmony_ci				       struct ocfs2_alloc_context *ac,
76462306a36Sopenharmony_ci				       int type,
76562306a36Sopenharmony_ci				       u32 slot,
76662306a36Sopenharmony_ci				       u64 *last_alloc_group,
76762306a36Sopenharmony_ci				       int flags)
76862306a36Sopenharmony_ci{
76962306a36Sopenharmony_ci	int status;
77062306a36Sopenharmony_ci	u32 bits_wanted = ac->ac_bits_wanted;
77162306a36Sopenharmony_ci	struct inode *alloc_inode;
77262306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
77362306a36Sopenharmony_ci	struct ocfs2_dinode *fe;
77462306a36Sopenharmony_ci	u32 free_bits;
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
77762306a36Sopenharmony_ci	if (!alloc_inode) {
77862306a36Sopenharmony_ci		mlog_errno(-EINVAL);
77962306a36Sopenharmony_ci		return -EINVAL;
78062306a36Sopenharmony_ci	}
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci	inode_lock(alloc_inode);
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	status = ocfs2_inode_lock(alloc_inode, &bh, 1);
78562306a36Sopenharmony_ci	if (status < 0) {
78662306a36Sopenharmony_ci		inode_unlock(alloc_inode);
78762306a36Sopenharmony_ci		iput(alloc_inode);
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci		mlog_errno(status);
79062306a36Sopenharmony_ci		return status;
79162306a36Sopenharmony_ci	}
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci	ac->ac_inode = alloc_inode;
79462306a36Sopenharmony_ci	ac->ac_alloc_slot = slot;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	fe = (struct ocfs2_dinode *) bh->b_data;
79762306a36Sopenharmony_ci
79862306a36Sopenharmony_ci	/* The bh was validated by the inode read inside
79962306a36Sopenharmony_ci	 * ocfs2_inode_lock().  Any corruption is a code bug. */
80062306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
80162306a36Sopenharmony_ci
80262306a36Sopenharmony_ci	if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
80362306a36Sopenharmony_ci		status = ocfs2_error(alloc_inode->i_sb,
80462306a36Sopenharmony_ci				     "Invalid chain allocator %llu\n",
80562306a36Sopenharmony_ci				     (unsigned long long)le64_to_cpu(fe->i_blkno));
80662306a36Sopenharmony_ci		goto bail;
80762306a36Sopenharmony_ci	}
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_ci	free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
81062306a36Sopenharmony_ci		le32_to_cpu(fe->id1.bitmap1.i_used);
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci	if (bits_wanted > free_bits) {
81362306a36Sopenharmony_ci		/* cluster bitmap never grows */
81462306a36Sopenharmony_ci		if (ocfs2_is_cluster_bitmap(alloc_inode)) {
81562306a36Sopenharmony_ci			trace_ocfs2_reserve_suballoc_bits_nospc(bits_wanted,
81662306a36Sopenharmony_ci								free_bits);
81762306a36Sopenharmony_ci			status = -ENOSPC;
81862306a36Sopenharmony_ci			goto bail;
81962306a36Sopenharmony_ci		}
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ci		if (!(flags & ALLOC_NEW_GROUP)) {
82262306a36Sopenharmony_ci			trace_ocfs2_reserve_suballoc_bits_no_new_group(
82362306a36Sopenharmony_ci						slot, bits_wanted, free_bits);
82462306a36Sopenharmony_ci			status = -ENOSPC;
82562306a36Sopenharmony_ci			goto bail;
82662306a36Sopenharmony_ci		}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci		status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
82962306a36Sopenharmony_ci						 ac->ac_max_block,
83062306a36Sopenharmony_ci						 last_alloc_group, flags);
83162306a36Sopenharmony_ci		if (status < 0) {
83262306a36Sopenharmony_ci			if (status != -ENOSPC)
83362306a36Sopenharmony_ci				mlog_errno(status);
83462306a36Sopenharmony_ci			goto bail;
83562306a36Sopenharmony_ci		}
83662306a36Sopenharmony_ci		atomic_inc(&osb->alloc_stats.bg_extends);
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci		/* You should never ask for this much metadata */
83962306a36Sopenharmony_ci		BUG_ON(bits_wanted >
84062306a36Sopenharmony_ci		       (le32_to_cpu(fe->id1.bitmap1.i_total)
84162306a36Sopenharmony_ci			- le32_to_cpu(fe->id1.bitmap1.i_used)));
84262306a36Sopenharmony_ci	}
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci	get_bh(bh);
84562306a36Sopenharmony_ci	ac->ac_bh = bh;
84662306a36Sopenharmony_cibail:
84762306a36Sopenharmony_ci	brelse(bh);
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	if (status)
85062306a36Sopenharmony_ci		mlog_errno(status);
85162306a36Sopenharmony_ci	return status;
85262306a36Sopenharmony_ci}
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_cistatic void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
85562306a36Sopenharmony_ci{
85662306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
85762306a36Sopenharmony_ci	osb->s_inode_steal_slot = OCFS2_INVALID_SLOT;
85862306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
85962306a36Sopenharmony_ci	atomic_set(&osb->s_num_inodes_stolen, 0);
86062306a36Sopenharmony_ci}
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_cistatic void ocfs2_init_meta_steal_slot(struct ocfs2_super *osb)
86362306a36Sopenharmony_ci{
86462306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
86562306a36Sopenharmony_ci	osb->s_meta_steal_slot = OCFS2_INVALID_SLOT;
86662306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
86762306a36Sopenharmony_ci	atomic_set(&osb->s_num_meta_stolen, 0);
86862306a36Sopenharmony_ci}
86962306a36Sopenharmony_ci
87062306a36Sopenharmony_civoid ocfs2_init_steal_slots(struct ocfs2_super *osb)
87162306a36Sopenharmony_ci{
87262306a36Sopenharmony_ci	ocfs2_init_inode_steal_slot(osb);
87362306a36Sopenharmony_ci	ocfs2_init_meta_steal_slot(osb);
87462306a36Sopenharmony_ci}
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_cistatic void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type)
87762306a36Sopenharmony_ci{
87862306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
87962306a36Sopenharmony_ci	if (type == INODE_ALLOC_SYSTEM_INODE)
88062306a36Sopenharmony_ci		osb->s_inode_steal_slot = (u16)slot;
88162306a36Sopenharmony_ci	else if (type == EXTENT_ALLOC_SYSTEM_INODE)
88262306a36Sopenharmony_ci		osb->s_meta_steal_slot = (u16)slot;
88362306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
88462306a36Sopenharmony_ci}
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_cistatic int __ocfs2_get_steal_slot(struct ocfs2_super *osb, int type)
88762306a36Sopenharmony_ci{
88862306a36Sopenharmony_ci	int slot = OCFS2_INVALID_SLOT;
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
89162306a36Sopenharmony_ci	if (type == INODE_ALLOC_SYSTEM_INODE)
89262306a36Sopenharmony_ci		slot = osb->s_inode_steal_slot;
89362306a36Sopenharmony_ci	else if (type == EXTENT_ALLOC_SYSTEM_INODE)
89462306a36Sopenharmony_ci		slot = osb->s_meta_steal_slot;
89562306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	return slot;
89862306a36Sopenharmony_ci}
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_cistatic int ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
90162306a36Sopenharmony_ci{
90262306a36Sopenharmony_ci	return __ocfs2_get_steal_slot(osb, INODE_ALLOC_SYSTEM_INODE);
90362306a36Sopenharmony_ci}
90462306a36Sopenharmony_ci
90562306a36Sopenharmony_cistatic int ocfs2_get_meta_steal_slot(struct ocfs2_super *osb)
90662306a36Sopenharmony_ci{
90762306a36Sopenharmony_ci	return __ocfs2_get_steal_slot(osb, EXTENT_ALLOC_SYSTEM_INODE);
90862306a36Sopenharmony_ci}
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_cistatic int ocfs2_steal_resource(struct ocfs2_super *osb,
91162306a36Sopenharmony_ci				struct ocfs2_alloc_context *ac,
91262306a36Sopenharmony_ci				int type)
91362306a36Sopenharmony_ci{
91462306a36Sopenharmony_ci	int i, status = -ENOSPC;
91562306a36Sopenharmony_ci	int slot = __ocfs2_get_steal_slot(osb, type);
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	/* Start to steal resource from the first slot after ours. */
91862306a36Sopenharmony_ci	if (slot == OCFS2_INVALID_SLOT)
91962306a36Sopenharmony_ci		slot = osb->slot_num + 1;
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	for (i = 0; i < osb->max_slots; i++, slot++) {
92262306a36Sopenharmony_ci		if (slot == osb->max_slots)
92362306a36Sopenharmony_ci			slot = 0;
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci		if (slot == osb->slot_num)
92662306a36Sopenharmony_ci			continue;
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci		status = ocfs2_reserve_suballoc_bits(osb, ac,
92962306a36Sopenharmony_ci						     type,
93062306a36Sopenharmony_ci						     (u32)slot, NULL,
93162306a36Sopenharmony_ci						     NOT_ALLOC_NEW_GROUP);
93262306a36Sopenharmony_ci		if (status >= 0) {
93362306a36Sopenharmony_ci			__ocfs2_set_steal_slot(osb, slot, type);
93462306a36Sopenharmony_ci			break;
93562306a36Sopenharmony_ci		}
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci		ocfs2_free_ac_resource(ac);
93862306a36Sopenharmony_ci	}
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_ci	return status;
94162306a36Sopenharmony_ci}
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_cistatic int ocfs2_steal_inode(struct ocfs2_super *osb,
94462306a36Sopenharmony_ci			     struct ocfs2_alloc_context *ac)
94562306a36Sopenharmony_ci{
94662306a36Sopenharmony_ci	return ocfs2_steal_resource(osb, ac, INODE_ALLOC_SYSTEM_INODE);
94762306a36Sopenharmony_ci}
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_cistatic int ocfs2_steal_meta(struct ocfs2_super *osb,
95062306a36Sopenharmony_ci			    struct ocfs2_alloc_context *ac)
95162306a36Sopenharmony_ci{
95262306a36Sopenharmony_ci	return ocfs2_steal_resource(osb, ac, EXTENT_ALLOC_SYSTEM_INODE);
95362306a36Sopenharmony_ci}
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ciint ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
95662306a36Sopenharmony_ci				      int blocks,
95762306a36Sopenharmony_ci				      struct ocfs2_alloc_context **ac)
95862306a36Sopenharmony_ci{
95962306a36Sopenharmony_ci	int status;
96062306a36Sopenharmony_ci	int slot = ocfs2_get_meta_steal_slot(osb);
96162306a36Sopenharmony_ci
96262306a36Sopenharmony_ci	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
96362306a36Sopenharmony_ci	if (!(*ac)) {
96462306a36Sopenharmony_ci		status = -ENOMEM;
96562306a36Sopenharmony_ci		mlog_errno(status);
96662306a36Sopenharmony_ci		goto bail;
96762306a36Sopenharmony_ci	}
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_ci	(*ac)->ac_bits_wanted = blocks;
97062306a36Sopenharmony_ci	(*ac)->ac_which = OCFS2_AC_USE_META;
97162306a36Sopenharmony_ci	(*ac)->ac_group_search = ocfs2_block_group_search;
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci	if (slot != OCFS2_INVALID_SLOT &&
97462306a36Sopenharmony_ci		atomic_read(&osb->s_num_meta_stolen) < OCFS2_MAX_TO_STEAL)
97562306a36Sopenharmony_ci		goto extent_steal;
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_ci	atomic_set(&osb->s_num_meta_stolen, 0);
97862306a36Sopenharmony_ci	status = ocfs2_reserve_suballoc_bits(osb, (*ac),
97962306a36Sopenharmony_ci					     EXTENT_ALLOC_SYSTEM_INODE,
98062306a36Sopenharmony_ci					     (u32)osb->slot_num, NULL,
98162306a36Sopenharmony_ci					     ALLOC_GROUPS_FROM_GLOBAL|ALLOC_NEW_GROUP);
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci
98462306a36Sopenharmony_ci	if (status >= 0) {
98562306a36Sopenharmony_ci		status = 0;
98662306a36Sopenharmony_ci		if (slot != OCFS2_INVALID_SLOT)
98762306a36Sopenharmony_ci			ocfs2_init_meta_steal_slot(osb);
98862306a36Sopenharmony_ci		goto bail;
98962306a36Sopenharmony_ci	} else if (status < 0 && status != -ENOSPC) {
99062306a36Sopenharmony_ci		mlog_errno(status);
99162306a36Sopenharmony_ci		goto bail;
99262306a36Sopenharmony_ci	}
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci	ocfs2_free_ac_resource(*ac);
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ciextent_steal:
99762306a36Sopenharmony_ci	status = ocfs2_steal_meta(osb, *ac);
99862306a36Sopenharmony_ci	atomic_inc(&osb->s_num_meta_stolen);
99962306a36Sopenharmony_ci	if (status < 0) {
100062306a36Sopenharmony_ci		if (status != -ENOSPC)
100162306a36Sopenharmony_ci			mlog_errno(status);
100262306a36Sopenharmony_ci		goto bail;
100362306a36Sopenharmony_ci	}
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ci	status = 0;
100662306a36Sopenharmony_cibail:
100762306a36Sopenharmony_ci	if ((status < 0) && *ac) {
100862306a36Sopenharmony_ci		ocfs2_free_alloc_context(*ac);
100962306a36Sopenharmony_ci		*ac = NULL;
101062306a36Sopenharmony_ci	}
101162306a36Sopenharmony_ci
101262306a36Sopenharmony_ci	if (status)
101362306a36Sopenharmony_ci		mlog_errno(status);
101462306a36Sopenharmony_ci	return status;
101562306a36Sopenharmony_ci}
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ciint ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
101862306a36Sopenharmony_ci			       struct ocfs2_extent_list *root_el,
101962306a36Sopenharmony_ci			       struct ocfs2_alloc_context **ac)
102062306a36Sopenharmony_ci{
102162306a36Sopenharmony_ci	return ocfs2_reserve_new_metadata_blocks(osb,
102262306a36Sopenharmony_ci					ocfs2_extend_meta_needed(root_el),
102362306a36Sopenharmony_ci					ac);
102462306a36Sopenharmony_ci}
102562306a36Sopenharmony_ci
102662306a36Sopenharmony_ciint ocfs2_reserve_new_inode(struct ocfs2_super *osb,
102762306a36Sopenharmony_ci			    struct ocfs2_alloc_context **ac)
102862306a36Sopenharmony_ci{
102962306a36Sopenharmony_ci	int status;
103062306a36Sopenharmony_ci	int slot = ocfs2_get_inode_steal_slot(osb);
103162306a36Sopenharmony_ci	u64 alloc_group;
103262306a36Sopenharmony_ci
103362306a36Sopenharmony_ci	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
103462306a36Sopenharmony_ci	if (!(*ac)) {
103562306a36Sopenharmony_ci		status = -ENOMEM;
103662306a36Sopenharmony_ci		mlog_errno(status);
103762306a36Sopenharmony_ci		goto bail;
103862306a36Sopenharmony_ci	}
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_ci	(*ac)->ac_bits_wanted = 1;
104162306a36Sopenharmony_ci	(*ac)->ac_which = OCFS2_AC_USE_INODE;
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ci	(*ac)->ac_group_search = ocfs2_block_group_search;
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci	/*
104662306a36Sopenharmony_ci	 * stat(2) can't handle i_ino > 32bits, so we tell the
104762306a36Sopenharmony_ci	 * lower levels not to allocate us a block group past that
104862306a36Sopenharmony_ci	 * limit.  The 'inode64' mount option avoids this behavior.
104962306a36Sopenharmony_ci	 */
105062306a36Sopenharmony_ci	if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
105162306a36Sopenharmony_ci		(*ac)->ac_max_block = (u32)~0U;
105262306a36Sopenharmony_ci
105362306a36Sopenharmony_ci	/*
105462306a36Sopenharmony_ci	 * slot is set when we successfully steal inode from other nodes.
105562306a36Sopenharmony_ci	 * It is reset in 3 places:
105662306a36Sopenharmony_ci	 * 1. when we flush the truncate log
105762306a36Sopenharmony_ci	 * 2. when we complete local alloc recovery.
105862306a36Sopenharmony_ci	 * 3. when we successfully allocate from our own slot.
105962306a36Sopenharmony_ci	 * After it is set, we will go on stealing inodes until we find the
106062306a36Sopenharmony_ci	 * need to check our slots to see whether there is some space for us.
106162306a36Sopenharmony_ci	 */
106262306a36Sopenharmony_ci	if (slot != OCFS2_INVALID_SLOT &&
106362306a36Sopenharmony_ci	    atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_TO_STEAL)
106462306a36Sopenharmony_ci		goto inode_steal;
106562306a36Sopenharmony_ci
106662306a36Sopenharmony_ci	atomic_set(&osb->s_num_inodes_stolen, 0);
106762306a36Sopenharmony_ci	alloc_group = osb->osb_inode_alloc_group;
106862306a36Sopenharmony_ci	status = ocfs2_reserve_suballoc_bits(osb, *ac,
106962306a36Sopenharmony_ci					     INODE_ALLOC_SYSTEM_INODE,
107062306a36Sopenharmony_ci					     (u32)osb->slot_num,
107162306a36Sopenharmony_ci					     &alloc_group,
107262306a36Sopenharmony_ci					     ALLOC_NEW_GROUP |
107362306a36Sopenharmony_ci					     ALLOC_GROUPS_FROM_GLOBAL);
107462306a36Sopenharmony_ci	if (status >= 0) {
107562306a36Sopenharmony_ci		status = 0;
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_ci		spin_lock(&osb->osb_lock);
107862306a36Sopenharmony_ci		osb->osb_inode_alloc_group = alloc_group;
107962306a36Sopenharmony_ci		spin_unlock(&osb->osb_lock);
108062306a36Sopenharmony_ci		trace_ocfs2_reserve_new_inode_new_group(
108162306a36Sopenharmony_ci			(unsigned long long)alloc_group);
108262306a36Sopenharmony_ci
108362306a36Sopenharmony_ci		/*
108462306a36Sopenharmony_ci		 * Some inodes must be freed by us, so try to allocate
108562306a36Sopenharmony_ci		 * from our own next time.
108662306a36Sopenharmony_ci		 */
108762306a36Sopenharmony_ci		if (slot != OCFS2_INVALID_SLOT)
108862306a36Sopenharmony_ci			ocfs2_init_inode_steal_slot(osb);
108962306a36Sopenharmony_ci		goto bail;
109062306a36Sopenharmony_ci	} else if (status < 0 && status != -ENOSPC) {
109162306a36Sopenharmony_ci		mlog_errno(status);
109262306a36Sopenharmony_ci		goto bail;
109362306a36Sopenharmony_ci	}
109462306a36Sopenharmony_ci
109562306a36Sopenharmony_ci	ocfs2_free_ac_resource(*ac);
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_ciinode_steal:
109862306a36Sopenharmony_ci	status = ocfs2_steal_inode(osb, *ac);
109962306a36Sopenharmony_ci	atomic_inc(&osb->s_num_inodes_stolen);
110062306a36Sopenharmony_ci	if (status < 0) {
110162306a36Sopenharmony_ci		if (status != -ENOSPC)
110262306a36Sopenharmony_ci			mlog_errno(status);
110362306a36Sopenharmony_ci		goto bail;
110462306a36Sopenharmony_ci	}
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci	status = 0;
110762306a36Sopenharmony_cibail:
110862306a36Sopenharmony_ci	if ((status < 0) && *ac) {
110962306a36Sopenharmony_ci		ocfs2_free_alloc_context(*ac);
111062306a36Sopenharmony_ci		*ac = NULL;
111162306a36Sopenharmony_ci	}
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_ci	if (status)
111462306a36Sopenharmony_ci		mlog_errno(status);
111562306a36Sopenharmony_ci	return status;
111662306a36Sopenharmony_ci}
111762306a36Sopenharmony_ci
111862306a36Sopenharmony_ci/* local alloc code has to do the same thing, so rather than do this
111962306a36Sopenharmony_ci * twice.. */
112062306a36Sopenharmony_ciint ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
112162306a36Sopenharmony_ci				      struct ocfs2_alloc_context *ac)
112262306a36Sopenharmony_ci{
112362306a36Sopenharmony_ci	int status;
112462306a36Sopenharmony_ci
112562306a36Sopenharmony_ci	ac->ac_which = OCFS2_AC_USE_MAIN;
112662306a36Sopenharmony_ci	ac->ac_group_search = ocfs2_cluster_group_search;
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci	status = ocfs2_reserve_suballoc_bits(osb, ac,
112962306a36Sopenharmony_ci					     GLOBAL_BITMAP_SYSTEM_INODE,
113062306a36Sopenharmony_ci					     OCFS2_INVALID_SLOT, NULL,
113162306a36Sopenharmony_ci					     ALLOC_NEW_GROUP);
113262306a36Sopenharmony_ci	if (status < 0 && status != -ENOSPC)
113362306a36Sopenharmony_ci		mlog_errno(status);
113462306a36Sopenharmony_ci
113562306a36Sopenharmony_ci	return status;
113662306a36Sopenharmony_ci}
113762306a36Sopenharmony_ci
113862306a36Sopenharmony_ci/* Callers don't need to care which bitmap (local alloc or main) to
113962306a36Sopenharmony_ci * use so we figure it out for them, but unfortunately this clutters
114062306a36Sopenharmony_ci * things a bit. */
114162306a36Sopenharmony_cistatic int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
114262306a36Sopenharmony_ci					     u32 bits_wanted, u64 max_block,
114362306a36Sopenharmony_ci					     int flags,
114462306a36Sopenharmony_ci					     struct ocfs2_alloc_context **ac)
114562306a36Sopenharmony_ci{
114662306a36Sopenharmony_ci	int status, ret = 0;
114762306a36Sopenharmony_ci	int retried = 0;
114862306a36Sopenharmony_ci
114962306a36Sopenharmony_ci	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
115062306a36Sopenharmony_ci	if (!(*ac)) {
115162306a36Sopenharmony_ci		status = -ENOMEM;
115262306a36Sopenharmony_ci		mlog_errno(status);
115362306a36Sopenharmony_ci		goto bail;
115462306a36Sopenharmony_ci	}
115562306a36Sopenharmony_ci
115662306a36Sopenharmony_ci	(*ac)->ac_bits_wanted = bits_wanted;
115762306a36Sopenharmony_ci	(*ac)->ac_max_block = max_block;
115862306a36Sopenharmony_ci
115962306a36Sopenharmony_ci	status = -ENOSPC;
116062306a36Sopenharmony_ci	if (!(flags & ALLOC_GROUPS_FROM_GLOBAL) &&
116162306a36Sopenharmony_ci	    ocfs2_alloc_should_use_local(osb, bits_wanted)) {
116262306a36Sopenharmony_ci		status = ocfs2_reserve_local_alloc_bits(osb,
116362306a36Sopenharmony_ci							bits_wanted,
116462306a36Sopenharmony_ci							*ac);
116562306a36Sopenharmony_ci		if ((status < 0) && (status != -ENOSPC)) {
116662306a36Sopenharmony_ci			mlog_errno(status);
116762306a36Sopenharmony_ci			goto bail;
116862306a36Sopenharmony_ci		}
116962306a36Sopenharmony_ci	}
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_ci	if (status == -ENOSPC) {
117262306a36Sopenharmony_ciretry:
117362306a36Sopenharmony_ci		status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
117462306a36Sopenharmony_ci		/* Retry if there is sufficient space cached in truncate log */
117562306a36Sopenharmony_ci		if (status == -ENOSPC && !retried) {
117662306a36Sopenharmony_ci			retried = 1;
117762306a36Sopenharmony_ci			ocfs2_inode_unlock((*ac)->ac_inode, 1);
117862306a36Sopenharmony_ci			inode_unlock((*ac)->ac_inode);
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_ci			ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
118162306a36Sopenharmony_ci			if (ret == 1) {
118262306a36Sopenharmony_ci				iput((*ac)->ac_inode);
118362306a36Sopenharmony_ci				(*ac)->ac_inode = NULL;
118462306a36Sopenharmony_ci				goto retry;
118562306a36Sopenharmony_ci			}
118662306a36Sopenharmony_ci
118762306a36Sopenharmony_ci			if (ret < 0)
118862306a36Sopenharmony_ci				mlog_errno(ret);
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_ci			inode_lock((*ac)->ac_inode);
119162306a36Sopenharmony_ci			ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
119262306a36Sopenharmony_ci			if (ret < 0) {
119362306a36Sopenharmony_ci				mlog_errno(ret);
119462306a36Sopenharmony_ci				inode_unlock((*ac)->ac_inode);
119562306a36Sopenharmony_ci				iput((*ac)->ac_inode);
119662306a36Sopenharmony_ci				(*ac)->ac_inode = NULL;
119762306a36Sopenharmony_ci				goto bail;
119862306a36Sopenharmony_ci			}
119962306a36Sopenharmony_ci		}
120062306a36Sopenharmony_ci		if (status < 0) {
120162306a36Sopenharmony_ci			if (status != -ENOSPC)
120262306a36Sopenharmony_ci				mlog_errno(status);
120362306a36Sopenharmony_ci			goto bail;
120462306a36Sopenharmony_ci		}
120562306a36Sopenharmony_ci	}
120662306a36Sopenharmony_ci
120762306a36Sopenharmony_ci	status = 0;
120862306a36Sopenharmony_cibail:
120962306a36Sopenharmony_ci	if ((status < 0) && *ac) {
121062306a36Sopenharmony_ci		ocfs2_free_alloc_context(*ac);
121162306a36Sopenharmony_ci		*ac = NULL;
121262306a36Sopenharmony_ci	}
121362306a36Sopenharmony_ci
121462306a36Sopenharmony_ci	if (status)
121562306a36Sopenharmony_ci		mlog_errno(status);
121662306a36Sopenharmony_ci	return status;
121762306a36Sopenharmony_ci}
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_ciint ocfs2_reserve_clusters(struct ocfs2_super *osb,
122062306a36Sopenharmony_ci			   u32 bits_wanted,
122162306a36Sopenharmony_ci			   struct ocfs2_alloc_context **ac)
122262306a36Sopenharmony_ci{
122362306a36Sopenharmony_ci	return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0,
122462306a36Sopenharmony_ci						 ALLOC_NEW_GROUP, ac);
122562306a36Sopenharmony_ci}
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci/*
122862306a36Sopenharmony_ci * More or less lifted from ext3. I'll leave their description below:
122962306a36Sopenharmony_ci *
123062306a36Sopenharmony_ci * "For ext3 allocations, we must not reuse any blocks which are
123162306a36Sopenharmony_ci * allocated in the bitmap buffer's "last committed data" copy.  This
123262306a36Sopenharmony_ci * prevents deletes from freeing up the page for reuse until we have
123362306a36Sopenharmony_ci * committed the delete transaction.
123462306a36Sopenharmony_ci *
123562306a36Sopenharmony_ci * If we didn't do this, then deleting something and reallocating it as
123662306a36Sopenharmony_ci * data would allow the old block to be overwritten before the
123762306a36Sopenharmony_ci * transaction committed (because we force data to disk before commit).
123862306a36Sopenharmony_ci * This would lead to corruption if we crashed between overwriting the
123962306a36Sopenharmony_ci * data and committing the delete.
124062306a36Sopenharmony_ci *
124162306a36Sopenharmony_ci * @@@ We may want to make this allocation behaviour conditional on
124262306a36Sopenharmony_ci * data-writes at some point, and disable it for metadata allocations or
124362306a36Sopenharmony_ci * sync-data inodes."
124462306a36Sopenharmony_ci *
124562306a36Sopenharmony_ci * Note: OCFS2 already does this differently for metadata vs data
124662306a36Sopenharmony_ci * allocations, as those bitmaps are separate and undo access is never
124762306a36Sopenharmony_ci * called on a metadata group descriptor.
124862306a36Sopenharmony_ci */
124962306a36Sopenharmony_cistatic int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
125062306a36Sopenharmony_ci					 int nr)
125162306a36Sopenharmony_ci{
125262306a36Sopenharmony_ci	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
125362306a36Sopenharmony_ci	struct journal_head *jh;
125462306a36Sopenharmony_ci	int ret;
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
125762306a36Sopenharmony_ci		return 0;
125862306a36Sopenharmony_ci
125962306a36Sopenharmony_ci	jh = jbd2_journal_grab_journal_head(bg_bh);
126062306a36Sopenharmony_ci	if (!jh)
126162306a36Sopenharmony_ci		return 1;
126262306a36Sopenharmony_ci
126362306a36Sopenharmony_ci	spin_lock(&jh->b_state_lock);
126462306a36Sopenharmony_ci	bg = (struct ocfs2_group_desc *) jh->b_committed_data;
126562306a36Sopenharmony_ci	if (bg)
126662306a36Sopenharmony_ci		ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
126762306a36Sopenharmony_ci	else
126862306a36Sopenharmony_ci		ret = 1;
126962306a36Sopenharmony_ci	spin_unlock(&jh->b_state_lock);
127062306a36Sopenharmony_ci	jbd2_journal_put_journal_head(jh);
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_ci	return ret;
127362306a36Sopenharmony_ci}
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_cistatic int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
127662306a36Sopenharmony_ci					     struct buffer_head *bg_bh,
127762306a36Sopenharmony_ci					     unsigned int bits_wanted,
127862306a36Sopenharmony_ci					     unsigned int total_bits,
127962306a36Sopenharmony_ci					     struct ocfs2_suballoc_result *res)
128062306a36Sopenharmony_ci{
128162306a36Sopenharmony_ci	void *bitmap;
128262306a36Sopenharmony_ci	u16 best_offset, best_size;
128362306a36Sopenharmony_ci	int offset, start, found, status = 0;
128462306a36Sopenharmony_ci	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_ci	/* Callers got this descriptor from
128762306a36Sopenharmony_ci	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
128862306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
128962306a36Sopenharmony_ci
129062306a36Sopenharmony_ci	found = start = best_offset = best_size = 0;
129162306a36Sopenharmony_ci	bitmap = bg->bg_bitmap;
129262306a36Sopenharmony_ci
129362306a36Sopenharmony_ci	while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
129462306a36Sopenharmony_ci		if (offset == total_bits)
129562306a36Sopenharmony_ci			break;
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ci		if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
129862306a36Sopenharmony_ci			/* We found a zero, but we can't use it as it
129962306a36Sopenharmony_ci			 * hasn't been put to disk yet! */
130062306a36Sopenharmony_ci			found = 0;
130162306a36Sopenharmony_ci			start = offset + 1;
130262306a36Sopenharmony_ci		} else if (offset == start) {
130362306a36Sopenharmony_ci			/* we found a zero */
130462306a36Sopenharmony_ci			found++;
130562306a36Sopenharmony_ci			/* move start to the next bit to test */
130662306a36Sopenharmony_ci			start++;
130762306a36Sopenharmony_ci		} else {
130862306a36Sopenharmony_ci			/* got a zero after some ones */
130962306a36Sopenharmony_ci			found = 1;
131062306a36Sopenharmony_ci			start = offset + 1;
131162306a36Sopenharmony_ci		}
131262306a36Sopenharmony_ci		if (found > best_size) {
131362306a36Sopenharmony_ci			best_size = found;
131462306a36Sopenharmony_ci			best_offset = start - found;
131562306a36Sopenharmony_ci		}
131662306a36Sopenharmony_ci		/* we got everything we needed */
131762306a36Sopenharmony_ci		if (found == bits_wanted) {
131862306a36Sopenharmony_ci			/* mlog(0, "Found it all!\n"); */
131962306a36Sopenharmony_ci			break;
132062306a36Sopenharmony_ci		}
132162306a36Sopenharmony_ci	}
132262306a36Sopenharmony_ci
132362306a36Sopenharmony_ci	if (best_size) {
132462306a36Sopenharmony_ci		res->sr_bit_offset = best_offset;
132562306a36Sopenharmony_ci		res->sr_bits = best_size;
132662306a36Sopenharmony_ci	} else {
132762306a36Sopenharmony_ci		status = -ENOSPC;
132862306a36Sopenharmony_ci		/* No error log here -- see the comment above
132962306a36Sopenharmony_ci		 * ocfs2_test_bg_bit_allocatable */
133062306a36Sopenharmony_ci	}
133162306a36Sopenharmony_ci
133262306a36Sopenharmony_ci	return status;
133362306a36Sopenharmony_ci}
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ciint ocfs2_block_group_set_bits(handle_t *handle,
133662306a36Sopenharmony_ci					     struct inode *alloc_inode,
133762306a36Sopenharmony_ci					     struct ocfs2_group_desc *bg,
133862306a36Sopenharmony_ci					     struct buffer_head *group_bh,
133962306a36Sopenharmony_ci					     unsigned int bit_off,
134062306a36Sopenharmony_ci					     unsigned int num_bits)
134162306a36Sopenharmony_ci{
134262306a36Sopenharmony_ci	int status;
134362306a36Sopenharmony_ci	void *bitmap = bg->bg_bitmap;
134462306a36Sopenharmony_ci	int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
134562306a36Sopenharmony_ci
134662306a36Sopenharmony_ci	/* All callers get the descriptor via
134762306a36Sopenharmony_ci	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
134862306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
134962306a36Sopenharmony_ci	BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ci	trace_ocfs2_block_group_set_bits(bit_off, num_bits);
135262306a36Sopenharmony_ci
135362306a36Sopenharmony_ci	if (ocfs2_is_cluster_bitmap(alloc_inode))
135462306a36Sopenharmony_ci		journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
135562306a36Sopenharmony_ci
135662306a36Sopenharmony_ci	status = ocfs2_journal_access_gd(handle,
135762306a36Sopenharmony_ci					 INODE_CACHE(alloc_inode),
135862306a36Sopenharmony_ci					 group_bh,
135962306a36Sopenharmony_ci					 journal_type);
136062306a36Sopenharmony_ci	if (status < 0) {
136162306a36Sopenharmony_ci		mlog_errno(status);
136262306a36Sopenharmony_ci		goto bail;
136362306a36Sopenharmony_ci	}
136462306a36Sopenharmony_ci
136562306a36Sopenharmony_ci	le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
136662306a36Sopenharmony_ci	if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
136762306a36Sopenharmony_ci		return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
136862306a36Sopenharmony_ci				   (unsigned long long)le64_to_cpu(bg->bg_blkno),
136962306a36Sopenharmony_ci				   le16_to_cpu(bg->bg_bits),
137062306a36Sopenharmony_ci				   le16_to_cpu(bg->bg_free_bits_count),
137162306a36Sopenharmony_ci				   num_bits);
137262306a36Sopenharmony_ci	}
137362306a36Sopenharmony_ci	while(num_bits--)
137462306a36Sopenharmony_ci		ocfs2_set_bit(bit_off++, bitmap);
137562306a36Sopenharmony_ci
137662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, group_bh);
137762306a36Sopenharmony_ci
137862306a36Sopenharmony_cibail:
137962306a36Sopenharmony_ci	return status;
138062306a36Sopenharmony_ci}
138162306a36Sopenharmony_ci
138262306a36Sopenharmony_ci/* find the one with the most empty bits */
138362306a36Sopenharmony_cistatic inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
138462306a36Sopenharmony_ci{
138562306a36Sopenharmony_ci	u16 curr, best;
138662306a36Sopenharmony_ci
138762306a36Sopenharmony_ci	BUG_ON(!cl->cl_next_free_rec);
138862306a36Sopenharmony_ci
138962306a36Sopenharmony_ci	best = curr = 0;
139062306a36Sopenharmony_ci	while (curr < le16_to_cpu(cl->cl_next_free_rec)) {
139162306a36Sopenharmony_ci		if (le32_to_cpu(cl->cl_recs[curr].c_free) >
139262306a36Sopenharmony_ci		    le32_to_cpu(cl->cl_recs[best].c_free))
139362306a36Sopenharmony_ci			best = curr;
139462306a36Sopenharmony_ci		curr++;
139562306a36Sopenharmony_ci	}
139662306a36Sopenharmony_ci
139762306a36Sopenharmony_ci	BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec));
139862306a36Sopenharmony_ci	return best;
139962306a36Sopenharmony_ci}
140062306a36Sopenharmony_ci
140162306a36Sopenharmony_cistatic int ocfs2_relink_block_group(handle_t *handle,
140262306a36Sopenharmony_ci				    struct inode *alloc_inode,
140362306a36Sopenharmony_ci				    struct buffer_head *fe_bh,
140462306a36Sopenharmony_ci				    struct buffer_head *bg_bh,
140562306a36Sopenharmony_ci				    struct buffer_head *prev_bg_bh,
140662306a36Sopenharmony_ci				    u16 chain)
140762306a36Sopenharmony_ci{
140862306a36Sopenharmony_ci	int status;
140962306a36Sopenharmony_ci	/* there is a really tiny chance the journal calls could fail,
141062306a36Sopenharmony_ci	 * but we wouldn't want inconsistent blocks in *any* case. */
141162306a36Sopenharmony_ci	u64 bg_ptr, prev_bg_ptr;
141262306a36Sopenharmony_ci	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
141362306a36Sopenharmony_ci	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
141462306a36Sopenharmony_ci	struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
141562306a36Sopenharmony_ci
141662306a36Sopenharmony_ci	/* The caller got these descriptors from
141762306a36Sopenharmony_ci	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
141862306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
141962306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
142062306a36Sopenharmony_ci
142162306a36Sopenharmony_ci	trace_ocfs2_relink_block_group(
142262306a36Sopenharmony_ci		(unsigned long long)le64_to_cpu(fe->i_blkno), chain,
142362306a36Sopenharmony_ci		(unsigned long long)le64_to_cpu(bg->bg_blkno),
142462306a36Sopenharmony_ci		(unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
142562306a36Sopenharmony_ci
142662306a36Sopenharmony_ci	bg_ptr = le64_to_cpu(bg->bg_next_group);
142762306a36Sopenharmony_ci	prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
143062306a36Sopenharmony_ci					 prev_bg_bh,
143162306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
143262306a36Sopenharmony_ci	if (status < 0)
143362306a36Sopenharmony_ci		goto out;
143462306a36Sopenharmony_ci
143562306a36Sopenharmony_ci	prev_bg->bg_next_group = bg->bg_next_group;
143662306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, prev_bg_bh);
143762306a36Sopenharmony_ci
143862306a36Sopenharmony_ci	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
143962306a36Sopenharmony_ci					 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
144062306a36Sopenharmony_ci	if (status < 0)
144162306a36Sopenharmony_ci		goto out_rollback_prev_bg;
144262306a36Sopenharmony_ci
144362306a36Sopenharmony_ci	bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
144462306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bg_bh);
144562306a36Sopenharmony_ci
144662306a36Sopenharmony_ci	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
144762306a36Sopenharmony_ci					 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
144862306a36Sopenharmony_ci	if (status < 0)
144962306a36Sopenharmony_ci		goto out_rollback_bg;
145062306a36Sopenharmony_ci
145162306a36Sopenharmony_ci	fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
145262306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, fe_bh);
145362306a36Sopenharmony_ci
145462306a36Sopenharmony_ciout:
145562306a36Sopenharmony_ci	if (status < 0)
145662306a36Sopenharmony_ci		mlog_errno(status);
145762306a36Sopenharmony_ci	return status;
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ciout_rollback_bg:
146062306a36Sopenharmony_ci	bg->bg_next_group = cpu_to_le64(bg_ptr);
146162306a36Sopenharmony_ciout_rollback_prev_bg:
146262306a36Sopenharmony_ci	prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
146362306a36Sopenharmony_ci	goto out;
146462306a36Sopenharmony_ci}
146562306a36Sopenharmony_ci
146662306a36Sopenharmony_cistatic inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
146762306a36Sopenharmony_ci						     u32 wanted)
146862306a36Sopenharmony_ci{
146962306a36Sopenharmony_ci	return le16_to_cpu(bg->bg_free_bits_count) > wanted;
147062306a36Sopenharmony_ci}
147162306a36Sopenharmony_ci
147262306a36Sopenharmony_ci/* return 0 on success, -ENOSPC to keep searching and any other < 0
147362306a36Sopenharmony_ci * value on error. */
147462306a36Sopenharmony_cistatic int ocfs2_cluster_group_search(struct inode *inode,
147562306a36Sopenharmony_ci				      struct buffer_head *group_bh,
147662306a36Sopenharmony_ci				      u32 bits_wanted, u32 min_bits,
147762306a36Sopenharmony_ci				      u64 max_block,
147862306a36Sopenharmony_ci				      struct ocfs2_suballoc_result *res)
147962306a36Sopenharmony_ci{
148062306a36Sopenharmony_ci	int search = -ENOSPC;
148162306a36Sopenharmony_ci	int ret;
148262306a36Sopenharmony_ci	u64 blkoff;
148362306a36Sopenharmony_ci	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
148462306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
148562306a36Sopenharmony_ci	unsigned int max_bits, gd_cluster_off;
148662306a36Sopenharmony_ci
148762306a36Sopenharmony_ci	BUG_ON(!ocfs2_is_cluster_bitmap(inode));
148862306a36Sopenharmony_ci
148962306a36Sopenharmony_ci	if (gd->bg_free_bits_count) {
149062306a36Sopenharmony_ci		max_bits = le16_to_cpu(gd->bg_bits);
149162306a36Sopenharmony_ci
149262306a36Sopenharmony_ci		/* Tail groups in cluster bitmaps which aren't cpg
149362306a36Sopenharmony_ci		 * aligned are prone to partial extension by a failed
149462306a36Sopenharmony_ci		 * fs resize. If the file system resize never got to
149562306a36Sopenharmony_ci		 * update the dinode cluster count, then we don't want
149662306a36Sopenharmony_ci		 * to trust any clusters past it, regardless of what
149762306a36Sopenharmony_ci		 * the group descriptor says. */
149862306a36Sopenharmony_ci		gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
149962306a36Sopenharmony_ci							  le64_to_cpu(gd->bg_blkno));
150062306a36Sopenharmony_ci		if ((gd_cluster_off + max_bits) >
150162306a36Sopenharmony_ci		    OCFS2_I(inode)->ip_clusters) {
150262306a36Sopenharmony_ci			max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
150362306a36Sopenharmony_ci			trace_ocfs2_cluster_group_search_wrong_max_bits(
150462306a36Sopenharmony_ci				(unsigned long long)le64_to_cpu(gd->bg_blkno),
150562306a36Sopenharmony_ci				le16_to_cpu(gd->bg_bits),
150662306a36Sopenharmony_ci				OCFS2_I(inode)->ip_clusters, max_bits);
150762306a36Sopenharmony_ci		}
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci		ret = ocfs2_block_group_find_clear_bits(osb,
151062306a36Sopenharmony_ci							group_bh, bits_wanted,
151162306a36Sopenharmony_ci							max_bits, res);
151262306a36Sopenharmony_ci		if (ret)
151362306a36Sopenharmony_ci			return ret;
151462306a36Sopenharmony_ci
151562306a36Sopenharmony_ci		if (max_block) {
151662306a36Sopenharmony_ci			blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
151762306a36Sopenharmony_ci							  gd_cluster_off +
151862306a36Sopenharmony_ci							  res->sr_bit_offset +
151962306a36Sopenharmony_ci							  res->sr_bits);
152062306a36Sopenharmony_ci			trace_ocfs2_cluster_group_search_max_block(
152162306a36Sopenharmony_ci				(unsigned long long)blkoff,
152262306a36Sopenharmony_ci				(unsigned long long)max_block);
152362306a36Sopenharmony_ci			if (blkoff > max_block)
152462306a36Sopenharmony_ci				return -ENOSPC;
152562306a36Sopenharmony_ci		}
152662306a36Sopenharmony_ci
152762306a36Sopenharmony_ci		/* ocfs2_block_group_find_clear_bits() might
152862306a36Sopenharmony_ci		 * return success, but we still want to return
152962306a36Sopenharmony_ci		 * -ENOSPC unless it found the minimum number
153062306a36Sopenharmony_ci		 * of bits. */
153162306a36Sopenharmony_ci		if (min_bits <= res->sr_bits)
153262306a36Sopenharmony_ci			search = 0; /* success */
153362306a36Sopenharmony_ci		else if (res->sr_bits) {
153462306a36Sopenharmony_ci			/*
153562306a36Sopenharmony_ci			 * Don't show bits which we'll be returning
153662306a36Sopenharmony_ci			 * for allocation to the local alloc bitmap.
153762306a36Sopenharmony_ci			 */
153862306a36Sopenharmony_ci			ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
153962306a36Sopenharmony_ci		}
154062306a36Sopenharmony_ci	}
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_ci	return search;
154362306a36Sopenharmony_ci}
154462306a36Sopenharmony_ci
154562306a36Sopenharmony_cistatic int ocfs2_block_group_search(struct inode *inode,
154662306a36Sopenharmony_ci				    struct buffer_head *group_bh,
154762306a36Sopenharmony_ci				    u32 bits_wanted, u32 min_bits,
154862306a36Sopenharmony_ci				    u64 max_block,
154962306a36Sopenharmony_ci				    struct ocfs2_suballoc_result *res)
155062306a36Sopenharmony_ci{
155162306a36Sopenharmony_ci	int ret = -ENOSPC;
155262306a36Sopenharmony_ci	u64 blkoff;
155362306a36Sopenharmony_ci	struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
155462306a36Sopenharmony_ci
155562306a36Sopenharmony_ci	BUG_ON(min_bits != 1);
155662306a36Sopenharmony_ci	BUG_ON(ocfs2_is_cluster_bitmap(inode));
155762306a36Sopenharmony_ci
155862306a36Sopenharmony_ci	if (bg->bg_free_bits_count) {
155962306a36Sopenharmony_ci		ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
156062306a36Sopenharmony_ci							group_bh, bits_wanted,
156162306a36Sopenharmony_ci							le16_to_cpu(bg->bg_bits),
156262306a36Sopenharmony_ci							res);
156362306a36Sopenharmony_ci		if (!ret && max_block) {
156462306a36Sopenharmony_ci			blkoff = le64_to_cpu(bg->bg_blkno) +
156562306a36Sopenharmony_ci				res->sr_bit_offset + res->sr_bits;
156662306a36Sopenharmony_ci			trace_ocfs2_block_group_search_max_block(
156762306a36Sopenharmony_ci				(unsigned long long)blkoff,
156862306a36Sopenharmony_ci				(unsigned long long)max_block);
156962306a36Sopenharmony_ci			if (blkoff > max_block)
157062306a36Sopenharmony_ci				ret = -ENOSPC;
157162306a36Sopenharmony_ci		}
157262306a36Sopenharmony_ci	}
157362306a36Sopenharmony_ci
157462306a36Sopenharmony_ci	return ret;
157562306a36Sopenharmony_ci}
157662306a36Sopenharmony_ci
157762306a36Sopenharmony_ciint ocfs2_alloc_dinode_update_counts(struct inode *inode,
157862306a36Sopenharmony_ci				       handle_t *handle,
157962306a36Sopenharmony_ci				       struct buffer_head *di_bh,
158062306a36Sopenharmony_ci				       u32 num_bits,
158162306a36Sopenharmony_ci				       u16 chain)
158262306a36Sopenharmony_ci{
158362306a36Sopenharmony_ci	int ret;
158462306a36Sopenharmony_ci	u32 tmp_used;
158562306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
158662306a36Sopenharmony_ci	struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ci	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
158962306a36Sopenharmony_ci				      OCFS2_JOURNAL_ACCESS_WRITE);
159062306a36Sopenharmony_ci	if (ret < 0) {
159162306a36Sopenharmony_ci		mlog_errno(ret);
159262306a36Sopenharmony_ci		goto out;
159362306a36Sopenharmony_ci	}
159462306a36Sopenharmony_ci
159562306a36Sopenharmony_ci	tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
159662306a36Sopenharmony_ci	di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
159762306a36Sopenharmony_ci	le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
159862306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, di_bh);
159962306a36Sopenharmony_ci
160062306a36Sopenharmony_ciout:
160162306a36Sopenharmony_ci	return ret;
160262306a36Sopenharmony_ci}
160362306a36Sopenharmony_ci
160462306a36Sopenharmony_civoid ocfs2_rollback_alloc_dinode_counts(struct inode *inode,
160562306a36Sopenharmony_ci				       struct buffer_head *di_bh,
160662306a36Sopenharmony_ci				       u32 num_bits,
160762306a36Sopenharmony_ci				       u16 chain)
160862306a36Sopenharmony_ci{
160962306a36Sopenharmony_ci	u32 tmp_used;
161062306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
161162306a36Sopenharmony_ci	struct ocfs2_chain_list *cl;
161262306a36Sopenharmony_ci
161362306a36Sopenharmony_ci	cl = (struct ocfs2_chain_list *)&di->id2.i_chain;
161462306a36Sopenharmony_ci	tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
161562306a36Sopenharmony_ci	di->id1.bitmap1.i_used = cpu_to_le32(tmp_used - num_bits);
161662306a36Sopenharmony_ci	le32_add_cpu(&cl->cl_recs[chain].c_free, num_bits);
161762306a36Sopenharmony_ci}
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_cistatic int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
162062306a36Sopenharmony_ci					 struct ocfs2_extent_rec *rec,
162162306a36Sopenharmony_ci					 struct ocfs2_chain_list *cl)
162262306a36Sopenharmony_ci{
162362306a36Sopenharmony_ci	unsigned int bpc = le16_to_cpu(cl->cl_bpc);
162462306a36Sopenharmony_ci	unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
162562306a36Sopenharmony_ci	unsigned int bitcount = le16_to_cpu(rec->e_leaf_clusters) * bpc;
162662306a36Sopenharmony_ci
162762306a36Sopenharmony_ci	if (res->sr_bit_offset < bitoff)
162862306a36Sopenharmony_ci		return 0;
162962306a36Sopenharmony_ci	if (res->sr_bit_offset >= (bitoff + bitcount))
163062306a36Sopenharmony_ci		return 0;
163162306a36Sopenharmony_ci	res->sr_blkno = le64_to_cpu(rec->e_blkno) +
163262306a36Sopenharmony_ci		(res->sr_bit_offset - bitoff);
163362306a36Sopenharmony_ci	if ((res->sr_bit_offset + res->sr_bits) > (bitoff + bitcount))
163462306a36Sopenharmony_ci		res->sr_bits = (bitoff + bitcount) - res->sr_bit_offset;
163562306a36Sopenharmony_ci	return 1;
163662306a36Sopenharmony_ci}
163762306a36Sopenharmony_ci
163862306a36Sopenharmony_cistatic void ocfs2_bg_discontig_fix_result(struct ocfs2_alloc_context *ac,
163962306a36Sopenharmony_ci					  struct ocfs2_group_desc *bg,
164062306a36Sopenharmony_ci					  struct ocfs2_suballoc_result *res)
164162306a36Sopenharmony_ci{
164262306a36Sopenharmony_ci	int i;
164362306a36Sopenharmony_ci	u64 bg_blkno = res->sr_bg_blkno;  /* Save off */
164462306a36Sopenharmony_ci	struct ocfs2_extent_rec *rec;
164562306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
164662306a36Sopenharmony_ci	struct ocfs2_chain_list *cl = &di->id2.i_chain;
164762306a36Sopenharmony_ci
164862306a36Sopenharmony_ci	if (ocfs2_is_cluster_bitmap(ac->ac_inode)) {
164962306a36Sopenharmony_ci		res->sr_blkno = 0;
165062306a36Sopenharmony_ci		return;
165162306a36Sopenharmony_ci	}
165262306a36Sopenharmony_ci
165362306a36Sopenharmony_ci	res->sr_blkno = res->sr_bg_blkno + res->sr_bit_offset;
165462306a36Sopenharmony_ci	res->sr_bg_blkno = 0;  /* Clear it for contig block groups */
165562306a36Sopenharmony_ci	if (!ocfs2_supports_discontig_bg(OCFS2_SB(ac->ac_inode->i_sb)) ||
165662306a36Sopenharmony_ci	    !bg->bg_list.l_next_free_rec)
165762306a36Sopenharmony_ci		return;
165862306a36Sopenharmony_ci
165962306a36Sopenharmony_ci	for (i = 0; i < le16_to_cpu(bg->bg_list.l_next_free_rec); i++) {
166062306a36Sopenharmony_ci		rec = &bg->bg_list.l_recs[i];
166162306a36Sopenharmony_ci		if (ocfs2_bg_discontig_fix_by_rec(res, rec, cl)) {
166262306a36Sopenharmony_ci			res->sr_bg_blkno = bg_blkno;  /* Restore */
166362306a36Sopenharmony_ci			break;
166462306a36Sopenharmony_ci		}
166562306a36Sopenharmony_ci	}
166662306a36Sopenharmony_ci}
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_cistatic int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
166962306a36Sopenharmony_ci				  handle_t *handle,
167062306a36Sopenharmony_ci				  u32 bits_wanted,
167162306a36Sopenharmony_ci				  u32 min_bits,
167262306a36Sopenharmony_ci				  struct ocfs2_suballoc_result *res,
167362306a36Sopenharmony_ci				  u16 *bits_left)
167462306a36Sopenharmony_ci{
167562306a36Sopenharmony_ci	int ret;
167662306a36Sopenharmony_ci	struct buffer_head *group_bh = NULL;
167762306a36Sopenharmony_ci	struct ocfs2_group_desc *gd;
167862306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
167962306a36Sopenharmony_ci	struct inode *alloc_inode = ac->ac_inode;
168062306a36Sopenharmony_ci
168162306a36Sopenharmony_ci	ret = ocfs2_read_group_descriptor(alloc_inode, di,
168262306a36Sopenharmony_ci					  res->sr_bg_blkno, &group_bh);
168362306a36Sopenharmony_ci	if (ret < 0) {
168462306a36Sopenharmony_ci		mlog_errno(ret);
168562306a36Sopenharmony_ci		return ret;
168662306a36Sopenharmony_ci	}
168762306a36Sopenharmony_ci
168862306a36Sopenharmony_ci	gd = (struct ocfs2_group_desc *) group_bh->b_data;
168962306a36Sopenharmony_ci	ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
169062306a36Sopenharmony_ci				  ac->ac_max_block, res);
169162306a36Sopenharmony_ci	if (ret < 0) {
169262306a36Sopenharmony_ci		if (ret != -ENOSPC)
169362306a36Sopenharmony_ci			mlog_errno(ret);
169462306a36Sopenharmony_ci		goto out;
169562306a36Sopenharmony_ci	}
169662306a36Sopenharmony_ci
169762306a36Sopenharmony_ci	if (!ret)
169862306a36Sopenharmony_ci		ocfs2_bg_discontig_fix_result(ac, gd, res);
169962306a36Sopenharmony_ci
170062306a36Sopenharmony_ci	/*
170162306a36Sopenharmony_ci	 * sr_bg_blkno might have been changed by
170262306a36Sopenharmony_ci	 * ocfs2_bg_discontig_fix_result
170362306a36Sopenharmony_ci	 */
170462306a36Sopenharmony_ci	res->sr_bg_stable_blkno = group_bh->b_blocknr;
170562306a36Sopenharmony_ci
170662306a36Sopenharmony_ci	if (ac->ac_find_loc_only)
170762306a36Sopenharmony_ci		goto out_loc_only;
170862306a36Sopenharmony_ci
170962306a36Sopenharmony_ci	ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
171062306a36Sopenharmony_ci					       res->sr_bits,
171162306a36Sopenharmony_ci					       le16_to_cpu(gd->bg_chain));
171262306a36Sopenharmony_ci	if (ret < 0) {
171362306a36Sopenharmony_ci		mlog_errno(ret);
171462306a36Sopenharmony_ci		goto out;
171562306a36Sopenharmony_ci	}
171662306a36Sopenharmony_ci
171762306a36Sopenharmony_ci	ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
171862306a36Sopenharmony_ci					 res->sr_bit_offset, res->sr_bits);
171962306a36Sopenharmony_ci	if (ret < 0) {
172062306a36Sopenharmony_ci		ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh,
172162306a36Sopenharmony_ci					       res->sr_bits,
172262306a36Sopenharmony_ci					       le16_to_cpu(gd->bg_chain));
172362306a36Sopenharmony_ci		mlog_errno(ret);
172462306a36Sopenharmony_ci	}
172562306a36Sopenharmony_ci
172662306a36Sopenharmony_ciout_loc_only:
172762306a36Sopenharmony_ci	*bits_left = le16_to_cpu(gd->bg_free_bits_count);
172862306a36Sopenharmony_ci
172962306a36Sopenharmony_ciout:
173062306a36Sopenharmony_ci	brelse(group_bh);
173162306a36Sopenharmony_ci
173262306a36Sopenharmony_ci	return ret;
173362306a36Sopenharmony_ci}
173462306a36Sopenharmony_ci
173562306a36Sopenharmony_cistatic int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
173662306a36Sopenharmony_ci			      handle_t *handle,
173762306a36Sopenharmony_ci			      u32 bits_wanted,
173862306a36Sopenharmony_ci			      u32 min_bits,
173962306a36Sopenharmony_ci			      struct ocfs2_suballoc_result *res,
174062306a36Sopenharmony_ci			      u16 *bits_left)
174162306a36Sopenharmony_ci{
174262306a36Sopenharmony_ci	int status;
174362306a36Sopenharmony_ci	u16 chain;
174462306a36Sopenharmony_ci	u64 next_group;
174562306a36Sopenharmony_ci	struct inode *alloc_inode = ac->ac_inode;
174662306a36Sopenharmony_ci	struct buffer_head *group_bh = NULL;
174762306a36Sopenharmony_ci	struct buffer_head *prev_group_bh = NULL;
174862306a36Sopenharmony_ci	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
174962306a36Sopenharmony_ci	struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
175062306a36Sopenharmony_ci	struct ocfs2_group_desc *bg;
175162306a36Sopenharmony_ci
175262306a36Sopenharmony_ci	chain = ac->ac_chain;
175362306a36Sopenharmony_ci	trace_ocfs2_search_chain_begin(
175462306a36Sopenharmony_ci		(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno,
175562306a36Sopenharmony_ci		bits_wanted, chain);
175662306a36Sopenharmony_ci
175762306a36Sopenharmony_ci	status = ocfs2_read_group_descriptor(alloc_inode, fe,
175862306a36Sopenharmony_ci					     le64_to_cpu(cl->cl_recs[chain].c_blkno),
175962306a36Sopenharmony_ci					     &group_bh);
176062306a36Sopenharmony_ci	if (status < 0) {
176162306a36Sopenharmony_ci		mlog_errno(status);
176262306a36Sopenharmony_ci		goto bail;
176362306a36Sopenharmony_ci	}
176462306a36Sopenharmony_ci	bg = (struct ocfs2_group_desc *) group_bh->b_data;
176562306a36Sopenharmony_ci
176662306a36Sopenharmony_ci	status = -ENOSPC;
176762306a36Sopenharmony_ci	/* for now, the chain search is a bit simplistic. We just use
176862306a36Sopenharmony_ci	 * the 1st group with any empty bits. */
176962306a36Sopenharmony_ci	while ((status = ac->ac_group_search(alloc_inode, group_bh,
177062306a36Sopenharmony_ci					     bits_wanted, min_bits,
177162306a36Sopenharmony_ci					     ac->ac_max_block,
177262306a36Sopenharmony_ci					     res)) == -ENOSPC) {
177362306a36Sopenharmony_ci		if (!bg->bg_next_group)
177462306a36Sopenharmony_ci			break;
177562306a36Sopenharmony_ci
177662306a36Sopenharmony_ci		brelse(prev_group_bh);
177762306a36Sopenharmony_ci		prev_group_bh = NULL;
177862306a36Sopenharmony_ci
177962306a36Sopenharmony_ci		next_group = le64_to_cpu(bg->bg_next_group);
178062306a36Sopenharmony_ci		prev_group_bh = group_bh;
178162306a36Sopenharmony_ci		group_bh = NULL;
178262306a36Sopenharmony_ci		status = ocfs2_read_group_descriptor(alloc_inode, fe,
178362306a36Sopenharmony_ci						     next_group, &group_bh);
178462306a36Sopenharmony_ci		if (status < 0) {
178562306a36Sopenharmony_ci			mlog_errno(status);
178662306a36Sopenharmony_ci			goto bail;
178762306a36Sopenharmony_ci		}
178862306a36Sopenharmony_ci		bg = (struct ocfs2_group_desc *) group_bh->b_data;
178962306a36Sopenharmony_ci	}
179062306a36Sopenharmony_ci	if (status < 0) {
179162306a36Sopenharmony_ci		if (status != -ENOSPC)
179262306a36Sopenharmony_ci			mlog_errno(status);
179362306a36Sopenharmony_ci		goto bail;
179462306a36Sopenharmony_ci	}
179562306a36Sopenharmony_ci
179662306a36Sopenharmony_ci	trace_ocfs2_search_chain_succ(
179762306a36Sopenharmony_ci		(unsigned long long)le64_to_cpu(bg->bg_blkno), res->sr_bits);
179862306a36Sopenharmony_ci
179962306a36Sopenharmony_ci	res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno);
180062306a36Sopenharmony_ci
180162306a36Sopenharmony_ci	BUG_ON(res->sr_bits == 0);
180262306a36Sopenharmony_ci	if (!status)
180362306a36Sopenharmony_ci		ocfs2_bg_discontig_fix_result(ac, bg, res);
180462306a36Sopenharmony_ci
180562306a36Sopenharmony_ci	/*
180662306a36Sopenharmony_ci	 * sr_bg_blkno might have been changed by
180762306a36Sopenharmony_ci	 * ocfs2_bg_discontig_fix_result
180862306a36Sopenharmony_ci	 */
180962306a36Sopenharmony_ci	res->sr_bg_stable_blkno = group_bh->b_blocknr;
181062306a36Sopenharmony_ci
181162306a36Sopenharmony_ci	/*
181262306a36Sopenharmony_ci	 * Keep track of previous block descriptor read. When
181362306a36Sopenharmony_ci	 * we find a target, if we have read more than X
181462306a36Sopenharmony_ci	 * number of descriptors, and the target is reasonably
181562306a36Sopenharmony_ci	 * empty, relink him to top of his chain.
181662306a36Sopenharmony_ci	 *
181762306a36Sopenharmony_ci	 * We've read 0 extra blocks and only send one more to
181862306a36Sopenharmony_ci	 * the transaction, yet the next guy to search has a
181962306a36Sopenharmony_ci	 * much easier time.
182062306a36Sopenharmony_ci	 *
182162306a36Sopenharmony_ci	 * Do this *after* figuring out how many bits we're taking out
182262306a36Sopenharmony_ci	 * of our target group.
182362306a36Sopenharmony_ci	 */
182462306a36Sopenharmony_ci	if (!ac->ac_disable_chain_relink &&
182562306a36Sopenharmony_ci	    (prev_group_bh) &&
182662306a36Sopenharmony_ci	    (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
182762306a36Sopenharmony_ci		status = ocfs2_relink_block_group(handle, alloc_inode,
182862306a36Sopenharmony_ci						  ac->ac_bh, group_bh,
182962306a36Sopenharmony_ci						  prev_group_bh, chain);
183062306a36Sopenharmony_ci		if (status < 0) {
183162306a36Sopenharmony_ci			mlog_errno(status);
183262306a36Sopenharmony_ci			goto bail;
183362306a36Sopenharmony_ci		}
183462306a36Sopenharmony_ci	}
183562306a36Sopenharmony_ci
183662306a36Sopenharmony_ci	if (ac->ac_find_loc_only)
183762306a36Sopenharmony_ci		goto out_loc_only;
183862306a36Sopenharmony_ci
183962306a36Sopenharmony_ci	status = ocfs2_alloc_dinode_update_counts(alloc_inode, handle,
184062306a36Sopenharmony_ci						  ac->ac_bh, res->sr_bits,
184162306a36Sopenharmony_ci						  chain);
184262306a36Sopenharmony_ci	if (status) {
184362306a36Sopenharmony_ci		mlog_errno(status);
184462306a36Sopenharmony_ci		goto bail;
184562306a36Sopenharmony_ci	}
184662306a36Sopenharmony_ci
184762306a36Sopenharmony_ci	status = ocfs2_block_group_set_bits(handle,
184862306a36Sopenharmony_ci					    alloc_inode,
184962306a36Sopenharmony_ci					    bg,
185062306a36Sopenharmony_ci					    group_bh,
185162306a36Sopenharmony_ci					    res->sr_bit_offset,
185262306a36Sopenharmony_ci					    res->sr_bits);
185362306a36Sopenharmony_ci	if (status < 0) {
185462306a36Sopenharmony_ci		ocfs2_rollback_alloc_dinode_counts(alloc_inode,
185562306a36Sopenharmony_ci					ac->ac_bh, res->sr_bits, chain);
185662306a36Sopenharmony_ci		mlog_errno(status);
185762306a36Sopenharmony_ci		goto bail;
185862306a36Sopenharmony_ci	}
185962306a36Sopenharmony_ci
186062306a36Sopenharmony_ci	trace_ocfs2_search_chain_end(
186162306a36Sopenharmony_ci			(unsigned long long)le64_to_cpu(fe->i_blkno),
186262306a36Sopenharmony_ci			res->sr_bits);
186362306a36Sopenharmony_ci
186462306a36Sopenharmony_ciout_loc_only:
186562306a36Sopenharmony_ci	*bits_left = le16_to_cpu(bg->bg_free_bits_count);
186662306a36Sopenharmony_cibail:
186762306a36Sopenharmony_ci	brelse(group_bh);
186862306a36Sopenharmony_ci	brelse(prev_group_bh);
186962306a36Sopenharmony_ci
187062306a36Sopenharmony_ci	if (status)
187162306a36Sopenharmony_ci		mlog_errno(status);
187262306a36Sopenharmony_ci	return status;
187362306a36Sopenharmony_ci}
187462306a36Sopenharmony_ci
187562306a36Sopenharmony_ci/* will give out up to bits_wanted contiguous bits. */
187662306a36Sopenharmony_cistatic int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
187762306a36Sopenharmony_ci				     handle_t *handle,
187862306a36Sopenharmony_ci				     u32 bits_wanted,
187962306a36Sopenharmony_ci				     u32 min_bits,
188062306a36Sopenharmony_ci				     struct ocfs2_suballoc_result *res)
188162306a36Sopenharmony_ci{
188262306a36Sopenharmony_ci	int status;
188362306a36Sopenharmony_ci	u16 victim, i;
188462306a36Sopenharmony_ci	u16 bits_left = 0;
188562306a36Sopenharmony_ci	u64 hint = ac->ac_last_group;
188662306a36Sopenharmony_ci	struct ocfs2_chain_list *cl;
188762306a36Sopenharmony_ci	struct ocfs2_dinode *fe;
188862306a36Sopenharmony_ci
188962306a36Sopenharmony_ci	BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
189062306a36Sopenharmony_ci	BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
189162306a36Sopenharmony_ci	BUG_ON(!ac->ac_bh);
189262306a36Sopenharmony_ci
189362306a36Sopenharmony_ci	fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
189462306a36Sopenharmony_ci
189562306a36Sopenharmony_ci	/* The bh was validated by the inode read during
189662306a36Sopenharmony_ci	 * ocfs2_reserve_suballoc_bits().  Any corruption is a code bug. */
189762306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
189862306a36Sopenharmony_ci
189962306a36Sopenharmony_ci	if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
190062306a36Sopenharmony_ci	    le32_to_cpu(fe->id1.bitmap1.i_total)) {
190162306a36Sopenharmony_ci		status = ocfs2_error(ac->ac_inode->i_sb,
190262306a36Sopenharmony_ci				     "Chain allocator dinode %llu has %u used bits but only %u total\n",
190362306a36Sopenharmony_ci				     (unsigned long long)le64_to_cpu(fe->i_blkno),
190462306a36Sopenharmony_ci				     le32_to_cpu(fe->id1.bitmap1.i_used),
190562306a36Sopenharmony_ci				     le32_to_cpu(fe->id1.bitmap1.i_total));
190662306a36Sopenharmony_ci		goto bail;
190762306a36Sopenharmony_ci	}
190862306a36Sopenharmony_ci
190962306a36Sopenharmony_ci	res->sr_bg_blkno = hint;
191062306a36Sopenharmony_ci	if (res->sr_bg_blkno) {
191162306a36Sopenharmony_ci		/* Attempt to short-circuit the usual search mechanism
191262306a36Sopenharmony_ci		 * by jumping straight to the most recently used
191362306a36Sopenharmony_ci		 * allocation group. This helps us maintain some
191462306a36Sopenharmony_ci		 * contiguousness across allocations. */
191562306a36Sopenharmony_ci		status = ocfs2_search_one_group(ac, handle, bits_wanted,
191662306a36Sopenharmony_ci						min_bits, res, &bits_left);
191762306a36Sopenharmony_ci		if (!status)
191862306a36Sopenharmony_ci			goto set_hint;
191962306a36Sopenharmony_ci		if (status < 0 && status != -ENOSPC) {
192062306a36Sopenharmony_ci			mlog_errno(status);
192162306a36Sopenharmony_ci			goto bail;
192262306a36Sopenharmony_ci		}
192362306a36Sopenharmony_ci	}
192462306a36Sopenharmony_ci
192562306a36Sopenharmony_ci	cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
192662306a36Sopenharmony_ci
192762306a36Sopenharmony_ci	victim = ocfs2_find_victim_chain(cl);
192862306a36Sopenharmony_ci	ac->ac_chain = victim;
192962306a36Sopenharmony_ci
193062306a36Sopenharmony_ci	status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
193162306a36Sopenharmony_ci				    res, &bits_left);
193262306a36Sopenharmony_ci	if (!status) {
193362306a36Sopenharmony_ci		if (ocfs2_is_cluster_bitmap(ac->ac_inode))
193462306a36Sopenharmony_ci			hint = res->sr_bg_blkno;
193562306a36Sopenharmony_ci		else
193662306a36Sopenharmony_ci			hint = ocfs2_group_from_res(res);
193762306a36Sopenharmony_ci		goto set_hint;
193862306a36Sopenharmony_ci	}
193962306a36Sopenharmony_ci	if (status < 0 && status != -ENOSPC) {
194062306a36Sopenharmony_ci		mlog_errno(status);
194162306a36Sopenharmony_ci		goto bail;
194262306a36Sopenharmony_ci	}
194362306a36Sopenharmony_ci
194462306a36Sopenharmony_ci	trace_ocfs2_claim_suballoc_bits(victim);
194562306a36Sopenharmony_ci
194662306a36Sopenharmony_ci	/* If we didn't pick a good victim, then just default to
194762306a36Sopenharmony_ci	 * searching each chain in order. Don't allow chain relinking
194862306a36Sopenharmony_ci	 * because we only calculate enough journal credits for one
194962306a36Sopenharmony_ci	 * relink per alloc. */
195062306a36Sopenharmony_ci	ac->ac_disable_chain_relink = 1;
195162306a36Sopenharmony_ci	for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
195262306a36Sopenharmony_ci		if (i == victim)
195362306a36Sopenharmony_ci			continue;
195462306a36Sopenharmony_ci		if (!cl->cl_recs[i].c_free)
195562306a36Sopenharmony_ci			continue;
195662306a36Sopenharmony_ci
195762306a36Sopenharmony_ci		ac->ac_chain = i;
195862306a36Sopenharmony_ci		status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
195962306a36Sopenharmony_ci					    res, &bits_left);
196062306a36Sopenharmony_ci		if (!status) {
196162306a36Sopenharmony_ci			hint = ocfs2_group_from_res(res);
196262306a36Sopenharmony_ci			break;
196362306a36Sopenharmony_ci		}
196462306a36Sopenharmony_ci		if (status < 0 && status != -ENOSPC) {
196562306a36Sopenharmony_ci			mlog_errno(status);
196662306a36Sopenharmony_ci			goto bail;
196762306a36Sopenharmony_ci		}
196862306a36Sopenharmony_ci	}
196962306a36Sopenharmony_ci
197062306a36Sopenharmony_ciset_hint:
197162306a36Sopenharmony_ci	if (status != -ENOSPC) {
197262306a36Sopenharmony_ci		/* If the next search of this group is not likely to
197362306a36Sopenharmony_ci		 * yield a suitable extent, then we reset the last
197462306a36Sopenharmony_ci		 * group hint so as to not waste a disk read */
197562306a36Sopenharmony_ci		if (bits_left < min_bits)
197662306a36Sopenharmony_ci			ac->ac_last_group = 0;
197762306a36Sopenharmony_ci		else
197862306a36Sopenharmony_ci			ac->ac_last_group = hint;
197962306a36Sopenharmony_ci	}
198062306a36Sopenharmony_ci
198162306a36Sopenharmony_cibail:
198262306a36Sopenharmony_ci	if (status)
198362306a36Sopenharmony_ci		mlog_errno(status);
198462306a36Sopenharmony_ci	return status;
198562306a36Sopenharmony_ci}
198662306a36Sopenharmony_ci
198762306a36Sopenharmony_ciint ocfs2_claim_metadata(handle_t *handle,
198862306a36Sopenharmony_ci			 struct ocfs2_alloc_context *ac,
198962306a36Sopenharmony_ci			 u32 bits_wanted,
199062306a36Sopenharmony_ci			 u64 *suballoc_loc,
199162306a36Sopenharmony_ci			 u16 *suballoc_bit_start,
199262306a36Sopenharmony_ci			 unsigned int *num_bits,
199362306a36Sopenharmony_ci			 u64 *blkno_start)
199462306a36Sopenharmony_ci{
199562306a36Sopenharmony_ci	int status;
199662306a36Sopenharmony_ci	struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
199762306a36Sopenharmony_ci
199862306a36Sopenharmony_ci	BUG_ON(!ac);
199962306a36Sopenharmony_ci	BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
200062306a36Sopenharmony_ci	BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
200162306a36Sopenharmony_ci
200262306a36Sopenharmony_ci	status = ocfs2_claim_suballoc_bits(ac,
200362306a36Sopenharmony_ci					   handle,
200462306a36Sopenharmony_ci					   bits_wanted,
200562306a36Sopenharmony_ci					   1,
200662306a36Sopenharmony_ci					   &res);
200762306a36Sopenharmony_ci	if (status < 0) {
200862306a36Sopenharmony_ci		mlog_errno(status);
200962306a36Sopenharmony_ci		goto bail;
201062306a36Sopenharmony_ci	}
201162306a36Sopenharmony_ci	atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
201262306a36Sopenharmony_ci
201362306a36Sopenharmony_ci	*suballoc_loc = res.sr_bg_blkno;
201462306a36Sopenharmony_ci	*suballoc_bit_start = res.sr_bit_offset;
201562306a36Sopenharmony_ci	*blkno_start = res.sr_blkno;
201662306a36Sopenharmony_ci	ac->ac_bits_given += res.sr_bits;
201762306a36Sopenharmony_ci	*num_bits = res.sr_bits;
201862306a36Sopenharmony_ci	status = 0;
201962306a36Sopenharmony_cibail:
202062306a36Sopenharmony_ci	if (status)
202162306a36Sopenharmony_ci		mlog_errno(status);
202262306a36Sopenharmony_ci	return status;
202362306a36Sopenharmony_ci}
202462306a36Sopenharmony_ci
202562306a36Sopenharmony_cistatic void ocfs2_init_inode_ac_group(struct inode *dir,
202662306a36Sopenharmony_ci				      struct buffer_head *parent_di_bh,
202762306a36Sopenharmony_ci				      struct ocfs2_alloc_context *ac)
202862306a36Sopenharmony_ci{
202962306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_di_bh->b_data;
203062306a36Sopenharmony_ci	/*
203162306a36Sopenharmony_ci	 * Try to allocate inodes from some specific group.
203262306a36Sopenharmony_ci	 *
203362306a36Sopenharmony_ci	 * If the parent dir has recorded the last group used in allocation,
203462306a36Sopenharmony_ci	 * cool, use it. Otherwise if we try to allocate new inode from the
203562306a36Sopenharmony_ci	 * same slot the parent dir belongs to, use the same chunk.
203662306a36Sopenharmony_ci	 *
203762306a36Sopenharmony_ci	 * We are very careful here to avoid the mistake of setting
203862306a36Sopenharmony_ci	 * ac_last_group to a group descriptor from a different (unlocked) slot.
203962306a36Sopenharmony_ci	 */
204062306a36Sopenharmony_ci	if (OCFS2_I(dir)->ip_last_used_group &&
204162306a36Sopenharmony_ci	    OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
204262306a36Sopenharmony_ci		ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
204362306a36Sopenharmony_ci	else if (le16_to_cpu(di->i_suballoc_slot) == ac->ac_alloc_slot) {
204462306a36Sopenharmony_ci		if (di->i_suballoc_loc)
204562306a36Sopenharmony_ci			ac->ac_last_group = le64_to_cpu(di->i_suballoc_loc);
204662306a36Sopenharmony_ci		else
204762306a36Sopenharmony_ci			ac->ac_last_group = ocfs2_which_suballoc_group(
204862306a36Sopenharmony_ci					le64_to_cpu(di->i_blkno),
204962306a36Sopenharmony_ci					le16_to_cpu(di->i_suballoc_bit));
205062306a36Sopenharmony_ci	}
205162306a36Sopenharmony_ci}
205262306a36Sopenharmony_ci
205362306a36Sopenharmony_cistatic inline void ocfs2_save_inode_ac_group(struct inode *dir,
205462306a36Sopenharmony_ci					     struct ocfs2_alloc_context *ac)
205562306a36Sopenharmony_ci{
205662306a36Sopenharmony_ci	OCFS2_I(dir)->ip_last_used_group = ac->ac_last_group;
205762306a36Sopenharmony_ci	OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
205862306a36Sopenharmony_ci}
205962306a36Sopenharmony_ci
206062306a36Sopenharmony_ciint ocfs2_find_new_inode_loc(struct inode *dir,
206162306a36Sopenharmony_ci			     struct buffer_head *parent_fe_bh,
206262306a36Sopenharmony_ci			     struct ocfs2_alloc_context *ac,
206362306a36Sopenharmony_ci			     u64 *fe_blkno)
206462306a36Sopenharmony_ci{
206562306a36Sopenharmony_ci	int ret;
206662306a36Sopenharmony_ci	handle_t *handle = NULL;
206762306a36Sopenharmony_ci	struct ocfs2_suballoc_result *res;
206862306a36Sopenharmony_ci
206962306a36Sopenharmony_ci	BUG_ON(!ac);
207062306a36Sopenharmony_ci	BUG_ON(ac->ac_bits_given != 0);
207162306a36Sopenharmony_ci	BUG_ON(ac->ac_bits_wanted != 1);
207262306a36Sopenharmony_ci	BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
207362306a36Sopenharmony_ci
207462306a36Sopenharmony_ci	res = kzalloc(sizeof(*res), GFP_NOFS);
207562306a36Sopenharmony_ci	if (res == NULL) {
207662306a36Sopenharmony_ci		ret = -ENOMEM;
207762306a36Sopenharmony_ci		mlog_errno(ret);
207862306a36Sopenharmony_ci		goto out;
207962306a36Sopenharmony_ci	}
208062306a36Sopenharmony_ci
208162306a36Sopenharmony_ci	ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
208262306a36Sopenharmony_ci
208362306a36Sopenharmony_ci	/*
208462306a36Sopenharmony_ci	 * The handle started here is for chain relink. Alternatively,
208562306a36Sopenharmony_ci	 * we could just disable relink for these calls.
208662306a36Sopenharmony_ci	 */
208762306a36Sopenharmony_ci	handle = ocfs2_start_trans(OCFS2_SB(dir->i_sb), OCFS2_SUBALLOC_ALLOC);
208862306a36Sopenharmony_ci	if (IS_ERR(handle)) {
208962306a36Sopenharmony_ci		ret = PTR_ERR(handle);
209062306a36Sopenharmony_ci		handle = NULL;
209162306a36Sopenharmony_ci		mlog_errno(ret);
209262306a36Sopenharmony_ci		goto out;
209362306a36Sopenharmony_ci	}
209462306a36Sopenharmony_ci
209562306a36Sopenharmony_ci	/*
209662306a36Sopenharmony_ci	 * This will instruct ocfs2_claim_suballoc_bits and
209762306a36Sopenharmony_ci	 * ocfs2_search_one_group to search but save actual allocation
209862306a36Sopenharmony_ci	 * for later.
209962306a36Sopenharmony_ci	 */
210062306a36Sopenharmony_ci	ac->ac_find_loc_only = 1;
210162306a36Sopenharmony_ci
210262306a36Sopenharmony_ci	ret = ocfs2_claim_suballoc_bits(ac, handle, 1, 1, res);
210362306a36Sopenharmony_ci	if (ret < 0) {
210462306a36Sopenharmony_ci		mlog_errno(ret);
210562306a36Sopenharmony_ci		goto out;
210662306a36Sopenharmony_ci	}
210762306a36Sopenharmony_ci
210862306a36Sopenharmony_ci	ac->ac_find_loc_priv = res;
210962306a36Sopenharmony_ci	*fe_blkno = res->sr_blkno;
211062306a36Sopenharmony_ci	ocfs2_update_inode_fsync_trans(handle, dir, 0);
211162306a36Sopenharmony_ciout:
211262306a36Sopenharmony_ci	if (handle)
211362306a36Sopenharmony_ci		ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle);
211462306a36Sopenharmony_ci
211562306a36Sopenharmony_ci	if (ret)
211662306a36Sopenharmony_ci		kfree(res);
211762306a36Sopenharmony_ci
211862306a36Sopenharmony_ci	return ret;
211962306a36Sopenharmony_ci}
212062306a36Sopenharmony_ci
212162306a36Sopenharmony_ciint ocfs2_claim_new_inode_at_loc(handle_t *handle,
212262306a36Sopenharmony_ci				 struct inode *dir,
212362306a36Sopenharmony_ci				 struct ocfs2_alloc_context *ac,
212462306a36Sopenharmony_ci				 u64 *suballoc_loc,
212562306a36Sopenharmony_ci				 u16 *suballoc_bit,
212662306a36Sopenharmony_ci				 u64 di_blkno)
212762306a36Sopenharmony_ci{
212862306a36Sopenharmony_ci	int ret;
212962306a36Sopenharmony_ci	u16 chain;
213062306a36Sopenharmony_ci	struct ocfs2_suballoc_result *res = ac->ac_find_loc_priv;
213162306a36Sopenharmony_ci	struct buffer_head *bg_bh = NULL;
213262306a36Sopenharmony_ci	struct ocfs2_group_desc *bg;
213362306a36Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *) ac->ac_bh->b_data;
213462306a36Sopenharmony_ci
213562306a36Sopenharmony_ci	/*
213662306a36Sopenharmony_ci	 * Since di_blkno is being passed back in, we check for any
213762306a36Sopenharmony_ci	 * inconsistencies which may have happened between
213862306a36Sopenharmony_ci	 * calls. These are code bugs as di_blkno is not expected to
213962306a36Sopenharmony_ci	 * change once returned from ocfs2_find_new_inode_loc()
214062306a36Sopenharmony_ci	 */
214162306a36Sopenharmony_ci	BUG_ON(res->sr_blkno != di_blkno);
214262306a36Sopenharmony_ci
214362306a36Sopenharmony_ci	ret = ocfs2_read_group_descriptor(ac->ac_inode, di,
214462306a36Sopenharmony_ci					  res->sr_bg_stable_blkno, &bg_bh);
214562306a36Sopenharmony_ci	if (ret) {
214662306a36Sopenharmony_ci		mlog_errno(ret);
214762306a36Sopenharmony_ci		goto out;
214862306a36Sopenharmony_ci	}
214962306a36Sopenharmony_ci
215062306a36Sopenharmony_ci	bg = (struct ocfs2_group_desc *) bg_bh->b_data;
215162306a36Sopenharmony_ci	chain = le16_to_cpu(bg->bg_chain);
215262306a36Sopenharmony_ci
215362306a36Sopenharmony_ci	ret = ocfs2_alloc_dinode_update_counts(ac->ac_inode, handle,
215462306a36Sopenharmony_ci					       ac->ac_bh, res->sr_bits,
215562306a36Sopenharmony_ci					       chain);
215662306a36Sopenharmony_ci	if (ret) {
215762306a36Sopenharmony_ci		mlog_errno(ret);
215862306a36Sopenharmony_ci		goto out;
215962306a36Sopenharmony_ci	}
216062306a36Sopenharmony_ci
216162306a36Sopenharmony_ci	ret = ocfs2_block_group_set_bits(handle,
216262306a36Sopenharmony_ci					 ac->ac_inode,
216362306a36Sopenharmony_ci					 bg,
216462306a36Sopenharmony_ci					 bg_bh,
216562306a36Sopenharmony_ci					 res->sr_bit_offset,
216662306a36Sopenharmony_ci					 res->sr_bits);
216762306a36Sopenharmony_ci	if (ret < 0) {
216862306a36Sopenharmony_ci		ocfs2_rollback_alloc_dinode_counts(ac->ac_inode,
216962306a36Sopenharmony_ci					       ac->ac_bh, res->sr_bits, chain);
217062306a36Sopenharmony_ci		mlog_errno(ret);
217162306a36Sopenharmony_ci		goto out;
217262306a36Sopenharmony_ci	}
217362306a36Sopenharmony_ci
217462306a36Sopenharmony_ci	trace_ocfs2_claim_new_inode_at_loc((unsigned long long)di_blkno,
217562306a36Sopenharmony_ci					   res->sr_bits);
217662306a36Sopenharmony_ci
217762306a36Sopenharmony_ci	atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
217862306a36Sopenharmony_ci
217962306a36Sopenharmony_ci	BUG_ON(res->sr_bits != 1);
218062306a36Sopenharmony_ci
218162306a36Sopenharmony_ci	*suballoc_loc = res->sr_bg_blkno;
218262306a36Sopenharmony_ci	*suballoc_bit = res->sr_bit_offset;
218362306a36Sopenharmony_ci	ac->ac_bits_given++;
218462306a36Sopenharmony_ci	ocfs2_save_inode_ac_group(dir, ac);
218562306a36Sopenharmony_ci
218662306a36Sopenharmony_ciout:
218762306a36Sopenharmony_ci	brelse(bg_bh);
218862306a36Sopenharmony_ci
218962306a36Sopenharmony_ci	return ret;
219062306a36Sopenharmony_ci}
219162306a36Sopenharmony_ci
219262306a36Sopenharmony_ciint ocfs2_claim_new_inode(handle_t *handle,
219362306a36Sopenharmony_ci			  struct inode *dir,
219462306a36Sopenharmony_ci			  struct buffer_head *parent_fe_bh,
219562306a36Sopenharmony_ci			  struct ocfs2_alloc_context *ac,
219662306a36Sopenharmony_ci			  u64 *suballoc_loc,
219762306a36Sopenharmony_ci			  u16 *suballoc_bit,
219862306a36Sopenharmony_ci			  u64 *fe_blkno)
219962306a36Sopenharmony_ci{
220062306a36Sopenharmony_ci	int status;
220162306a36Sopenharmony_ci	struct ocfs2_suballoc_result res;
220262306a36Sopenharmony_ci
220362306a36Sopenharmony_ci	BUG_ON(!ac);
220462306a36Sopenharmony_ci	BUG_ON(ac->ac_bits_given != 0);
220562306a36Sopenharmony_ci	BUG_ON(ac->ac_bits_wanted != 1);
220662306a36Sopenharmony_ci	BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
220762306a36Sopenharmony_ci
220862306a36Sopenharmony_ci	ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
220962306a36Sopenharmony_ci
221062306a36Sopenharmony_ci	status = ocfs2_claim_suballoc_bits(ac,
221162306a36Sopenharmony_ci					   handle,
221262306a36Sopenharmony_ci					   1,
221362306a36Sopenharmony_ci					   1,
221462306a36Sopenharmony_ci					   &res);
221562306a36Sopenharmony_ci	if (status < 0) {
221662306a36Sopenharmony_ci		mlog_errno(status);
221762306a36Sopenharmony_ci		goto bail;
221862306a36Sopenharmony_ci	}
221962306a36Sopenharmony_ci	atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
222062306a36Sopenharmony_ci
222162306a36Sopenharmony_ci	BUG_ON(res.sr_bits != 1);
222262306a36Sopenharmony_ci
222362306a36Sopenharmony_ci	*suballoc_loc = res.sr_bg_blkno;
222462306a36Sopenharmony_ci	*suballoc_bit = res.sr_bit_offset;
222562306a36Sopenharmony_ci	*fe_blkno = res.sr_blkno;
222662306a36Sopenharmony_ci	ac->ac_bits_given++;
222762306a36Sopenharmony_ci	ocfs2_save_inode_ac_group(dir, ac);
222862306a36Sopenharmony_ci	status = 0;
222962306a36Sopenharmony_cibail:
223062306a36Sopenharmony_ci	if (status)
223162306a36Sopenharmony_ci		mlog_errno(status);
223262306a36Sopenharmony_ci	return status;
223362306a36Sopenharmony_ci}
223462306a36Sopenharmony_ci
223562306a36Sopenharmony_ci/* translate a group desc. blkno and it's bitmap offset into
223662306a36Sopenharmony_ci * disk cluster offset. */
223762306a36Sopenharmony_cistatic inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
223862306a36Sopenharmony_ci						   u64 bg_blkno,
223962306a36Sopenharmony_ci						   u16 bg_bit_off)
224062306a36Sopenharmony_ci{
224162306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
224262306a36Sopenharmony_ci	u32 cluster = 0;
224362306a36Sopenharmony_ci
224462306a36Sopenharmony_ci	BUG_ON(!ocfs2_is_cluster_bitmap(inode));
224562306a36Sopenharmony_ci
224662306a36Sopenharmony_ci	if (bg_blkno != osb->first_cluster_group_blkno)
224762306a36Sopenharmony_ci		cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno);
224862306a36Sopenharmony_ci	cluster += (u32) bg_bit_off;
224962306a36Sopenharmony_ci	return cluster;
225062306a36Sopenharmony_ci}
225162306a36Sopenharmony_ci
225262306a36Sopenharmony_ci/* given a cluster offset, calculate which block group it belongs to
225362306a36Sopenharmony_ci * and return that block offset. */
225462306a36Sopenharmony_ciu64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
225562306a36Sopenharmony_ci{
225662306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
225762306a36Sopenharmony_ci	u32 group_no;
225862306a36Sopenharmony_ci
225962306a36Sopenharmony_ci	BUG_ON(!ocfs2_is_cluster_bitmap(inode));
226062306a36Sopenharmony_ci
226162306a36Sopenharmony_ci	group_no = cluster / osb->bitmap_cpg;
226262306a36Sopenharmony_ci	if (!group_no)
226362306a36Sopenharmony_ci		return osb->first_cluster_group_blkno;
226462306a36Sopenharmony_ci	return ocfs2_clusters_to_blocks(inode->i_sb,
226562306a36Sopenharmony_ci					group_no * osb->bitmap_cpg);
226662306a36Sopenharmony_ci}
226762306a36Sopenharmony_ci
226862306a36Sopenharmony_ci/* given the block number of a cluster start, calculate which cluster
226962306a36Sopenharmony_ci * group and descriptor bitmap offset that corresponds to. */
227062306a36Sopenharmony_cistatic inline void ocfs2_block_to_cluster_group(struct inode *inode,
227162306a36Sopenharmony_ci						u64 data_blkno,
227262306a36Sopenharmony_ci						u64 *bg_blkno,
227362306a36Sopenharmony_ci						u16 *bg_bit_off)
227462306a36Sopenharmony_ci{
227562306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
227662306a36Sopenharmony_ci	u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno);
227762306a36Sopenharmony_ci
227862306a36Sopenharmony_ci	BUG_ON(!ocfs2_is_cluster_bitmap(inode));
227962306a36Sopenharmony_ci
228062306a36Sopenharmony_ci	*bg_blkno = ocfs2_which_cluster_group(inode,
228162306a36Sopenharmony_ci					      data_cluster);
228262306a36Sopenharmony_ci
228362306a36Sopenharmony_ci	if (*bg_blkno == osb->first_cluster_group_blkno)
228462306a36Sopenharmony_ci		*bg_bit_off = (u16) data_cluster;
228562306a36Sopenharmony_ci	else
228662306a36Sopenharmony_ci		*bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb,
228762306a36Sopenharmony_ci							     data_blkno - *bg_blkno);
228862306a36Sopenharmony_ci}
228962306a36Sopenharmony_ci
229062306a36Sopenharmony_ci/*
229162306a36Sopenharmony_ci * min_bits - minimum contiguous chunk from this total allocation we
229262306a36Sopenharmony_ci * can handle. set to what we asked for originally for a full
229362306a36Sopenharmony_ci * contig. allocation, set to '1' to indicate we can deal with extents
229462306a36Sopenharmony_ci * of any size.
229562306a36Sopenharmony_ci */
229662306a36Sopenharmony_ciint __ocfs2_claim_clusters(handle_t *handle,
229762306a36Sopenharmony_ci			   struct ocfs2_alloc_context *ac,
229862306a36Sopenharmony_ci			   u32 min_clusters,
229962306a36Sopenharmony_ci			   u32 max_clusters,
230062306a36Sopenharmony_ci			   u32 *cluster_start,
230162306a36Sopenharmony_ci			   u32 *num_clusters)
230262306a36Sopenharmony_ci{
230362306a36Sopenharmony_ci	int status;
230462306a36Sopenharmony_ci	unsigned int bits_wanted = max_clusters;
230562306a36Sopenharmony_ci	struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
230662306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb);
230762306a36Sopenharmony_ci
230862306a36Sopenharmony_ci	BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
230962306a36Sopenharmony_ci
231062306a36Sopenharmony_ci	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
231162306a36Sopenharmony_ci	       && ac->ac_which != OCFS2_AC_USE_MAIN);
231262306a36Sopenharmony_ci
231362306a36Sopenharmony_ci	if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
231462306a36Sopenharmony_ci		WARN_ON(min_clusters > 1);
231562306a36Sopenharmony_ci
231662306a36Sopenharmony_ci		status = ocfs2_claim_local_alloc_bits(osb,
231762306a36Sopenharmony_ci						      handle,
231862306a36Sopenharmony_ci						      ac,
231962306a36Sopenharmony_ci						      bits_wanted,
232062306a36Sopenharmony_ci						      cluster_start,
232162306a36Sopenharmony_ci						      num_clusters);
232262306a36Sopenharmony_ci		if (!status)
232362306a36Sopenharmony_ci			atomic_inc(&osb->alloc_stats.local_data);
232462306a36Sopenharmony_ci	} else {
232562306a36Sopenharmony_ci		if (min_clusters > (osb->bitmap_cpg - 1)) {
232662306a36Sopenharmony_ci			/* The only paths asking for contiguousness
232762306a36Sopenharmony_ci			 * should know about this already. */
232862306a36Sopenharmony_ci			mlog(ML_ERROR, "minimum allocation requested %u exceeds "
232962306a36Sopenharmony_ci			     "group bitmap size %u!\n", min_clusters,
233062306a36Sopenharmony_ci			     osb->bitmap_cpg);
233162306a36Sopenharmony_ci			status = -ENOSPC;
233262306a36Sopenharmony_ci			goto bail;
233362306a36Sopenharmony_ci		}
233462306a36Sopenharmony_ci		/* clamp the current request down to a realistic size. */
233562306a36Sopenharmony_ci		if (bits_wanted > (osb->bitmap_cpg - 1))
233662306a36Sopenharmony_ci			bits_wanted = osb->bitmap_cpg - 1;
233762306a36Sopenharmony_ci
233862306a36Sopenharmony_ci		status = ocfs2_claim_suballoc_bits(ac,
233962306a36Sopenharmony_ci						   handle,
234062306a36Sopenharmony_ci						   bits_wanted,
234162306a36Sopenharmony_ci						   min_clusters,
234262306a36Sopenharmony_ci						   &res);
234362306a36Sopenharmony_ci		if (!status) {
234462306a36Sopenharmony_ci			BUG_ON(res.sr_blkno); /* cluster alloc can't set */
234562306a36Sopenharmony_ci			*cluster_start =
234662306a36Sopenharmony_ci				ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
234762306a36Sopenharmony_ci								 res.sr_bg_blkno,
234862306a36Sopenharmony_ci								 res.sr_bit_offset);
234962306a36Sopenharmony_ci			atomic_inc(&osb->alloc_stats.bitmap_data);
235062306a36Sopenharmony_ci			*num_clusters = res.sr_bits;
235162306a36Sopenharmony_ci		}
235262306a36Sopenharmony_ci	}
235362306a36Sopenharmony_ci	if (status < 0) {
235462306a36Sopenharmony_ci		if (status != -ENOSPC)
235562306a36Sopenharmony_ci			mlog_errno(status);
235662306a36Sopenharmony_ci		goto bail;
235762306a36Sopenharmony_ci	}
235862306a36Sopenharmony_ci
235962306a36Sopenharmony_ci	ac->ac_bits_given += *num_clusters;
236062306a36Sopenharmony_ci
236162306a36Sopenharmony_cibail:
236262306a36Sopenharmony_ci	if (status)
236362306a36Sopenharmony_ci		mlog_errno(status);
236462306a36Sopenharmony_ci	return status;
236562306a36Sopenharmony_ci}
236662306a36Sopenharmony_ci
236762306a36Sopenharmony_ciint ocfs2_claim_clusters(handle_t *handle,
236862306a36Sopenharmony_ci			 struct ocfs2_alloc_context *ac,
236962306a36Sopenharmony_ci			 u32 min_clusters,
237062306a36Sopenharmony_ci			 u32 *cluster_start,
237162306a36Sopenharmony_ci			 u32 *num_clusters)
237262306a36Sopenharmony_ci{
237362306a36Sopenharmony_ci	unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
237462306a36Sopenharmony_ci
237562306a36Sopenharmony_ci	return __ocfs2_claim_clusters(handle, ac, min_clusters,
237662306a36Sopenharmony_ci				      bits_wanted, cluster_start, num_clusters);
237762306a36Sopenharmony_ci}
237862306a36Sopenharmony_ci
237962306a36Sopenharmony_cistatic int ocfs2_block_group_clear_bits(handle_t *handle,
238062306a36Sopenharmony_ci					struct inode *alloc_inode,
238162306a36Sopenharmony_ci					struct ocfs2_group_desc *bg,
238262306a36Sopenharmony_ci					struct buffer_head *group_bh,
238362306a36Sopenharmony_ci					unsigned int bit_off,
238462306a36Sopenharmony_ci					unsigned int num_bits,
238562306a36Sopenharmony_ci					void (*undo_fn)(unsigned int bit,
238662306a36Sopenharmony_ci							unsigned long *bmap))
238762306a36Sopenharmony_ci{
238862306a36Sopenharmony_ci	int status;
238962306a36Sopenharmony_ci	unsigned int tmp;
239062306a36Sopenharmony_ci	struct ocfs2_group_desc *undo_bg = NULL;
239162306a36Sopenharmony_ci	struct journal_head *jh;
239262306a36Sopenharmony_ci
239362306a36Sopenharmony_ci	/* The caller got this descriptor from
239462306a36Sopenharmony_ci	 * ocfs2_read_group_descriptor().  Any corruption is a code bug. */
239562306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
239662306a36Sopenharmony_ci
239762306a36Sopenharmony_ci	trace_ocfs2_block_group_clear_bits(bit_off, num_bits);
239862306a36Sopenharmony_ci
239962306a36Sopenharmony_ci	BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode));
240062306a36Sopenharmony_ci	status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
240162306a36Sopenharmony_ci					 group_bh,
240262306a36Sopenharmony_ci					 undo_fn ?
240362306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_UNDO :
240462306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
240562306a36Sopenharmony_ci	if (status < 0) {
240662306a36Sopenharmony_ci		mlog_errno(status);
240762306a36Sopenharmony_ci		goto bail;
240862306a36Sopenharmony_ci	}
240962306a36Sopenharmony_ci
241062306a36Sopenharmony_ci	jh = bh2jh(group_bh);
241162306a36Sopenharmony_ci	if (undo_fn) {
241262306a36Sopenharmony_ci		spin_lock(&jh->b_state_lock);
241362306a36Sopenharmony_ci		undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data;
241462306a36Sopenharmony_ci		BUG_ON(!undo_bg);
241562306a36Sopenharmony_ci	}
241662306a36Sopenharmony_ci
241762306a36Sopenharmony_ci	tmp = num_bits;
241862306a36Sopenharmony_ci	while(tmp--) {
241962306a36Sopenharmony_ci		ocfs2_clear_bit((bit_off + tmp),
242062306a36Sopenharmony_ci				(unsigned long *) bg->bg_bitmap);
242162306a36Sopenharmony_ci		if (undo_fn)
242262306a36Sopenharmony_ci			undo_fn(bit_off + tmp,
242362306a36Sopenharmony_ci				(unsigned long *) undo_bg->bg_bitmap);
242462306a36Sopenharmony_ci	}
242562306a36Sopenharmony_ci	le16_add_cpu(&bg->bg_free_bits_count, num_bits);
242662306a36Sopenharmony_ci	if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
242762306a36Sopenharmony_ci		if (undo_fn)
242862306a36Sopenharmony_ci			spin_unlock(&jh->b_state_lock);
242962306a36Sopenharmony_ci		return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
243062306a36Sopenharmony_ci				   (unsigned long long)le64_to_cpu(bg->bg_blkno),
243162306a36Sopenharmony_ci				   le16_to_cpu(bg->bg_bits),
243262306a36Sopenharmony_ci				   le16_to_cpu(bg->bg_free_bits_count),
243362306a36Sopenharmony_ci				   num_bits);
243462306a36Sopenharmony_ci	}
243562306a36Sopenharmony_ci
243662306a36Sopenharmony_ci	if (undo_fn)
243762306a36Sopenharmony_ci		spin_unlock(&jh->b_state_lock);
243862306a36Sopenharmony_ci
243962306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, group_bh);
244062306a36Sopenharmony_cibail:
244162306a36Sopenharmony_ci	return status;
244262306a36Sopenharmony_ci}
244362306a36Sopenharmony_ci
244462306a36Sopenharmony_ci/*
244562306a36Sopenharmony_ci * expects the suballoc inode to already be locked.
244662306a36Sopenharmony_ci */
244762306a36Sopenharmony_cistatic int _ocfs2_free_suballoc_bits(handle_t *handle,
244862306a36Sopenharmony_ci				     struct inode *alloc_inode,
244962306a36Sopenharmony_ci				     struct buffer_head *alloc_bh,
245062306a36Sopenharmony_ci				     unsigned int start_bit,
245162306a36Sopenharmony_ci				     u64 bg_blkno,
245262306a36Sopenharmony_ci				     unsigned int count,
245362306a36Sopenharmony_ci				     void (*undo_fn)(unsigned int bit,
245462306a36Sopenharmony_ci						     unsigned long *bitmap))
245562306a36Sopenharmony_ci{
245662306a36Sopenharmony_ci	int status = 0;
245762306a36Sopenharmony_ci	u32 tmp_used;
245862306a36Sopenharmony_ci	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
245962306a36Sopenharmony_ci	struct ocfs2_chain_list *cl = &fe->id2.i_chain;
246062306a36Sopenharmony_ci	struct buffer_head *group_bh = NULL;
246162306a36Sopenharmony_ci	struct ocfs2_group_desc *group;
246262306a36Sopenharmony_ci
246362306a36Sopenharmony_ci	/* The alloc_bh comes from ocfs2_free_dinode() or
246462306a36Sopenharmony_ci	 * ocfs2_free_clusters().  The callers have all locked the
246562306a36Sopenharmony_ci	 * allocator and gotten alloc_bh from the lock call.  This
246662306a36Sopenharmony_ci	 * validates the dinode buffer.  Any corruption that has happened
246762306a36Sopenharmony_ci	 * is a code bug. */
246862306a36Sopenharmony_ci	BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
246962306a36Sopenharmony_ci	BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
247062306a36Sopenharmony_ci
247162306a36Sopenharmony_ci	trace_ocfs2_free_suballoc_bits(
247262306a36Sopenharmony_ci		(unsigned long long)OCFS2_I(alloc_inode)->ip_blkno,
247362306a36Sopenharmony_ci		(unsigned long long)bg_blkno,
247462306a36Sopenharmony_ci		start_bit, count);
247562306a36Sopenharmony_ci
247662306a36Sopenharmony_ci	status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
247762306a36Sopenharmony_ci					     &group_bh);
247862306a36Sopenharmony_ci	if (status < 0) {
247962306a36Sopenharmony_ci		mlog_errno(status);
248062306a36Sopenharmony_ci		goto bail;
248162306a36Sopenharmony_ci	}
248262306a36Sopenharmony_ci	group = (struct ocfs2_group_desc *) group_bh->b_data;
248362306a36Sopenharmony_ci
248462306a36Sopenharmony_ci	BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
248562306a36Sopenharmony_ci
248662306a36Sopenharmony_ci	status = ocfs2_block_group_clear_bits(handle, alloc_inode,
248762306a36Sopenharmony_ci					      group, group_bh,
248862306a36Sopenharmony_ci					      start_bit, count, undo_fn);
248962306a36Sopenharmony_ci	if (status < 0) {
249062306a36Sopenharmony_ci		mlog_errno(status);
249162306a36Sopenharmony_ci		goto bail;
249262306a36Sopenharmony_ci	}
249362306a36Sopenharmony_ci
249462306a36Sopenharmony_ci	status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
249562306a36Sopenharmony_ci					 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
249662306a36Sopenharmony_ci	if (status < 0) {
249762306a36Sopenharmony_ci		mlog_errno(status);
249862306a36Sopenharmony_ci		ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
249962306a36Sopenharmony_ci				start_bit, count);
250062306a36Sopenharmony_ci		goto bail;
250162306a36Sopenharmony_ci	}
250262306a36Sopenharmony_ci
250362306a36Sopenharmony_ci	le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
250462306a36Sopenharmony_ci		     count);
250562306a36Sopenharmony_ci	tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
250662306a36Sopenharmony_ci	fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
250762306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, alloc_bh);
250862306a36Sopenharmony_ci
250962306a36Sopenharmony_cibail:
251062306a36Sopenharmony_ci	brelse(group_bh);
251162306a36Sopenharmony_ci	return status;
251262306a36Sopenharmony_ci}
251362306a36Sopenharmony_ci
251462306a36Sopenharmony_ciint ocfs2_free_suballoc_bits(handle_t *handle,
251562306a36Sopenharmony_ci			     struct inode *alloc_inode,
251662306a36Sopenharmony_ci			     struct buffer_head *alloc_bh,
251762306a36Sopenharmony_ci			     unsigned int start_bit,
251862306a36Sopenharmony_ci			     u64 bg_blkno,
251962306a36Sopenharmony_ci			     unsigned int count)
252062306a36Sopenharmony_ci{
252162306a36Sopenharmony_ci	return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh,
252262306a36Sopenharmony_ci					 start_bit, bg_blkno, count, NULL);
252362306a36Sopenharmony_ci}
252462306a36Sopenharmony_ci
252562306a36Sopenharmony_ciint ocfs2_free_dinode(handle_t *handle,
252662306a36Sopenharmony_ci		      struct inode *inode_alloc_inode,
252762306a36Sopenharmony_ci		      struct buffer_head *inode_alloc_bh,
252862306a36Sopenharmony_ci		      struct ocfs2_dinode *di)
252962306a36Sopenharmony_ci{
253062306a36Sopenharmony_ci	u64 blk = le64_to_cpu(di->i_blkno);
253162306a36Sopenharmony_ci	u16 bit = le16_to_cpu(di->i_suballoc_bit);
253262306a36Sopenharmony_ci	u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
253362306a36Sopenharmony_ci
253462306a36Sopenharmony_ci	if (di->i_suballoc_loc)
253562306a36Sopenharmony_ci		bg_blkno = le64_to_cpu(di->i_suballoc_loc);
253662306a36Sopenharmony_ci	return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
253762306a36Sopenharmony_ci					inode_alloc_bh, bit, bg_blkno, 1);
253862306a36Sopenharmony_ci}
253962306a36Sopenharmony_ci
254062306a36Sopenharmony_cistatic int _ocfs2_free_clusters(handle_t *handle,
254162306a36Sopenharmony_ci				struct inode *bitmap_inode,
254262306a36Sopenharmony_ci				struct buffer_head *bitmap_bh,
254362306a36Sopenharmony_ci				u64 start_blk,
254462306a36Sopenharmony_ci				unsigned int num_clusters,
254562306a36Sopenharmony_ci				void (*undo_fn)(unsigned int bit,
254662306a36Sopenharmony_ci						unsigned long *bitmap))
254762306a36Sopenharmony_ci{
254862306a36Sopenharmony_ci	int status;
254962306a36Sopenharmony_ci	u16 bg_start_bit;
255062306a36Sopenharmony_ci	u64 bg_blkno;
255162306a36Sopenharmony_ci
255262306a36Sopenharmony_ci	/* You can't ever have a contiguous set of clusters
255362306a36Sopenharmony_ci	 * bigger than a block group bitmap so we never have to worry
255462306a36Sopenharmony_ci	 * about looping on them.
255562306a36Sopenharmony_ci	 * This is expensive. We can safely remove once this stuff has
255662306a36Sopenharmony_ci	 * gotten tested really well. */
255762306a36Sopenharmony_ci	BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb,
255862306a36Sopenharmony_ci				ocfs2_blocks_to_clusters(bitmap_inode->i_sb,
255962306a36Sopenharmony_ci							 start_blk)));
256062306a36Sopenharmony_ci
256162306a36Sopenharmony_ci
256262306a36Sopenharmony_ci	ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
256362306a36Sopenharmony_ci				     &bg_start_bit);
256462306a36Sopenharmony_ci
256562306a36Sopenharmony_ci	trace_ocfs2_free_clusters((unsigned long long)bg_blkno,
256662306a36Sopenharmony_ci			(unsigned long long)start_blk,
256762306a36Sopenharmony_ci			bg_start_bit, num_clusters);
256862306a36Sopenharmony_ci
256962306a36Sopenharmony_ci	status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
257062306a36Sopenharmony_ci					   bg_start_bit, bg_blkno,
257162306a36Sopenharmony_ci					   num_clusters, undo_fn);
257262306a36Sopenharmony_ci	if (status < 0) {
257362306a36Sopenharmony_ci		mlog_errno(status);
257462306a36Sopenharmony_ci		goto out;
257562306a36Sopenharmony_ci	}
257662306a36Sopenharmony_ci
257762306a36Sopenharmony_ci	ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
257862306a36Sopenharmony_ci					 num_clusters);
257962306a36Sopenharmony_ci
258062306a36Sopenharmony_ciout:
258162306a36Sopenharmony_ci	return status;
258262306a36Sopenharmony_ci}
258362306a36Sopenharmony_ci
258462306a36Sopenharmony_ciint ocfs2_free_clusters(handle_t *handle,
258562306a36Sopenharmony_ci			struct inode *bitmap_inode,
258662306a36Sopenharmony_ci			struct buffer_head *bitmap_bh,
258762306a36Sopenharmony_ci			u64 start_blk,
258862306a36Sopenharmony_ci			unsigned int num_clusters)
258962306a36Sopenharmony_ci{
259062306a36Sopenharmony_ci	return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
259162306a36Sopenharmony_ci				    start_blk, num_clusters,
259262306a36Sopenharmony_ci				    _ocfs2_set_bit);
259362306a36Sopenharmony_ci}
259462306a36Sopenharmony_ci
259562306a36Sopenharmony_ci/*
259662306a36Sopenharmony_ci * Give never-used clusters back to the global bitmap.  We don't need
259762306a36Sopenharmony_ci * to protect these bits in the undo buffer.
259862306a36Sopenharmony_ci */
259962306a36Sopenharmony_ciint ocfs2_release_clusters(handle_t *handle,
260062306a36Sopenharmony_ci			   struct inode *bitmap_inode,
260162306a36Sopenharmony_ci			   struct buffer_head *bitmap_bh,
260262306a36Sopenharmony_ci			   u64 start_blk,
260362306a36Sopenharmony_ci			   unsigned int num_clusters)
260462306a36Sopenharmony_ci{
260562306a36Sopenharmony_ci	return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
260662306a36Sopenharmony_ci				    start_blk, num_clusters,
260762306a36Sopenharmony_ci				    _ocfs2_clear_bit);
260862306a36Sopenharmony_ci}
260962306a36Sopenharmony_ci
261062306a36Sopenharmony_ci/*
261162306a36Sopenharmony_ci * For a given allocation, determine which allocators will need to be
261262306a36Sopenharmony_ci * accessed, and lock them, reserving the appropriate number of bits.
261362306a36Sopenharmony_ci *
261462306a36Sopenharmony_ci * Sparse file systems call this from ocfs2_write_begin_nolock()
261562306a36Sopenharmony_ci * and ocfs2_allocate_unwritten_extents().
261662306a36Sopenharmony_ci *
261762306a36Sopenharmony_ci * File systems which don't support holes call this from
261862306a36Sopenharmony_ci * ocfs2_extend_allocation().
261962306a36Sopenharmony_ci */
262062306a36Sopenharmony_ciint ocfs2_lock_allocators(struct inode *inode,
262162306a36Sopenharmony_ci			  struct ocfs2_extent_tree *et,
262262306a36Sopenharmony_ci			  u32 clusters_to_add, u32 extents_to_split,
262362306a36Sopenharmony_ci			  struct ocfs2_alloc_context **data_ac,
262462306a36Sopenharmony_ci			  struct ocfs2_alloc_context **meta_ac)
262562306a36Sopenharmony_ci{
262662306a36Sopenharmony_ci	int ret = 0, num_free_extents;
262762306a36Sopenharmony_ci	unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
262862306a36Sopenharmony_ci	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
262962306a36Sopenharmony_ci
263062306a36Sopenharmony_ci	*meta_ac = NULL;
263162306a36Sopenharmony_ci	if (data_ac)
263262306a36Sopenharmony_ci		*data_ac = NULL;
263362306a36Sopenharmony_ci
263462306a36Sopenharmony_ci	BUG_ON(clusters_to_add != 0 && data_ac == NULL);
263562306a36Sopenharmony_ci
263662306a36Sopenharmony_ci	num_free_extents = ocfs2_num_free_extents(et);
263762306a36Sopenharmony_ci	if (num_free_extents < 0) {
263862306a36Sopenharmony_ci		ret = num_free_extents;
263962306a36Sopenharmony_ci		mlog_errno(ret);
264062306a36Sopenharmony_ci		goto out;
264162306a36Sopenharmony_ci	}
264262306a36Sopenharmony_ci
264362306a36Sopenharmony_ci	/*
264462306a36Sopenharmony_ci	 * Sparse allocation file systems need to be more conservative
264562306a36Sopenharmony_ci	 * with reserving room for expansion - the actual allocation
264662306a36Sopenharmony_ci	 * happens while we've got a journal handle open so re-taking
264762306a36Sopenharmony_ci	 * a cluster lock (because we ran out of room for another
264862306a36Sopenharmony_ci	 * extent) will violate ordering rules.
264962306a36Sopenharmony_ci	 *
265062306a36Sopenharmony_ci	 * Most of the time we'll only be seeing this 1 cluster at a time
265162306a36Sopenharmony_ci	 * anyway.
265262306a36Sopenharmony_ci	 *
265362306a36Sopenharmony_ci	 * Always lock for any unwritten extents - we might want to
265462306a36Sopenharmony_ci	 * add blocks during a split.
265562306a36Sopenharmony_ci	 */
265662306a36Sopenharmony_ci	if (!num_free_extents ||
265762306a36Sopenharmony_ci	    (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
265862306a36Sopenharmony_ci		ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
265962306a36Sopenharmony_ci		if (ret < 0) {
266062306a36Sopenharmony_ci			if (ret != -ENOSPC)
266162306a36Sopenharmony_ci				mlog_errno(ret);
266262306a36Sopenharmony_ci			goto out;
266362306a36Sopenharmony_ci		}
266462306a36Sopenharmony_ci	}
266562306a36Sopenharmony_ci
266662306a36Sopenharmony_ci	if (clusters_to_add == 0)
266762306a36Sopenharmony_ci		goto out;
266862306a36Sopenharmony_ci
266962306a36Sopenharmony_ci	ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
267062306a36Sopenharmony_ci	if (ret < 0) {
267162306a36Sopenharmony_ci		if (ret != -ENOSPC)
267262306a36Sopenharmony_ci			mlog_errno(ret);
267362306a36Sopenharmony_ci		goto out;
267462306a36Sopenharmony_ci	}
267562306a36Sopenharmony_ci
267662306a36Sopenharmony_ciout:
267762306a36Sopenharmony_ci	if (ret) {
267862306a36Sopenharmony_ci		if (*meta_ac) {
267962306a36Sopenharmony_ci			ocfs2_free_alloc_context(*meta_ac);
268062306a36Sopenharmony_ci			*meta_ac = NULL;
268162306a36Sopenharmony_ci		}
268262306a36Sopenharmony_ci
268362306a36Sopenharmony_ci		/*
268462306a36Sopenharmony_ci		 * We cannot have an error and a non null *data_ac.
268562306a36Sopenharmony_ci		 */
268662306a36Sopenharmony_ci	}
268762306a36Sopenharmony_ci
268862306a36Sopenharmony_ci	return ret;
268962306a36Sopenharmony_ci}
269062306a36Sopenharmony_ci
269162306a36Sopenharmony_ci/*
269262306a36Sopenharmony_ci * Read the inode specified by blkno to get suballoc_slot and
269362306a36Sopenharmony_ci * suballoc_bit.
269462306a36Sopenharmony_ci */
269562306a36Sopenharmony_cistatic int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
269662306a36Sopenharmony_ci				       u16 *suballoc_slot, u64 *group_blkno,
269762306a36Sopenharmony_ci				       u16 *suballoc_bit)
269862306a36Sopenharmony_ci{
269962306a36Sopenharmony_ci	int status;
270062306a36Sopenharmony_ci	struct buffer_head *inode_bh = NULL;
270162306a36Sopenharmony_ci	struct ocfs2_dinode *inode_fe;
270262306a36Sopenharmony_ci
270362306a36Sopenharmony_ci	trace_ocfs2_get_suballoc_slot_bit((unsigned long long)blkno);
270462306a36Sopenharmony_ci
270562306a36Sopenharmony_ci	/* dirty read disk */
270662306a36Sopenharmony_ci	status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
270762306a36Sopenharmony_ci	if (status < 0) {
270862306a36Sopenharmony_ci		mlog(ML_ERROR, "read block %llu failed %d\n",
270962306a36Sopenharmony_ci		     (unsigned long long)blkno, status);
271062306a36Sopenharmony_ci		goto bail;
271162306a36Sopenharmony_ci	}
271262306a36Sopenharmony_ci
271362306a36Sopenharmony_ci	inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
271462306a36Sopenharmony_ci	if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
271562306a36Sopenharmony_ci		mlog(ML_ERROR, "invalid inode %llu requested\n",
271662306a36Sopenharmony_ci		     (unsigned long long)blkno);
271762306a36Sopenharmony_ci		status = -EINVAL;
271862306a36Sopenharmony_ci		goto bail;
271962306a36Sopenharmony_ci	}
272062306a36Sopenharmony_ci
272162306a36Sopenharmony_ci	if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
272262306a36Sopenharmony_ci	    (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
272362306a36Sopenharmony_ci		mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
272462306a36Sopenharmony_ci		     (unsigned long long)blkno,
272562306a36Sopenharmony_ci		     (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
272662306a36Sopenharmony_ci		status = -EINVAL;
272762306a36Sopenharmony_ci		goto bail;
272862306a36Sopenharmony_ci	}
272962306a36Sopenharmony_ci
273062306a36Sopenharmony_ci	if (suballoc_slot)
273162306a36Sopenharmony_ci		*suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
273262306a36Sopenharmony_ci	if (suballoc_bit)
273362306a36Sopenharmony_ci		*suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
273462306a36Sopenharmony_ci	if (group_blkno)
273562306a36Sopenharmony_ci		*group_blkno = le64_to_cpu(inode_fe->i_suballoc_loc);
273662306a36Sopenharmony_ci
273762306a36Sopenharmony_cibail:
273862306a36Sopenharmony_ci	brelse(inode_bh);
273962306a36Sopenharmony_ci
274062306a36Sopenharmony_ci	if (status)
274162306a36Sopenharmony_ci		mlog_errno(status);
274262306a36Sopenharmony_ci	return status;
274362306a36Sopenharmony_ci}
274462306a36Sopenharmony_ci
274562306a36Sopenharmony_ci/*
274662306a36Sopenharmony_ci * test whether bit is SET in allocator bitmap or not.  on success, 0
274762306a36Sopenharmony_ci * is returned and *res is 1 for SET; 0 otherwise.  when fails, errno
274862306a36Sopenharmony_ci * is returned and *res is meaningless.  Call this after you have
274962306a36Sopenharmony_ci * cluster locked against suballoc, or you may get a result based on
275062306a36Sopenharmony_ci * non-up2date contents
275162306a36Sopenharmony_ci */
275262306a36Sopenharmony_cistatic int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
275362306a36Sopenharmony_ci				   struct inode *suballoc,
275462306a36Sopenharmony_ci				   struct buffer_head *alloc_bh,
275562306a36Sopenharmony_ci				   u64 group_blkno, u64 blkno,
275662306a36Sopenharmony_ci				   u16 bit, int *res)
275762306a36Sopenharmony_ci{
275862306a36Sopenharmony_ci	struct ocfs2_dinode *alloc_di;
275962306a36Sopenharmony_ci	struct ocfs2_group_desc *group;
276062306a36Sopenharmony_ci	struct buffer_head *group_bh = NULL;
276162306a36Sopenharmony_ci	u64 bg_blkno;
276262306a36Sopenharmony_ci	int status;
276362306a36Sopenharmony_ci
276462306a36Sopenharmony_ci	trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
276562306a36Sopenharmony_ci				      (unsigned int)bit);
276662306a36Sopenharmony_ci
276762306a36Sopenharmony_ci	alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data;
276862306a36Sopenharmony_ci	if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) {
276962306a36Sopenharmony_ci		mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
277062306a36Sopenharmony_ci		     (unsigned int)bit,
277162306a36Sopenharmony_ci		     ocfs2_bits_per_group(&alloc_di->id2.i_chain));
277262306a36Sopenharmony_ci		status = -EINVAL;
277362306a36Sopenharmony_ci		goto bail;
277462306a36Sopenharmony_ci	}
277562306a36Sopenharmony_ci
277662306a36Sopenharmony_ci	bg_blkno = group_blkno ? group_blkno :
277762306a36Sopenharmony_ci		   ocfs2_which_suballoc_group(blkno, bit);
277862306a36Sopenharmony_ci	status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
277962306a36Sopenharmony_ci					     &group_bh);
278062306a36Sopenharmony_ci	if (status < 0) {
278162306a36Sopenharmony_ci		mlog(ML_ERROR, "read group %llu failed %d\n",
278262306a36Sopenharmony_ci		     (unsigned long long)bg_blkno, status);
278362306a36Sopenharmony_ci		goto bail;
278462306a36Sopenharmony_ci	}
278562306a36Sopenharmony_ci
278662306a36Sopenharmony_ci	group = (struct ocfs2_group_desc *) group_bh->b_data;
278762306a36Sopenharmony_ci	*res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap);
278862306a36Sopenharmony_ci
278962306a36Sopenharmony_cibail:
279062306a36Sopenharmony_ci	brelse(group_bh);
279162306a36Sopenharmony_ci
279262306a36Sopenharmony_ci	if (status)
279362306a36Sopenharmony_ci		mlog_errno(status);
279462306a36Sopenharmony_ci	return status;
279562306a36Sopenharmony_ci}
279662306a36Sopenharmony_ci
279762306a36Sopenharmony_ci/*
279862306a36Sopenharmony_ci * Test if the bit representing this inode (blkno) is set in the
279962306a36Sopenharmony_ci * suballocator.
280062306a36Sopenharmony_ci *
280162306a36Sopenharmony_ci * On success, 0 is returned and *res is 1 for SET; 0 otherwise.
280262306a36Sopenharmony_ci *
280362306a36Sopenharmony_ci * In the event of failure, a negative value is returned and *res is
280462306a36Sopenharmony_ci * meaningless.
280562306a36Sopenharmony_ci *
280662306a36Sopenharmony_ci * Callers must make sure to hold nfs_sync_lock to prevent
280762306a36Sopenharmony_ci * ocfs2_delete_inode() on another node from accessing the same
280862306a36Sopenharmony_ci * suballocator concurrently.
280962306a36Sopenharmony_ci */
281062306a36Sopenharmony_ciint ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
281162306a36Sopenharmony_ci{
281262306a36Sopenharmony_ci	int status;
281362306a36Sopenharmony_ci	u64 group_blkno = 0;
281462306a36Sopenharmony_ci	u16 suballoc_bit = 0, suballoc_slot = 0;
281562306a36Sopenharmony_ci	struct inode *inode_alloc_inode;
281662306a36Sopenharmony_ci	struct buffer_head *alloc_bh = NULL;
281762306a36Sopenharmony_ci
281862306a36Sopenharmony_ci	trace_ocfs2_test_inode_bit((unsigned long long)blkno);
281962306a36Sopenharmony_ci
282062306a36Sopenharmony_ci	status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
282162306a36Sopenharmony_ci					     &group_blkno, &suballoc_bit);
282262306a36Sopenharmony_ci	if (status < 0) {
282362306a36Sopenharmony_ci		mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
282462306a36Sopenharmony_ci		goto bail;
282562306a36Sopenharmony_ci	}
282662306a36Sopenharmony_ci
282762306a36Sopenharmony_ci	if (suballoc_slot == (u16)OCFS2_INVALID_SLOT)
282862306a36Sopenharmony_ci		inode_alloc_inode = ocfs2_get_system_file_inode(osb,
282962306a36Sopenharmony_ci			GLOBAL_INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
283062306a36Sopenharmony_ci	else
283162306a36Sopenharmony_ci		inode_alloc_inode = ocfs2_get_system_file_inode(osb,
283262306a36Sopenharmony_ci			INODE_ALLOC_SYSTEM_INODE, suballoc_slot);
283362306a36Sopenharmony_ci	if (!inode_alloc_inode) {
283462306a36Sopenharmony_ci		/* the error code could be inaccurate, but we are not able to
283562306a36Sopenharmony_ci		 * get the correct one. */
283662306a36Sopenharmony_ci		status = -EINVAL;
283762306a36Sopenharmony_ci		mlog(ML_ERROR, "unable to get alloc inode in slot %u\n",
283862306a36Sopenharmony_ci		     (u32)suballoc_slot);
283962306a36Sopenharmony_ci		goto bail;
284062306a36Sopenharmony_ci	}
284162306a36Sopenharmony_ci
284262306a36Sopenharmony_ci	inode_lock(inode_alloc_inode);
284362306a36Sopenharmony_ci	status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
284462306a36Sopenharmony_ci	if (status < 0) {
284562306a36Sopenharmony_ci		inode_unlock(inode_alloc_inode);
284662306a36Sopenharmony_ci		iput(inode_alloc_inode);
284762306a36Sopenharmony_ci		mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
284862306a36Sopenharmony_ci		     (u32)suballoc_slot, status);
284962306a36Sopenharmony_ci		goto bail;
285062306a36Sopenharmony_ci	}
285162306a36Sopenharmony_ci
285262306a36Sopenharmony_ci	status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
285362306a36Sopenharmony_ci					 group_blkno, blkno, suballoc_bit, res);
285462306a36Sopenharmony_ci	if (status < 0)
285562306a36Sopenharmony_ci		mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
285662306a36Sopenharmony_ci
285762306a36Sopenharmony_ci	ocfs2_inode_unlock(inode_alloc_inode, 0);
285862306a36Sopenharmony_ci	inode_unlock(inode_alloc_inode);
285962306a36Sopenharmony_ci
286062306a36Sopenharmony_ci	iput(inode_alloc_inode);
286162306a36Sopenharmony_ci	brelse(alloc_bh);
286262306a36Sopenharmony_cibail:
286362306a36Sopenharmony_ci	if (status)
286462306a36Sopenharmony_ci		mlog_errno(status);
286562306a36Sopenharmony_ci	return status;
286662306a36Sopenharmony_ci}
2867