162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * localalloc.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Node local data allocation
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/fs.h>
1162306a36Sopenharmony_ci#include <linux/types.h>
1262306a36Sopenharmony_ci#include <linux/slab.h>
1362306a36Sopenharmony_ci#include <linux/highmem.h>
1462306a36Sopenharmony_ci#include <linux/bitops.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include <cluster/masklog.h>
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#include "ocfs2.h"
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#include "alloc.h"
2162306a36Sopenharmony_ci#include "blockcheck.h"
2262306a36Sopenharmony_ci#include "dlmglue.h"
2362306a36Sopenharmony_ci#include "inode.h"
2462306a36Sopenharmony_ci#include "journal.h"
2562306a36Sopenharmony_ci#include "localalloc.h"
2662306a36Sopenharmony_ci#include "suballoc.h"
2762306a36Sopenharmony_ci#include "super.h"
2862306a36Sopenharmony_ci#include "sysfile.h"
2962306a36Sopenharmony_ci#include "ocfs2_trace.h"
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#include "buffer_head_io.h"
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#define OCFS2_LOCAL_ALLOC(dinode)	(&((dinode)->id2.i_lab))
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cistatic u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
3862306a36Sopenharmony_ci					     struct ocfs2_dinode *alloc,
3962306a36Sopenharmony_ci					     u32 *numbits,
4062306a36Sopenharmony_ci					     struct ocfs2_alloc_reservation *resv);
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cistatic void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_cistatic int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
4562306a36Sopenharmony_ci				    handle_t *handle,
4662306a36Sopenharmony_ci				    struct ocfs2_dinode *alloc,
4762306a36Sopenharmony_ci				    struct inode *main_bm_inode,
4862306a36Sopenharmony_ci				    struct buffer_head *main_bm_bh);
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_cistatic int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
5162306a36Sopenharmony_ci						struct ocfs2_alloc_context **ac,
5262306a36Sopenharmony_ci						struct inode **bitmap_inode,
5362306a36Sopenharmony_ci						struct buffer_head **bitmap_bh);
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cistatic int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
5662306a36Sopenharmony_ci					handle_t *handle,
5762306a36Sopenharmony_ci					struct ocfs2_alloc_context *ac);
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_cistatic int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
6062306a36Sopenharmony_ci					  struct inode *local_alloc_inode);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci/*
6362306a36Sopenharmony_ci * ocfs2_la_default_mb() - determine a default size, in megabytes of
6462306a36Sopenharmony_ci * the local alloc.
6562306a36Sopenharmony_ci *
6662306a36Sopenharmony_ci * Generally, we'd like to pick as large a local alloc as
6762306a36Sopenharmony_ci * possible. Performance on large workloads tends to scale
6862306a36Sopenharmony_ci * proportionally to la size. In addition to that, the reservations
6962306a36Sopenharmony_ci * code functions more efficiently as it can reserve more windows for
7062306a36Sopenharmony_ci * write.
7162306a36Sopenharmony_ci *
7262306a36Sopenharmony_ci * Some things work against us when trying to choose a large local alloc:
7362306a36Sopenharmony_ci *
7462306a36Sopenharmony_ci * - We need to ensure our sizing is picked to leave enough space in
7562306a36Sopenharmony_ci *   group descriptors for other allocations (such as block groups,
7662306a36Sopenharmony_ci *   etc). Picking default sizes which are a multiple of 4 could help
7762306a36Sopenharmony_ci *   - block groups are allocated in 2mb and 4mb chunks.
7862306a36Sopenharmony_ci *
7962306a36Sopenharmony_ci * - Likewise, we don't want to starve other nodes of bits on small
8062306a36Sopenharmony_ci *   file systems. This can easily be taken care of by limiting our
8162306a36Sopenharmony_ci *   default to a reasonable size (256M) on larger cluster sizes.
8262306a36Sopenharmony_ci *
8362306a36Sopenharmony_ci * - Some file systems can't support very large sizes - 4k and 8k in
8462306a36Sopenharmony_ci *   particular are limited to less than 128 and 256 megabytes respectively.
8562306a36Sopenharmony_ci *
8662306a36Sopenharmony_ci * The following reference table shows group descriptor and local
8762306a36Sopenharmony_ci * alloc maximums at various cluster sizes (4k blocksize)
8862306a36Sopenharmony_ci *
8962306a36Sopenharmony_ci * csize: 4K	group: 126M	la: 121M
9062306a36Sopenharmony_ci * csize: 8K	group: 252M	la: 243M
9162306a36Sopenharmony_ci * csize: 16K	group: 504M	la: 486M
9262306a36Sopenharmony_ci * csize: 32K	group: 1008M	la: 972M
9362306a36Sopenharmony_ci * csize: 64K	group: 2016M	la: 1944M
9462306a36Sopenharmony_ci * csize: 128K	group: 4032M	la: 3888M
9562306a36Sopenharmony_ci * csize: 256K	group: 8064M	la: 7776M
9662306a36Sopenharmony_ci * csize: 512K	group: 16128M	la: 15552M
9762306a36Sopenharmony_ci * csize: 1024K	group: 32256M	la: 31104M
9862306a36Sopenharmony_ci */
9962306a36Sopenharmony_ci#define	OCFS2_LA_MAX_DEFAULT_MB	256
10062306a36Sopenharmony_ci#define	OCFS2_LA_OLD_DEFAULT	8
10162306a36Sopenharmony_ciunsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
10262306a36Sopenharmony_ci{
10362306a36Sopenharmony_ci	unsigned int la_mb;
10462306a36Sopenharmony_ci	unsigned int gd_mb;
10562306a36Sopenharmony_ci	unsigned int la_max_mb;
10662306a36Sopenharmony_ci	unsigned int megs_per_slot;
10762306a36Sopenharmony_ci	struct super_block *sb = osb->sb;
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
11062306a36Sopenharmony_ci		8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat));
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	/*
11362306a36Sopenharmony_ci	 * This takes care of files systems with very small group
11462306a36Sopenharmony_ci	 * descriptors - 512 byte blocksize at cluster sizes lower
11562306a36Sopenharmony_ci	 * than 16K and also 1k blocksize with 4k cluster size.
11662306a36Sopenharmony_ci	 */
11762306a36Sopenharmony_ci	if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
11862306a36Sopenharmony_ci	    || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
11962306a36Sopenharmony_ci		return OCFS2_LA_OLD_DEFAULT;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	/*
12262306a36Sopenharmony_ci	 * Leave enough room for some block groups and make the final
12362306a36Sopenharmony_ci	 * value we work from a multiple of 4.
12462306a36Sopenharmony_ci	 */
12562306a36Sopenharmony_ci	gd_mb -= 16;
12662306a36Sopenharmony_ci	gd_mb &= 0xFFFFFFFB;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci	la_mb = gd_mb;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	/*
13162306a36Sopenharmony_ci	 * Keep window sizes down to a reasonable default
13262306a36Sopenharmony_ci	 */
13362306a36Sopenharmony_ci	if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
13462306a36Sopenharmony_ci		/*
13562306a36Sopenharmony_ci		 * Some clustersize / blocksize combinations will have
13662306a36Sopenharmony_ci		 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB
13762306a36Sopenharmony_ci		 * default size, but get poor distribution when
13862306a36Sopenharmony_ci		 * limited to exactly 256 megabytes.
13962306a36Sopenharmony_ci		 *
14062306a36Sopenharmony_ci		 * As an example, 16K clustersize at 4K blocksize
14162306a36Sopenharmony_ci		 * gives us a cluster group size of 504M. Paring the
14262306a36Sopenharmony_ci		 * local alloc size down to 256 however, would give us
14362306a36Sopenharmony_ci		 * only one window and around 200MB left in the
14462306a36Sopenharmony_ci		 * cluster group. Instead, find the first size below
14562306a36Sopenharmony_ci		 * 256 which would give us an even distribution.
14662306a36Sopenharmony_ci		 *
14762306a36Sopenharmony_ci		 * Larger cluster group sizes actually work out pretty
14862306a36Sopenharmony_ci		 * well when pared to 256, so we don't have to do this
14962306a36Sopenharmony_ci		 * for any group that fits more than two
15062306a36Sopenharmony_ci		 * OCFS2_LA_MAX_DEFAULT_MB windows.
15162306a36Sopenharmony_ci		 */
15262306a36Sopenharmony_ci		if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
15362306a36Sopenharmony_ci			la_mb = 256;
15462306a36Sopenharmony_ci		else {
15562306a36Sopenharmony_ci			unsigned int gd_mult = gd_mb;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci			while (gd_mult > 256)
15862306a36Sopenharmony_ci				gd_mult = gd_mult >> 1;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci			la_mb = gd_mult;
16162306a36Sopenharmony_ci		}
16262306a36Sopenharmony_ci	}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
16562306a36Sopenharmony_ci	megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
16662306a36Sopenharmony_ci	/* Too many nodes, too few disk clusters. */
16762306a36Sopenharmony_ci	if (megs_per_slot < la_mb)
16862306a36Sopenharmony_ci		la_mb = megs_per_slot;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	/* We can't store more bits than we can in a block. */
17162306a36Sopenharmony_ci	la_max_mb = ocfs2_clusters_to_megabytes(osb->sb,
17262306a36Sopenharmony_ci						ocfs2_local_alloc_size(sb) * 8);
17362306a36Sopenharmony_ci	if (la_mb > la_max_mb)
17462306a36Sopenharmony_ci		la_mb = la_max_mb;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	return la_mb;
17762306a36Sopenharmony_ci}
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_civoid ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
18062306a36Sopenharmony_ci{
18162306a36Sopenharmony_ci	struct super_block *sb = osb->sb;
18262306a36Sopenharmony_ci	unsigned int la_default_mb = ocfs2_la_default_mb(osb);
18362306a36Sopenharmony_ci	unsigned int la_max_mb;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	la_max_mb = ocfs2_clusters_to_megabytes(sb,
18662306a36Sopenharmony_ci						ocfs2_local_alloc_size(sb) * 8);
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	trace_ocfs2_la_set_sizes(requested_mb, la_max_mb, la_default_mb);
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci	if (requested_mb == -1) {
19162306a36Sopenharmony_ci		/* No user request - use defaults */
19262306a36Sopenharmony_ci		osb->local_alloc_default_bits =
19362306a36Sopenharmony_ci			ocfs2_megabytes_to_clusters(sb, la_default_mb);
19462306a36Sopenharmony_ci	} else if (requested_mb > la_max_mb) {
19562306a36Sopenharmony_ci		/* Request is too big, we give the maximum available */
19662306a36Sopenharmony_ci		osb->local_alloc_default_bits =
19762306a36Sopenharmony_ci			ocfs2_megabytes_to_clusters(sb, la_max_mb);
19862306a36Sopenharmony_ci	} else {
19962306a36Sopenharmony_ci		osb->local_alloc_default_bits =
20062306a36Sopenharmony_ci			ocfs2_megabytes_to_clusters(sb, requested_mb);
20162306a36Sopenharmony_ci	}
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	osb->local_alloc_bits = osb->local_alloc_default_bits;
20462306a36Sopenharmony_ci}
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_cistatic inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
20762306a36Sopenharmony_ci{
20862306a36Sopenharmony_ci	return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
20962306a36Sopenharmony_ci		osb->local_alloc_state == OCFS2_LA_ENABLED);
21062306a36Sopenharmony_ci}
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_civoid ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
21362306a36Sopenharmony_ci				      unsigned int num_clusters)
21462306a36Sopenharmony_ci{
21562306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
21662306a36Sopenharmony_ci	if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
21762306a36Sopenharmony_ci	    osb->local_alloc_state == OCFS2_LA_THROTTLED)
21862306a36Sopenharmony_ci		if (num_clusters >= osb->local_alloc_default_bits) {
21962306a36Sopenharmony_ci			cancel_delayed_work(&osb->la_enable_wq);
22062306a36Sopenharmony_ci			osb->local_alloc_state = OCFS2_LA_ENABLED;
22162306a36Sopenharmony_ci		}
22262306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
22362306a36Sopenharmony_ci}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_civoid ocfs2_la_enable_worker(struct work_struct *work)
22662306a36Sopenharmony_ci{
22762306a36Sopenharmony_ci	struct ocfs2_super *osb =
22862306a36Sopenharmony_ci		container_of(work, struct ocfs2_super,
22962306a36Sopenharmony_ci			     la_enable_wq.work);
23062306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
23162306a36Sopenharmony_ci	osb->local_alloc_state = OCFS2_LA_ENABLED;
23262306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
23362306a36Sopenharmony_ci}
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci/*
23662306a36Sopenharmony_ci * Tell us whether a given allocation should use the local alloc
23762306a36Sopenharmony_ci * file. Otherwise, it has to go to the main bitmap.
23862306a36Sopenharmony_ci *
23962306a36Sopenharmony_ci * This function does semi-dirty reads of local alloc size and state!
24062306a36Sopenharmony_ci * This is ok however, as the values are re-checked once under mutex.
24162306a36Sopenharmony_ci */
24262306a36Sopenharmony_ciint ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
24362306a36Sopenharmony_ci{
24462306a36Sopenharmony_ci	int ret = 0;
24562306a36Sopenharmony_ci	int la_bits;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
24862306a36Sopenharmony_ci	la_bits = osb->local_alloc_bits;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	if (!ocfs2_la_state_enabled(osb))
25162306a36Sopenharmony_ci		goto bail;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	/* la_bits should be at least twice the size (in clusters) of
25462306a36Sopenharmony_ci	 * a new block group. We want to be sure block group
25562306a36Sopenharmony_ci	 * allocations go through the local alloc, so allow an
25662306a36Sopenharmony_ci	 * allocation to take up to half the bitmap. */
25762306a36Sopenharmony_ci	if (bits > (la_bits / 2))
25862306a36Sopenharmony_ci		goto bail;
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	ret = 1;
26162306a36Sopenharmony_cibail:
26262306a36Sopenharmony_ci	trace_ocfs2_alloc_should_use_local(
26362306a36Sopenharmony_ci	     (unsigned long long)bits, osb->local_alloc_state, la_bits, ret);
26462306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
26562306a36Sopenharmony_ci	return ret;
26662306a36Sopenharmony_ci}
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ciint ocfs2_load_local_alloc(struct ocfs2_super *osb)
26962306a36Sopenharmony_ci{
27062306a36Sopenharmony_ci	int status = 0;
27162306a36Sopenharmony_ci	struct ocfs2_dinode *alloc = NULL;
27262306a36Sopenharmony_ci	struct buffer_head *alloc_bh = NULL;
27362306a36Sopenharmony_ci	u32 num_used;
27462306a36Sopenharmony_ci	struct inode *inode = NULL;
27562306a36Sopenharmony_ci	struct ocfs2_local_alloc *la;
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	if (osb->local_alloc_bits == 0)
27862306a36Sopenharmony_ci		goto bail;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	if (osb->local_alloc_bits >= osb->bitmap_cpg) {
28162306a36Sopenharmony_ci		mlog(ML_NOTICE, "Requested local alloc window %d is larger "
28262306a36Sopenharmony_ci		     "than max possible %u. Using defaults.\n",
28362306a36Sopenharmony_ci		     osb->local_alloc_bits, (osb->bitmap_cpg - 1));
28462306a36Sopenharmony_ci		osb->local_alloc_bits =
28562306a36Sopenharmony_ci			ocfs2_megabytes_to_clusters(osb->sb,
28662306a36Sopenharmony_ci						    ocfs2_la_default_mb(osb));
28762306a36Sopenharmony_ci	}
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	/* read the alloc off disk */
29062306a36Sopenharmony_ci	inode = ocfs2_get_system_file_inode(osb, LOCAL_ALLOC_SYSTEM_INODE,
29162306a36Sopenharmony_ci					    osb->slot_num);
29262306a36Sopenharmony_ci	if (!inode) {
29362306a36Sopenharmony_ci		status = -EINVAL;
29462306a36Sopenharmony_ci		mlog_errno(status);
29562306a36Sopenharmony_ci		goto bail;
29662306a36Sopenharmony_ci	}
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	status = ocfs2_read_inode_block_full(inode, &alloc_bh,
29962306a36Sopenharmony_ci					     OCFS2_BH_IGNORE_CACHE);
30062306a36Sopenharmony_ci	if (status < 0) {
30162306a36Sopenharmony_ci		mlog_errno(status);
30262306a36Sopenharmony_ci		goto bail;
30362306a36Sopenharmony_ci	}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
30662306a36Sopenharmony_ci	la = OCFS2_LOCAL_ALLOC(alloc);
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_ci	if (!(le32_to_cpu(alloc->i_flags) &
30962306a36Sopenharmony_ci	    (OCFS2_LOCAL_ALLOC_FL|OCFS2_BITMAP_FL))) {
31062306a36Sopenharmony_ci		mlog(ML_ERROR, "Invalid local alloc inode, %llu\n",
31162306a36Sopenharmony_ci		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
31262306a36Sopenharmony_ci		status = -EINVAL;
31362306a36Sopenharmony_ci		goto bail;
31462306a36Sopenharmony_ci	}
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	if ((la->la_size == 0) ||
31762306a36Sopenharmony_ci	    (le16_to_cpu(la->la_size) > ocfs2_local_alloc_size(inode->i_sb))) {
31862306a36Sopenharmony_ci		mlog(ML_ERROR, "Local alloc size is invalid (la_size = %u)\n",
31962306a36Sopenharmony_ci		     le16_to_cpu(la->la_size));
32062306a36Sopenharmony_ci		status = -EINVAL;
32162306a36Sopenharmony_ci		goto bail;
32262306a36Sopenharmony_ci	}
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci	/* do a little verification. */
32562306a36Sopenharmony_ci	num_used = ocfs2_local_alloc_count_bits(alloc);
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	/* hopefully the local alloc has always been recovered before
32862306a36Sopenharmony_ci	 * we load it. */
32962306a36Sopenharmony_ci	if (num_used
33062306a36Sopenharmony_ci	    || alloc->id1.bitmap1.i_used
33162306a36Sopenharmony_ci	    || alloc->id1.bitmap1.i_total
33262306a36Sopenharmony_ci	    || la->la_bm_off) {
33362306a36Sopenharmony_ci		mlog(ML_ERROR, "inconsistent detected, clean journal with"
33462306a36Sopenharmony_ci		     " unrecovered local alloc, please run fsck.ocfs2!\n"
33562306a36Sopenharmony_ci		     "found = %u, set = %u, taken = %u, off = %u\n",
33662306a36Sopenharmony_ci		     num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
33762306a36Sopenharmony_ci		     le32_to_cpu(alloc->id1.bitmap1.i_total),
33862306a36Sopenharmony_ci		     OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci		status = -EINVAL;
34162306a36Sopenharmony_ci		goto bail;
34262306a36Sopenharmony_ci	}
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	osb->local_alloc_bh = alloc_bh;
34562306a36Sopenharmony_ci	osb->local_alloc_state = OCFS2_LA_ENABLED;
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_cibail:
34862306a36Sopenharmony_ci	if (status < 0)
34962306a36Sopenharmony_ci		brelse(alloc_bh);
35062306a36Sopenharmony_ci	iput(inode);
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	trace_ocfs2_load_local_alloc(osb->local_alloc_bits);
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	if (status)
35562306a36Sopenharmony_ci		mlog_errno(status);
35662306a36Sopenharmony_ci	return status;
35762306a36Sopenharmony_ci}
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci/*
36062306a36Sopenharmony_ci * return any unused bits to the bitmap and write out a clean
36162306a36Sopenharmony_ci * local_alloc.
36262306a36Sopenharmony_ci *
36362306a36Sopenharmony_ci * local_alloc_bh is optional. If not passed, we will simply use the
36462306a36Sopenharmony_ci * one off osb. If you do pass it however, be warned that it *will* be
36562306a36Sopenharmony_ci * returned brelse'd and NULL'd out.*/
36662306a36Sopenharmony_civoid ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
36762306a36Sopenharmony_ci{
36862306a36Sopenharmony_ci	int status;
36962306a36Sopenharmony_ci	handle_t *handle;
37062306a36Sopenharmony_ci	struct inode *local_alloc_inode = NULL;
37162306a36Sopenharmony_ci	struct buffer_head *bh = NULL;
37262306a36Sopenharmony_ci	struct buffer_head *main_bm_bh = NULL;
37362306a36Sopenharmony_ci	struct inode *main_bm_inode = NULL;
37462306a36Sopenharmony_ci	struct ocfs2_dinode *alloc_copy = NULL;
37562306a36Sopenharmony_ci	struct ocfs2_dinode *alloc = NULL;
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	cancel_delayed_work(&osb->la_enable_wq);
37862306a36Sopenharmony_ci	if (osb->ocfs2_wq)
37962306a36Sopenharmony_ci		flush_workqueue(osb->ocfs2_wq);
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci	if (osb->local_alloc_state == OCFS2_LA_UNUSED)
38262306a36Sopenharmony_ci		goto out;
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	local_alloc_inode =
38562306a36Sopenharmony_ci		ocfs2_get_system_file_inode(osb,
38662306a36Sopenharmony_ci					    LOCAL_ALLOC_SYSTEM_INODE,
38762306a36Sopenharmony_ci					    osb->slot_num);
38862306a36Sopenharmony_ci	if (!local_alloc_inode) {
38962306a36Sopenharmony_ci		status = -ENOENT;
39062306a36Sopenharmony_ci		mlog_errno(status);
39162306a36Sopenharmony_ci		goto out;
39262306a36Sopenharmony_ci	}
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	osb->local_alloc_state = OCFS2_LA_DISABLED;
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	ocfs2_resmap_uninit(&osb->osb_la_resmap);
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	main_bm_inode = ocfs2_get_system_file_inode(osb,
39962306a36Sopenharmony_ci						    GLOBAL_BITMAP_SYSTEM_INODE,
40062306a36Sopenharmony_ci						    OCFS2_INVALID_SLOT);
40162306a36Sopenharmony_ci	if (!main_bm_inode) {
40262306a36Sopenharmony_ci		status = -EINVAL;
40362306a36Sopenharmony_ci		mlog_errno(status);
40462306a36Sopenharmony_ci		goto out;
40562306a36Sopenharmony_ci	}
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	inode_lock(main_bm_inode);
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
41062306a36Sopenharmony_ci	if (status < 0) {
41162306a36Sopenharmony_ci		mlog_errno(status);
41262306a36Sopenharmony_ci		goto out_mutex;
41362306a36Sopenharmony_ci	}
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci	/* WINDOW_MOVE_CREDITS is a bit heavy... */
41662306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
41762306a36Sopenharmony_ci	if (IS_ERR(handle)) {
41862306a36Sopenharmony_ci		mlog_errno(PTR_ERR(handle));
41962306a36Sopenharmony_ci		handle = NULL;
42062306a36Sopenharmony_ci		goto out_unlock;
42162306a36Sopenharmony_ci	}
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	bh = osb->local_alloc_bh;
42462306a36Sopenharmony_ci	alloc = (struct ocfs2_dinode *) bh->b_data;
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	alloc_copy = kmemdup(alloc, bh->b_size, GFP_NOFS);
42762306a36Sopenharmony_ci	if (!alloc_copy) {
42862306a36Sopenharmony_ci		status = -ENOMEM;
42962306a36Sopenharmony_ci		goto out_commit;
43062306a36Sopenharmony_ci	}
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
43362306a36Sopenharmony_ci					 bh, OCFS2_JOURNAL_ACCESS_WRITE);
43462306a36Sopenharmony_ci	if (status < 0) {
43562306a36Sopenharmony_ci		mlog_errno(status);
43662306a36Sopenharmony_ci		goto out_commit;
43762306a36Sopenharmony_ci	}
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	ocfs2_clear_local_alloc(alloc);
44062306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, bh);
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	brelse(bh);
44362306a36Sopenharmony_ci	osb->local_alloc_bh = NULL;
44462306a36Sopenharmony_ci	osb->local_alloc_state = OCFS2_LA_UNUSED;
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
44762306a36Sopenharmony_ci					  main_bm_inode, main_bm_bh);
44862306a36Sopenharmony_ci	if (status < 0)
44962306a36Sopenharmony_ci		mlog_errno(status);
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ciout_commit:
45262306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ciout_unlock:
45562306a36Sopenharmony_ci	brelse(main_bm_bh);
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	ocfs2_inode_unlock(main_bm_inode, 1);
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ciout_mutex:
46062306a36Sopenharmony_ci	inode_unlock(main_bm_inode);
46162306a36Sopenharmony_ci	iput(main_bm_inode);
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ciout:
46462306a36Sopenharmony_ci	iput(local_alloc_inode);
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci	kfree(alloc_copy);
46762306a36Sopenharmony_ci}
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_ci/*
47062306a36Sopenharmony_ci * We want to free the bitmap bits outside of any recovery context as
47162306a36Sopenharmony_ci * we'll need a cluster lock to do so, but we must clear the local
47262306a36Sopenharmony_ci * alloc before giving up the recovered nodes journal. To solve this,
47362306a36Sopenharmony_ci * we kmalloc a copy of the local alloc before it's change for the
47462306a36Sopenharmony_ci * caller to process with ocfs2_complete_local_alloc_recovery
47562306a36Sopenharmony_ci */
47662306a36Sopenharmony_ciint ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
47762306a36Sopenharmony_ci				     int slot_num,
47862306a36Sopenharmony_ci				     struct ocfs2_dinode **alloc_copy)
47962306a36Sopenharmony_ci{
48062306a36Sopenharmony_ci	int status = 0;
48162306a36Sopenharmony_ci	struct buffer_head *alloc_bh = NULL;
48262306a36Sopenharmony_ci	struct inode *inode = NULL;
48362306a36Sopenharmony_ci	struct ocfs2_dinode *alloc;
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci	trace_ocfs2_begin_local_alloc_recovery(slot_num);
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci	*alloc_copy = NULL;
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci	inode = ocfs2_get_system_file_inode(osb,
49062306a36Sopenharmony_ci					    LOCAL_ALLOC_SYSTEM_INODE,
49162306a36Sopenharmony_ci					    slot_num);
49262306a36Sopenharmony_ci	if (!inode) {
49362306a36Sopenharmony_ci		status = -EINVAL;
49462306a36Sopenharmony_ci		mlog_errno(status);
49562306a36Sopenharmony_ci		goto bail;
49662306a36Sopenharmony_ci	}
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	inode_lock(inode);
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_ci	status = ocfs2_read_inode_block_full(inode, &alloc_bh,
50162306a36Sopenharmony_ci					     OCFS2_BH_IGNORE_CACHE);
50262306a36Sopenharmony_ci	if (status < 0) {
50362306a36Sopenharmony_ci		mlog_errno(status);
50462306a36Sopenharmony_ci		goto bail;
50562306a36Sopenharmony_ci	}
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	*alloc_copy = kmalloc(alloc_bh->b_size, GFP_KERNEL);
50862306a36Sopenharmony_ci	if (!(*alloc_copy)) {
50962306a36Sopenharmony_ci		status = -ENOMEM;
51062306a36Sopenharmony_ci		goto bail;
51162306a36Sopenharmony_ci	}
51262306a36Sopenharmony_ci	memcpy((*alloc_copy), alloc_bh->b_data, alloc_bh->b_size);
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	alloc = (struct ocfs2_dinode *) alloc_bh->b_data;
51562306a36Sopenharmony_ci	ocfs2_clear_local_alloc(alloc);
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
51862306a36Sopenharmony_ci	status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode));
51962306a36Sopenharmony_ci	if (status < 0)
52062306a36Sopenharmony_ci		mlog_errno(status);
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_cibail:
52362306a36Sopenharmony_ci	if (status < 0) {
52462306a36Sopenharmony_ci		kfree(*alloc_copy);
52562306a36Sopenharmony_ci		*alloc_copy = NULL;
52662306a36Sopenharmony_ci	}
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci	brelse(alloc_bh);
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_ci	if (inode) {
53162306a36Sopenharmony_ci		inode_unlock(inode);
53262306a36Sopenharmony_ci		iput(inode);
53362306a36Sopenharmony_ci	}
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	if (status)
53662306a36Sopenharmony_ci		mlog_errno(status);
53762306a36Sopenharmony_ci	return status;
53862306a36Sopenharmony_ci}
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci/*
54162306a36Sopenharmony_ci * Step 2: By now, we've completed the journal recovery, we've stamped
54262306a36Sopenharmony_ci * a clean local alloc on disk and dropped the node out of the
54362306a36Sopenharmony_ci * recovery map. Dlm locks will no longer stall, so lets clear out the
54462306a36Sopenharmony_ci * main bitmap.
54562306a36Sopenharmony_ci */
54662306a36Sopenharmony_ciint ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
54762306a36Sopenharmony_ci					struct ocfs2_dinode *alloc)
54862306a36Sopenharmony_ci{
54962306a36Sopenharmony_ci	int status;
55062306a36Sopenharmony_ci	handle_t *handle;
55162306a36Sopenharmony_ci	struct buffer_head *main_bm_bh = NULL;
55262306a36Sopenharmony_ci	struct inode *main_bm_inode;
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci	main_bm_inode = ocfs2_get_system_file_inode(osb,
55562306a36Sopenharmony_ci						    GLOBAL_BITMAP_SYSTEM_INODE,
55662306a36Sopenharmony_ci						    OCFS2_INVALID_SLOT);
55762306a36Sopenharmony_ci	if (!main_bm_inode) {
55862306a36Sopenharmony_ci		status = -EINVAL;
55962306a36Sopenharmony_ci		mlog_errno(status);
56062306a36Sopenharmony_ci		goto out;
56162306a36Sopenharmony_ci	}
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	inode_lock(main_bm_inode);
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
56662306a36Sopenharmony_ci	if (status < 0) {
56762306a36Sopenharmony_ci		mlog_errno(status);
56862306a36Sopenharmony_ci		goto out_mutex;
56962306a36Sopenharmony_ci	}
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
57262306a36Sopenharmony_ci	if (IS_ERR(handle)) {
57362306a36Sopenharmony_ci		status = PTR_ERR(handle);
57462306a36Sopenharmony_ci		handle = NULL;
57562306a36Sopenharmony_ci		mlog_errno(status);
57662306a36Sopenharmony_ci		goto out_unlock;
57762306a36Sopenharmony_ci	}
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	/* we want the bitmap change to be recorded on disk asap */
58062306a36Sopenharmony_ci	handle->h_sync = 1;
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci	status = ocfs2_sync_local_to_main(osb, handle, alloc,
58362306a36Sopenharmony_ci					  main_bm_inode, main_bm_bh);
58462306a36Sopenharmony_ci	if (status < 0)
58562306a36Sopenharmony_ci		mlog_errno(status);
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	ocfs2_commit_trans(osb, handle);
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ciout_unlock:
59062306a36Sopenharmony_ci	ocfs2_inode_unlock(main_bm_inode, 1);
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ciout_mutex:
59362306a36Sopenharmony_ci	inode_unlock(main_bm_inode);
59462306a36Sopenharmony_ci
59562306a36Sopenharmony_ci	brelse(main_bm_bh);
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	iput(main_bm_inode);
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ciout:
60062306a36Sopenharmony_ci	if (!status)
60162306a36Sopenharmony_ci		ocfs2_init_steal_slots(osb);
60262306a36Sopenharmony_ci	if (status)
60362306a36Sopenharmony_ci		mlog_errno(status);
60462306a36Sopenharmony_ci	return status;
60562306a36Sopenharmony_ci}
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci/*
60862306a36Sopenharmony_ci * make sure we've got at least bits_wanted contiguous bits in the
60962306a36Sopenharmony_ci * local alloc. You lose them when you drop i_rwsem.
61062306a36Sopenharmony_ci *
61162306a36Sopenharmony_ci * We will add ourselves to the transaction passed in, but may start
61262306a36Sopenharmony_ci * our own in order to shift windows.
61362306a36Sopenharmony_ci */
61462306a36Sopenharmony_ciint ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
61562306a36Sopenharmony_ci				   u32 bits_wanted,
61662306a36Sopenharmony_ci				   struct ocfs2_alloc_context *ac)
61762306a36Sopenharmony_ci{
61862306a36Sopenharmony_ci	int status;
61962306a36Sopenharmony_ci	struct ocfs2_dinode *alloc;
62062306a36Sopenharmony_ci	struct inode *local_alloc_inode;
62162306a36Sopenharmony_ci	unsigned int free_bits;
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	BUG_ON(!ac);
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci	local_alloc_inode =
62662306a36Sopenharmony_ci		ocfs2_get_system_file_inode(osb,
62762306a36Sopenharmony_ci					    LOCAL_ALLOC_SYSTEM_INODE,
62862306a36Sopenharmony_ci					    osb->slot_num);
62962306a36Sopenharmony_ci	if (!local_alloc_inode) {
63062306a36Sopenharmony_ci		status = -ENOENT;
63162306a36Sopenharmony_ci		mlog_errno(status);
63262306a36Sopenharmony_ci		goto bail;
63362306a36Sopenharmony_ci	}
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	inode_lock(local_alloc_inode);
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci	/*
63862306a36Sopenharmony_ci	 * We must double check state and allocator bits because
63962306a36Sopenharmony_ci	 * another process may have changed them while holding i_rwsem.
64062306a36Sopenharmony_ci	 */
64162306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
64262306a36Sopenharmony_ci	if (!ocfs2_la_state_enabled(osb) ||
64362306a36Sopenharmony_ci	    (bits_wanted > osb->local_alloc_bits)) {
64462306a36Sopenharmony_ci		spin_unlock(&osb->osb_lock);
64562306a36Sopenharmony_ci		status = -ENOSPC;
64662306a36Sopenharmony_ci		goto bail;
64762306a36Sopenharmony_ci	}
64862306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci#ifdef CONFIG_OCFS2_DEBUG_FS
65362306a36Sopenharmony_ci	if (le32_to_cpu(alloc->id1.bitmap1.i_used) !=
65462306a36Sopenharmony_ci	    ocfs2_local_alloc_count_bits(alloc)) {
65562306a36Sopenharmony_ci		status = ocfs2_error(osb->sb, "local alloc inode %llu says it has %u used bits, but a count shows %u\n",
65662306a36Sopenharmony_ci				(unsigned long long)le64_to_cpu(alloc->i_blkno),
65762306a36Sopenharmony_ci				le32_to_cpu(alloc->id1.bitmap1.i_used),
65862306a36Sopenharmony_ci				ocfs2_local_alloc_count_bits(alloc));
65962306a36Sopenharmony_ci		goto bail;
66062306a36Sopenharmony_ci	}
66162306a36Sopenharmony_ci#endif
66262306a36Sopenharmony_ci
66362306a36Sopenharmony_ci	free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
66462306a36Sopenharmony_ci		le32_to_cpu(alloc->id1.bitmap1.i_used);
66562306a36Sopenharmony_ci	if (bits_wanted > free_bits) {
66662306a36Sopenharmony_ci		/* uhoh, window change time. */
66762306a36Sopenharmony_ci		status =
66862306a36Sopenharmony_ci			ocfs2_local_alloc_slide_window(osb, local_alloc_inode);
66962306a36Sopenharmony_ci		if (status < 0) {
67062306a36Sopenharmony_ci			if (status != -ENOSPC)
67162306a36Sopenharmony_ci				mlog_errno(status);
67262306a36Sopenharmony_ci			goto bail;
67362306a36Sopenharmony_ci		}
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_ci		/*
67662306a36Sopenharmony_ci		 * Under certain conditions, the window slide code
67762306a36Sopenharmony_ci		 * might have reduced the number of bits available or
67862306a36Sopenharmony_ci		 * disabled the local alloc entirely. Re-check
67962306a36Sopenharmony_ci		 * here and return -ENOSPC if necessary.
68062306a36Sopenharmony_ci		 */
68162306a36Sopenharmony_ci		status = -ENOSPC;
68262306a36Sopenharmony_ci		if (!ocfs2_la_state_enabled(osb))
68362306a36Sopenharmony_ci			goto bail;
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci		free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
68662306a36Sopenharmony_ci			le32_to_cpu(alloc->id1.bitmap1.i_used);
68762306a36Sopenharmony_ci		if (bits_wanted > free_bits)
68862306a36Sopenharmony_ci			goto bail;
68962306a36Sopenharmony_ci	}
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci	ac->ac_inode = local_alloc_inode;
69262306a36Sopenharmony_ci	/* We should never use localalloc from another slot */
69362306a36Sopenharmony_ci	ac->ac_alloc_slot = osb->slot_num;
69462306a36Sopenharmony_ci	ac->ac_which = OCFS2_AC_USE_LOCAL;
69562306a36Sopenharmony_ci	get_bh(osb->local_alloc_bh);
69662306a36Sopenharmony_ci	ac->ac_bh = osb->local_alloc_bh;
69762306a36Sopenharmony_ci	status = 0;
69862306a36Sopenharmony_cibail:
69962306a36Sopenharmony_ci	if (status < 0 && local_alloc_inode) {
70062306a36Sopenharmony_ci		inode_unlock(local_alloc_inode);
70162306a36Sopenharmony_ci		iput(local_alloc_inode);
70262306a36Sopenharmony_ci	}
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci	trace_ocfs2_reserve_local_alloc_bits(
70562306a36Sopenharmony_ci		(unsigned long long)ac->ac_max_block,
70662306a36Sopenharmony_ci		bits_wanted, osb->slot_num, status);
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ci	if (status)
70962306a36Sopenharmony_ci		mlog_errno(status);
71062306a36Sopenharmony_ci	return status;
71162306a36Sopenharmony_ci}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ciint ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
71462306a36Sopenharmony_ci				 handle_t *handle,
71562306a36Sopenharmony_ci				 struct ocfs2_alloc_context *ac,
71662306a36Sopenharmony_ci				 u32 bits_wanted,
71762306a36Sopenharmony_ci				 u32 *bit_off,
71862306a36Sopenharmony_ci				 u32 *num_bits)
71962306a36Sopenharmony_ci{
72062306a36Sopenharmony_ci	int status, start;
72162306a36Sopenharmony_ci	struct inode *local_alloc_inode;
72262306a36Sopenharmony_ci	void *bitmap;
72362306a36Sopenharmony_ci	struct ocfs2_dinode *alloc;
72462306a36Sopenharmony_ci	struct ocfs2_local_alloc *la;
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci	local_alloc_inode = ac->ac_inode;
72962306a36Sopenharmony_ci	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
73062306a36Sopenharmony_ci	la = OCFS2_LOCAL_ALLOC(alloc);
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
73362306a36Sopenharmony_ci						  ac->ac_resv);
73462306a36Sopenharmony_ci	if (start == -1) {
73562306a36Sopenharmony_ci		/* TODO: Shouldn't we just BUG here? */
73662306a36Sopenharmony_ci		status = -ENOSPC;
73762306a36Sopenharmony_ci		mlog_errno(status);
73862306a36Sopenharmony_ci		goto bail;
73962306a36Sopenharmony_ci	}
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	bitmap = la->la_bitmap;
74262306a36Sopenharmony_ci	*bit_off = le32_to_cpu(la->la_bm_off) + start;
74362306a36Sopenharmony_ci	*num_bits = bits_wanted;
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ci	status = ocfs2_journal_access_di(handle,
74662306a36Sopenharmony_ci					 INODE_CACHE(local_alloc_inode),
74762306a36Sopenharmony_ci					 osb->local_alloc_bh,
74862306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
74962306a36Sopenharmony_ci	if (status < 0) {
75062306a36Sopenharmony_ci		mlog_errno(status);
75162306a36Sopenharmony_ci		goto bail;
75262306a36Sopenharmony_ci	}
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
75562306a36Sopenharmony_ci				  bits_wanted);
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci	while(bits_wanted--)
75862306a36Sopenharmony_ci		ocfs2_set_bit(start++, bitmap);
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci	le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
76162306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, osb->local_alloc_bh);
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_cibail:
76462306a36Sopenharmony_ci	if (status)
76562306a36Sopenharmony_ci		mlog_errno(status);
76662306a36Sopenharmony_ci	return status;
76762306a36Sopenharmony_ci}
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ciint ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
77062306a36Sopenharmony_ci				handle_t *handle,
77162306a36Sopenharmony_ci				struct ocfs2_alloc_context *ac,
77262306a36Sopenharmony_ci				u32 bit_off,
77362306a36Sopenharmony_ci				u32 num_bits)
77462306a36Sopenharmony_ci{
77562306a36Sopenharmony_ci	int status, start;
77662306a36Sopenharmony_ci	u32 clear_bits;
77762306a36Sopenharmony_ci	struct inode *local_alloc_inode;
77862306a36Sopenharmony_ci	void *bitmap;
77962306a36Sopenharmony_ci	struct ocfs2_dinode *alloc;
78062306a36Sopenharmony_ci	struct ocfs2_local_alloc *la;
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci	BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	local_alloc_inode = ac->ac_inode;
78562306a36Sopenharmony_ci	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
78662306a36Sopenharmony_ci	la = OCFS2_LOCAL_ALLOC(alloc);
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci	bitmap = la->la_bitmap;
78962306a36Sopenharmony_ci	start = bit_off - le32_to_cpu(la->la_bm_off);
79062306a36Sopenharmony_ci	clear_bits = num_bits;
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_ci	status = ocfs2_journal_access_di(handle,
79362306a36Sopenharmony_ci			INODE_CACHE(local_alloc_inode),
79462306a36Sopenharmony_ci			osb->local_alloc_bh,
79562306a36Sopenharmony_ci			OCFS2_JOURNAL_ACCESS_WRITE);
79662306a36Sopenharmony_ci	if (status < 0) {
79762306a36Sopenharmony_ci		mlog_errno(status);
79862306a36Sopenharmony_ci		goto bail;
79962306a36Sopenharmony_ci	}
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci	while (clear_bits--)
80262306a36Sopenharmony_ci		ocfs2_clear_bit(start++, bitmap);
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci	le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits);
80562306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, osb->local_alloc_bh);
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_cibail:
80862306a36Sopenharmony_ci	return status;
80962306a36Sopenharmony_ci}
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_cistatic u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
81262306a36Sopenharmony_ci{
81362306a36Sopenharmony_ci	u32 count;
81462306a36Sopenharmony_ci	struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci	count = memweight(la->la_bitmap, le16_to_cpu(la->la_size));
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci	trace_ocfs2_local_alloc_count_bits(count);
81962306a36Sopenharmony_ci	return count;
82062306a36Sopenharmony_ci}
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_cistatic int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
82362306a36Sopenharmony_ci				     struct ocfs2_dinode *alloc,
82462306a36Sopenharmony_ci				     u32 *numbits,
82562306a36Sopenharmony_ci				     struct ocfs2_alloc_reservation *resv)
82662306a36Sopenharmony_ci{
82762306a36Sopenharmony_ci	int numfound = 0, bitoff, left, startoff;
82862306a36Sopenharmony_ci	int local_resv = 0;
82962306a36Sopenharmony_ci	struct ocfs2_alloc_reservation r;
83062306a36Sopenharmony_ci	void *bitmap = NULL;
83162306a36Sopenharmony_ci	struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ci	if (!alloc->id1.bitmap1.i_total) {
83462306a36Sopenharmony_ci		bitoff = -1;
83562306a36Sopenharmony_ci		goto bail;
83662306a36Sopenharmony_ci	}
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	if (!resv) {
83962306a36Sopenharmony_ci		local_resv = 1;
84062306a36Sopenharmony_ci		ocfs2_resv_init_once(&r);
84162306a36Sopenharmony_ci		ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
84262306a36Sopenharmony_ci		resv = &r;
84362306a36Sopenharmony_ci	}
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_ci	numfound = *numbits;
84662306a36Sopenharmony_ci	if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
84762306a36Sopenharmony_ci		if (numfound < *numbits)
84862306a36Sopenharmony_ci			*numbits = numfound;
84962306a36Sopenharmony_ci		goto bail;
85062306a36Sopenharmony_ci	}
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci	/*
85362306a36Sopenharmony_ci	 * Code error. While reservations are enabled, local
85462306a36Sopenharmony_ci	 * allocation should _always_ go through them.
85562306a36Sopenharmony_ci	 */
85662306a36Sopenharmony_ci	BUG_ON(osb->osb_resv_level != 0);
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	/*
85962306a36Sopenharmony_ci	 * Reservations are disabled. Handle this the old way.
86062306a36Sopenharmony_ci	 */
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci	bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_ci	numfound = bitoff = startoff = 0;
86562306a36Sopenharmony_ci	left = le32_to_cpu(alloc->id1.bitmap1.i_total);
86662306a36Sopenharmony_ci	while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
86762306a36Sopenharmony_ci		if (bitoff == left) {
86862306a36Sopenharmony_ci			/* mlog(0, "bitoff (%d) == left", bitoff); */
86962306a36Sopenharmony_ci			break;
87062306a36Sopenharmony_ci		}
87162306a36Sopenharmony_ci		/* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
87262306a36Sopenharmony_ci		   "numfound = %d\n", bitoff, startoff, numfound);*/
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci		/* Ok, we found a zero bit... is it contig. or do we
87562306a36Sopenharmony_ci		 * start over?*/
87662306a36Sopenharmony_ci		if (bitoff == startoff) {
87762306a36Sopenharmony_ci			/* we found a zero */
87862306a36Sopenharmony_ci			numfound++;
87962306a36Sopenharmony_ci			startoff++;
88062306a36Sopenharmony_ci		} else {
88162306a36Sopenharmony_ci			/* got a zero after some ones */
88262306a36Sopenharmony_ci			numfound = 1;
88362306a36Sopenharmony_ci			startoff = bitoff+1;
88462306a36Sopenharmony_ci		}
88562306a36Sopenharmony_ci		/* we got everything we needed */
88662306a36Sopenharmony_ci		if (numfound == *numbits) {
88762306a36Sopenharmony_ci			/* mlog(0, "Found it all!\n"); */
88862306a36Sopenharmony_ci			break;
88962306a36Sopenharmony_ci		}
89062306a36Sopenharmony_ci	}
89162306a36Sopenharmony_ci
89262306a36Sopenharmony_ci	trace_ocfs2_local_alloc_find_clear_bits_search_bitmap(bitoff, numfound);
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	if (numfound == *numbits)
89562306a36Sopenharmony_ci		bitoff = startoff - numfound;
89662306a36Sopenharmony_ci	else
89762306a36Sopenharmony_ci		bitoff = -1;
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_cibail:
90062306a36Sopenharmony_ci	if (local_resv)
90162306a36Sopenharmony_ci		ocfs2_resv_discard(resmap, resv);
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	trace_ocfs2_local_alloc_find_clear_bits(*numbits,
90462306a36Sopenharmony_ci		le32_to_cpu(alloc->id1.bitmap1.i_total),
90562306a36Sopenharmony_ci		bitoff, numfound);
90662306a36Sopenharmony_ci
90762306a36Sopenharmony_ci	return bitoff;
90862306a36Sopenharmony_ci}
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_cistatic void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc)
91162306a36Sopenharmony_ci{
91262306a36Sopenharmony_ci	struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
91362306a36Sopenharmony_ci	int i;
91462306a36Sopenharmony_ci
91562306a36Sopenharmony_ci	alloc->id1.bitmap1.i_total = 0;
91662306a36Sopenharmony_ci	alloc->id1.bitmap1.i_used = 0;
91762306a36Sopenharmony_ci	la->la_bm_off = 0;
91862306a36Sopenharmony_ci	for(i = 0; i < le16_to_cpu(la->la_size); i++)
91962306a36Sopenharmony_ci		la->la_bitmap[i] = 0;
92062306a36Sopenharmony_ci}
92162306a36Sopenharmony_ci
92262306a36Sopenharmony_ci#if 0
92362306a36Sopenharmony_ci/* turn this on and uncomment below to aid debugging window shifts. */
92462306a36Sopenharmony_cistatic void ocfs2_verify_zero_bits(unsigned long *bitmap,
92562306a36Sopenharmony_ci				   unsigned int start,
92662306a36Sopenharmony_ci				   unsigned int count)
92762306a36Sopenharmony_ci{
92862306a36Sopenharmony_ci	unsigned int tmp = count;
92962306a36Sopenharmony_ci	while(tmp--) {
93062306a36Sopenharmony_ci		if (ocfs2_test_bit(start + tmp, bitmap)) {
93162306a36Sopenharmony_ci			printk("ocfs2_verify_zero_bits: start = %u, count = "
93262306a36Sopenharmony_ci			       "%u\n", start, count);
93362306a36Sopenharmony_ci			printk("ocfs2_verify_zero_bits: bit %u is set!",
93462306a36Sopenharmony_ci			       start + tmp);
93562306a36Sopenharmony_ci			BUG();
93662306a36Sopenharmony_ci		}
93762306a36Sopenharmony_ci	}
93862306a36Sopenharmony_ci}
93962306a36Sopenharmony_ci#endif
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_ci/*
94262306a36Sopenharmony_ci * sync the local alloc to main bitmap.
94362306a36Sopenharmony_ci *
94462306a36Sopenharmony_ci * assumes you've already locked the main bitmap -- the bitmap inode
94562306a36Sopenharmony_ci * passed is used for caching.
94662306a36Sopenharmony_ci */
94762306a36Sopenharmony_cistatic int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
94862306a36Sopenharmony_ci				    handle_t *handle,
94962306a36Sopenharmony_ci				    struct ocfs2_dinode *alloc,
95062306a36Sopenharmony_ci				    struct inode *main_bm_inode,
95162306a36Sopenharmony_ci				    struct buffer_head *main_bm_bh)
95262306a36Sopenharmony_ci{
95362306a36Sopenharmony_ci	int status = 0;
95462306a36Sopenharmony_ci	int bit_off, left, count, start;
95562306a36Sopenharmony_ci	u64 la_start_blk;
95662306a36Sopenharmony_ci	u64 blkno;
95762306a36Sopenharmony_ci	void *bitmap;
95862306a36Sopenharmony_ci	struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	trace_ocfs2_sync_local_to_main(
96162306a36Sopenharmony_ci	     le32_to_cpu(alloc->id1.bitmap1.i_total),
96262306a36Sopenharmony_ci	     le32_to_cpu(alloc->id1.bitmap1.i_used));
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci	if (!alloc->id1.bitmap1.i_total) {
96562306a36Sopenharmony_ci		goto bail;
96662306a36Sopenharmony_ci	}
96762306a36Sopenharmony_ci
96862306a36Sopenharmony_ci	if (le32_to_cpu(alloc->id1.bitmap1.i_used) ==
96962306a36Sopenharmony_ci	    le32_to_cpu(alloc->id1.bitmap1.i_total)) {
97062306a36Sopenharmony_ci		goto bail;
97162306a36Sopenharmony_ci	}
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci	la_start_blk = ocfs2_clusters_to_blocks(osb->sb,
97462306a36Sopenharmony_ci						le32_to_cpu(la->la_bm_off));
97562306a36Sopenharmony_ci	bitmap = la->la_bitmap;
97662306a36Sopenharmony_ci	start = count = 0;
97762306a36Sopenharmony_ci	left = le32_to_cpu(alloc->id1.bitmap1.i_total);
97862306a36Sopenharmony_ci
97962306a36Sopenharmony_ci	while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
98062306a36Sopenharmony_ci	       != -1) {
98162306a36Sopenharmony_ci		if ((bit_off < left) && (bit_off == start)) {
98262306a36Sopenharmony_ci			count++;
98362306a36Sopenharmony_ci			start++;
98462306a36Sopenharmony_ci			continue;
98562306a36Sopenharmony_ci		}
98662306a36Sopenharmony_ci		if (count) {
98762306a36Sopenharmony_ci			blkno = la_start_blk +
98862306a36Sopenharmony_ci				ocfs2_clusters_to_blocks(osb->sb,
98962306a36Sopenharmony_ci							 start - count);
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ci			trace_ocfs2_sync_local_to_main_free(
99262306a36Sopenharmony_ci			     count, start - count,
99362306a36Sopenharmony_ci			     (unsigned long long)la_start_blk,
99462306a36Sopenharmony_ci			     (unsigned long long)blkno);
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci			status = ocfs2_release_clusters(handle,
99762306a36Sopenharmony_ci							main_bm_inode,
99862306a36Sopenharmony_ci							main_bm_bh, blkno,
99962306a36Sopenharmony_ci							count);
100062306a36Sopenharmony_ci			if (status < 0) {
100162306a36Sopenharmony_ci				mlog_errno(status);
100262306a36Sopenharmony_ci				goto bail;
100362306a36Sopenharmony_ci			}
100462306a36Sopenharmony_ci		}
100562306a36Sopenharmony_ci		if (bit_off >= left)
100662306a36Sopenharmony_ci			break;
100762306a36Sopenharmony_ci		count = 1;
100862306a36Sopenharmony_ci		start = bit_off + 1;
100962306a36Sopenharmony_ci	}
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_cibail:
101262306a36Sopenharmony_ci	if (status)
101362306a36Sopenharmony_ci		mlog_errno(status);
101462306a36Sopenharmony_ci	return status;
101562306a36Sopenharmony_ci}
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_cienum ocfs2_la_event {
101862306a36Sopenharmony_ci	OCFS2_LA_EVENT_SLIDE,		/* Normal window slide. */
101962306a36Sopenharmony_ci	OCFS2_LA_EVENT_FRAGMENTED,	/* The global bitmap has
102062306a36Sopenharmony_ci					 * enough bits theoretically
102162306a36Sopenharmony_ci					 * free, but a contiguous
102262306a36Sopenharmony_ci					 * allocation could not be
102362306a36Sopenharmony_ci					 * found. */
102462306a36Sopenharmony_ci	OCFS2_LA_EVENT_ENOSPC,		/* Global bitmap doesn't have
102562306a36Sopenharmony_ci					 * enough bits free to satisfy
102662306a36Sopenharmony_ci					 * our request. */
102762306a36Sopenharmony_ci};
102862306a36Sopenharmony_ci#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
102962306a36Sopenharmony_ci/*
103062306a36Sopenharmony_ci * Given an event, calculate the size of our next local alloc window.
103162306a36Sopenharmony_ci *
103262306a36Sopenharmony_ci * This should always be called under i_rwsem of the local alloc inode
103362306a36Sopenharmony_ci * so that local alloc disabling doesn't race with processes trying to
103462306a36Sopenharmony_ci * use the allocator.
103562306a36Sopenharmony_ci *
103662306a36Sopenharmony_ci * Returns the state which the local alloc was left in. This value can
103762306a36Sopenharmony_ci * be ignored by some paths.
103862306a36Sopenharmony_ci */
103962306a36Sopenharmony_cistatic int ocfs2_recalc_la_window(struct ocfs2_super *osb,
104062306a36Sopenharmony_ci				  enum ocfs2_la_event event)
104162306a36Sopenharmony_ci{
104262306a36Sopenharmony_ci	unsigned int bits;
104362306a36Sopenharmony_ci	int state;
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci	spin_lock(&osb->osb_lock);
104662306a36Sopenharmony_ci	if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
104762306a36Sopenharmony_ci		WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
104862306a36Sopenharmony_ci		goto out_unlock;
104962306a36Sopenharmony_ci	}
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci	/*
105262306a36Sopenharmony_ci	 * ENOSPC and fragmentation are treated similarly for now.
105362306a36Sopenharmony_ci	 */
105462306a36Sopenharmony_ci	if (event == OCFS2_LA_EVENT_ENOSPC ||
105562306a36Sopenharmony_ci	    event == OCFS2_LA_EVENT_FRAGMENTED) {
105662306a36Sopenharmony_ci		/*
105762306a36Sopenharmony_ci		 * We ran out of contiguous space in the primary
105862306a36Sopenharmony_ci		 * bitmap. Drastically reduce the number of bits used
105962306a36Sopenharmony_ci		 * by local alloc until we have to disable it.
106062306a36Sopenharmony_ci		 */
106162306a36Sopenharmony_ci		bits = osb->local_alloc_bits >> 1;
106262306a36Sopenharmony_ci		if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
106362306a36Sopenharmony_ci			/*
106462306a36Sopenharmony_ci			 * By setting state to THROTTLED, we'll keep
106562306a36Sopenharmony_ci			 * the number of local alloc bits used down
106662306a36Sopenharmony_ci			 * until an event occurs which would give us
106762306a36Sopenharmony_ci			 * reason to assume the bitmap situation might
106862306a36Sopenharmony_ci			 * have changed.
106962306a36Sopenharmony_ci			 */
107062306a36Sopenharmony_ci			osb->local_alloc_state = OCFS2_LA_THROTTLED;
107162306a36Sopenharmony_ci			osb->local_alloc_bits = bits;
107262306a36Sopenharmony_ci		} else {
107362306a36Sopenharmony_ci			osb->local_alloc_state = OCFS2_LA_DISABLED;
107462306a36Sopenharmony_ci		}
107562306a36Sopenharmony_ci		queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq,
107662306a36Sopenharmony_ci				   OCFS2_LA_ENABLE_INTERVAL);
107762306a36Sopenharmony_ci		goto out_unlock;
107862306a36Sopenharmony_ci	}
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci	/*
108162306a36Sopenharmony_ci	 * Don't increase the size of the local alloc window until we
108262306a36Sopenharmony_ci	 * know we might be able to fulfill the request. Otherwise, we
108362306a36Sopenharmony_ci	 * risk bouncing around the global bitmap during periods of
108462306a36Sopenharmony_ci	 * low space.
108562306a36Sopenharmony_ci	 */
108662306a36Sopenharmony_ci	if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
108762306a36Sopenharmony_ci		osb->local_alloc_bits = osb->local_alloc_default_bits;
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ciout_unlock:
109062306a36Sopenharmony_ci	state = osb->local_alloc_state;
109162306a36Sopenharmony_ci	spin_unlock(&osb->osb_lock);
109262306a36Sopenharmony_ci
109362306a36Sopenharmony_ci	return state;
109462306a36Sopenharmony_ci}
109562306a36Sopenharmony_ci
109662306a36Sopenharmony_cistatic int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
109762306a36Sopenharmony_ci						struct ocfs2_alloc_context **ac,
109862306a36Sopenharmony_ci						struct inode **bitmap_inode,
109962306a36Sopenharmony_ci						struct buffer_head **bitmap_bh)
110062306a36Sopenharmony_ci{
110162306a36Sopenharmony_ci	int status;
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
110462306a36Sopenharmony_ci	if (!(*ac)) {
110562306a36Sopenharmony_ci		status = -ENOMEM;
110662306a36Sopenharmony_ci		mlog_errno(status);
110762306a36Sopenharmony_ci		goto bail;
110862306a36Sopenharmony_ci	}
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ciretry_enospc:
111162306a36Sopenharmony_ci	(*ac)->ac_bits_wanted = osb->local_alloc_bits;
111262306a36Sopenharmony_ci	status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
111362306a36Sopenharmony_ci	if (status == -ENOSPC) {
111462306a36Sopenharmony_ci		if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
111562306a36Sopenharmony_ci		    OCFS2_LA_DISABLED)
111662306a36Sopenharmony_ci			goto bail;
111762306a36Sopenharmony_ci
111862306a36Sopenharmony_ci		ocfs2_free_ac_resource(*ac);
111962306a36Sopenharmony_ci		memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
112062306a36Sopenharmony_ci		goto retry_enospc;
112162306a36Sopenharmony_ci	}
112262306a36Sopenharmony_ci	if (status < 0) {
112362306a36Sopenharmony_ci		mlog_errno(status);
112462306a36Sopenharmony_ci		goto bail;
112562306a36Sopenharmony_ci	}
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_ci	*bitmap_inode = (*ac)->ac_inode;
112862306a36Sopenharmony_ci	igrab(*bitmap_inode);
112962306a36Sopenharmony_ci	*bitmap_bh = (*ac)->ac_bh;
113062306a36Sopenharmony_ci	get_bh(*bitmap_bh);
113162306a36Sopenharmony_ci	status = 0;
113262306a36Sopenharmony_cibail:
113362306a36Sopenharmony_ci	if ((status < 0) && *ac) {
113462306a36Sopenharmony_ci		ocfs2_free_alloc_context(*ac);
113562306a36Sopenharmony_ci		*ac = NULL;
113662306a36Sopenharmony_ci	}
113762306a36Sopenharmony_ci
113862306a36Sopenharmony_ci	if (status)
113962306a36Sopenharmony_ci		mlog_errno(status);
114062306a36Sopenharmony_ci	return status;
114162306a36Sopenharmony_ci}
114262306a36Sopenharmony_ci
114362306a36Sopenharmony_ci/*
114462306a36Sopenharmony_ci * pass it the bitmap lock in lock_bh if you have it.
114562306a36Sopenharmony_ci */
114662306a36Sopenharmony_cistatic int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
114762306a36Sopenharmony_ci					handle_t *handle,
114862306a36Sopenharmony_ci					struct ocfs2_alloc_context *ac)
114962306a36Sopenharmony_ci{
115062306a36Sopenharmony_ci	int status = 0;
115162306a36Sopenharmony_ci	u32 cluster_off, cluster_count;
115262306a36Sopenharmony_ci	struct ocfs2_dinode *alloc = NULL;
115362306a36Sopenharmony_ci	struct ocfs2_local_alloc *la;
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
115662306a36Sopenharmony_ci	la = OCFS2_LOCAL_ALLOC(alloc);
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci	trace_ocfs2_local_alloc_new_window(
115962306a36Sopenharmony_ci		le32_to_cpu(alloc->id1.bitmap1.i_total),
116062306a36Sopenharmony_ci		osb->local_alloc_bits);
116162306a36Sopenharmony_ci
116262306a36Sopenharmony_ci	/* Instruct the allocation code to try the most recently used
116362306a36Sopenharmony_ci	 * cluster group. We'll re-record the group used this pass
116462306a36Sopenharmony_ci	 * below. */
116562306a36Sopenharmony_ci	ac->ac_last_group = osb->la_last_gd;
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci	/* we used the generic suballoc reserve function, but we set
116862306a36Sopenharmony_ci	 * everything up nicely, so there's no reason why we can't use
116962306a36Sopenharmony_ci	 * the more specific cluster api to claim bits. */
117062306a36Sopenharmony_ci	status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits,
117162306a36Sopenharmony_ci				      &cluster_off, &cluster_count);
117262306a36Sopenharmony_ci	if (status == -ENOSPC) {
117362306a36Sopenharmony_ciretry_enospc:
117462306a36Sopenharmony_ci		/*
117562306a36Sopenharmony_ci		 * Note: We could also try syncing the journal here to
117662306a36Sopenharmony_ci		 * allow use of any free bits which the current
117762306a36Sopenharmony_ci		 * transaction can't give us access to. --Mark
117862306a36Sopenharmony_ci		 */
117962306a36Sopenharmony_ci		if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
118062306a36Sopenharmony_ci		    OCFS2_LA_DISABLED)
118162306a36Sopenharmony_ci			goto bail;
118262306a36Sopenharmony_ci
118362306a36Sopenharmony_ci		ac->ac_bits_wanted = osb->local_alloc_bits;
118462306a36Sopenharmony_ci		status = ocfs2_claim_clusters(handle, ac,
118562306a36Sopenharmony_ci					      osb->local_alloc_bits,
118662306a36Sopenharmony_ci					      &cluster_off,
118762306a36Sopenharmony_ci					      &cluster_count);
118862306a36Sopenharmony_ci		if (status == -ENOSPC)
118962306a36Sopenharmony_ci			goto retry_enospc;
119062306a36Sopenharmony_ci		/*
119162306a36Sopenharmony_ci		 * We only shrunk the *minimum* number of in our
119262306a36Sopenharmony_ci		 * request - it's entirely possible that the allocator
119362306a36Sopenharmony_ci		 * might give us more than we asked for.
119462306a36Sopenharmony_ci		 */
119562306a36Sopenharmony_ci		if (status == 0) {
119662306a36Sopenharmony_ci			spin_lock(&osb->osb_lock);
119762306a36Sopenharmony_ci			osb->local_alloc_bits = cluster_count;
119862306a36Sopenharmony_ci			spin_unlock(&osb->osb_lock);
119962306a36Sopenharmony_ci		}
120062306a36Sopenharmony_ci	}
120162306a36Sopenharmony_ci	if (status < 0) {
120262306a36Sopenharmony_ci		if (status != -ENOSPC)
120362306a36Sopenharmony_ci			mlog_errno(status);
120462306a36Sopenharmony_ci		goto bail;
120562306a36Sopenharmony_ci	}
120662306a36Sopenharmony_ci
120762306a36Sopenharmony_ci	osb->la_last_gd = ac->ac_last_group;
120862306a36Sopenharmony_ci
120962306a36Sopenharmony_ci	la->la_bm_off = cpu_to_le32(cluster_off);
121062306a36Sopenharmony_ci	alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
121162306a36Sopenharmony_ci	/* just in case... In the future when we find space ourselves,
121262306a36Sopenharmony_ci	 * we don't have to get all contiguous -- but we'll have to
121362306a36Sopenharmony_ci	 * set all previously used bits in bitmap and update
121462306a36Sopenharmony_ci	 * la_bits_set before setting the bits in the main bitmap. */
121562306a36Sopenharmony_ci	alloc->id1.bitmap1.i_used = 0;
121662306a36Sopenharmony_ci	memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
121762306a36Sopenharmony_ci	       le16_to_cpu(la->la_size));
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_ci	ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
122062306a36Sopenharmony_ci			     OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_ci	trace_ocfs2_local_alloc_new_window_result(
122362306a36Sopenharmony_ci		OCFS2_LOCAL_ALLOC(alloc)->la_bm_off,
122462306a36Sopenharmony_ci		le32_to_cpu(alloc->id1.bitmap1.i_total));
122562306a36Sopenharmony_ci
122662306a36Sopenharmony_cibail:
122762306a36Sopenharmony_ci	if (status)
122862306a36Sopenharmony_ci		mlog_errno(status);
122962306a36Sopenharmony_ci	return status;
123062306a36Sopenharmony_ci}
123162306a36Sopenharmony_ci
123262306a36Sopenharmony_ci/* Note that we do *NOT* lock the local alloc inode here as
123362306a36Sopenharmony_ci * it's been locked already for us. */
123462306a36Sopenharmony_cistatic int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
123562306a36Sopenharmony_ci					  struct inode *local_alloc_inode)
123662306a36Sopenharmony_ci{
123762306a36Sopenharmony_ci	int status = 0;
123862306a36Sopenharmony_ci	struct buffer_head *main_bm_bh = NULL;
123962306a36Sopenharmony_ci	struct inode *main_bm_inode = NULL;
124062306a36Sopenharmony_ci	handle_t *handle = NULL;
124162306a36Sopenharmony_ci	struct ocfs2_dinode *alloc;
124262306a36Sopenharmony_ci	struct ocfs2_dinode *alloc_copy = NULL;
124362306a36Sopenharmony_ci	struct ocfs2_alloc_context *ac = NULL;
124462306a36Sopenharmony_ci
124562306a36Sopenharmony_ci	ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_ci	/* This will lock the main bitmap for us. */
124862306a36Sopenharmony_ci	status = ocfs2_local_alloc_reserve_for_window(osb,
124962306a36Sopenharmony_ci						      &ac,
125062306a36Sopenharmony_ci						      &main_bm_inode,
125162306a36Sopenharmony_ci						      &main_bm_bh);
125262306a36Sopenharmony_ci	if (status < 0) {
125362306a36Sopenharmony_ci		if (status != -ENOSPC)
125462306a36Sopenharmony_ci			mlog_errno(status);
125562306a36Sopenharmony_ci		goto bail;
125662306a36Sopenharmony_ci	}
125762306a36Sopenharmony_ci
125862306a36Sopenharmony_ci	handle = ocfs2_start_trans(osb, OCFS2_WINDOW_MOVE_CREDITS);
125962306a36Sopenharmony_ci	if (IS_ERR(handle)) {
126062306a36Sopenharmony_ci		status = PTR_ERR(handle);
126162306a36Sopenharmony_ci		handle = NULL;
126262306a36Sopenharmony_ci		mlog_errno(status);
126362306a36Sopenharmony_ci		goto bail;
126462306a36Sopenharmony_ci	}
126562306a36Sopenharmony_ci
126662306a36Sopenharmony_ci	alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
126762306a36Sopenharmony_ci
126862306a36Sopenharmony_ci	/* We want to clear the local alloc before doing anything
126962306a36Sopenharmony_ci	 * else, so that if we error later during this operation,
127062306a36Sopenharmony_ci	 * local alloc shutdown won't try to double free main bitmap
127162306a36Sopenharmony_ci	 * bits. Make a copy so the sync function knows which bits to
127262306a36Sopenharmony_ci	 * free. */
127362306a36Sopenharmony_ci	alloc_copy = kmemdup(alloc, osb->local_alloc_bh->b_size, GFP_NOFS);
127462306a36Sopenharmony_ci	if (!alloc_copy) {
127562306a36Sopenharmony_ci		status = -ENOMEM;
127662306a36Sopenharmony_ci		mlog_errno(status);
127762306a36Sopenharmony_ci		goto bail;
127862306a36Sopenharmony_ci	}
127962306a36Sopenharmony_ci
128062306a36Sopenharmony_ci	status = ocfs2_journal_access_di(handle,
128162306a36Sopenharmony_ci					 INODE_CACHE(local_alloc_inode),
128262306a36Sopenharmony_ci					 osb->local_alloc_bh,
128362306a36Sopenharmony_ci					 OCFS2_JOURNAL_ACCESS_WRITE);
128462306a36Sopenharmony_ci	if (status < 0) {
128562306a36Sopenharmony_ci		mlog_errno(status);
128662306a36Sopenharmony_ci		goto bail;
128762306a36Sopenharmony_ci	}
128862306a36Sopenharmony_ci
128962306a36Sopenharmony_ci	ocfs2_clear_local_alloc(alloc);
129062306a36Sopenharmony_ci	ocfs2_journal_dirty(handle, osb->local_alloc_bh);
129162306a36Sopenharmony_ci
129262306a36Sopenharmony_ci	status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
129362306a36Sopenharmony_ci					  main_bm_inode, main_bm_bh);
129462306a36Sopenharmony_ci	if (status < 0) {
129562306a36Sopenharmony_ci		mlog_errno(status);
129662306a36Sopenharmony_ci		goto bail;
129762306a36Sopenharmony_ci	}
129862306a36Sopenharmony_ci
129962306a36Sopenharmony_ci	status = ocfs2_local_alloc_new_window(osb, handle, ac);
130062306a36Sopenharmony_ci	if (status < 0) {
130162306a36Sopenharmony_ci		if (status != -ENOSPC)
130262306a36Sopenharmony_ci			mlog_errno(status);
130362306a36Sopenharmony_ci		goto bail;
130462306a36Sopenharmony_ci	}
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_ci	atomic_inc(&osb->alloc_stats.moves);
130762306a36Sopenharmony_ci
130862306a36Sopenharmony_cibail:
130962306a36Sopenharmony_ci	if (handle)
131062306a36Sopenharmony_ci		ocfs2_commit_trans(osb, handle);
131162306a36Sopenharmony_ci
131262306a36Sopenharmony_ci	brelse(main_bm_bh);
131362306a36Sopenharmony_ci
131462306a36Sopenharmony_ci	iput(main_bm_inode);
131562306a36Sopenharmony_ci	kfree(alloc_copy);
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_ci	if (ac)
131862306a36Sopenharmony_ci		ocfs2_free_alloc_context(ac);
131962306a36Sopenharmony_ci
132062306a36Sopenharmony_ci	if (status)
132162306a36Sopenharmony_ci		mlog_errno(status);
132262306a36Sopenharmony_ci	return status;
132362306a36Sopenharmony_ci}
132462306a36Sopenharmony_ci
1325