162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *   Copyright (C) International Business Machines Corp., 2000-2004
462306a36Sopenharmony_ci *   Portions Copyright (C) Tino Reichardt, 2012
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include <linux/fs.h>
862306a36Sopenharmony_ci#include <linux/slab.h>
962306a36Sopenharmony_ci#include "jfs_incore.h"
1062306a36Sopenharmony_ci#include "jfs_superblock.h"
1162306a36Sopenharmony_ci#include "jfs_dmap.h"
1262306a36Sopenharmony_ci#include "jfs_imap.h"
1362306a36Sopenharmony_ci#include "jfs_lock.h"
1462306a36Sopenharmony_ci#include "jfs_metapage.h"
1562306a36Sopenharmony_ci#include "jfs_debug.h"
1662306a36Sopenharmony_ci#include "jfs_discard.h"
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci/*
1962306a36Sopenharmony_ci *	SERIALIZATION of the Block Allocation Map.
2062306a36Sopenharmony_ci *
2162306a36Sopenharmony_ci *	the working state of the block allocation map is accessed in
2262306a36Sopenharmony_ci *	two directions:
2362306a36Sopenharmony_ci *
2462306a36Sopenharmony_ci *	1) allocation and free requests that start at the dmap
2562306a36Sopenharmony_ci *	   level and move up through the dmap control pages (i.e.
2662306a36Sopenharmony_ci *	   the vast majority of requests).
2762306a36Sopenharmony_ci *
2862306a36Sopenharmony_ci *	2) allocation requests that start at dmap control page
2962306a36Sopenharmony_ci *	   level and work down towards the dmaps.
3062306a36Sopenharmony_ci *
3162306a36Sopenharmony_ci *	the serialization scheme used here is as follows.
3262306a36Sopenharmony_ci *
3362306a36Sopenharmony_ci *	requests which start at the bottom are serialized against each
3462306a36Sopenharmony_ci *	other through buffers and each requests holds onto its buffers
3562306a36Sopenharmony_ci *	as it works it way up from a single dmap to the required level
3662306a36Sopenharmony_ci *	of dmap control page.
3762306a36Sopenharmony_ci *	requests that start at the top are serialized against each other
3862306a36Sopenharmony_ci *	and request that start from the bottom by the multiple read/single
3962306a36Sopenharmony_ci *	write inode lock of the bmap inode. requests starting at the top
4062306a36Sopenharmony_ci *	take this lock in write mode while request starting at the bottom
4162306a36Sopenharmony_ci *	take the lock in read mode.  a single top-down request may proceed
4262306a36Sopenharmony_ci *	exclusively while multiple bottoms-up requests may proceed
4362306a36Sopenharmony_ci *	simultaneously (under the protection of busy buffers).
4462306a36Sopenharmony_ci *
4562306a36Sopenharmony_ci *	in addition to information found in dmaps and dmap control pages,
4662306a36Sopenharmony_ci *	the working state of the block allocation map also includes read/
4762306a36Sopenharmony_ci *	write information maintained in the bmap descriptor (i.e. total
4862306a36Sopenharmony_ci *	free block count, allocation group level free block counts).
4962306a36Sopenharmony_ci *	a single exclusive lock (BMAP_LOCK) is used to guard this information
5062306a36Sopenharmony_ci *	in the face of multiple-bottoms up requests.
5162306a36Sopenharmony_ci *	(lock ordering: IREAD_LOCK, BMAP_LOCK);
5262306a36Sopenharmony_ci *
5362306a36Sopenharmony_ci *	accesses to the persistent state of the block allocation map (limited
5462306a36Sopenharmony_ci *	to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
5562306a36Sopenharmony_ci */
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci#define BMAP_LOCK_INIT(bmp)	mutex_init(&bmp->db_bmaplock)
5862306a36Sopenharmony_ci#define BMAP_LOCK(bmp)		mutex_lock(&bmp->db_bmaplock)
5962306a36Sopenharmony_ci#define BMAP_UNLOCK(bmp)	mutex_unlock(&bmp->db_bmaplock)
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci/*
6262306a36Sopenharmony_ci * forward references
6362306a36Sopenharmony_ci */
6462306a36Sopenharmony_cistatic void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
6562306a36Sopenharmony_ci			int nblocks);
6662306a36Sopenharmony_cistatic void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl);
6762306a36Sopenharmony_cistatic int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl);
6862306a36Sopenharmony_cistatic int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl);
6962306a36Sopenharmony_cistatic void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl);
7062306a36Sopenharmony_cistatic int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
7162306a36Sopenharmony_ci		    int level);
7262306a36Sopenharmony_cistatic int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results);
7362306a36Sopenharmony_cistatic int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
7462306a36Sopenharmony_ci		       int nblocks);
7562306a36Sopenharmony_cistatic int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno,
7662306a36Sopenharmony_ci		       int nblocks,
7762306a36Sopenharmony_ci		       int l2nb, s64 * results);
7862306a36Sopenharmony_cistatic int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
7962306a36Sopenharmony_ci		       int nblocks);
8062306a36Sopenharmony_cistatic int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks,
8162306a36Sopenharmony_ci			  int l2nb,
8262306a36Sopenharmony_ci			  s64 * results);
8362306a36Sopenharmony_cistatic int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb,
8462306a36Sopenharmony_ci		     s64 * results);
8562306a36Sopenharmony_cistatic int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno,
8662306a36Sopenharmony_ci		      s64 * results);
8762306a36Sopenharmony_cistatic int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
8862306a36Sopenharmony_cistatic int dbFindBits(u32 word, int l2nb);
8962306a36Sopenharmony_cistatic int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno);
9062306a36Sopenharmony_cistatic int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl);
9162306a36Sopenharmony_cistatic int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
9262306a36Sopenharmony_ci		      int nblocks);
9362306a36Sopenharmony_cistatic int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
9462306a36Sopenharmony_ci		      int nblocks);
9562306a36Sopenharmony_cistatic int dbMaxBud(u8 * cp);
9662306a36Sopenharmony_cistatic int blkstol2(s64 nb);
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_cistatic int cntlz(u32 value);
9962306a36Sopenharmony_cistatic int cnttz(u32 word);
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_cistatic int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
10262306a36Sopenharmony_ci			 int nblocks);
10362306a36Sopenharmony_cistatic int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks);
10462306a36Sopenharmony_cistatic int dbInitDmapTree(struct dmap * dp);
10562306a36Sopenharmony_cistatic int dbInitTree(struct dmaptree * dtp);
10662306a36Sopenharmony_cistatic int dbInitDmapCtl(struct dmapctl * dcp, int level, int i);
10762306a36Sopenharmony_cistatic int dbGetL2AGSize(s64 nblocks);
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci/*
11062306a36Sopenharmony_ci *	buddy table
11162306a36Sopenharmony_ci *
11262306a36Sopenharmony_ci * table used for determining buddy sizes within characters of
11362306a36Sopenharmony_ci * dmap bitmap words.  the characters themselves serve as indexes
11462306a36Sopenharmony_ci * into the table, with the table elements yielding the maximum
11562306a36Sopenharmony_ci * binary buddy of free bits within the character.
11662306a36Sopenharmony_ci */
11762306a36Sopenharmony_cistatic const s8 budtab[256] = {
11862306a36Sopenharmony_ci	3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
11962306a36Sopenharmony_ci	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12062306a36Sopenharmony_ci	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12162306a36Sopenharmony_ci	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12262306a36Sopenharmony_ci	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12362306a36Sopenharmony_ci	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
12462306a36Sopenharmony_ci	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
12562306a36Sopenharmony_ci	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
12662306a36Sopenharmony_ci	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
12762306a36Sopenharmony_ci	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
12862306a36Sopenharmony_ci	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
12962306a36Sopenharmony_ci	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13062306a36Sopenharmony_ci	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13162306a36Sopenharmony_ci	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13262306a36Sopenharmony_ci	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13362306a36Sopenharmony_ci	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1
13462306a36Sopenharmony_ci};
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci/*
13762306a36Sopenharmony_ci * NAME:	dbMount()
13862306a36Sopenharmony_ci *
13962306a36Sopenharmony_ci * FUNCTION:	initializate the block allocation map.
14062306a36Sopenharmony_ci *
14162306a36Sopenharmony_ci *		memory is allocated for the in-core bmap descriptor and
14262306a36Sopenharmony_ci *		the in-core descriptor is initialized from disk.
14362306a36Sopenharmony_ci *
14462306a36Sopenharmony_ci * PARAMETERS:
14562306a36Sopenharmony_ci *	ipbmap	- pointer to in-core inode for the block map.
14662306a36Sopenharmony_ci *
14762306a36Sopenharmony_ci * RETURN VALUES:
14862306a36Sopenharmony_ci *	0	- success
14962306a36Sopenharmony_ci *	-ENOMEM	- insufficient memory
15062306a36Sopenharmony_ci *	-EIO	- i/o error
15162306a36Sopenharmony_ci *	-EINVAL - wrong bmap data
15262306a36Sopenharmony_ci */
15362306a36Sopenharmony_ciint dbMount(struct inode *ipbmap)
15462306a36Sopenharmony_ci{
15562306a36Sopenharmony_ci	struct bmap *bmp;
15662306a36Sopenharmony_ci	struct dbmap_disk *dbmp_le;
15762306a36Sopenharmony_ci	struct metapage *mp;
15862306a36Sopenharmony_ci	int i, err;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	/*
16162306a36Sopenharmony_ci	 * allocate/initialize the in-memory bmap descriptor
16262306a36Sopenharmony_ci	 */
16362306a36Sopenharmony_ci	/* allocate memory for the in-memory bmap descriptor */
16462306a36Sopenharmony_ci	bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL);
16562306a36Sopenharmony_ci	if (bmp == NULL)
16662306a36Sopenharmony_ci		return -ENOMEM;
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	/* read the on-disk bmap descriptor. */
16962306a36Sopenharmony_ci	mp = read_metapage(ipbmap,
17062306a36Sopenharmony_ci			   BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
17162306a36Sopenharmony_ci			   PSIZE, 0);
17262306a36Sopenharmony_ci	if (mp == NULL) {
17362306a36Sopenharmony_ci		err = -EIO;
17462306a36Sopenharmony_ci		goto err_kfree_bmp;
17562306a36Sopenharmony_ci	}
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	/* copy the on-disk bmap descriptor to its in-memory version. */
17862306a36Sopenharmony_ci	dbmp_le = (struct dbmap_disk *) mp->data;
17962306a36Sopenharmony_ci	bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize);
18062306a36Sopenharmony_ci	bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree);
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage);
18362306a36Sopenharmony_ci	if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE ||
18462306a36Sopenharmony_ci		bmp->db_l2nbperpage < 0) {
18562306a36Sopenharmony_ci		err = -EINVAL;
18662306a36Sopenharmony_ci		goto err_release_metapage;
18762306a36Sopenharmony_ci	}
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag);
19062306a36Sopenharmony_ci	if (!bmp->db_numag) {
19162306a36Sopenharmony_ci		err = -EINVAL;
19262306a36Sopenharmony_ci		goto err_release_metapage;
19362306a36Sopenharmony_ci	}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel);
19662306a36Sopenharmony_ci	bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
19762306a36Sopenharmony_ci	bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
19862306a36Sopenharmony_ci	if (bmp->db_maxag >= MAXAG || bmp->db_maxag < 0 ||
19962306a36Sopenharmony_ci		bmp->db_agpref >= MAXAG || bmp->db_agpref < 0) {
20062306a36Sopenharmony_ci		err = -EINVAL;
20162306a36Sopenharmony_ci		goto err_release_metapage;
20262306a36Sopenharmony_ci	}
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
20562306a36Sopenharmony_ci	bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
20662306a36Sopenharmony_ci	bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
20762306a36Sopenharmony_ci	bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
20862306a36Sopenharmony_ci	bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
20962306a36Sopenharmony_ci	if (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG ||
21062306a36Sopenharmony_ci	    bmp->db_agl2size < 0) {
21162306a36Sopenharmony_ci		err = -EINVAL;
21262306a36Sopenharmony_ci		goto err_release_metapage;
21362306a36Sopenharmony_ci	}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	if (((bmp->db_mapsize - 1) >> bmp->db_agl2size) > MAXAG) {
21662306a36Sopenharmony_ci		err = -EINVAL;
21762306a36Sopenharmony_ci		goto err_release_metapage;
21862306a36Sopenharmony_ci	}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	for (i = 0; i < MAXAG; i++)
22162306a36Sopenharmony_ci		bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]);
22262306a36Sopenharmony_ci	bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize);
22362306a36Sopenharmony_ci	bmp->db_maxfreebud = dbmp_le->dn_maxfreebud;
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	/* release the buffer. */
22662306a36Sopenharmony_ci	release_metapage(mp);
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	/* bind the bmap inode and the bmap descriptor to each other. */
22962306a36Sopenharmony_ci	bmp->db_ipbmap = ipbmap;
23062306a36Sopenharmony_ci	JFS_SBI(ipbmap->i_sb)->bmap = bmp;
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	memset(bmp->db_active, 0, sizeof(bmp->db_active));
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	/*
23562306a36Sopenharmony_ci	 * allocate/initialize the bmap lock
23662306a36Sopenharmony_ci	 */
23762306a36Sopenharmony_ci	BMAP_LOCK_INIT(bmp);
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	return (0);
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_cierr_release_metapage:
24262306a36Sopenharmony_ci	release_metapage(mp);
24362306a36Sopenharmony_cierr_kfree_bmp:
24462306a36Sopenharmony_ci	kfree(bmp);
24562306a36Sopenharmony_ci	return err;
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci/*
25062306a36Sopenharmony_ci * NAME:	dbUnmount()
25162306a36Sopenharmony_ci *
25262306a36Sopenharmony_ci * FUNCTION:	terminate the block allocation map in preparation for
25362306a36Sopenharmony_ci *		file system unmount.
25462306a36Sopenharmony_ci *
25562306a36Sopenharmony_ci *		the in-core bmap descriptor is written to disk and
25662306a36Sopenharmony_ci *		the memory for this descriptor is freed.
25762306a36Sopenharmony_ci *
25862306a36Sopenharmony_ci * PARAMETERS:
25962306a36Sopenharmony_ci *	ipbmap	- pointer to in-core inode for the block map.
26062306a36Sopenharmony_ci *
26162306a36Sopenharmony_ci * RETURN VALUES:
26262306a36Sopenharmony_ci *	0	- success
26362306a36Sopenharmony_ci *	-EIO	- i/o error
26462306a36Sopenharmony_ci */
26562306a36Sopenharmony_ciint dbUnmount(struct inode *ipbmap, int mounterror)
26662306a36Sopenharmony_ci{
26762306a36Sopenharmony_ci	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	if (!(mounterror || isReadOnly(ipbmap)))
27062306a36Sopenharmony_ci		dbSync(ipbmap);
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	/*
27362306a36Sopenharmony_ci	 * Invalidate the page cache buffers
27462306a36Sopenharmony_ci	 */
27562306a36Sopenharmony_ci	truncate_inode_pages(ipbmap->i_mapping, 0);
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	/* free the memory for the in-memory bmap. */
27862306a36Sopenharmony_ci	kfree(bmp);
27962306a36Sopenharmony_ci	JFS_SBI(ipbmap->i_sb)->bmap = NULL;
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	return (0);
28262306a36Sopenharmony_ci}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci/*
28562306a36Sopenharmony_ci *	dbSync()
28662306a36Sopenharmony_ci */
28762306a36Sopenharmony_ciint dbSync(struct inode *ipbmap)
28862306a36Sopenharmony_ci{
28962306a36Sopenharmony_ci	struct dbmap_disk *dbmp_le;
29062306a36Sopenharmony_ci	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
29162306a36Sopenharmony_ci	struct metapage *mp;
29262306a36Sopenharmony_ci	int i;
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	/*
29562306a36Sopenharmony_ci	 * write bmap global control page
29662306a36Sopenharmony_ci	 */
29762306a36Sopenharmony_ci	/* get the buffer for the on-disk bmap descriptor. */
29862306a36Sopenharmony_ci	mp = read_metapage(ipbmap,
29962306a36Sopenharmony_ci			   BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
30062306a36Sopenharmony_ci			   PSIZE, 0);
30162306a36Sopenharmony_ci	if (mp == NULL) {
30262306a36Sopenharmony_ci		jfs_err("dbSync: read_metapage failed!");
30362306a36Sopenharmony_ci		return -EIO;
30462306a36Sopenharmony_ci	}
30562306a36Sopenharmony_ci	/* copy the in-memory version of the bmap to the on-disk version */
30662306a36Sopenharmony_ci	dbmp_le = (struct dbmap_disk *) mp->data;
30762306a36Sopenharmony_ci	dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize);
30862306a36Sopenharmony_ci	dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree);
30962306a36Sopenharmony_ci	dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage);
31062306a36Sopenharmony_ci	dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag);
31162306a36Sopenharmony_ci	dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel);
31262306a36Sopenharmony_ci	dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
31362306a36Sopenharmony_ci	dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
31462306a36Sopenharmony_ci	dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
31562306a36Sopenharmony_ci	dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight);
31662306a36Sopenharmony_ci	dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
31762306a36Sopenharmony_ci	dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
31862306a36Sopenharmony_ci	dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
31962306a36Sopenharmony_ci	for (i = 0; i < MAXAG; i++)
32062306a36Sopenharmony_ci		dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]);
32162306a36Sopenharmony_ci	dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize);
32262306a36Sopenharmony_ci	dbmp_le->dn_maxfreebud = bmp->db_maxfreebud;
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci	/* write the buffer */
32562306a36Sopenharmony_ci	write_metapage(mp);
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	/*
32862306a36Sopenharmony_ci	 * write out dirty pages of bmap
32962306a36Sopenharmony_ci	 */
33062306a36Sopenharmony_ci	filemap_write_and_wait(ipbmap->i_mapping);
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci	diWriteSpecial(ipbmap, 0);
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	return (0);
33562306a36Sopenharmony_ci}
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci/*
33862306a36Sopenharmony_ci * NAME:	dbFree()
33962306a36Sopenharmony_ci *
34062306a36Sopenharmony_ci * FUNCTION:	free the specified block range from the working block
34162306a36Sopenharmony_ci *		allocation map.
34262306a36Sopenharmony_ci *
34362306a36Sopenharmony_ci *		the blocks will be free from the working map one dmap
34462306a36Sopenharmony_ci *		at a time.
34562306a36Sopenharmony_ci *
34662306a36Sopenharmony_ci * PARAMETERS:
34762306a36Sopenharmony_ci *	ip	- pointer to in-core inode;
34862306a36Sopenharmony_ci *	blkno	- starting block number to be freed.
34962306a36Sopenharmony_ci *	nblocks	- number of blocks to be freed.
35062306a36Sopenharmony_ci *
35162306a36Sopenharmony_ci * RETURN VALUES:
35262306a36Sopenharmony_ci *	0	- success
35362306a36Sopenharmony_ci *	-EIO	- i/o error
35462306a36Sopenharmony_ci */
35562306a36Sopenharmony_ciint dbFree(struct inode *ip, s64 blkno, s64 nblocks)
35662306a36Sopenharmony_ci{
35762306a36Sopenharmony_ci	struct metapage *mp;
35862306a36Sopenharmony_ci	struct dmap *dp;
35962306a36Sopenharmony_ci	int nb, rc;
36062306a36Sopenharmony_ci	s64 lblkno, rem;
36162306a36Sopenharmony_ci	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
36262306a36Sopenharmony_ci	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
36362306a36Sopenharmony_ci	struct super_block *sb = ipbmap->i_sb;
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	/* block to be freed better be within the mapsize. */
36862306a36Sopenharmony_ci	if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) {
36962306a36Sopenharmony_ci		IREAD_UNLOCK(ipbmap);
37062306a36Sopenharmony_ci		printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
37162306a36Sopenharmony_ci		       (unsigned long long) blkno,
37262306a36Sopenharmony_ci		       (unsigned long long) nblocks);
37362306a36Sopenharmony_ci		jfs_error(ip->i_sb, "block to be freed is outside the map\n");
37462306a36Sopenharmony_ci		return -EIO;
37562306a36Sopenharmony_ci	}
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	/**
37862306a36Sopenharmony_ci	 * TRIM the blocks, when mounted with discard option
37962306a36Sopenharmony_ci	 */
38062306a36Sopenharmony_ci	if (JFS_SBI(sb)->flag & JFS_DISCARD)
38162306a36Sopenharmony_ci		if (JFS_SBI(sb)->minblks_trim <= nblocks)
38262306a36Sopenharmony_ci			jfs_issue_discard(ipbmap, blkno, nblocks);
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	/*
38562306a36Sopenharmony_ci	 * free the blocks a dmap at a time.
38662306a36Sopenharmony_ci	 */
38762306a36Sopenharmony_ci	mp = NULL;
38862306a36Sopenharmony_ci	for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
38962306a36Sopenharmony_ci		/* release previous dmap if any */
39062306a36Sopenharmony_ci		if (mp) {
39162306a36Sopenharmony_ci			write_metapage(mp);
39262306a36Sopenharmony_ci		}
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci		/* get the buffer for the current dmap. */
39562306a36Sopenharmony_ci		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
39662306a36Sopenharmony_ci		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
39762306a36Sopenharmony_ci		if (mp == NULL) {
39862306a36Sopenharmony_ci			IREAD_UNLOCK(ipbmap);
39962306a36Sopenharmony_ci			return -EIO;
40062306a36Sopenharmony_ci		}
40162306a36Sopenharmony_ci		dp = (struct dmap *) mp->data;
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci		/* determine the number of blocks to be freed from
40462306a36Sopenharmony_ci		 * this dmap.
40562306a36Sopenharmony_ci		 */
40662306a36Sopenharmony_ci		nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci		/* free the blocks. */
40962306a36Sopenharmony_ci		if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) {
41062306a36Sopenharmony_ci			jfs_error(ip->i_sb, "error in block map\n");
41162306a36Sopenharmony_ci			release_metapage(mp);
41262306a36Sopenharmony_ci			IREAD_UNLOCK(ipbmap);
41362306a36Sopenharmony_ci			return (rc);
41462306a36Sopenharmony_ci		}
41562306a36Sopenharmony_ci	}
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	/* write the last buffer. */
41862306a36Sopenharmony_ci	if (mp)
41962306a36Sopenharmony_ci		write_metapage(mp);
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	IREAD_UNLOCK(ipbmap);
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	return (0);
42462306a36Sopenharmony_ci}
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci/*
42862306a36Sopenharmony_ci * NAME:	dbUpdatePMap()
42962306a36Sopenharmony_ci *
43062306a36Sopenharmony_ci * FUNCTION:	update the allocation state (free or allocate) of the
43162306a36Sopenharmony_ci *		specified block range in the persistent block allocation map.
43262306a36Sopenharmony_ci *
43362306a36Sopenharmony_ci *		the blocks will be updated in the persistent map one
43462306a36Sopenharmony_ci *		dmap at a time.
43562306a36Sopenharmony_ci *
43662306a36Sopenharmony_ci * PARAMETERS:
43762306a36Sopenharmony_ci *	ipbmap	- pointer to in-core inode for the block map.
43862306a36Sopenharmony_ci *	free	- 'true' if block range is to be freed from the persistent
43962306a36Sopenharmony_ci *		  map; 'false' if it is to be allocated.
44062306a36Sopenharmony_ci *	blkno	- starting block number of the range.
44162306a36Sopenharmony_ci *	nblocks	- number of contiguous blocks in the range.
44262306a36Sopenharmony_ci *	tblk	- transaction block;
44362306a36Sopenharmony_ci *
44462306a36Sopenharmony_ci * RETURN VALUES:
44562306a36Sopenharmony_ci *	0	- success
44662306a36Sopenharmony_ci *	-EIO	- i/o error
44762306a36Sopenharmony_ci */
44862306a36Sopenharmony_ciint
44962306a36Sopenharmony_cidbUpdatePMap(struct inode *ipbmap,
45062306a36Sopenharmony_ci	     int free, s64 blkno, s64 nblocks, struct tblock * tblk)
45162306a36Sopenharmony_ci{
45262306a36Sopenharmony_ci	int nblks, dbitno, wbitno, rbits;
45362306a36Sopenharmony_ci	int word, nbits, nwords;
45462306a36Sopenharmony_ci	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
45562306a36Sopenharmony_ci	s64 lblkno, rem, lastlblkno;
45662306a36Sopenharmony_ci	u32 mask;
45762306a36Sopenharmony_ci	struct dmap *dp;
45862306a36Sopenharmony_ci	struct metapage *mp;
45962306a36Sopenharmony_ci	struct jfs_log *log;
46062306a36Sopenharmony_ci	int lsn, difft, diffp;
46162306a36Sopenharmony_ci	unsigned long flags;
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci	/* the blocks better be within the mapsize. */
46462306a36Sopenharmony_ci	if (blkno + nblocks > bmp->db_mapsize) {
46562306a36Sopenharmony_ci		printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
46662306a36Sopenharmony_ci		       (unsigned long long) blkno,
46762306a36Sopenharmony_ci		       (unsigned long long) nblocks);
46862306a36Sopenharmony_ci		jfs_error(ipbmap->i_sb, "blocks are outside the map\n");
46962306a36Sopenharmony_ci		return -EIO;
47062306a36Sopenharmony_ci	}
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	/* compute delta of transaction lsn from log syncpt */
47362306a36Sopenharmony_ci	lsn = tblk->lsn;
47462306a36Sopenharmony_ci	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
47562306a36Sopenharmony_ci	logdiff(difft, lsn, log);
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci	/*
47862306a36Sopenharmony_ci	 * update the block state a dmap at a time.
47962306a36Sopenharmony_ci	 */
48062306a36Sopenharmony_ci	mp = NULL;
48162306a36Sopenharmony_ci	lastlblkno = 0;
48262306a36Sopenharmony_ci	for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) {
48362306a36Sopenharmony_ci		/* get the buffer for the current dmap. */
48462306a36Sopenharmony_ci		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
48562306a36Sopenharmony_ci		if (lblkno != lastlblkno) {
48662306a36Sopenharmony_ci			if (mp) {
48762306a36Sopenharmony_ci				write_metapage(mp);
48862306a36Sopenharmony_ci			}
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci			mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE,
49162306a36Sopenharmony_ci					   0);
49262306a36Sopenharmony_ci			if (mp == NULL)
49362306a36Sopenharmony_ci				return -EIO;
49462306a36Sopenharmony_ci			metapage_wait_for_io(mp);
49562306a36Sopenharmony_ci		}
49662306a36Sopenharmony_ci		dp = (struct dmap *) mp->data;
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci		/* determine the bit number and word within the dmap of
49962306a36Sopenharmony_ci		 * the starting block.  also determine how many blocks
50062306a36Sopenharmony_ci		 * are to be updated within this dmap.
50162306a36Sopenharmony_ci		 */
50262306a36Sopenharmony_ci		dbitno = blkno & (BPERDMAP - 1);
50362306a36Sopenharmony_ci		word = dbitno >> L2DBWORD;
50462306a36Sopenharmony_ci		nblks = min(rem, (s64)BPERDMAP - dbitno);
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci		/* update the bits of the dmap words. the first and last
50762306a36Sopenharmony_ci		 * words may only have a subset of their bits updated. if
50862306a36Sopenharmony_ci		 * this is the case, we'll work against that word (i.e.
50962306a36Sopenharmony_ci		 * partial first and/or last) only in a single pass.  a
51062306a36Sopenharmony_ci		 * single pass will also be used to update all words that
51162306a36Sopenharmony_ci		 * are to have all their bits updated.
51262306a36Sopenharmony_ci		 */
51362306a36Sopenharmony_ci		for (rbits = nblks; rbits > 0;
51462306a36Sopenharmony_ci		     rbits -= nbits, dbitno += nbits) {
51562306a36Sopenharmony_ci			/* determine the bit number within the word and
51662306a36Sopenharmony_ci			 * the number of bits within the word.
51762306a36Sopenharmony_ci			 */
51862306a36Sopenharmony_ci			wbitno = dbitno & (DBWORD - 1);
51962306a36Sopenharmony_ci			nbits = min(rbits, DBWORD - wbitno);
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci			/* check if only part of the word is to be updated. */
52262306a36Sopenharmony_ci			if (nbits < DBWORD) {
52362306a36Sopenharmony_ci				/* update (free or allocate) the bits
52462306a36Sopenharmony_ci				 * in this word.
52562306a36Sopenharmony_ci				 */
52662306a36Sopenharmony_ci				mask =
52762306a36Sopenharmony_ci				    (ONES << (DBWORD - nbits) >> wbitno);
52862306a36Sopenharmony_ci				if (free)
52962306a36Sopenharmony_ci					dp->pmap[word] &=
53062306a36Sopenharmony_ci					    cpu_to_le32(~mask);
53162306a36Sopenharmony_ci				else
53262306a36Sopenharmony_ci					dp->pmap[word] |=
53362306a36Sopenharmony_ci					    cpu_to_le32(mask);
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci				word += 1;
53662306a36Sopenharmony_ci			} else {
53762306a36Sopenharmony_ci				/* one or more words are to have all
53862306a36Sopenharmony_ci				 * their bits updated.  determine how
53962306a36Sopenharmony_ci				 * many words and how many bits.
54062306a36Sopenharmony_ci				 */
54162306a36Sopenharmony_ci				nwords = rbits >> L2DBWORD;
54262306a36Sopenharmony_ci				nbits = nwords << L2DBWORD;
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci				/* update (free or allocate) the bits
54562306a36Sopenharmony_ci				 * in these words.
54662306a36Sopenharmony_ci				 */
54762306a36Sopenharmony_ci				if (free)
54862306a36Sopenharmony_ci					memset(&dp->pmap[word], 0,
54962306a36Sopenharmony_ci					       nwords * 4);
55062306a36Sopenharmony_ci				else
55162306a36Sopenharmony_ci					memset(&dp->pmap[word], (int) ONES,
55262306a36Sopenharmony_ci					       nwords * 4);
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci				word += nwords;
55562306a36Sopenharmony_ci			}
55662306a36Sopenharmony_ci		}
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci		/*
55962306a36Sopenharmony_ci		 * update dmap lsn
56062306a36Sopenharmony_ci		 */
56162306a36Sopenharmony_ci		if (lblkno == lastlblkno)
56262306a36Sopenharmony_ci			continue;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci		lastlblkno = lblkno;
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci		LOGSYNC_LOCK(log, flags);
56762306a36Sopenharmony_ci		if (mp->lsn != 0) {
56862306a36Sopenharmony_ci			/* inherit older/smaller lsn */
56962306a36Sopenharmony_ci			logdiff(diffp, mp->lsn, log);
57062306a36Sopenharmony_ci			if (difft < diffp) {
57162306a36Sopenharmony_ci				mp->lsn = lsn;
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci				/* move bp after tblock in logsync list */
57462306a36Sopenharmony_ci				list_move(&mp->synclist, &tblk->synclist);
57562306a36Sopenharmony_ci			}
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci			/* inherit younger/larger clsn */
57862306a36Sopenharmony_ci			logdiff(difft, tblk->clsn, log);
57962306a36Sopenharmony_ci			logdiff(diffp, mp->clsn, log);
58062306a36Sopenharmony_ci			if (difft > diffp)
58162306a36Sopenharmony_ci				mp->clsn = tblk->clsn;
58262306a36Sopenharmony_ci		} else {
58362306a36Sopenharmony_ci			mp->log = log;
58462306a36Sopenharmony_ci			mp->lsn = lsn;
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci			/* insert bp after tblock in logsync list */
58762306a36Sopenharmony_ci			log->count++;
58862306a36Sopenharmony_ci			list_add(&mp->synclist, &tblk->synclist);
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci			mp->clsn = tblk->clsn;
59162306a36Sopenharmony_ci		}
59262306a36Sopenharmony_ci		LOGSYNC_UNLOCK(log, flags);
59362306a36Sopenharmony_ci	}
59462306a36Sopenharmony_ci
59562306a36Sopenharmony_ci	/* write the last buffer. */
59662306a36Sopenharmony_ci	if (mp) {
59762306a36Sopenharmony_ci		write_metapage(mp);
59862306a36Sopenharmony_ci	}
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	return (0);
60162306a36Sopenharmony_ci}
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci/*
60562306a36Sopenharmony_ci * NAME:	dbNextAG()
60662306a36Sopenharmony_ci *
60762306a36Sopenharmony_ci * FUNCTION:	find the preferred allocation group for new allocations.
60862306a36Sopenharmony_ci *
60962306a36Sopenharmony_ci *		Within the allocation groups, we maintain a preferred
61062306a36Sopenharmony_ci *		allocation group which consists of a group with at least
61162306a36Sopenharmony_ci *		average free space.  It is the preferred group that we target
61262306a36Sopenharmony_ci *		new inode allocation towards.  The tie-in between inode
61362306a36Sopenharmony_ci *		allocation and block allocation occurs as we allocate the
61462306a36Sopenharmony_ci *		first (data) block of an inode and specify the inode (block)
61562306a36Sopenharmony_ci *		as the allocation hint for this block.
61662306a36Sopenharmony_ci *
61762306a36Sopenharmony_ci *		We try to avoid having more than one open file growing in
61862306a36Sopenharmony_ci *		an allocation group, as this will lead to fragmentation.
61962306a36Sopenharmony_ci *		This differs from the old OS/2 method of trying to keep
62062306a36Sopenharmony_ci *		empty ags around for large allocations.
62162306a36Sopenharmony_ci *
62262306a36Sopenharmony_ci * PARAMETERS:
62362306a36Sopenharmony_ci *	ipbmap	- pointer to in-core inode for the block map.
62462306a36Sopenharmony_ci *
62562306a36Sopenharmony_ci * RETURN VALUES:
62662306a36Sopenharmony_ci *	the preferred allocation group number.
62762306a36Sopenharmony_ci */
62862306a36Sopenharmony_ciint dbNextAG(struct inode *ipbmap)
62962306a36Sopenharmony_ci{
63062306a36Sopenharmony_ci	s64 avgfree;
63162306a36Sopenharmony_ci	int agpref;
63262306a36Sopenharmony_ci	s64 hwm = 0;
63362306a36Sopenharmony_ci	int i;
63462306a36Sopenharmony_ci	int next_best = -1;
63562306a36Sopenharmony_ci	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci	BMAP_LOCK(bmp);
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci	/* determine the average number of free blocks within the ags. */
64062306a36Sopenharmony_ci	avgfree = (u32)bmp->db_nfree / bmp->db_numag;
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci	/*
64362306a36Sopenharmony_ci	 * if the current preferred ag does not have an active allocator
64462306a36Sopenharmony_ci	 * and has at least average freespace, return it
64562306a36Sopenharmony_ci	 */
64662306a36Sopenharmony_ci	agpref = bmp->db_agpref;
64762306a36Sopenharmony_ci	if ((atomic_read(&bmp->db_active[agpref]) == 0) &&
64862306a36Sopenharmony_ci	    (bmp->db_agfree[agpref] >= avgfree))
64962306a36Sopenharmony_ci		goto unlock;
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	/* From the last preferred ag, find the next one with at least
65262306a36Sopenharmony_ci	 * average free space.
65362306a36Sopenharmony_ci	 */
65462306a36Sopenharmony_ci	for (i = 0 ; i < bmp->db_numag; i++, agpref++) {
65562306a36Sopenharmony_ci		if (agpref == bmp->db_numag)
65662306a36Sopenharmony_ci			agpref = 0;
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci		if (atomic_read(&bmp->db_active[agpref]))
65962306a36Sopenharmony_ci			/* open file is currently growing in this ag */
66062306a36Sopenharmony_ci			continue;
66162306a36Sopenharmony_ci		if (bmp->db_agfree[agpref] >= avgfree) {
66262306a36Sopenharmony_ci			/* Return this one */
66362306a36Sopenharmony_ci			bmp->db_agpref = agpref;
66462306a36Sopenharmony_ci			goto unlock;
66562306a36Sopenharmony_ci		} else if (bmp->db_agfree[agpref] > hwm) {
66662306a36Sopenharmony_ci			/* Less than avg. freespace, but best so far */
66762306a36Sopenharmony_ci			hwm = bmp->db_agfree[agpref];
66862306a36Sopenharmony_ci			next_best = agpref;
66962306a36Sopenharmony_ci		}
67062306a36Sopenharmony_ci	}
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci	/*
67362306a36Sopenharmony_ci	 * If no inactive ag was found with average freespace, use the
67462306a36Sopenharmony_ci	 * next best
67562306a36Sopenharmony_ci	 */
67662306a36Sopenharmony_ci	if (next_best != -1)
67762306a36Sopenharmony_ci		bmp->db_agpref = next_best;
67862306a36Sopenharmony_ci	/* else leave db_agpref unchanged */
67962306a36Sopenharmony_ciunlock:
68062306a36Sopenharmony_ci	BMAP_UNLOCK(bmp);
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci	/* return the preferred group.
68362306a36Sopenharmony_ci	 */
68462306a36Sopenharmony_ci	return (bmp->db_agpref);
68562306a36Sopenharmony_ci}
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci/*
68862306a36Sopenharmony_ci * NAME:	dbAlloc()
68962306a36Sopenharmony_ci *
69062306a36Sopenharmony_ci * FUNCTION:	attempt to allocate a specified number of contiguous free
69162306a36Sopenharmony_ci *		blocks from the working allocation block map.
69262306a36Sopenharmony_ci *
69362306a36Sopenharmony_ci *		the block allocation policy uses hints and a multi-step
69462306a36Sopenharmony_ci *		approach.
69562306a36Sopenharmony_ci *
69662306a36Sopenharmony_ci *		for allocation requests smaller than the number of blocks
69762306a36Sopenharmony_ci *		per dmap, we first try to allocate the new blocks
69862306a36Sopenharmony_ci *		immediately following the hint.  if these blocks are not
69962306a36Sopenharmony_ci *		available, we try to allocate blocks near the hint.  if
70062306a36Sopenharmony_ci *		no blocks near the hint are available, we next try to
70162306a36Sopenharmony_ci *		allocate within the same dmap as contains the hint.
70262306a36Sopenharmony_ci *
70362306a36Sopenharmony_ci *		if no blocks are available in the dmap or the allocation
70462306a36Sopenharmony_ci *		request is larger than the dmap size, we try to allocate
70562306a36Sopenharmony_ci *		within the same allocation group as contains the hint. if
70662306a36Sopenharmony_ci *		this does not succeed, we finally try to allocate anywhere
70762306a36Sopenharmony_ci *		within the aggregate.
70862306a36Sopenharmony_ci *
70962306a36Sopenharmony_ci *		we also try to allocate anywhere within the aggregate
71062306a36Sopenharmony_ci *		for allocation requests larger than the allocation group
71162306a36Sopenharmony_ci *		size or requests that specify no hint value.
71262306a36Sopenharmony_ci *
71362306a36Sopenharmony_ci * PARAMETERS:
71462306a36Sopenharmony_ci *	ip	- pointer to in-core inode;
71562306a36Sopenharmony_ci *	hint	- allocation hint.
71662306a36Sopenharmony_ci *	nblocks	- number of contiguous blocks in the range.
71762306a36Sopenharmony_ci *	results	- on successful return, set to the starting block number
71862306a36Sopenharmony_ci *		  of the newly allocated contiguous range.
71962306a36Sopenharmony_ci *
72062306a36Sopenharmony_ci * RETURN VALUES:
72162306a36Sopenharmony_ci *	0	- success
72262306a36Sopenharmony_ci *	-ENOSPC	- insufficient disk resources
72362306a36Sopenharmony_ci *	-EIO	- i/o error
72462306a36Sopenharmony_ci */
72562306a36Sopenharmony_ciint dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
72662306a36Sopenharmony_ci{
72762306a36Sopenharmony_ci	int rc, agno;
72862306a36Sopenharmony_ci	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
72962306a36Sopenharmony_ci	struct bmap *bmp;
73062306a36Sopenharmony_ci	struct metapage *mp;
73162306a36Sopenharmony_ci	s64 lblkno, blkno;
73262306a36Sopenharmony_ci	struct dmap *dp;
73362306a36Sopenharmony_ci	int l2nb;
73462306a36Sopenharmony_ci	s64 mapSize;
73562306a36Sopenharmony_ci	int writers;
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_ci	/* assert that nblocks is valid */
73862306a36Sopenharmony_ci	assert(nblocks > 0);
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci	/* get the log2 number of blocks to be allocated.
74162306a36Sopenharmony_ci	 * if the number of blocks is not a log2 multiple,
74262306a36Sopenharmony_ci	 * it will be rounded up to the next log2 multiple.
74362306a36Sopenharmony_ci	 */
74462306a36Sopenharmony_ci	l2nb = BLKSTOL2(nblocks);
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci	bmp = JFS_SBI(ip->i_sb)->bmap;
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci	mapSize = bmp->db_mapsize;
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	/* the hint should be within the map */
75162306a36Sopenharmony_ci	if (hint >= mapSize) {
75262306a36Sopenharmony_ci		jfs_error(ip->i_sb, "the hint is outside the map\n");
75362306a36Sopenharmony_ci		return -EIO;
75462306a36Sopenharmony_ci	}
75562306a36Sopenharmony_ci
75662306a36Sopenharmony_ci	/* if the number of blocks to be allocated is greater than the
75762306a36Sopenharmony_ci	 * allocation group size, try to allocate anywhere.
75862306a36Sopenharmony_ci	 */
75962306a36Sopenharmony_ci	if (l2nb > bmp->db_agl2size) {
76062306a36Sopenharmony_ci		IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_ci		rc = dbAllocAny(bmp, nblocks, l2nb, results);
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci		goto write_unlock;
76562306a36Sopenharmony_ci	}
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	/*
76862306a36Sopenharmony_ci	 * If no hint, let dbNextAG recommend an allocation group
76962306a36Sopenharmony_ci	 */
77062306a36Sopenharmony_ci	if (hint == 0)
77162306a36Sopenharmony_ci		goto pref_ag;
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	/* we would like to allocate close to the hint.  adjust the
77462306a36Sopenharmony_ci	 * hint to the block following the hint since the allocators
77562306a36Sopenharmony_ci	 * will start looking for free space starting at this point.
77662306a36Sopenharmony_ci	 */
77762306a36Sopenharmony_ci	blkno = hint + 1;
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci	if (blkno >= bmp->db_mapsize)
78062306a36Sopenharmony_ci		goto pref_ag;
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci	agno = blkno >> bmp->db_agl2size;
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	/* check if blkno crosses over into a new allocation group.
78562306a36Sopenharmony_ci	 * if so, check if we should allow allocations within this
78662306a36Sopenharmony_ci	 * allocation group.
78762306a36Sopenharmony_ci	 */
78862306a36Sopenharmony_ci	if ((blkno & (bmp->db_agsize - 1)) == 0)
78962306a36Sopenharmony_ci		/* check if the AG is currently being written to.
79062306a36Sopenharmony_ci		 * if so, call dbNextAG() to find a non-busy
79162306a36Sopenharmony_ci		 * AG with sufficient free space.
79262306a36Sopenharmony_ci		 */
79362306a36Sopenharmony_ci		if (atomic_read(&bmp->db_active[agno]))
79462306a36Sopenharmony_ci			goto pref_ag;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci	/* check if the allocation request size can be satisfied from a
79762306a36Sopenharmony_ci	 * single dmap.  if so, try to allocate from the dmap containing
79862306a36Sopenharmony_ci	 * the hint using a tiered strategy.
79962306a36Sopenharmony_ci	 */
80062306a36Sopenharmony_ci	if (nblocks <= BPERDMAP) {
80162306a36Sopenharmony_ci		IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci		/* get the buffer for the dmap containing the hint.
80462306a36Sopenharmony_ci		 */
80562306a36Sopenharmony_ci		rc = -EIO;
80662306a36Sopenharmony_ci		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
80762306a36Sopenharmony_ci		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
80862306a36Sopenharmony_ci		if (mp == NULL)
80962306a36Sopenharmony_ci			goto read_unlock;
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci		dp = (struct dmap *) mp->data;
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci		/* first, try to satisfy the allocation request with the
81462306a36Sopenharmony_ci		 * blocks beginning at the hint.
81562306a36Sopenharmony_ci		 */
81662306a36Sopenharmony_ci		if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks))
81762306a36Sopenharmony_ci		    != -ENOSPC) {
81862306a36Sopenharmony_ci			if (rc == 0) {
81962306a36Sopenharmony_ci				*results = blkno;
82062306a36Sopenharmony_ci				mark_metapage_dirty(mp);
82162306a36Sopenharmony_ci			}
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci			release_metapage(mp);
82462306a36Sopenharmony_ci			goto read_unlock;
82562306a36Sopenharmony_ci		}
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci		writers = atomic_read(&bmp->db_active[agno]);
82862306a36Sopenharmony_ci		if ((writers > 1) ||
82962306a36Sopenharmony_ci		    ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) {
83062306a36Sopenharmony_ci			/*
83162306a36Sopenharmony_ci			 * Someone else is writing in this allocation
83262306a36Sopenharmony_ci			 * group.  To avoid fragmenting, try another ag
83362306a36Sopenharmony_ci			 */
83462306a36Sopenharmony_ci			release_metapage(mp);
83562306a36Sopenharmony_ci			IREAD_UNLOCK(ipbmap);
83662306a36Sopenharmony_ci			goto pref_ag;
83762306a36Sopenharmony_ci		}
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci		/* next, try to satisfy the allocation request with blocks
84062306a36Sopenharmony_ci		 * near the hint.
84162306a36Sopenharmony_ci		 */
84262306a36Sopenharmony_ci		if ((rc =
84362306a36Sopenharmony_ci		     dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
84462306a36Sopenharmony_ci		    != -ENOSPC) {
84562306a36Sopenharmony_ci			if (rc == 0)
84662306a36Sopenharmony_ci				mark_metapage_dirty(mp);
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci			release_metapage(mp);
84962306a36Sopenharmony_ci			goto read_unlock;
85062306a36Sopenharmony_ci		}
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci		/* try to satisfy the allocation request with blocks within
85362306a36Sopenharmony_ci		 * the same dmap as the hint.
85462306a36Sopenharmony_ci		 */
85562306a36Sopenharmony_ci		if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
85662306a36Sopenharmony_ci		    != -ENOSPC) {
85762306a36Sopenharmony_ci			if (rc == 0)
85862306a36Sopenharmony_ci				mark_metapage_dirty(mp);
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci			release_metapage(mp);
86162306a36Sopenharmony_ci			goto read_unlock;
86262306a36Sopenharmony_ci		}
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_ci		release_metapage(mp);
86562306a36Sopenharmony_ci		IREAD_UNLOCK(ipbmap);
86662306a36Sopenharmony_ci	}
86762306a36Sopenharmony_ci
86862306a36Sopenharmony_ci	/* try to satisfy the allocation request with blocks within
86962306a36Sopenharmony_ci	 * the same allocation group as the hint.
87062306a36Sopenharmony_ci	 */
87162306a36Sopenharmony_ci	IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
87262306a36Sopenharmony_ci	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC)
87362306a36Sopenharmony_ci		goto write_unlock;
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	IWRITE_UNLOCK(ipbmap);
87662306a36Sopenharmony_ci
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci      pref_ag:
87962306a36Sopenharmony_ci	/*
88062306a36Sopenharmony_ci	 * Let dbNextAG recommend a preferred allocation group
88162306a36Sopenharmony_ci	 */
88262306a36Sopenharmony_ci	agno = dbNextAG(ipbmap);
88362306a36Sopenharmony_ci	IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	/* Try to allocate within this allocation group.  if that fails, try to
88662306a36Sopenharmony_ci	 * allocate anywhere in the map.
88762306a36Sopenharmony_ci	 */
88862306a36Sopenharmony_ci	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC)
88962306a36Sopenharmony_ci		rc = dbAllocAny(bmp, nblocks, l2nb, results);
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci      write_unlock:
89262306a36Sopenharmony_ci	IWRITE_UNLOCK(ipbmap);
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	return (rc);
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci      read_unlock:
89762306a36Sopenharmony_ci	IREAD_UNLOCK(ipbmap);
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci	return (rc);
90062306a36Sopenharmony_ci}
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci/*
90362306a36Sopenharmony_ci * NAME:	dbReAlloc()
90462306a36Sopenharmony_ci *
90562306a36Sopenharmony_ci * FUNCTION:	attempt to extend a current allocation by a specified
90662306a36Sopenharmony_ci *		number of blocks.
90762306a36Sopenharmony_ci *
90862306a36Sopenharmony_ci *		this routine attempts to satisfy the allocation request
90962306a36Sopenharmony_ci *		by first trying to extend the existing allocation in
91062306a36Sopenharmony_ci *		place by allocating the additional blocks as the blocks
91162306a36Sopenharmony_ci *		immediately following the current allocation.  if these
91262306a36Sopenharmony_ci *		blocks are not available, this routine will attempt to
91362306a36Sopenharmony_ci *		allocate a new set of contiguous blocks large enough
91462306a36Sopenharmony_ci *		to cover the existing allocation plus the additional
91562306a36Sopenharmony_ci *		number of blocks required.
91662306a36Sopenharmony_ci *
91762306a36Sopenharmony_ci * PARAMETERS:
91862306a36Sopenharmony_ci *	ip	    -  pointer to in-core inode requiring allocation.
91962306a36Sopenharmony_ci *	blkno	    -  starting block of the current allocation.
92062306a36Sopenharmony_ci *	nblocks	    -  number of contiguous blocks within the current
92162306a36Sopenharmony_ci *		       allocation.
92262306a36Sopenharmony_ci *	addnblocks  -  number of blocks to add to the allocation.
92362306a36Sopenharmony_ci *	results	-      on successful return, set to the starting block number
92462306a36Sopenharmony_ci *		       of the existing allocation if the existing allocation
92562306a36Sopenharmony_ci *		       was extended in place or to a newly allocated contiguous
92662306a36Sopenharmony_ci *		       range if the existing allocation could not be extended
92762306a36Sopenharmony_ci *		       in place.
92862306a36Sopenharmony_ci *
92962306a36Sopenharmony_ci * RETURN VALUES:
93062306a36Sopenharmony_ci *	0	- success
93162306a36Sopenharmony_ci *	-ENOSPC	- insufficient disk resources
93262306a36Sopenharmony_ci *	-EIO	- i/o error
93362306a36Sopenharmony_ci */
93462306a36Sopenharmony_ciint
93562306a36Sopenharmony_cidbReAlloc(struct inode *ip,
93662306a36Sopenharmony_ci	  s64 blkno, s64 nblocks, s64 addnblocks, s64 * results)
93762306a36Sopenharmony_ci{
93862306a36Sopenharmony_ci	int rc;
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_ci	/* try to extend the allocation in place.
94162306a36Sopenharmony_ci	 */
94262306a36Sopenharmony_ci	if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) {
94362306a36Sopenharmony_ci		*results = blkno;
94462306a36Sopenharmony_ci		return (0);
94562306a36Sopenharmony_ci	} else {
94662306a36Sopenharmony_ci		if (rc != -ENOSPC)
94762306a36Sopenharmony_ci			return (rc);
94862306a36Sopenharmony_ci	}
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	/* could not extend the allocation in place, so allocate a
95162306a36Sopenharmony_ci	 * new set of blocks for the entire request (i.e. try to get
95262306a36Sopenharmony_ci	 * a range of contiguous blocks large enough to cover the
95362306a36Sopenharmony_ci	 * existing allocation plus the additional blocks.)
95462306a36Sopenharmony_ci	 */
95562306a36Sopenharmony_ci	return (dbAlloc
95662306a36Sopenharmony_ci		(ip, blkno + nblocks - 1, addnblocks + nblocks, results));
95762306a36Sopenharmony_ci}
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci/*
96162306a36Sopenharmony_ci * NAME:	dbExtend()
96262306a36Sopenharmony_ci *
96362306a36Sopenharmony_ci * FUNCTION:	attempt to extend a current allocation by a specified
96462306a36Sopenharmony_ci *		number of blocks.
96562306a36Sopenharmony_ci *
96662306a36Sopenharmony_ci *		this routine attempts to satisfy the allocation request
96762306a36Sopenharmony_ci *		by first trying to extend the existing allocation in
96862306a36Sopenharmony_ci *		place by allocating the additional blocks as the blocks
96962306a36Sopenharmony_ci *		immediately following the current allocation.
97062306a36Sopenharmony_ci *
97162306a36Sopenharmony_ci * PARAMETERS:
97262306a36Sopenharmony_ci *	ip	    -  pointer to in-core inode requiring allocation.
97362306a36Sopenharmony_ci *	blkno	    -  starting block of the current allocation.
97462306a36Sopenharmony_ci *	nblocks	    -  number of contiguous blocks within the current
97562306a36Sopenharmony_ci *		       allocation.
97662306a36Sopenharmony_ci *	addnblocks  -  number of blocks to add to the allocation.
97762306a36Sopenharmony_ci *
97862306a36Sopenharmony_ci * RETURN VALUES:
97962306a36Sopenharmony_ci *	0	- success
98062306a36Sopenharmony_ci *	-ENOSPC	- insufficient disk resources
98162306a36Sopenharmony_ci *	-EIO	- i/o error
98262306a36Sopenharmony_ci */
98362306a36Sopenharmony_cistatic int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
98462306a36Sopenharmony_ci{
98562306a36Sopenharmony_ci	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
98662306a36Sopenharmony_ci	s64 lblkno, lastblkno, extblkno;
98762306a36Sopenharmony_ci	uint rel_block;
98862306a36Sopenharmony_ci	struct metapage *mp;
98962306a36Sopenharmony_ci	struct dmap *dp;
99062306a36Sopenharmony_ci	int rc;
99162306a36Sopenharmony_ci	struct inode *ipbmap = sbi->ipbmap;
99262306a36Sopenharmony_ci	struct bmap *bmp;
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci	/*
99562306a36Sopenharmony_ci	 * We don't want a non-aligned extent to cross a page boundary
99662306a36Sopenharmony_ci	 */
99762306a36Sopenharmony_ci	if (((rel_block = blkno & (sbi->nbperpage - 1))) &&
99862306a36Sopenharmony_ci	    (rel_block + nblocks + addnblocks > sbi->nbperpage))
99962306a36Sopenharmony_ci		return -ENOSPC;
100062306a36Sopenharmony_ci
100162306a36Sopenharmony_ci	/* get the last block of the current allocation */
100262306a36Sopenharmony_ci	lastblkno = blkno + nblocks - 1;
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci	/* determine the block number of the block following
100562306a36Sopenharmony_ci	 * the existing allocation.
100662306a36Sopenharmony_ci	 */
100762306a36Sopenharmony_ci	extblkno = lastblkno + 1;
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci	/* better be within the file system */
101262306a36Sopenharmony_ci	bmp = sbi->bmap;
101362306a36Sopenharmony_ci	if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) {
101462306a36Sopenharmony_ci		IREAD_UNLOCK(ipbmap);
101562306a36Sopenharmony_ci		jfs_error(ip->i_sb, "the block is outside the filesystem\n");
101662306a36Sopenharmony_ci		return -EIO;
101762306a36Sopenharmony_ci	}
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci	/* we'll attempt to extend the current allocation in place by
102062306a36Sopenharmony_ci	 * allocating the additional blocks as the blocks immediately
102162306a36Sopenharmony_ci	 * following the current allocation.  we only try to extend the
102262306a36Sopenharmony_ci	 * current allocation in place if the number of additional blocks
102362306a36Sopenharmony_ci	 * can fit into a dmap, the last block of the current allocation
102462306a36Sopenharmony_ci	 * is not the last block of the file system, and the start of the
102562306a36Sopenharmony_ci	 * inplace extension is not on an allocation group boundary.
102662306a36Sopenharmony_ci	 */
102762306a36Sopenharmony_ci	if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize ||
102862306a36Sopenharmony_ci	    (extblkno & (bmp->db_agsize - 1)) == 0) {
102962306a36Sopenharmony_ci		IREAD_UNLOCK(ipbmap);
103062306a36Sopenharmony_ci		return -ENOSPC;
103162306a36Sopenharmony_ci	}
103262306a36Sopenharmony_ci
103362306a36Sopenharmony_ci	/* get the buffer for the dmap containing the first block
103462306a36Sopenharmony_ci	 * of the extension.
103562306a36Sopenharmony_ci	 */
103662306a36Sopenharmony_ci	lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage);
103762306a36Sopenharmony_ci	mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
103862306a36Sopenharmony_ci	if (mp == NULL) {
103962306a36Sopenharmony_ci		IREAD_UNLOCK(ipbmap);
104062306a36Sopenharmony_ci		return -EIO;
104162306a36Sopenharmony_ci	}
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ci	dp = (struct dmap *) mp->data;
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci	/* try to allocate the blocks immediately following the
104662306a36Sopenharmony_ci	 * current allocation.
104762306a36Sopenharmony_ci	 */
104862306a36Sopenharmony_ci	rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks);
104962306a36Sopenharmony_ci
105062306a36Sopenharmony_ci	IREAD_UNLOCK(ipbmap);
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci	/* were we successful ? */
105362306a36Sopenharmony_ci	if (rc == 0)
105462306a36Sopenharmony_ci		write_metapage(mp);
105562306a36Sopenharmony_ci	else
105662306a36Sopenharmony_ci		/* we were not successful */
105762306a36Sopenharmony_ci		release_metapage(mp);
105862306a36Sopenharmony_ci
105962306a36Sopenharmony_ci	return (rc);
106062306a36Sopenharmony_ci}
106162306a36Sopenharmony_ci
106262306a36Sopenharmony_ci
106362306a36Sopenharmony_ci/*
106462306a36Sopenharmony_ci * NAME:	dbAllocNext()
106562306a36Sopenharmony_ci *
106662306a36Sopenharmony_ci * FUNCTION:	attempt to allocate the blocks of the specified block
106762306a36Sopenharmony_ci *		range within a dmap.
106862306a36Sopenharmony_ci *
106962306a36Sopenharmony_ci * PARAMETERS:
107062306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
107162306a36Sopenharmony_ci *	dp	-  pointer to dmap.
107262306a36Sopenharmony_ci *	blkno	-  starting block number of the range.
107362306a36Sopenharmony_ci *	nblocks	-  number of contiguous free blocks of the range.
107462306a36Sopenharmony_ci *
107562306a36Sopenharmony_ci * RETURN VALUES:
107662306a36Sopenharmony_ci *	0	- success
107762306a36Sopenharmony_ci *	-ENOSPC	- insufficient disk resources
107862306a36Sopenharmony_ci *	-EIO	- i/o error
107962306a36Sopenharmony_ci *
108062306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
108162306a36Sopenharmony_ci */
108262306a36Sopenharmony_cistatic int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
108362306a36Sopenharmony_ci		       int nblocks)
108462306a36Sopenharmony_ci{
108562306a36Sopenharmony_ci	int dbitno, word, rembits, nb, nwords, wbitno, nw;
108662306a36Sopenharmony_ci	int l2size;
108762306a36Sopenharmony_ci	s8 *leaf;
108862306a36Sopenharmony_ci	u32 mask;
108962306a36Sopenharmony_ci
109062306a36Sopenharmony_ci	if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
109162306a36Sopenharmony_ci		jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n");
109262306a36Sopenharmony_ci		return -EIO;
109362306a36Sopenharmony_ci	}
109462306a36Sopenharmony_ci
109562306a36Sopenharmony_ci	/* pick up a pointer to the leaves of the dmap tree.
109662306a36Sopenharmony_ci	 */
109762306a36Sopenharmony_ci	leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ci	/* determine the bit number and word within the dmap of the
110062306a36Sopenharmony_ci	 * starting block.
110162306a36Sopenharmony_ci	 */
110262306a36Sopenharmony_ci	dbitno = blkno & (BPERDMAP - 1);
110362306a36Sopenharmony_ci	word = dbitno >> L2DBWORD;
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	/* check if the specified block range is contained within
110662306a36Sopenharmony_ci	 * this dmap.
110762306a36Sopenharmony_ci	 */
110862306a36Sopenharmony_ci	if (dbitno + nblocks > BPERDMAP)
110962306a36Sopenharmony_ci		return -ENOSPC;
111062306a36Sopenharmony_ci
111162306a36Sopenharmony_ci	/* check if the starting leaf indicates that anything
111262306a36Sopenharmony_ci	 * is free.
111362306a36Sopenharmony_ci	 */
111462306a36Sopenharmony_ci	if (leaf[word] == NOFREE)
111562306a36Sopenharmony_ci		return -ENOSPC;
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci	/* check the dmaps words corresponding to block range to see
111862306a36Sopenharmony_ci	 * if the block range is free.  not all bits of the first and
111962306a36Sopenharmony_ci	 * last words may be contained within the block range.  if this
112062306a36Sopenharmony_ci	 * is the case, we'll work against those words (i.e. partial first
112162306a36Sopenharmony_ci	 * and/or last) on an individual basis (a single pass) and examine
112262306a36Sopenharmony_ci	 * the actual bits to determine if they are free.  a single pass
112362306a36Sopenharmony_ci	 * will be used for all dmap words fully contained within the
112462306a36Sopenharmony_ci	 * specified range.  within this pass, the leaves of the dmap
112562306a36Sopenharmony_ci	 * tree will be examined to determine if the blocks are free. a
112662306a36Sopenharmony_ci	 * single leaf may describe the free space of multiple dmap
112762306a36Sopenharmony_ci	 * words, so we may visit only a subset of the actual leaves
112862306a36Sopenharmony_ci	 * corresponding to the dmap words of the block range.
112962306a36Sopenharmony_ci	 */
113062306a36Sopenharmony_ci	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
113162306a36Sopenharmony_ci		/* determine the bit number within the word and
113262306a36Sopenharmony_ci		 * the number of bits within the word.
113362306a36Sopenharmony_ci		 */
113462306a36Sopenharmony_ci		wbitno = dbitno & (DBWORD - 1);
113562306a36Sopenharmony_ci		nb = min(rembits, DBWORD - wbitno);
113662306a36Sopenharmony_ci
113762306a36Sopenharmony_ci		/* check if only part of the word is to be examined.
113862306a36Sopenharmony_ci		 */
113962306a36Sopenharmony_ci		if (nb < DBWORD) {
114062306a36Sopenharmony_ci			/* check if the bits are free.
114162306a36Sopenharmony_ci			 */
114262306a36Sopenharmony_ci			mask = (ONES << (DBWORD - nb) >> wbitno);
114362306a36Sopenharmony_ci			if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask)
114462306a36Sopenharmony_ci				return -ENOSPC;
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_ci			word += 1;
114762306a36Sopenharmony_ci		} else {
114862306a36Sopenharmony_ci			/* one or more dmap words are fully contained
114962306a36Sopenharmony_ci			 * within the block range.  determine how many
115062306a36Sopenharmony_ci			 * words and how many bits.
115162306a36Sopenharmony_ci			 */
115262306a36Sopenharmony_ci			nwords = rembits >> L2DBWORD;
115362306a36Sopenharmony_ci			nb = nwords << L2DBWORD;
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci			/* now examine the appropriate leaves to determine
115662306a36Sopenharmony_ci			 * if the blocks are free.
115762306a36Sopenharmony_ci			 */
115862306a36Sopenharmony_ci			while (nwords > 0) {
115962306a36Sopenharmony_ci				/* does the leaf describe any free space ?
116062306a36Sopenharmony_ci				 */
116162306a36Sopenharmony_ci				if (leaf[word] < BUDMIN)
116262306a36Sopenharmony_ci					return -ENOSPC;
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci				/* determine the l2 number of bits provided
116562306a36Sopenharmony_ci				 * by this leaf.
116662306a36Sopenharmony_ci				 */
116762306a36Sopenharmony_ci				l2size =
116862306a36Sopenharmony_ci				    min_t(int, leaf[word], NLSTOL2BSZ(nwords));
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci				/* determine how many words were handled.
117162306a36Sopenharmony_ci				 */
117262306a36Sopenharmony_ci				nw = BUDSIZE(l2size, BUDMIN);
117362306a36Sopenharmony_ci
117462306a36Sopenharmony_ci				nwords -= nw;
117562306a36Sopenharmony_ci				word += nw;
117662306a36Sopenharmony_ci			}
117762306a36Sopenharmony_ci		}
117862306a36Sopenharmony_ci	}
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_ci	/* allocate the blocks.
118162306a36Sopenharmony_ci	 */
118262306a36Sopenharmony_ci	return (dbAllocDmap(bmp, dp, blkno, nblocks));
118362306a36Sopenharmony_ci}
118462306a36Sopenharmony_ci
118562306a36Sopenharmony_ci
118662306a36Sopenharmony_ci/*
118762306a36Sopenharmony_ci * NAME:	dbAllocNear()
118862306a36Sopenharmony_ci *
118962306a36Sopenharmony_ci * FUNCTION:	attempt to allocate a number of contiguous free blocks near
119062306a36Sopenharmony_ci *		a specified block (hint) within a dmap.
119162306a36Sopenharmony_ci *
119262306a36Sopenharmony_ci *		starting with the dmap leaf that covers the hint, we'll
119362306a36Sopenharmony_ci *		check the next four contiguous leaves for sufficient free
119462306a36Sopenharmony_ci *		space.  if sufficient free space is found, we'll allocate
119562306a36Sopenharmony_ci *		the desired free space.
119662306a36Sopenharmony_ci *
119762306a36Sopenharmony_ci * PARAMETERS:
119862306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
119962306a36Sopenharmony_ci *	dp	-  pointer to dmap.
120062306a36Sopenharmony_ci *	blkno	-  block number to allocate near.
120162306a36Sopenharmony_ci *	nblocks	-  actual number of contiguous free blocks desired.
120262306a36Sopenharmony_ci *	l2nb	-  log2 number of contiguous free blocks desired.
120362306a36Sopenharmony_ci *	results	-  on successful return, set to the starting block number
120462306a36Sopenharmony_ci *		   of the newly allocated range.
120562306a36Sopenharmony_ci *
120662306a36Sopenharmony_ci * RETURN VALUES:
120762306a36Sopenharmony_ci *	0	- success
120862306a36Sopenharmony_ci *	-ENOSPC	- insufficient disk resources
120962306a36Sopenharmony_ci *	-EIO	- i/o error
121062306a36Sopenharmony_ci *
121162306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
121262306a36Sopenharmony_ci */
121362306a36Sopenharmony_cistatic int
121462306a36Sopenharmony_cidbAllocNear(struct bmap * bmp,
121562306a36Sopenharmony_ci	    struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results)
121662306a36Sopenharmony_ci{
121762306a36Sopenharmony_ci	int word, lword, rc;
121862306a36Sopenharmony_ci	s8 *leaf;
121962306a36Sopenharmony_ci
122062306a36Sopenharmony_ci	if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
122162306a36Sopenharmony_ci		jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n");
122262306a36Sopenharmony_ci		return -EIO;
122362306a36Sopenharmony_ci	}
122462306a36Sopenharmony_ci
122562306a36Sopenharmony_ci	leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci	/* determine the word within the dmap that holds the hint
122862306a36Sopenharmony_ci	 * (i.e. blkno).  also, determine the last word in the dmap
122962306a36Sopenharmony_ci	 * that we'll include in our examination.
123062306a36Sopenharmony_ci	 */
123162306a36Sopenharmony_ci	word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
123262306a36Sopenharmony_ci	lword = min(word + 4, LPERDMAP);
123362306a36Sopenharmony_ci
123462306a36Sopenharmony_ci	/* examine the leaves for sufficient free space.
123562306a36Sopenharmony_ci	 */
123662306a36Sopenharmony_ci	for (; word < lword; word++) {
123762306a36Sopenharmony_ci		/* does the leaf describe sufficient free space ?
123862306a36Sopenharmony_ci		 */
123962306a36Sopenharmony_ci		if (leaf[word] < l2nb)
124062306a36Sopenharmony_ci			continue;
124162306a36Sopenharmony_ci
124262306a36Sopenharmony_ci		/* determine the block number within the file system
124362306a36Sopenharmony_ci		 * of the first block described by this dmap word.
124462306a36Sopenharmony_ci		 */
124562306a36Sopenharmony_ci		blkno = le64_to_cpu(dp->start) + (word << L2DBWORD);
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_ci		/* if not all bits of the dmap word are free, get the
124862306a36Sopenharmony_ci		 * starting bit number within the dmap word of the required
124962306a36Sopenharmony_ci		 * string of free bits and adjust the block number with the
125062306a36Sopenharmony_ci		 * value.
125162306a36Sopenharmony_ci		 */
125262306a36Sopenharmony_ci		if (leaf[word] < BUDMIN)
125362306a36Sopenharmony_ci			blkno +=
125462306a36Sopenharmony_ci			    dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb);
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci		/* allocate the blocks.
125762306a36Sopenharmony_ci		 */
125862306a36Sopenharmony_ci		if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
125962306a36Sopenharmony_ci			*results = blkno;
126062306a36Sopenharmony_ci
126162306a36Sopenharmony_ci		return (rc);
126262306a36Sopenharmony_ci	}
126362306a36Sopenharmony_ci
126462306a36Sopenharmony_ci	return -ENOSPC;
126562306a36Sopenharmony_ci}
126662306a36Sopenharmony_ci
126762306a36Sopenharmony_ci
126862306a36Sopenharmony_ci/*
126962306a36Sopenharmony_ci * NAME:	dbAllocAG()
127062306a36Sopenharmony_ci *
127162306a36Sopenharmony_ci * FUNCTION:	attempt to allocate the specified number of contiguous
127262306a36Sopenharmony_ci *		free blocks within the specified allocation group.
127362306a36Sopenharmony_ci *
127462306a36Sopenharmony_ci *		unless the allocation group size is equal to the number
127562306a36Sopenharmony_ci *		of blocks per dmap, the dmap control pages will be used to
127662306a36Sopenharmony_ci *		find the required free space, if available.  we start the
127762306a36Sopenharmony_ci *		search at the highest dmap control page level which
127862306a36Sopenharmony_ci *		distinctly describes the allocation group's free space
127962306a36Sopenharmony_ci *		(i.e. the highest level at which the allocation group's
128062306a36Sopenharmony_ci *		free space is not mixed in with that of any other group).
128162306a36Sopenharmony_ci *		in addition, we start the search within this level at a
128262306a36Sopenharmony_ci *		height of the dmapctl dmtree at which the nodes distinctly
128362306a36Sopenharmony_ci *		describe the allocation group's free space.  at this height,
128462306a36Sopenharmony_ci *		the allocation group's free space may be represented by 1
128562306a36Sopenharmony_ci *		or two sub-trees, depending on the allocation group size.
128662306a36Sopenharmony_ci *		we search the top nodes of these subtrees left to right for
128762306a36Sopenharmony_ci *		sufficient free space.  if sufficient free space is found,
128862306a36Sopenharmony_ci *		the subtree is searched to find the leftmost leaf that
128962306a36Sopenharmony_ci *		has free space.  once we have made it to the leaf, we
129062306a36Sopenharmony_ci *		move the search to the next lower level dmap control page
129162306a36Sopenharmony_ci *		corresponding to this leaf.  we continue down the dmap control
129262306a36Sopenharmony_ci *		pages until we find the dmap that contains or starts the
129362306a36Sopenharmony_ci *		sufficient free space and we allocate at this dmap.
129462306a36Sopenharmony_ci *
129562306a36Sopenharmony_ci *		if the allocation group size is equal to the dmap size,
129662306a36Sopenharmony_ci *		we'll start at the dmap corresponding to the allocation
129762306a36Sopenharmony_ci *		group and attempt the allocation at this level.
129862306a36Sopenharmony_ci *
129962306a36Sopenharmony_ci *		the dmap control page search is also not performed if the
130062306a36Sopenharmony_ci *		allocation group is completely free and we go to the first
130162306a36Sopenharmony_ci *		dmap of the allocation group to do the allocation.  this is
130262306a36Sopenharmony_ci *		done because the allocation group may be part (not the first
130362306a36Sopenharmony_ci *		part) of a larger binary buddy system, causing the dmap
130462306a36Sopenharmony_ci *		control pages to indicate no free space (NOFREE) within
130562306a36Sopenharmony_ci *		the allocation group.
130662306a36Sopenharmony_ci *
130762306a36Sopenharmony_ci * PARAMETERS:
130862306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
130962306a36Sopenharmony_ci *	agno	- allocation group number.
131062306a36Sopenharmony_ci *	nblocks	-  actual number of contiguous free blocks desired.
131162306a36Sopenharmony_ci *	l2nb	-  log2 number of contiguous free blocks desired.
131262306a36Sopenharmony_ci *	results	-  on successful return, set to the starting block number
131362306a36Sopenharmony_ci *		   of the newly allocated range.
131462306a36Sopenharmony_ci *
131562306a36Sopenharmony_ci * RETURN VALUES:
131662306a36Sopenharmony_ci *	0	- success
131762306a36Sopenharmony_ci *	-ENOSPC	- insufficient disk resources
131862306a36Sopenharmony_ci *	-EIO	- i/o error
131962306a36Sopenharmony_ci *
132062306a36Sopenharmony_ci * note: IWRITE_LOCK(ipmap) held on entry/exit;
132162306a36Sopenharmony_ci */
132262306a36Sopenharmony_cistatic int
132362306a36Sopenharmony_cidbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
132462306a36Sopenharmony_ci{
132562306a36Sopenharmony_ci	struct metapage *mp;
132662306a36Sopenharmony_ci	struct dmapctl *dcp;
132762306a36Sopenharmony_ci	int rc, ti, i, k, m, n, agperlev;
132862306a36Sopenharmony_ci	s64 blkno, lblkno;
132962306a36Sopenharmony_ci	int budmin;
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_ci	/* allocation request should not be for more than the
133262306a36Sopenharmony_ci	 * allocation group size.
133362306a36Sopenharmony_ci	 */
133462306a36Sopenharmony_ci	if (l2nb > bmp->db_agl2size) {
133562306a36Sopenharmony_ci		jfs_error(bmp->db_ipbmap->i_sb,
133662306a36Sopenharmony_ci			  "allocation request is larger than the allocation group size\n");
133762306a36Sopenharmony_ci		return -EIO;
133862306a36Sopenharmony_ci	}
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci	/* determine the starting block number of the allocation
134162306a36Sopenharmony_ci	 * group.
134262306a36Sopenharmony_ci	 */
134362306a36Sopenharmony_ci	blkno = (s64) agno << bmp->db_agl2size;
134462306a36Sopenharmony_ci
134562306a36Sopenharmony_ci	/* check if the allocation group size is the minimum allocation
134662306a36Sopenharmony_ci	 * group size or if the allocation group is completely free. if
134762306a36Sopenharmony_ci	 * the allocation group size is the minimum size of BPERDMAP (i.e.
134862306a36Sopenharmony_ci	 * 1 dmap), there is no need to search the dmap control page (below)
134962306a36Sopenharmony_ci	 * that fully describes the allocation group since the allocation
135062306a36Sopenharmony_ci	 * group is already fully described by a dmap.  in this case, we
135162306a36Sopenharmony_ci	 * just call dbAllocCtl() to search the dmap tree and allocate the
135262306a36Sopenharmony_ci	 * required space if available.
135362306a36Sopenharmony_ci	 *
135462306a36Sopenharmony_ci	 * if the allocation group is completely free, dbAllocCtl() is
135562306a36Sopenharmony_ci	 * also called to allocate the required space.  this is done for
135662306a36Sopenharmony_ci	 * two reasons.  first, it makes no sense searching the dmap control
135762306a36Sopenharmony_ci	 * pages for free space when we know that free space exists.  second,
135862306a36Sopenharmony_ci	 * the dmap control pages may indicate that the allocation group
135962306a36Sopenharmony_ci	 * has no free space if the allocation group is part (not the first
136062306a36Sopenharmony_ci	 * part) of a larger binary buddy system.
136162306a36Sopenharmony_ci	 */
136262306a36Sopenharmony_ci	if (bmp->db_agsize == BPERDMAP
136362306a36Sopenharmony_ci	    || bmp->db_agfree[agno] == bmp->db_agsize) {
136462306a36Sopenharmony_ci		rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
136562306a36Sopenharmony_ci		if ((rc == -ENOSPC) &&
136662306a36Sopenharmony_ci		    (bmp->db_agfree[agno] == bmp->db_agsize)) {
136762306a36Sopenharmony_ci			printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n",
136862306a36Sopenharmony_ci			       (unsigned long long) blkno,
136962306a36Sopenharmony_ci			       (unsigned long long) nblocks);
137062306a36Sopenharmony_ci			jfs_error(bmp->db_ipbmap->i_sb,
137162306a36Sopenharmony_ci				  "dbAllocCtl failed in free AG\n");
137262306a36Sopenharmony_ci		}
137362306a36Sopenharmony_ci		return (rc);
137462306a36Sopenharmony_ci	}
137562306a36Sopenharmony_ci
137662306a36Sopenharmony_ci	/* the buffer for the dmap control page that fully describes the
137762306a36Sopenharmony_ci	 * allocation group.
137862306a36Sopenharmony_ci	 */
137962306a36Sopenharmony_ci	lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel);
138062306a36Sopenharmony_ci	mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
138162306a36Sopenharmony_ci	if (mp == NULL)
138262306a36Sopenharmony_ci		return -EIO;
138362306a36Sopenharmony_ci	dcp = (struct dmapctl *) mp->data;
138462306a36Sopenharmony_ci	budmin = dcp->budmin;
138562306a36Sopenharmony_ci
138662306a36Sopenharmony_ci	if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
138762306a36Sopenharmony_ci		jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n");
138862306a36Sopenharmony_ci		release_metapage(mp);
138962306a36Sopenharmony_ci		return -EIO;
139062306a36Sopenharmony_ci	}
139162306a36Sopenharmony_ci
139262306a36Sopenharmony_ci	/* search the subtree(s) of the dmap control page that describes
139362306a36Sopenharmony_ci	 * the allocation group, looking for sufficient free space.  to begin,
139462306a36Sopenharmony_ci	 * determine how many allocation groups are represented in a dmap
139562306a36Sopenharmony_ci	 * control page at the control page level (i.e. L0, L1, L2) that
139662306a36Sopenharmony_ci	 * fully describes an allocation group. next, determine the starting
139762306a36Sopenharmony_ci	 * tree index of this allocation group within the control page.
139862306a36Sopenharmony_ci	 */
139962306a36Sopenharmony_ci	agperlev =
140062306a36Sopenharmony_ci	    (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth;
140162306a36Sopenharmony_ci	ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
140262306a36Sopenharmony_ci
140362306a36Sopenharmony_ci	/* dmap control page trees fan-out by 4 and a single allocation
140462306a36Sopenharmony_ci	 * group may be described by 1 or 2 subtrees within the ag level
140562306a36Sopenharmony_ci	 * dmap control page, depending upon the ag size. examine the ag's
140662306a36Sopenharmony_ci	 * subtrees for sufficient free space, starting with the leftmost
140762306a36Sopenharmony_ci	 * subtree.
140862306a36Sopenharmony_ci	 */
140962306a36Sopenharmony_ci	for (i = 0; i < bmp->db_agwidth; i++, ti++) {
141062306a36Sopenharmony_ci		/* is there sufficient free space ?
141162306a36Sopenharmony_ci		 */
141262306a36Sopenharmony_ci		if (l2nb > dcp->stree[ti])
141362306a36Sopenharmony_ci			continue;
141462306a36Sopenharmony_ci
141562306a36Sopenharmony_ci		/* sufficient free space found in a subtree. now search down
141662306a36Sopenharmony_ci		 * the subtree to find the leftmost leaf that describes this
141762306a36Sopenharmony_ci		 * free space.
141862306a36Sopenharmony_ci		 */
141962306a36Sopenharmony_ci		for (k = bmp->db_agheight; k > 0; k--) {
142062306a36Sopenharmony_ci			for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
142162306a36Sopenharmony_ci				if (l2nb <= dcp->stree[m + n]) {
142262306a36Sopenharmony_ci					ti = m + n;
142362306a36Sopenharmony_ci					break;
142462306a36Sopenharmony_ci				}
142562306a36Sopenharmony_ci			}
142662306a36Sopenharmony_ci			if (n == 4) {
142762306a36Sopenharmony_ci				jfs_error(bmp->db_ipbmap->i_sb,
142862306a36Sopenharmony_ci					  "failed descending stree\n");
142962306a36Sopenharmony_ci				release_metapage(mp);
143062306a36Sopenharmony_ci				return -EIO;
143162306a36Sopenharmony_ci			}
143262306a36Sopenharmony_ci		}
143362306a36Sopenharmony_ci
143462306a36Sopenharmony_ci		/* determine the block number within the file system
143562306a36Sopenharmony_ci		 * that corresponds to this leaf.
143662306a36Sopenharmony_ci		 */
143762306a36Sopenharmony_ci		if (bmp->db_aglevel == 2)
143862306a36Sopenharmony_ci			blkno = 0;
143962306a36Sopenharmony_ci		else if (bmp->db_aglevel == 1)
144062306a36Sopenharmony_ci			blkno &= ~(MAXL1SIZE - 1);
144162306a36Sopenharmony_ci		else		/* bmp->db_aglevel == 0 */
144262306a36Sopenharmony_ci			blkno &= ~(MAXL0SIZE - 1);
144362306a36Sopenharmony_ci
144462306a36Sopenharmony_ci		blkno +=
144562306a36Sopenharmony_ci		    ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin;
144662306a36Sopenharmony_ci
144762306a36Sopenharmony_ci		/* release the buffer in preparation for going down
144862306a36Sopenharmony_ci		 * the next level of dmap control pages.
144962306a36Sopenharmony_ci		 */
145062306a36Sopenharmony_ci		release_metapage(mp);
145162306a36Sopenharmony_ci
145262306a36Sopenharmony_ci		/* check if we need to continue to search down the lower
145362306a36Sopenharmony_ci		 * level dmap control pages.  we need to if the number of
145462306a36Sopenharmony_ci		 * blocks required is less than maximum number of blocks
145562306a36Sopenharmony_ci		 * described at the next lower level.
145662306a36Sopenharmony_ci		 */
145762306a36Sopenharmony_ci		if (l2nb < budmin) {
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci			/* search the lower level dmap control pages to get
146062306a36Sopenharmony_ci			 * the starting block number of the dmap that
146162306a36Sopenharmony_ci			 * contains or starts off the free space.
146262306a36Sopenharmony_ci			 */
146362306a36Sopenharmony_ci			if ((rc =
146462306a36Sopenharmony_ci			     dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1,
146562306a36Sopenharmony_ci				       &blkno))) {
146662306a36Sopenharmony_ci				if (rc == -ENOSPC) {
146762306a36Sopenharmony_ci					jfs_error(bmp->db_ipbmap->i_sb,
146862306a36Sopenharmony_ci						  "control page inconsistent\n");
146962306a36Sopenharmony_ci					return -EIO;
147062306a36Sopenharmony_ci				}
147162306a36Sopenharmony_ci				return (rc);
147262306a36Sopenharmony_ci			}
147362306a36Sopenharmony_ci		}
147462306a36Sopenharmony_ci
147562306a36Sopenharmony_ci		/* allocate the blocks.
147662306a36Sopenharmony_ci		 */
147762306a36Sopenharmony_ci		rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
147862306a36Sopenharmony_ci		if (rc == -ENOSPC) {
147962306a36Sopenharmony_ci			jfs_error(bmp->db_ipbmap->i_sb,
148062306a36Sopenharmony_ci				  "unable to allocate blocks\n");
148162306a36Sopenharmony_ci			rc = -EIO;
148262306a36Sopenharmony_ci		}
148362306a36Sopenharmony_ci		return (rc);
148462306a36Sopenharmony_ci	}
148562306a36Sopenharmony_ci
148662306a36Sopenharmony_ci	/* no space in the allocation group.  release the buffer and
148762306a36Sopenharmony_ci	 * return -ENOSPC.
148862306a36Sopenharmony_ci	 */
148962306a36Sopenharmony_ci	release_metapage(mp);
149062306a36Sopenharmony_ci
149162306a36Sopenharmony_ci	return -ENOSPC;
149262306a36Sopenharmony_ci}
149362306a36Sopenharmony_ci
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_ci/*
149662306a36Sopenharmony_ci * NAME:	dbAllocAny()
149762306a36Sopenharmony_ci *
149862306a36Sopenharmony_ci * FUNCTION:	attempt to allocate the specified number of contiguous
149962306a36Sopenharmony_ci *		free blocks anywhere in the file system.
150062306a36Sopenharmony_ci *
150162306a36Sopenharmony_ci *		dbAllocAny() attempts to find the sufficient free space by
150262306a36Sopenharmony_ci *		searching down the dmap control pages, starting with the
150362306a36Sopenharmony_ci *		highest level (i.e. L0, L1, L2) control page.  if free space
150462306a36Sopenharmony_ci *		large enough to satisfy the desired free space is found, the
150562306a36Sopenharmony_ci *		desired free space is allocated.
150662306a36Sopenharmony_ci *
150762306a36Sopenharmony_ci * PARAMETERS:
150862306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
150962306a36Sopenharmony_ci *	nblocks	 -  actual number of contiguous free blocks desired.
151062306a36Sopenharmony_ci *	l2nb	 -  log2 number of contiguous free blocks desired.
151162306a36Sopenharmony_ci *	results	-  on successful return, set to the starting block number
151262306a36Sopenharmony_ci *		   of the newly allocated range.
151362306a36Sopenharmony_ci *
151462306a36Sopenharmony_ci * RETURN VALUES:
151562306a36Sopenharmony_ci *	0	- success
151662306a36Sopenharmony_ci *	-ENOSPC	- insufficient disk resources
151762306a36Sopenharmony_ci *	-EIO	- i/o error
151862306a36Sopenharmony_ci *
151962306a36Sopenharmony_ci * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
152062306a36Sopenharmony_ci */
152162306a36Sopenharmony_cistatic int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
152262306a36Sopenharmony_ci{
152362306a36Sopenharmony_ci	int rc;
152462306a36Sopenharmony_ci	s64 blkno = 0;
152562306a36Sopenharmony_ci
152662306a36Sopenharmony_ci	/* starting with the top level dmap control page, search
152762306a36Sopenharmony_ci	 * down the dmap control levels for sufficient free space.
152862306a36Sopenharmony_ci	 * if free space is found, dbFindCtl() returns the starting
152962306a36Sopenharmony_ci	 * block number of the dmap that contains or starts off the
153062306a36Sopenharmony_ci	 * range of free space.
153162306a36Sopenharmony_ci	 */
153262306a36Sopenharmony_ci	if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno)))
153362306a36Sopenharmony_ci		return (rc);
153462306a36Sopenharmony_ci
153562306a36Sopenharmony_ci	/* allocate the blocks.
153662306a36Sopenharmony_ci	 */
153762306a36Sopenharmony_ci	rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
153862306a36Sopenharmony_ci	if (rc == -ENOSPC) {
153962306a36Sopenharmony_ci		jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n");
154062306a36Sopenharmony_ci		return -EIO;
154162306a36Sopenharmony_ci	}
154262306a36Sopenharmony_ci	return (rc);
154362306a36Sopenharmony_ci}
154462306a36Sopenharmony_ci
154562306a36Sopenharmony_ci
154662306a36Sopenharmony_ci/*
154762306a36Sopenharmony_ci * NAME:	dbDiscardAG()
154862306a36Sopenharmony_ci *
154962306a36Sopenharmony_ci * FUNCTION:	attempt to discard (TRIM) all free blocks of specific AG
155062306a36Sopenharmony_ci *
155162306a36Sopenharmony_ci *		algorithm:
155262306a36Sopenharmony_ci *		1) allocate blocks, as large as possible and save them
155362306a36Sopenharmony_ci *		   while holding IWRITE_LOCK on ipbmap
155462306a36Sopenharmony_ci *		2) trim all these saved block/length values
155562306a36Sopenharmony_ci *		3) mark the blocks free again
155662306a36Sopenharmony_ci *
155762306a36Sopenharmony_ci *		benefit:
155862306a36Sopenharmony_ci *		- we work only on one ag at some time, minimizing how long we
155962306a36Sopenharmony_ci *		  need to lock ipbmap
156062306a36Sopenharmony_ci *		- reading / writing the fs is possible most time, even on
156162306a36Sopenharmony_ci *		  trimming
156262306a36Sopenharmony_ci *
156362306a36Sopenharmony_ci *		downside:
156462306a36Sopenharmony_ci *		- we write two times to the dmapctl and dmap pages
156562306a36Sopenharmony_ci *		- but for me, this seems the best way, better ideas?
156662306a36Sopenharmony_ci *		/TR 2012
156762306a36Sopenharmony_ci *
156862306a36Sopenharmony_ci * PARAMETERS:
156962306a36Sopenharmony_ci *	ip	- pointer to in-core inode
157062306a36Sopenharmony_ci *	agno	- ag to trim
157162306a36Sopenharmony_ci *	minlen	- minimum value of contiguous blocks
157262306a36Sopenharmony_ci *
157362306a36Sopenharmony_ci * RETURN VALUES:
157462306a36Sopenharmony_ci *	s64	- actual number of blocks trimmed
157562306a36Sopenharmony_ci */
157662306a36Sopenharmony_cis64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
157762306a36Sopenharmony_ci{
157862306a36Sopenharmony_ci	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
157962306a36Sopenharmony_ci	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
158062306a36Sopenharmony_ci	s64 nblocks, blkno;
158162306a36Sopenharmony_ci	u64 trimmed = 0;
158262306a36Sopenharmony_ci	int rc, l2nb;
158362306a36Sopenharmony_ci	struct super_block *sb = ipbmap->i_sb;
158462306a36Sopenharmony_ci
158562306a36Sopenharmony_ci	struct range2trim {
158662306a36Sopenharmony_ci		u64 blkno;
158762306a36Sopenharmony_ci		u64 nblocks;
158862306a36Sopenharmony_ci	} *totrim, *tt;
158962306a36Sopenharmony_ci
159062306a36Sopenharmony_ci	/* max blkno / nblocks pairs to trim */
159162306a36Sopenharmony_ci	int count = 0, range_cnt;
159262306a36Sopenharmony_ci	u64 max_ranges;
159362306a36Sopenharmony_ci
159462306a36Sopenharmony_ci	/* prevent others from writing new stuff here, while trimming */
159562306a36Sopenharmony_ci	IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_ci	nblocks = bmp->db_agfree[agno];
159862306a36Sopenharmony_ci	max_ranges = nblocks;
159962306a36Sopenharmony_ci	do_div(max_ranges, minlen);
160062306a36Sopenharmony_ci	range_cnt = min_t(u64, max_ranges + 1, 32 * 1024);
160162306a36Sopenharmony_ci	totrim = kmalloc_array(range_cnt, sizeof(struct range2trim), GFP_NOFS);
160262306a36Sopenharmony_ci	if (totrim == NULL) {
160362306a36Sopenharmony_ci		jfs_error(bmp->db_ipbmap->i_sb, "no memory for trim array\n");
160462306a36Sopenharmony_ci		IWRITE_UNLOCK(ipbmap);
160562306a36Sopenharmony_ci		return 0;
160662306a36Sopenharmony_ci	}
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_ci	tt = totrim;
160962306a36Sopenharmony_ci	while (nblocks >= minlen) {
161062306a36Sopenharmony_ci		l2nb = BLKSTOL2(nblocks);
161162306a36Sopenharmony_ci
161262306a36Sopenharmony_ci		/* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */
161362306a36Sopenharmony_ci		rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno);
161462306a36Sopenharmony_ci		if (rc == 0) {
161562306a36Sopenharmony_ci			tt->blkno = blkno;
161662306a36Sopenharmony_ci			tt->nblocks = nblocks;
161762306a36Sopenharmony_ci			tt++; count++;
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci			/* the whole ag is free, trim now */
162062306a36Sopenharmony_ci			if (bmp->db_agfree[agno] == 0)
162162306a36Sopenharmony_ci				break;
162262306a36Sopenharmony_ci
162362306a36Sopenharmony_ci			/* give a hint for the next while */
162462306a36Sopenharmony_ci			nblocks = bmp->db_agfree[agno];
162562306a36Sopenharmony_ci			continue;
162662306a36Sopenharmony_ci		} else if (rc == -ENOSPC) {
162762306a36Sopenharmony_ci			/* search for next smaller log2 block */
162862306a36Sopenharmony_ci			l2nb = BLKSTOL2(nblocks) - 1;
162962306a36Sopenharmony_ci			nblocks = 1LL << l2nb;
163062306a36Sopenharmony_ci		} else {
163162306a36Sopenharmony_ci			/* Trim any already allocated blocks */
163262306a36Sopenharmony_ci			jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n");
163362306a36Sopenharmony_ci			break;
163462306a36Sopenharmony_ci		}
163562306a36Sopenharmony_ci
163662306a36Sopenharmony_ci		/* check, if our trim array is full */
163762306a36Sopenharmony_ci		if (unlikely(count >= range_cnt - 1))
163862306a36Sopenharmony_ci			break;
163962306a36Sopenharmony_ci	}
164062306a36Sopenharmony_ci	IWRITE_UNLOCK(ipbmap);
164162306a36Sopenharmony_ci
164262306a36Sopenharmony_ci	tt->nblocks = 0; /* mark the current end */
164362306a36Sopenharmony_ci	for (tt = totrim; tt->nblocks != 0; tt++) {
164462306a36Sopenharmony_ci		/* when mounted with online discard, dbFree() will
164562306a36Sopenharmony_ci		 * call jfs_issue_discard() itself */
164662306a36Sopenharmony_ci		if (!(JFS_SBI(sb)->flag & JFS_DISCARD))
164762306a36Sopenharmony_ci			jfs_issue_discard(ip, tt->blkno, tt->nblocks);
164862306a36Sopenharmony_ci		dbFree(ip, tt->blkno, tt->nblocks);
164962306a36Sopenharmony_ci		trimmed += tt->nblocks;
165062306a36Sopenharmony_ci	}
165162306a36Sopenharmony_ci	kfree(totrim);
165262306a36Sopenharmony_ci
165362306a36Sopenharmony_ci	return trimmed;
165462306a36Sopenharmony_ci}
165562306a36Sopenharmony_ci
165662306a36Sopenharmony_ci/*
165762306a36Sopenharmony_ci * NAME:	dbFindCtl()
165862306a36Sopenharmony_ci *
165962306a36Sopenharmony_ci * FUNCTION:	starting at a specified dmap control page level and block
166062306a36Sopenharmony_ci *		number, search down the dmap control levels for a range of
166162306a36Sopenharmony_ci *		contiguous free blocks large enough to satisfy an allocation
166262306a36Sopenharmony_ci *		request for the specified number of free blocks.
166362306a36Sopenharmony_ci *
166462306a36Sopenharmony_ci *		if sufficient contiguous free blocks are found, this routine
166562306a36Sopenharmony_ci *		returns the starting block number within a dmap page that
166662306a36Sopenharmony_ci *		contains or starts a range of contiqious free blocks that
166762306a36Sopenharmony_ci *		is sufficient in size.
166862306a36Sopenharmony_ci *
166962306a36Sopenharmony_ci * PARAMETERS:
167062306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
167162306a36Sopenharmony_ci *	level	-  starting dmap control page level.
167262306a36Sopenharmony_ci *	l2nb	-  log2 number of contiguous free blocks desired.
167362306a36Sopenharmony_ci *	*blkno	-  on entry, starting block number for conducting the search.
167462306a36Sopenharmony_ci *		   on successful return, the first block within a dmap page
167562306a36Sopenharmony_ci *		   that contains or starts a range of contiguous free blocks.
167662306a36Sopenharmony_ci *
167762306a36Sopenharmony_ci * RETURN VALUES:
167862306a36Sopenharmony_ci *	0	- success
167962306a36Sopenharmony_ci *	-ENOSPC	- insufficient disk resources
168062306a36Sopenharmony_ci *	-EIO	- i/o error
168162306a36Sopenharmony_ci *
168262306a36Sopenharmony_ci * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
168362306a36Sopenharmony_ci */
168462306a36Sopenharmony_cistatic int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
168562306a36Sopenharmony_ci{
168662306a36Sopenharmony_ci	int rc, leafidx, lev;
168762306a36Sopenharmony_ci	s64 b, lblkno;
168862306a36Sopenharmony_ci	struct dmapctl *dcp;
168962306a36Sopenharmony_ci	int budmin;
169062306a36Sopenharmony_ci	struct metapage *mp;
169162306a36Sopenharmony_ci
169262306a36Sopenharmony_ci	/* starting at the specified dmap control page level and block
169362306a36Sopenharmony_ci	 * number, search down the dmap control levels for the starting
169462306a36Sopenharmony_ci	 * block number of a dmap page that contains or starts off
169562306a36Sopenharmony_ci	 * sufficient free blocks.
169662306a36Sopenharmony_ci	 */
169762306a36Sopenharmony_ci	for (lev = level, b = *blkno; lev >= 0; lev--) {
169862306a36Sopenharmony_ci		/* get the buffer of the dmap control page for the block
169962306a36Sopenharmony_ci		 * number and level (i.e. L0, L1, L2).
170062306a36Sopenharmony_ci		 */
170162306a36Sopenharmony_ci		lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev);
170262306a36Sopenharmony_ci		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
170362306a36Sopenharmony_ci		if (mp == NULL)
170462306a36Sopenharmony_ci			return -EIO;
170562306a36Sopenharmony_ci		dcp = (struct dmapctl *) mp->data;
170662306a36Sopenharmony_ci		budmin = dcp->budmin;
170762306a36Sopenharmony_ci
170862306a36Sopenharmony_ci		if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
170962306a36Sopenharmony_ci			jfs_error(bmp->db_ipbmap->i_sb,
171062306a36Sopenharmony_ci				  "Corrupt dmapctl page\n");
171162306a36Sopenharmony_ci			release_metapage(mp);
171262306a36Sopenharmony_ci			return -EIO;
171362306a36Sopenharmony_ci		}
171462306a36Sopenharmony_ci
171562306a36Sopenharmony_ci		/* search the tree within the dmap control page for
171662306a36Sopenharmony_ci		 * sufficient free space.  if sufficient free space is found,
171762306a36Sopenharmony_ci		 * dbFindLeaf() returns the index of the leaf at which
171862306a36Sopenharmony_ci		 * free space was found.
171962306a36Sopenharmony_ci		 */
172062306a36Sopenharmony_ci		rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx, true);
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_ci		/* release the buffer.
172362306a36Sopenharmony_ci		 */
172462306a36Sopenharmony_ci		release_metapage(mp);
172562306a36Sopenharmony_ci
172662306a36Sopenharmony_ci		/* space found ?
172762306a36Sopenharmony_ci		 */
172862306a36Sopenharmony_ci		if (rc) {
172962306a36Sopenharmony_ci			if (lev != level) {
173062306a36Sopenharmony_ci				jfs_error(bmp->db_ipbmap->i_sb,
173162306a36Sopenharmony_ci					  "dmap inconsistent\n");
173262306a36Sopenharmony_ci				return -EIO;
173362306a36Sopenharmony_ci			}
173462306a36Sopenharmony_ci			return -ENOSPC;
173562306a36Sopenharmony_ci		}
173662306a36Sopenharmony_ci
173762306a36Sopenharmony_ci		/* adjust the block number to reflect the location within
173862306a36Sopenharmony_ci		 * the dmap control page (i.e. the leaf) at which free
173962306a36Sopenharmony_ci		 * space was found.
174062306a36Sopenharmony_ci		 */
174162306a36Sopenharmony_ci		b += (((s64) leafidx) << budmin);
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_ci		/* we stop the search at this dmap control page level if
174462306a36Sopenharmony_ci		 * the number of blocks required is greater than or equal
174562306a36Sopenharmony_ci		 * to the maximum number of blocks described at the next
174662306a36Sopenharmony_ci		 * (lower) level.
174762306a36Sopenharmony_ci		 */
174862306a36Sopenharmony_ci		if (l2nb >= budmin)
174962306a36Sopenharmony_ci			break;
175062306a36Sopenharmony_ci	}
175162306a36Sopenharmony_ci
175262306a36Sopenharmony_ci	*blkno = b;
175362306a36Sopenharmony_ci	return (0);
175462306a36Sopenharmony_ci}
175562306a36Sopenharmony_ci
175662306a36Sopenharmony_ci
175762306a36Sopenharmony_ci/*
175862306a36Sopenharmony_ci * NAME:	dbAllocCtl()
175962306a36Sopenharmony_ci *
176062306a36Sopenharmony_ci * FUNCTION:	attempt to allocate a specified number of contiguous
176162306a36Sopenharmony_ci *		blocks starting within a specific dmap.
176262306a36Sopenharmony_ci *
176362306a36Sopenharmony_ci *		this routine is called by higher level routines that search
176462306a36Sopenharmony_ci *		the dmap control pages above the actual dmaps for contiguous
176562306a36Sopenharmony_ci *		free space.  the result of successful searches by these
176662306a36Sopenharmony_ci *		routines are the starting block numbers within dmaps, with
176762306a36Sopenharmony_ci *		the dmaps themselves containing the desired contiguous free
176862306a36Sopenharmony_ci *		space or starting a contiguous free space of desired size
176962306a36Sopenharmony_ci *		that is made up of the blocks of one or more dmaps. these
177062306a36Sopenharmony_ci *		calls should not fail due to insufficent resources.
177162306a36Sopenharmony_ci *
177262306a36Sopenharmony_ci *		this routine is called in some cases where it is not known
177362306a36Sopenharmony_ci *		whether it will fail due to insufficient resources.  more
177462306a36Sopenharmony_ci *		specifically, this occurs when allocating from an allocation
177562306a36Sopenharmony_ci *		group whose size is equal to the number of blocks per dmap.
177662306a36Sopenharmony_ci *		in this case, the dmap control pages are not examined prior
177762306a36Sopenharmony_ci *		to calling this routine (to save pathlength) and the call
177862306a36Sopenharmony_ci *		might fail.
177962306a36Sopenharmony_ci *
178062306a36Sopenharmony_ci *		for a request size that fits within a dmap, this routine relies
178162306a36Sopenharmony_ci *		upon the dmap's dmtree to find the requested contiguous free
178262306a36Sopenharmony_ci *		space.  for request sizes that are larger than a dmap, the
178362306a36Sopenharmony_ci *		requested free space will start at the first block of the
178462306a36Sopenharmony_ci *		first dmap (i.e. blkno).
178562306a36Sopenharmony_ci *
178662306a36Sopenharmony_ci * PARAMETERS:
178762306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
178862306a36Sopenharmony_ci *	nblocks	 -  actual number of contiguous free blocks to allocate.
178962306a36Sopenharmony_ci *	l2nb	 -  log2 number of contiguous free blocks to allocate.
179062306a36Sopenharmony_ci *	blkno	 -  starting block number of the dmap to start the allocation
179162306a36Sopenharmony_ci *		    from.
179262306a36Sopenharmony_ci *	results	-  on successful return, set to the starting block number
179362306a36Sopenharmony_ci *		   of the newly allocated range.
179462306a36Sopenharmony_ci *
179562306a36Sopenharmony_ci * RETURN VALUES:
179662306a36Sopenharmony_ci *	0	- success
179762306a36Sopenharmony_ci *	-ENOSPC	- insufficient disk resources
179862306a36Sopenharmony_ci *	-EIO	- i/o error
179962306a36Sopenharmony_ci *
180062306a36Sopenharmony_ci * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
180162306a36Sopenharmony_ci */
180262306a36Sopenharmony_cistatic int
180362306a36Sopenharmony_cidbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
180462306a36Sopenharmony_ci{
180562306a36Sopenharmony_ci	int rc, nb;
180662306a36Sopenharmony_ci	s64 b, lblkno, n;
180762306a36Sopenharmony_ci	struct metapage *mp;
180862306a36Sopenharmony_ci	struct dmap *dp;
180962306a36Sopenharmony_ci
181062306a36Sopenharmony_ci	/* check if the allocation request is confined to a single dmap.
181162306a36Sopenharmony_ci	 */
181262306a36Sopenharmony_ci	if (l2nb <= L2BPERDMAP) {
181362306a36Sopenharmony_ci		/* get the buffer for the dmap.
181462306a36Sopenharmony_ci		 */
181562306a36Sopenharmony_ci		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
181662306a36Sopenharmony_ci		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
181762306a36Sopenharmony_ci		if (mp == NULL)
181862306a36Sopenharmony_ci			return -EIO;
181962306a36Sopenharmony_ci		dp = (struct dmap *) mp->data;
182062306a36Sopenharmony_ci
182162306a36Sopenharmony_ci		/* try to allocate the blocks.
182262306a36Sopenharmony_ci		 */
182362306a36Sopenharmony_ci		rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results);
182462306a36Sopenharmony_ci		if (rc == 0)
182562306a36Sopenharmony_ci			mark_metapage_dirty(mp);
182662306a36Sopenharmony_ci
182762306a36Sopenharmony_ci		release_metapage(mp);
182862306a36Sopenharmony_ci
182962306a36Sopenharmony_ci		return (rc);
183062306a36Sopenharmony_ci	}
183162306a36Sopenharmony_ci
183262306a36Sopenharmony_ci	/* allocation request involving multiple dmaps. it must start on
183362306a36Sopenharmony_ci	 * a dmap boundary.
183462306a36Sopenharmony_ci	 */
183562306a36Sopenharmony_ci	assert((blkno & (BPERDMAP - 1)) == 0);
183662306a36Sopenharmony_ci
183762306a36Sopenharmony_ci	/* allocate the blocks dmap by dmap.
183862306a36Sopenharmony_ci	 */
183962306a36Sopenharmony_ci	for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) {
184062306a36Sopenharmony_ci		/* get the buffer for the dmap.
184162306a36Sopenharmony_ci		 */
184262306a36Sopenharmony_ci		lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
184362306a36Sopenharmony_ci		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
184462306a36Sopenharmony_ci		if (mp == NULL) {
184562306a36Sopenharmony_ci			rc = -EIO;
184662306a36Sopenharmony_ci			goto backout;
184762306a36Sopenharmony_ci		}
184862306a36Sopenharmony_ci		dp = (struct dmap *) mp->data;
184962306a36Sopenharmony_ci
185062306a36Sopenharmony_ci		/* the dmap better be all free.
185162306a36Sopenharmony_ci		 */
185262306a36Sopenharmony_ci		if (dp->tree.stree[ROOT] != L2BPERDMAP) {
185362306a36Sopenharmony_ci			release_metapage(mp);
185462306a36Sopenharmony_ci			jfs_error(bmp->db_ipbmap->i_sb,
185562306a36Sopenharmony_ci				  "the dmap is not all free\n");
185662306a36Sopenharmony_ci			rc = -EIO;
185762306a36Sopenharmony_ci			goto backout;
185862306a36Sopenharmony_ci		}
185962306a36Sopenharmony_ci
186062306a36Sopenharmony_ci		/* determine how many blocks to allocate from this dmap.
186162306a36Sopenharmony_ci		 */
186262306a36Sopenharmony_ci		nb = min_t(s64, n, BPERDMAP);
186362306a36Sopenharmony_ci
186462306a36Sopenharmony_ci		/* allocate the blocks from the dmap.
186562306a36Sopenharmony_ci		 */
186662306a36Sopenharmony_ci		if ((rc = dbAllocDmap(bmp, dp, b, nb))) {
186762306a36Sopenharmony_ci			release_metapage(mp);
186862306a36Sopenharmony_ci			goto backout;
186962306a36Sopenharmony_ci		}
187062306a36Sopenharmony_ci
187162306a36Sopenharmony_ci		/* write the buffer.
187262306a36Sopenharmony_ci		 */
187362306a36Sopenharmony_ci		write_metapage(mp);
187462306a36Sopenharmony_ci	}
187562306a36Sopenharmony_ci
187662306a36Sopenharmony_ci	/* set the results (starting block number) and return.
187762306a36Sopenharmony_ci	 */
187862306a36Sopenharmony_ci	*results = blkno;
187962306a36Sopenharmony_ci	return (0);
188062306a36Sopenharmony_ci
188162306a36Sopenharmony_ci	/* something failed in handling an allocation request involving
188262306a36Sopenharmony_ci	 * multiple dmaps.  we'll try to clean up by backing out any
188362306a36Sopenharmony_ci	 * allocation that has already happened for this request.  if
188462306a36Sopenharmony_ci	 * we fail in backing out the allocation, we'll mark the file
188562306a36Sopenharmony_ci	 * system to indicate that blocks have been leaked.
188662306a36Sopenharmony_ci	 */
188762306a36Sopenharmony_ci      backout:
188862306a36Sopenharmony_ci
188962306a36Sopenharmony_ci	/* try to backout the allocations dmap by dmap.
189062306a36Sopenharmony_ci	 */
189162306a36Sopenharmony_ci	for (n = nblocks - n, b = blkno; n > 0;
189262306a36Sopenharmony_ci	     n -= BPERDMAP, b += BPERDMAP) {
189362306a36Sopenharmony_ci		/* get the buffer for this dmap.
189462306a36Sopenharmony_ci		 */
189562306a36Sopenharmony_ci		lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
189662306a36Sopenharmony_ci		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
189762306a36Sopenharmony_ci		if (mp == NULL) {
189862306a36Sopenharmony_ci			/* could not back out.  mark the file system
189962306a36Sopenharmony_ci			 * to indicate that we have leaked blocks.
190062306a36Sopenharmony_ci			 */
190162306a36Sopenharmony_ci			jfs_error(bmp->db_ipbmap->i_sb,
190262306a36Sopenharmony_ci				  "I/O Error: Block Leakage\n");
190362306a36Sopenharmony_ci			continue;
190462306a36Sopenharmony_ci		}
190562306a36Sopenharmony_ci		dp = (struct dmap *) mp->data;
190662306a36Sopenharmony_ci
190762306a36Sopenharmony_ci		/* free the blocks is this dmap.
190862306a36Sopenharmony_ci		 */
190962306a36Sopenharmony_ci		if (dbFreeDmap(bmp, dp, b, BPERDMAP)) {
191062306a36Sopenharmony_ci			/* could not back out.  mark the file system
191162306a36Sopenharmony_ci			 * to indicate that we have leaked blocks.
191262306a36Sopenharmony_ci			 */
191362306a36Sopenharmony_ci			release_metapage(mp);
191462306a36Sopenharmony_ci			jfs_error(bmp->db_ipbmap->i_sb, "Block Leakage\n");
191562306a36Sopenharmony_ci			continue;
191662306a36Sopenharmony_ci		}
191762306a36Sopenharmony_ci
191862306a36Sopenharmony_ci		/* write the buffer.
191962306a36Sopenharmony_ci		 */
192062306a36Sopenharmony_ci		write_metapage(mp);
192162306a36Sopenharmony_ci	}
192262306a36Sopenharmony_ci
192362306a36Sopenharmony_ci	return (rc);
192462306a36Sopenharmony_ci}
192562306a36Sopenharmony_ci
192662306a36Sopenharmony_ci
192762306a36Sopenharmony_ci/*
192862306a36Sopenharmony_ci * NAME:	dbAllocDmapLev()
192962306a36Sopenharmony_ci *
193062306a36Sopenharmony_ci * FUNCTION:	attempt to allocate a specified number of contiguous blocks
193162306a36Sopenharmony_ci *		from a specified dmap.
193262306a36Sopenharmony_ci *
193362306a36Sopenharmony_ci *		this routine checks if the contiguous blocks are available.
193462306a36Sopenharmony_ci *		if so, nblocks of blocks are allocated; otherwise, ENOSPC is
193562306a36Sopenharmony_ci *		returned.
193662306a36Sopenharmony_ci *
193762306a36Sopenharmony_ci * PARAMETERS:
193862306a36Sopenharmony_ci *	mp	-  pointer to bmap descriptor
193962306a36Sopenharmony_ci *	dp	-  pointer to dmap to attempt to allocate blocks from.
194062306a36Sopenharmony_ci *	l2nb	-  log2 number of contiguous block desired.
194162306a36Sopenharmony_ci *	nblocks	-  actual number of contiguous block desired.
194262306a36Sopenharmony_ci *	results	-  on successful return, set to the starting block number
194362306a36Sopenharmony_ci *		   of the newly allocated range.
194462306a36Sopenharmony_ci *
194562306a36Sopenharmony_ci * RETURN VALUES:
194662306a36Sopenharmony_ci *	0	- success
194762306a36Sopenharmony_ci *	-ENOSPC	- insufficient disk resources
194862306a36Sopenharmony_ci *	-EIO	- i/o error
194962306a36Sopenharmony_ci *
195062306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
195162306a36Sopenharmony_ci *	IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
195262306a36Sopenharmony_ci */
195362306a36Sopenharmony_cistatic int
195462306a36Sopenharmony_cidbAllocDmapLev(struct bmap * bmp,
195562306a36Sopenharmony_ci	       struct dmap * dp, int nblocks, int l2nb, s64 * results)
195662306a36Sopenharmony_ci{
195762306a36Sopenharmony_ci	s64 blkno;
195862306a36Sopenharmony_ci	int leafidx, rc;
195962306a36Sopenharmony_ci
196062306a36Sopenharmony_ci	/* can't be more than a dmaps worth of blocks */
196162306a36Sopenharmony_ci	assert(l2nb <= L2BPERDMAP);
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci	/* search the tree within the dmap page for sufficient
196462306a36Sopenharmony_ci	 * free space.  if sufficient free space is found, dbFindLeaf()
196562306a36Sopenharmony_ci	 * returns the index of the leaf at which free space was found.
196662306a36Sopenharmony_ci	 */
196762306a36Sopenharmony_ci	if (dbFindLeaf((dmtree_t *) &dp->tree, l2nb, &leafidx, false))
196862306a36Sopenharmony_ci		return -ENOSPC;
196962306a36Sopenharmony_ci
197062306a36Sopenharmony_ci	if (leafidx < 0)
197162306a36Sopenharmony_ci		return -EIO;
197262306a36Sopenharmony_ci
197362306a36Sopenharmony_ci	/* determine the block number within the file system corresponding
197462306a36Sopenharmony_ci	 * to the leaf at which free space was found.
197562306a36Sopenharmony_ci	 */
197662306a36Sopenharmony_ci	blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD);
197762306a36Sopenharmony_ci
197862306a36Sopenharmony_ci	/* if not all bits of the dmap word are free, get the starting
197962306a36Sopenharmony_ci	 * bit number within the dmap word of the required string of free
198062306a36Sopenharmony_ci	 * bits and adjust the block number with this value.
198162306a36Sopenharmony_ci	 */
198262306a36Sopenharmony_ci	if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN)
198362306a36Sopenharmony_ci		blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb);
198462306a36Sopenharmony_ci
198562306a36Sopenharmony_ci	/* allocate the blocks */
198662306a36Sopenharmony_ci	if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
198762306a36Sopenharmony_ci		*results = blkno;
198862306a36Sopenharmony_ci
198962306a36Sopenharmony_ci	return (rc);
199062306a36Sopenharmony_ci}
199162306a36Sopenharmony_ci
199262306a36Sopenharmony_ci
199362306a36Sopenharmony_ci/*
199462306a36Sopenharmony_ci * NAME:	dbAllocDmap()
199562306a36Sopenharmony_ci *
199662306a36Sopenharmony_ci * FUNCTION:	adjust the disk allocation map to reflect the allocation
199762306a36Sopenharmony_ci *		of a specified block range within a dmap.
199862306a36Sopenharmony_ci *
199962306a36Sopenharmony_ci *		this routine allocates the specified blocks from the dmap
200062306a36Sopenharmony_ci *		through a call to dbAllocBits(). if the allocation of the
200162306a36Sopenharmony_ci *		block range causes the maximum string of free blocks within
200262306a36Sopenharmony_ci *		the dmap to change (i.e. the value of the root of the dmap's
200362306a36Sopenharmony_ci *		dmtree), this routine will cause this change to be reflected
200462306a36Sopenharmony_ci *		up through the appropriate levels of the dmap control pages
200562306a36Sopenharmony_ci *		by a call to dbAdjCtl() for the L0 dmap control page that
200662306a36Sopenharmony_ci *		covers this dmap.
200762306a36Sopenharmony_ci *
200862306a36Sopenharmony_ci * PARAMETERS:
200962306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
201062306a36Sopenharmony_ci *	dp	-  pointer to dmap to allocate the block range from.
201162306a36Sopenharmony_ci *	blkno	-  starting block number of the block to be allocated.
201262306a36Sopenharmony_ci *	nblocks	-  number of blocks to be allocated.
201362306a36Sopenharmony_ci *
201462306a36Sopenharmony_ci * RETURN VALUES:
201562306a36Sopenharmony_ci *	0	- success
201662306a36Sopenharmony_ci *	-EIO	- i/o error
201762306a36Sopenharmony_ci *
201862306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
201962306a36Sopenharmony_ci */
202062306a36Sopenharmony_cistatic int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
202162306a36Sopenharmony_ci		       int nblocks)
202262306a36Sopenharmony_ci{
202362306a36Sopenharmony_ci	s8 oldroot;
202462306a36Sopenharmony_ci	int rc;
202562306a36Sopenharmony_ci
202662306a36Sopenharmony_ci	/* save the current value of the root (i.e. maximum free string)
202762306a36Sopenharmony_ci	 * of the dmap tree.
202862306a36Sopenharmony_ci	 */
202962306a36Sopenharmony_ci	oldroot = dp->tree.stree[ROOT];
203062306a36Sopenharmony_ci
203162306a36Sopenharmony_ci	/* allocate the specified (blocks) bits */
203262306a36Sopenharmony_ci	dbAllocBits(bmp, dp, blkno, nblocks);
203362306a36Sopenharmony_ci
203462306a36Sopenharmony_ci	/* if the root has not changed, done. */
203562306a36Sopenharmony_ci	if (dp->tree.stree[ROOT] == oldroot)
203662306a36Sopenharmony_ci		return (0);
203762306a36Sopenharmony_ci
203862306a36Sopenharmony_ci	/* root changed. bubble the change up to the dmap control pages.
203962306a36Sopenharmony_ci	 * if the adjustment of the upper level control pages fails,
204062306a36Sopenharmony_ci	 * backout the bit allocation (thus making everything consistent).
204162306a36Sopenharmony_ci	 */
204262306a36Sopenharmony_ci	if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0)))
204362306a36Sopenharmony_ci		dbFreeBits(bmp, dp, blkno, nblocks);
204462306a36Sopenharmony_ci
204562306a36Sopenharmony_ci	return (rc);
204662306a36Sopenharmony_ci}
204762306a36Sopenharmony_ci
204862306a36Sopenharmony_ci
204962306a36Sopenharmony_ci/*
205062306a36Sopenharmony_ci * NAME:	dbFreeDmap()
205162306a36Sopenharmony_ci *
205262306a36Sopenharmony_ci * FUNCTION:	adjust the disk allocation map to reflect the allocation
205362306a36Sopenharmony_ci *		of a specified block range within a dmap.
205462306a36Sopenharmony_ci *
205562306a36Sopenharmony_ci *		this routine frees the specified blocks from the dmap through
205662306a36Sopenharmony_ci *		a call to dbFreeBits(). if the deallocation of the block range
205762306a36Sopenharmony_ci *		causes the maximum string of free blocks within the dmap to
205862306a36Sopenharmony_ci *		change (i.e. the value of the root of the dmap's dmtree), this
205962306a36Sopenharmony_ci *		routine will cause this change to be reflected up through the
206062306a36Sopenharmony_ci *		appropriate levels of the dmap control pages by a call to
206162306a36Sopenharmony_ci *		dbAdjCtl() for the L0 dmap control page that covers this dmap.
206262306a36Sopenharmony_ci *
206362306a36Sopenharmony_ci * PARAMETERS:
206462306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
206562306a36Sopenharmony_ci *	dp	-  pointer to dmap to free the block range from.
206662306a36Sopenharmony_ci *	blkno	-  starting block number of the block to be freed.
206762306a36Sopenharmony_ci *	nblocks	-  number of blocks to be freed.
206862306a36Sopenharmony_ci *
206962306a36Sopenharmony_ci * RETURN VALUES:
207062306a36Sopenharmony_ci *	0	- success
207162306a36Sopenharmony_ci *	-EIO	- i/o error
207262306a36Sopenharmony_ci *
207362306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
207462306a36Sopenharmony_ci */
207562306a36Sopenharmony_cistatic int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
207662306a36Sopenharmony_ci		      int nblocks)
207762306a36Sopenharmony_ci{
207862306a36Sopenharmony_ci	s8 oldroot;
207962306a36Sopenharmony_ci	int rc = 0, word;
208062306a36Sopenharmony_ci
208162306a36Sopenharmony_ci	/* save the current value of the root (i.e. maximum free string)
208262306a36Sopenharmony_ci	 * of the dmap tree.
208362306a36Sopenharmony_ci	 */
208462306a36Sopenharmony_ci	oldroot = dp->tree.stree[ROOT];
208562306a36Sopenharmony_ci
208662306a36Sopenharmony_ci	/* free the specified (blocks) bits */
208762306a36Sopenharmony_ci	rc = dbFreeBits(bmp, dp, blkno, nblocks);
208862306a36Sopenharmony_ci
208962306a36Sopenharmony_ci	/* if error or the root has not changed, done. */
209062306a36Sopenharmony_ci	if (rc || (dp->tree.stree[ROOT] == oldroot))
209162306a36Sopenharmony_ci		return (rc);
209262306a36Sopenharmony_ci
209362306a36Sopenharmony_ci	/* root changed. bubble the change up to the dmap control pages.
209462306a36Sopenharmony_ci	 * if the adjustment of the upper level control pages fails,
209562306a36Sopenharmony_ci	 * backout the deallocation.
209662306a36Sopenharmony_ci	 */
209762306a36Sopenharmony_ci	if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) {
209862306a36Sopenharmony_ci		word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
209962306a36Sopenharmony_ci
210062306a36Sopenharmony_ci		/* as part of backing out the deallocation, we will have
210162306a36Sopenharmony_ci		 * to back split the dmap tree if the deallocation caused
210262306a36Sopenharmony_ci		 * the freed blocks to become part of a larger binary buddy
210362306a36Sopenharmony_ci		 * system.
210462306a36Sopenharmony_ci		 */
210562306a36Sopenharmony_ci		if (dp->tree.stree[word] == NOFREE)
210662306a36Sopenharmony_ci			dbBackSplit((dmtree_t *)&dp->tree, word, false);
210762306a36Sopenharmony_ci
210862306a36Sopenharmony_ci		dbAllocBits(bmp, dp, blkno, nblocks);
210962306a36Sopenharmony_ci	}
211062306a36Sopenharmony_ci
211162306a36Sopenharmony_ci	return (rc);
211262306a36Sopenharmony_ci}
211362306a36Sopenharmony_ci
211462306a36Sopenharmony_ci
211562306a36Sopenharmony_ci/*
211662306a36Sopenharmony_ci * NAME:	dbAllocBits()
211762306a36Sopenharmony_ci *
211862306a36Sopenharmony_ci * FUNCTION:	allocate a specified block range from a dmap.
211962306a36Sopenharmony_ci *
212062306a36Sopenharmony_ci *		this routine updates the dmap to reflect the working
212162306a36Sopenharmony_ci *		state allocation of the specified block range. it directly
212262306a36Sopenharmony_ci *		updates the bits of the working map and causes the adjustment
212362306a36Sopenharmony_ci *		of the binary buddy system described by the dmap's dmtree
212462306a36Sopenharmony_ci *		leaves to reflect the bits allocated.  it also causes the
212562306a36Sopenharmony_ci *		dmap's dmtree, as a whole, to reflect the allocated range.
212662306a36Sopenharmony_ci *
212762306a36Sopenharmony_ci * PARAMETERS:
212862306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
212962306a36Sopenharmony_ci *	dp	-  pointer to dmap to allocate bits from.
213062306a36Sopenharmony_ci *	blkno	-  starting block number of the bits to be allocated.
213162306a36Sopenharmony_ci *	nblocks	-  number of bits to be allocated.
213262306a36Sopenharmony_ci *
213362306a36Sopenharmony_ci * RETURN VALUES: none
213462306a36Sopenharmony_ci *
213562306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
213662306a36Sopenharmony_ci */
213762306a36Sopenharmony_cistatic void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
213862306a36Sopenharmony_ci			int nblocks)
213962306a36Sopenharmony_ci{
214062306a36Sopenharmony_ci	int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
214162306a36Sopenharmony_ci	dmtree_t *tp = (dmtree_t *) & dp->tree;
214262306a36Sopenharmony_ci	int size;
214362306a36Sopenharmony_ci	s8 *leaf;
214462306a36Sopenharmony_ci
214562306a36Sopenharmony_ci	/* pick up a pointer to the leaves of the dmap tree */
214662306a36Sopenharmony_ci	leaf = dp->tree.stree + LEAFIND;
214762306a36Sopenharmony_ci
214862306a36Sopenharmony_ci	/* determine the bit number and word within the dmap of the
214962306a36Sopenharmony_ci	 * starting block.
215062306a36Sopenharmony_ci	 */
215162306a36Sopenharmony_ci	dbitno = blkno & (BPERDMAP - 1);
215262306a36Sopenharmony_ci	word = dbitno >> L2DBWORD;
215362306a36Sopenharmony_ci
215462306a36Sopenharmony_ci	/* block range better be within the dmap */
215562306a36Sopenharmony_ci	assert(dbitno + nblocks <= BPERDMAP);
215662306a36Sopenharmony_ci
215762306a36Sopenharmony_ci	/* allocate the bits of the dmap's words corresponding to the block
215862306a36Sopenharmony_ci	 * range. not all bits of the first and last words may be contained
215962306a36Sopenharmony_ci	 * within the block range.  if this is the case, we'll work against
216062306a36Sopenharmony_ci	 * those words (i.e. partial first and/or last) on an individual basis
216162306a36Sopenharmony_ci	 * (a single pass), allocating the bits of interest by hand and
216262306a36Sopenharmony_ci	 * updating the leaf corresponding to the dmap word. a single pass
216362306a36Sopenharmony_ci	 * will be used for all dmap words fully contained within the
216462306a36Sopenharmony_ci	 * specified range.  within this pass, the bits of all fully contained
216562306a36Sopenharmony_ci	 * dmap words will be marked as free in a single shot and the leaves
216662306a36Sopenharmony_ci	 * will be updated. a single leaf may describe the free space of
216762306a36Sopenharmony_ci	 * multiple dmap words, so we may update only a subset of the actual
216862306a36Sopenharmony_ci	 * leaves corresponding to the dmap words of the block range.
216962306a36Sopenharmony_ci	 */
217062306a36Sopenharmony_ci	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
217162306a36Sopenharmony_ci		/* determine the bit number within the word and
217262306a36Sopenharmony_ci		 * the number of bits within the word.
217362306a36Sopenharmony_ci		 */
217462306a36Sopenharmony_ci		wbitno = dbitno & (DBWORD - 1);
217562306a36Sopenharmony_ci		nb = min(rembits, DBWORD - wbitno);
217662306a36Sopenharmony_ci
217762306a36Sopenharmony_ci		/* check if only part of a word is to be allocated.
217862306a36Sopenharmony_ci		 */
217962306a36Sopenharmony_ci		if (nb < DBWORD) {
218062306a36Sopenharmony_ci			/* allocate (set to 1) the appropriate bits within
218162306a36Sopenharmony_ci			 * this dmap word.
218262306a36Sopenharmony_ci			 */
218362306a36Sopenharmony_ci			dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
218462306a36Sopenharmony_ci						      >> wbitno);
218562306a36Sopenharmony_ci
218662306a36Sopenharmony_ci			/* update the leaf for this dmap word. in addition
218762306a36Sopenharmony_ci			 * to setting the leaf value to the binary buddy max
218862306a36Sopenharmony_ci			 * of the updated dmap word, dbSplit() will split
218962306a36Sopenharmony_ci			 * the binary system of the leaves if need be.
219062306a36Sopenharmony_ci			 */
219162306a36Sopenharmony_ci			dbSplit(tp, word, BUDMIN,
219262306a36Sopenharmony_ci				dbMaxBud((u8 *)&dp->wmap[word]), false);
219362306a36Sopenharmony_ci
219462306a36Sopenharmony_ci			word += 1;
219562306a36Sopenharmony_ci		} else {
219662306a36Sopenharmony_ci			/* one or more dmap words are fully contained
219762306a36Sopenharmony_ci			 * within the block range.  determine how many
219862306a36Sopenharmony_ci			 * words and allocate (set to 1) the bits of these
219962306a36Sopenharmony_ci			 * words.
220062306a36Sopenharmony_ci			 */
220162306a36Sopenharmony_ci			nwords = rembits >> L2DBWORD;
220262306a36Sopenharmony_ci			memset(&dp->wmap[word], (int) ONES, nwords * 4);
220362306a36Sopenharmony_ci
220462306a36Sopenharmony_ci			/* determine how many bits.
220562306a36Sopenharmony_ci			 */
220662306a36Sopenharmony_ci			nb = nwords << L2DBWORD;
220762306a36Sopenharmony_ci
220862306a36Sopenharmony_ci			/* now update the appropriate leaves to reflect
220962306a36Sopenharmony_ci			 * the allocated words.
221062306a36Sopenharmony_ci			 */
221162306a36Sopenharmony_ci			for (; nwords > 0; nwords -= nw) {
221262306a36Sopenharmony_ci				if (leaf[word] < BUDMIN) {
221362306a36Sopenharmony_ci					jfs_error(bmp->db_ipbmap->i_sb,
221462306a36Sopenharmony_ci						  "leaf page corrupt\n");
221562306a36Sopenharmony_ci					break;
221662306a36Sopenharmony_ci				}
221762306a36Sopenharmony_ci
221862306a36Sopenharmony_ci				/* determine what the leaf value should be
221962306a36Sopenharmony_ci				 * updated to as the minimum of the l2 number
222062306a36Sopenharmony_ci				 * of bits being allocated and the l2 number
222162306a36Sopenharmony_ci				 * of bits currently described by this leaf.
222262306a36Sopenharmony_ci				 */
222362306a36Sopenharmony_ci				size = min_t(int, leaf[word],
222462306a36Sopenharmony_ci					     NLSTOL2BSZ(nwords));
222562306a36Sopenharmony_ci
222662306a36Sopenharmony_ci				/* update the leaf to reflect the allocation.
222762306a36Sopenharmony_ci				 * in addition to setting the leaf value to
222862306a36Sopenharmony_ci				 * NOFREE, dbSplit() will split the binary
222962306a36Sopenharmony_ci				 * system of the leaves to reflect the current
223062306a36Sopenharmony_ci				 * allocation (size).
223162306a36Sopenharmony_ci				 */
223262306a36Sopenharmony_ci				dbSplit(tp, word, size, NOFREE, false);
223362306a36Sopenharmony_ci
223462306a36Sopenharmony_ci				/* get the number of dmap words handled */
223562306a36Sopenharmony_ci				nw = BUDSIZE(size, BUDMIN);
223662306a36Sopenharmony_ci				word += nw;
223762306a36Sopenharmony_ci			}
223862306a36Sopenharmony_ci		}
223962306a36Sopenharmony_ci	}
224062306a36Sopenharmony_ci
224162306a36Sopenharmony_ci	/* update the free count for this dmap */
224262306a36Sopenharmony_ci	le32_add_cpu(&dp->nfree, -nblocks);
224362306a36Sopenharmony_ci
224462306a36Sopenharmony_ci	BMAP_LOCK(bmp);
224562306a36Sopenharmony_ci
224662306a36Sopenharmony_ci	/* if this allocation group is completely free,
224762306a36Sopenharmony_ci	 * update the maximum allocation group number if this allocation
224862306a36Sopenharmony_ci	 * group is the new max.
224962306a36Sopenharmony_ci	 */
225062306a36Sopenharmony_ci	agno = blkno >> bmp->db_agl2size;
225162306a36Sopenharmony_ci	if (agno > bmp->db_maxag)
225262306a36Sopenharmony_ci		bmp->db_maxag = agno;
225362306a36Sopenharmony_ci
225462306a36Sopenharmony_ci	/* update the free count for the allocation group and map */
225562306a36Sopenharmony_ci	bmp->db_agfree[agno] -= nblocks;
225662306a36Sopenharmony_ci	bmp->db_nfree -= nblocks;
225762306a36Sopenharmony_ci
225862306a36Sopenharmony_ci	BMAP_UNLOCK(bmp);
225962306a36Sopenharmony_ci}
226062306a36Sopenharmony_ci
226162306a36Sopenharmony_ci
226262306a36Sopenharmony_ci/*
226362306a36Sopenharmony_ci * NAME:	dbFreeBits()
226462306a36Sopenharmony_ci *
226562306a36Sopenharmony_ci * FUNCTION:	free a specified block range from a dmap.
226662306a36Sopenharmony_ci *
226762306a36Sopenharmony_ci *		this routine updates the dmap to reflect the working
226862306a36Sopenharmony_ci *		state allocation of the specified block range. it directly
226962306a36Sopenharmony_ci *		updates the bits of the working map and causes the adjustment
227062306a36Sopenharmony_ci *		of the binary buddy system described by the dmap's dmtree
227162306a36Sopenharmony_ci *		leaves to reflect the bits freed.  it also causes the dmap's
227262306a36Sopenharmony_ci *		dmtree, as a whole, to reflect the deallocated range.
227362306a36Sopenharmony_ci *
227462306a36Sopenharmony_ci * PARAMETERS:
227562306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
227662306a36Sopenharmony_ci *	dp	-  pointer to dmap to free bits from.
227762306a36Sopenharmony_ci *	blkno	-  starting block number of the bits to be freed.
227862306a36Sopenharmony_ci *	nblocks	-  number of bits to be freed.
227962306a36Sopenharmony_ci *
228062306a36Sopenharmony_ci * RETURN VALUES: 0 for success
228162306a36Sopenharmony_ci *
228262306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
228362306a36Sopenharmony_ci */
228462306a36Sopenharmony_cistatic int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
228562306a36Sopenharmony_ci		       int nblocks)
228662306a36Sopenharmony_ci{
228762306a36Sopenharmony_ci	int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
228862306a36Sopenharmony_ci	dmtree_t *tp = (dmtree_t *) & dp->tree;
228962306a36Sopenharmony_ci	int rc = 0;
229062306a36Sopenharmony_ci	int size;
229162306a36Sopenharmony_ci
229262306a36Sopenharmony_ci	/* determine the bit number and word within the dmap of the
229362306a36Sopenharmony_ci	 * starting block.
229462306a36Sopenharmony_ci	 */
229562306a36Sopenharmony_ci	dbitno = blkno & (BPERDMAP - 1);
229662306a36Sopenharmony_ci	word = dbitno >> L2DBWORD;
229762306a36Sopenharmony_ci
229862306a36Sopenharmony_ci	/* block range better be within the dmap.
229962306a36Sopenharmony_ci	 */
230062306a36Sopenharmony_ci	assert(dbitno + nblocks <= BPERDMAP);
230162306a36Sopenharmony_ci
230262306a36Sopenharmony_ci	/* free the bits of the dmaps words corresponding to the block range.
230362306a36Sopenharmony_ci	 * not all bits of the first and last words may be contained within
230462306a36Sopenharmony_ci	 * the block range.  if this is the case, we'll work against those
230562306a36Sopenharmony_ci	 * words (i.e. partial first and/or last) on an individual basis
230662306a36Sopenharmony_ci	 * (a single pass), freeing the bits of interest by hand and updating
230762306a36Sopenharmony_ci	 * the leaf corresponding to the dmap word. a single pass will be used
230862306a36Sopenharmony_ci	 * for all dmap words fully contained within the specified range.
230962306a36Sopenharmony_ci	 * within this pass, the bits of all fully contained dmap words will
231062306a36Sopenharmony_ci	 * be marked as free in a single shot and the leaves will be updated. a
231162306a36Sopenharmony_ci	 * single leaf may describe the free space of multiple dmap words,
231262306a36Sopenharmony_ci	 * so we may update only a subset of the actual leaves corresponding
231362306a36Sopenharmony_ci	 * to the dmap words of the block range.
231462306a36Sopenharmony_ci	 *
231562306a36Sopenharmony_ci	 * dbJoin() is used to update leaf values and will join the binary
231662306a36Sopenharmony_ci	 * buddy system of the leaves if the new leaf values indicate this
231762306a36Sopenharmony_ci	 * should be done.
231862306a36Sopenharmony_ci	 */
231962306a36Sopenharmony_ci	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
232062306a36Sopenharmony_ci		/* determine the bit number within the word and
232162306a36Sopenharmony_ci		 * the number of bits within the word.
232262306a36Sopenharmony_ci		 */
232362306a36Sopenharmony_ci		wbitno = dbitno & (DBWORD - 1);
232462306a36Sopenharmony_ci		nb = min(rembits, DBWORD - wbitno);
232562306a36Sopenharmony_ci
232662306a36Sopenharmony_ci		/* check if only part of a word is to be freed.
232762306a36Sopenharmony_ci		 */
232862306a36Sopenharmony_ci		if (nb < DBWORD) {
232962306a36Sopenharmony_ci			/* free (zero) the appropriate bits within this
233062306a36Sopenharmony_ci			 * dmap word.
233162306a36Sopenharmony_ci			 */
233262306a36Sopenharmony_ci			dp->wmap[word] &=
233362306a36Sopenharmony_ci			    cpu_to_le32(~(ONES << (DBWORD - nb)
233462306a36Sopenharmony_ci					  >> wbitno));
233562306a36Sopenharmony_ci
233662306a36Sopenharmony_ci			/* update the leaf for this dmap word.
233762306a36Sopenharmony_ci			 */
233862306a36Sopenharmony_ci			rc = dbJoin(tp, word,
233962306a36Sopenharmony_ci				    dbMaxBud((u8 *)&dp->wmap[word]), false);
234062306a36Sopenharmony_ci			if (rc)
234162306a36Sopenharmony_ci				return rc;
234262306a36Sopenharmony_ci
234362306a36Sopenharmony_ci			word += 1;
234462306a36Sopenharmony_ci		} else {
234562306a36Sopenharmony_ci			/* one or more dmap words are fully contained
234662306a36Sopenharmony_ci			 * within the block range.  determine how many
234762306a36Sopenharmony_ci			 * words and free (zero) the bits of these words.
234862306a36Sopenharmony_ci			 */
234962306a36Sopenharmony_ci			nwords = rembits >> L2DBWORD;
235062306a36Sopenharmony_ci			memset(&dp->wmap[word], 0, nwords * 4);
235162306a36Sopenharmony_ci
235262306a36Sopenharmony_ci			/* determine how many bits.
235362306a36Sopenharmony_ci			 */
235462306a36Sopenharmony_ci			nb = nwords << L2DBWORD;
235562306a36Sopenharmony_ci
235662306a36Sopenharmony_ci			/* now update the appropriate leaves to reflect
235762306a36Sopenharmony_ci			 * the freed words.
235862306a36Sopenharmony_ci			 */
235962306a36Sopenharmony_ci			for (; nwords > 0; nwords -= nw) {
236062306a36Sopenharmony_ci				/* determine what the leaf value should be
236162306a36Sopenharmony_ci				 * updated to as the minimum of the l2 number
236262306a36Sopenharmony_ci				 * of bits being freed and the l2 (max) number
236362306a36Sopenharmony_ci				 * of bits that can be described by this leaf.
236462306a36Sopenharmony_ci				 */
236562306a36Sopenharmony_ci				size =
236662306a36Sopenharmony_ci				    min(LITOL2BSZ
236762306a36Sopenharmony_ci					(word, L2LPERDMAP, BUDMIN),
236862306a36Sopenharmony_ci					NLSTOL2BSZ(nwords));
236962306a36Sopenharmony_ci
237062306a36Sopenharmony_ci				/* update the leaf.
237162306a36Sopenharmony_ci				 */
237262306a36Sopenharmony_ci				rc = dbJoin(tp, word, size, false);
237362306a36Sopenharmony_ci				if (rc)
237462306a36Sopenharmony_ci					return rc;
237562306a36Sopenharmony_ci
237662306a36Sopenharmony_ci				/* get the number of dmap words handled.
237762306a36Sopenharmony_ci				 */
237862306a36Sopenharmony_ci				nw = BUDSIZE(size, BUDMIN);
237962306a36Sopenharmony_ci				word += nw;
238062306a36Sopenharmony_ci			}
238162306a36Sopenharmony_ci		}
238262306a36Sopenharmony_ci	}
238362306a36Sopenharmony_ci
238462306a36Sopenharmony_ci	/* update the free count for this dmap.
238562306a36Sopenharmony_ci	 */
238662306a36Sopenharmony_ci	le32_add_cpu(&dp->nfree, nblocks);
238762306a36Sopenharmony_ci
238862306a36Sopenharmony_ci	BMAP_LOCK(bmp);
238962306a36Sopenharmony_ci
239062306a36Sopenharmony_ci	/* update the free count for the allocation group and
239162306a36Sopenharmony_ci	 * map.
239262306a36Sopenharmony_ci	 */
239362306a36Sopenharmony_ci	agno = blkno >> bmp->db_agl2size;
239462306a36Sopenharmony_ci	bmp->db_nfree += nblocks;
239562306a36Sopenharmony_ci	bmp->db_agfree[agno] += nblocks;
239662306a36Sopenharmony_ci
239762306a36Sopenharmony_ci	/* check if this allocation group is not completely free and
239862306a36Sopenharmony_ci	 * if it is currently the maximum (rightmost) allocation group.
239962306a36Sopenharmony_ci	 * if so, establish the new maximum allocation group number by
240062306a36Sopenharmony_ci	 * searching left for the first allocation group with allocation.
240162306a36Sopenharmony_ci	 */
240262306a36Sopenharmony_ci	if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) ||
240362306a36Sopenharmony_ci	    (agno == bmp->db_numag - 1 &&
240462306a36Sopenharmony_ci	     bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) {
240562306a36Sopenharmony_ci		while (bmp->db_maxag > 0) {
240662306a36Sopenharmony_ci			bmp->db_maxag -= 1;
240762306a36Sopenharmony_ci			if (bmp->db_agfree[bmp->db_maxag] !=
240862306a36Sopenharmony_ci			    bmp->db_agsize)
240962306a36Sopenharmony_ci				break;
241062306a36Sopenharmony_ci		}
241162306a36Sopenharmony_ci
241262306a36Sopenharmony_ci		/* re-establish the allocation group preference if the
241362306a36Sopenharmony_ci		 * current preference is right of the maximum allocation
241462306a36Sopenharmony_ci		 * group.
241562306a36Sopenharmony_ci		 */
241662306a36Sopenharmony_ci		if (bmp->db_agpref > bmp->db_maxag)
241762306a36Sopenharmony_ci			bmp->db_agpref = bmp->db_maxag;
241862306a36Sopenharmony_ci	}
241962306a36Sopenharmony_ci
242062306a36Sopenharmony_ci	BMAP_UNLOCK(bmp);
242162306a36Sopenharmony_ci
242262306a36Sopenharmony_ci	return 0;
242362306a36Sopenharmony_ci}
242462306a36Sopenharmony_ci
242562306a36Sopenharmony_ci
242662306a36Sopenharmony_ci/*
242762306a36Sopenharmony_ci * NAME:	dbAdjCtl()
242862306a36Sopenharmony_ci *
242962306a36Sopenharmony_ci * FUNCTION:	adjust a dmap control page at a specified level to reflect
243062306a36Sopenharmony_ci *		the change in a lower level dmap or dmap control page's
243162306a36Sopenharmony_ci *		maximum string of free blocks (i.e. a change in the root
243262306a36Sopenharmony_ci *		of the lower level object's dmtree) due to the allocation
243362306a36Sopenharmony_ci *		or deallocation of a range of blocks with a single dmap.
243462306a36Sopenharmony_ci *
243562306a36Sopenharmony_ci *		on entry, this routine is provided with the new value of
243662306a36Sopenharmony_ci *		the lower level dmap or dmap control page root and the
243762306a36Sopenharmony_ci *		starting block number of the block range whose allocation
243862306a36Sopenharmony_ci *		or deallocation resulted in the root change.  this range
243962306a36Sopenharmony_ci *		is respresented by a single leaf of the current dmapctl
244062306a36Sopenharmony_ci *		and the leaf will be updated with this value, possibly
244162306a36Sopenharmony_ci *		causing a binary buddy system within the leaves to be
244262306a36Sopenharmony_ci *		split or joined.  the update may also cause the dmapctl's
244362306a36Sopenharmony_ci *		dmtree to be updated.
244462306a36Sopenharmony_ci *
244562306a36Sopenharmony_ci *		if the adjustment of the dmap control page, itself, causes its
244662306a36Sopenharmony_ci *		root to change, this change will be bubbled up to the next dmap
244762306a36Sopenharmony_ci *		control level by a recursive call to this routine, specifying
244862306a36Sopenharmony_ci *		the new root value and the next dmap control page level to
244962306a36Sopenharmony_ci *		be adjusted.
245062306a36Sopenharmony_ci * PARAMETERS:
245162306a36Sopenharmony_ci *	bmp	-  pointer to bmap descriptor
245262306a36Sopenharmony_ci *	blkno	-  the first block of a block range within a dmap.  it is
245362306a36Sopenharmony_ci *		   the allocation or deallocation of this block range that
245462306a36Sopenharmony_ci *		   requires the dmap control page to be adjusted.
245562306a36Sopenharmony_ci *	newval	-  the new value of the lower level dmap or dmap control
245662306a36Sopenharmony_ci *		   page root.
245762306a36Sopenharmony_ci *	alloc	-  'true' if adjustment is due to an allocation.
245862306a36Sopenharmony_ci *	level	-  current level of dmap control page (i.e. L0, L1, L2) to
245962306a36Sopenharmony_ci *		   be adjusted.
246062306a36Sopenharmony_ci *
246162306a36Sopenharmony_ci * RETURN VALUES:
246262306a36Sopenharmony_ci *	0	- success
246362306a36Sopenharmony_ci *	-EIO	- i/o error
246462306a36Sopenharmony_ci *
246562306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
246662306a36Sopenharmony_ci */
246762306a36Sopenharmony_cistatic int
246862306a36Sopenharmony_cidbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
246962306a36Sopenharmony_ci{
247062306a36Sopenharmony_ci	struct metapage *mp;
247162306a36Sopenharmony_ci	s8 oldroot;
247262306a36Sopenharmony_ci	int oldval;
247362306a36Sopenharmony_ci	s64 lblkno;
247462306a36Sopenharmony_ci	struct dmapctl *dcp;
247562306a36Sopenharmony_ci	int rc, leafno, ti;
247662306a36Sopenharmony_ci
247762306a36Sopenharmony_ci	/* get the buffer for the dmap control page for the specified
247862306a36Sopenharmony_ci	 * block number and control page level.
247962306a36Sopenharmony_ci	 */
248062306a36Sopenharmony_ci	lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level);
248162306a36Sopenharmony_ci	mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
248262306a36Sopenharmony_ci	if (mp == NULL)
248362306a36Sopenharmony_ci		return -EIO;
248462306a36Sopenharmony_ci	dcp = (struct dmapctl *) mp->data;
248562306a36Sopenharmony_ci
248662306a36Sopenharmony_ci	if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
248762306a36Sopenharmony_ci		jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n");
248862306a36Sopenharmony_ci		release_metapage(mp);
248962306a36Sopenharmony_ci		return -EIO;
249062306a36Sopenharmony_ci	}
249162306a36Sopenharmony_ci
249262306a36Sopenharmony_ci	/* determine the leaf number corresponding to the block and
249362306a36Sopenharmony_ci	 * the index within the dmap control tree.
249462306a36Sopenharmony_ci	 */
249562306a36Sopenharmony_ci	leafno = BLKTOCTLLEAF(blkno, dcp->budmin);
249662306a36Sopenharmony_ci	ti = leafno + le32_to_cpu(dcp->leafidx);
249762306a36Sopenharmony_ci
249862306a36Sopenharmony_ci	/* save the current leaf value and the current root level (i.e.
249962306a36Sopenharmony_ci	 * maximum l2 free string described by this dmapctl).
250062306a36Sopenharmony_ci	 */
250162306a36Sopenharmony_ci	oldval = dcp->stree[ti];
250262306a36Sopenharmony_ci	oldroot = dcp->stree[ROOT];
250362306a36Sopenharmony_ci
250462306a36Sopenharmony_ci	/* check if this is a control page update for an allocation.
250562306a36Sopenharmony_ci	 * if so, update the leaf to reflect the new leaf value using
250662306a36Sopenharmony_ci	 * dbSplit(); otherwise (deallocation), use dbJoin() to update
250762306a36Sopenharmony_ci	 * the leaf with the new value.  in addition to updating the
250862306a36Sopenharmony_ci	 * leaf, dbSplit() will also split the binary buddy system of
250962306a36Sopenharmony_ci	 * the leaves, if required, and bubble new values within the
251062306a36Sopenharmony_ci	 * dmapctl tree, if required.  similarly, dbJoin() will join
251162306a36Sopenharmony_ci	 * the binary buddy system of leaves and bubble new values up
251262306a36Sopenharmony_ci	 * the dmapctl tree as required by the new leaf value.
251362306a36Sopenharmony_ci	 */
251462306a36Sopenharmony_ci	if (alloc) {
251562306a36Sopenharmony_ci		/* check if we are in the middle of a binary buddy
251662306a36Sopenharmony_ci		 * system.  this happens when we are performing the
251762306a36Sopenharmony_ci		 * first allocation out of an allocation group that
251862306a36Sopenharmony_ci		 * is part (not the first part) of a larger binary
251962306a36Sopenharmony_ci		 * buddy system.  if we are in the middle, back split
252062306a36Sopenharmony_ci		 * the system prior to calling dbSplit() which assumes
252162306a36Sopenharmony_ci		 * that it is at the front of a binary buddy system.
252262306a36Sopenharmony_ci		 */
252362306a36Sopenharmony_ci		if (oldval == NOFREE) {
252462306a36Sopenharmony_ci			rc = dbBackSplit((dmtree_t *)dcp, leafno, true);
252562306a36Sopenharmony_ci			if (rc) {
252662306a36Sopenharmony_ci				release_metapage(mp);
252762306a36Sopenharmony_ci				return rc;
252862306a36Sopenharmony_ci			}
252962306a36Sopenharmony_ci			oldval = dcp->stree[ti];
253062306a36Sopenharmony_ci		}
253162306a36Sopenharmony_ci		dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval, true);
253262306a36Sopenharmony_ci	} else {
253362306a36Sopenharmony_ci		rc = dbJoin((dmtree_t *) dcp, leafno, newval, true);
253462306a36Sopenharmony_ci		if (rc) {
253562306a36Sopenharmony_ci			release_metapage(mp);
253662306a36Sopenharmony_ci			return rc;
253762306a36Sopenharmony_ci		}
253862306a36Sopenharmony_ci	}
253962306a36Sopenharmony_ci
254062306a36Sopenharmony_ci	/* check if the root of the current dmap control page changed due
254162306a36Sopenharmony_ci	 * to the update and if the current dmap control page is not at
254262306a36Sopenharmony_ci	 * the current top level (i.e. L0, L1, L2) of the map.  if so (i.e.
254362306a36Sopenharmony_ci	 * root changed and this is not the top level), call this routine
254462306a36Sopenharmony_ci	 * again (recursion) for the next higher level of the mapping to
254562306a36Sopenharmony_ci	 * reflect the change in root for the current dmap control page.
254662306a36Sopenharmony_ci	 */
254762306a36Sopenharmony_ci	if (dcp->stree[ROOT] != oldroot) {
254862306a36Sopenharmony_ci		/* are we below the top level of the map.  if so,
254962306a36Sopenharmony_ci		 * bubble the root up to the next higher level.
255062306a36Sopenharmony_ci		 */
255162306a36Sopenharmony_ci		if (level < bmp->db_maxlevel) {
255262306a36Sopenharmony_ci			/* bubble up the new root of this dmap control page to
255362306a36Sopenharmony_ci			 * the next level.
255462306a36Sopenharmony_ci			 */
255562306a36Sopenharmony_ci			if ((rc =
255662306a36Sopenharmony_ci			     dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc,
255762306a36Sopenharmony_ci				      level + 1))) {
255862306a36Sopenharmony_ci				/* something went wrong in bubbling up the new
255962306a36Sopenharmony_ci				 * root value, so backout the changes to the
256062306a36Sopenharmony_ci				 * current dmap control page.
256162306a36Sopenharmony_ci				 */
256262306a36Sopenharmony_ci				if (alloc) {
256362306a36Sopenharmony_ci					dbJoin((dmtree_t *) dcp, leafno,
256462306a36Sopenharmony_ci					       oldval, true);
256562306a36Sopenharmony_ci				} else {
256662306a36Sopenharmony_ci					/* the dbJoin() above might have
256762306a36Sopenharmony_ci					 * caused a larger binary buddy system
256862306a36Sopenharmony_ci					 * to form and we may now be in the
256962306a36Sopenharmony_ci					 * middle of it.  if this is the case,
257062306a36Sopenharmony_ci					 * back split the buddies.
257162306a36Sopenharmony_ci					 */
257262306a36Sopenharmony_ci					if (dcp->stree[ti] == NOFREE)
257362306a36Sopenharmony_ci						dbBackSplit((dmtree_t *)
257462306a36Sopenharmony_ci							    dcp, leafno, true);
257562306a36Sopenharmony_ci					dbSplit((dmtree_t *) dcp, leafno,
257662306a36Sopenharmony_ci						dcp->budmin, oldval, true);
257762306a36Sopenharmony_ci				}
257862306a36Sopenharmony_ci
257962306a36Sopenharmony_ci				/* release the buffer and return the error.
258062306a36Sopenharmony_ci				 */
258162306a36Sopenharmony_ci				release_metapage(mp);
258262306a36Sopenharmony_ci				return (rc);
258362306a36Sopenharmony_ci			}
258462306a36Sopenharmony_ci		} else {
258562306a36Sopenharmony_ci			/* we're at the top level of the map. update
258662306a36Sopenharmony_ci			 * the bmap control page to reflect the size
258762306a36Sopenharmony_ci			 * of the maximum free buddy system.
258862306a36Sopenharmony_ci			 */
258962306a36Sopenharmony_ci			assert(level == bmp->db_maxlevel);
259062306a36Sopenharmony_ci			if (bmp->db_maxfreebud != oldroot) {
259162306a36Sopenharmony_ci				jfs_error(bmp->db_ipbmap->i_sb,
259262306a36Sopenharmony_ci					  "the maximum free buddy is not the old root\n");
259362306a36Sopenharmony_ci			}
259462306a36Sopenharmony_ci			bmp->db_maxfreebud = dcp->stree[ROOT];
259562306a36Sopenharmony_ci		}
259662306a36Sopenharmony_ci	}
259762306a36Sopenharmony_ci
259862306a36Sopenharmony_ci	/* write the buffer.
259962306a36Sopenharmony_ci	 */
260062306a36Sopenharmony_ci	write_metapage(mp);
260162306a36Sopenharmony_ci
260262306a36Sopenharmony_ci	return (0);
260362306a36Sopenharmony_ci}
260462306a36Sopenharmony_ci
260562306a36Sopenharmony_ci
260662306a36Sopenharmony_ci/*
260762306a36Sopenharmony_ci * NAME:	dbSplit()
260862306a36Sopenharmony_ci *
260962306a36Sopenharmony_ci * FUNCTION:	update the leaf of a dmtree with a new value, splitting
261062306a36Sopenharmony_ci *		the leaf from the binary buddy system of the dmtree's
261162306a36Sopenharmony_ci *		leaves, as required.
261262306a36Sopenharmony_ci *
261362306a36Sopenharmony_ci * PARAMETERS:
261462306a36Sopenharmony_ci *	tp	- pointer to the tree containing the leaf.
261562306a36Sopenharmony_ci *	leafno	- the number of the leaf to be updated.
261662306a36Sopenharmony_ci *	splitsz	- the size the binary buddy system starting at the leaf
261762306a36Sopenharmony_ci *		  must be split to, specified as the log2 number of blocks.
261862306a36Sopenharmony_ci *	newval	- the new value for the leaf.
261962306a36Sopenharmony_ci *
262062306a36Sopenharmony_ci * RETURN VALUES: none
262162306a36Sopenharmony_ci *
262262306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
262362306a36Sopenharmony_ci */
262462306a36Sopenharmony_cistatic void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl)
262562306a36Sopenharmony_ci{
262662306a36Sopenharmony_ci	int budsz;
262762306a36Sopenharmony_ci	int cursz;
262862306a36Sopenharmony_ci	s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
262962306a36Sopenharmony_ci
263062306a36Sopenharmony_ci	/* check if the leaf needs to be split.
263162306a36Sopenharmony_ci	 */
263262306a36Sopenharmony_ci	if (leaf[leafno] > tp->dmt_budmin) {
263362306a36Sopenharmony_ci		/* the split occurs by cutting the buddy system in half
263462306a36Sopenharmony_ci		 * at the specified leaf until we reach the specified
263562306a36Sopenharmony_ci		 * size.  pick up the starting split size (current size
263662306a36Sopenharmony_ci		 * - 1 in l2) and the corresponding buddy size.
263762306a36Sopenharmony_ci		 */
263862306a36Sopenharmony_ci		cursz = leaf[leafno] - 1;
263962306a36Sopenharmony_ci		budsz = BUDSIZE(cursz, tp->dmt_budmin);
264062306a36Sopenharmony_ci
264162306a36Sopenharmony_ci		/* split until we reach the specified size.
264262306a36Sopenharmony_ci		 */
264362306a36Sopenharmony_ci		while (cursz >= splitsz) {
264462306a36Sopenharmony_ci			/* update the buddy's leaf with its new value.
264562306a36Sopenharmony_ci			 */
264662306a36Sopenharmony_ci			dbAdjTree(tp, leafno ^ budsz, cursz, is_ctl);
264762306a36Sopenharmony_ci
264862306a36Sopenharmony_ci			/* on to the next size and buddy.
264962306a36Sopenharmony_ci			 */
265062306a36Sopenharmony_ci			cursz -= 1;
265162306a36Sopenharmony_ci			budsz >>= 1;
265262306a36Sopenharmony_ci		}
265362306a36Sopenharmony_ci	}
265462306a36Sopenharmony_ci
265562306a36Sopenharmony_ci	/* adjust the dmap tree to reflect the specified leaf's new
265662306a36Sopenharmony_ci	 * value.
265762306a36Sopenharmony_ci	 */
265862306a36Sopenharmony_ci	dbAdjTree(tp, leafno, newval, is_ctl);
265962306a36Sopenharmony_ci}
266062306a36Sopenharmony_ci
266162306a36Sopenharmony_ci
266262306a36Sopenharmony_ci/*
266362306a36Sopenharmony_ci * NAME:	dbBackSplit()
266462306a36Sopenharmony_ci *
266562306a36Sopenharmony_ci * FUNCTION:	back split the binary buddy system of dmtree leaves
266662306a36Sopenharmony_ci *		that hold a specified leaf until the specified leaf
266762306a36Sopenharmony_ci *		starts its own binary buddy system.
266862306a36Sopenharmony_ci *
266962306a36Sopenharmony_ci *		the allocators typically perform allocations at the start
267062306a36Sopenharmony_ci *		of binary buddy systems and dbSplit() is used to accomplish
267162306a36Sopenharmony_ci *		any required splits.  in some cases, however, allocation
267262306a36Sopenharmony_ci *		may occur in the middle of a binary system and requires a
267362306a36Sopenharmony_ci *		back split, with the split proceeding out from the middle of
267462306a36Sopenharmony_ci *		the system (less efficient) rather than the start of the
267562306a36Sopenharmony_ci *		system (more efficient).  the cases in which a back split
267662306a36Sopenharmony_ci *		is required are rare and are limited to the first allocation
267762306a36Sopenharmony_ci *		within an allocation group which is a part (not first part)
267862306a36Sopenharmony_ci *		of a larger binary buddy system and a few exception cases
267962306a36Sopenharmony_ci *		in which a previous join operation must be backed out.
268062306a36Sopenharmony_ci *
268162306a36Sopenharmony_ci * PARAMETERS:
268262306a36Sopenharmony_ci *	tp	- pointer to the tree containing the leaf.
268362306a36Sopenharmony_ci *	leafno	- the number of the leaf to be updated.
268462306a36Sopenharmony_ci *
268562306a36Sopenharmony_ci * RETURN VALUES: none
268662306a36Sopenharmony_ci *
268762306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
268862306a36Sopenharmony_ci */
268962306a36Sopenharmony_cistatic int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl)
269062306a36Sopenharmony_ci{
269162306a36Sopenharmony_ci	int budsz, bud, w, bsz, size;
269262306a36Sopenharmony_ci	int cursz;
269362306a36Sopenharmony_ci	s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
269462306a36Sopenharmony_ci
269562306a36Sopenharmony_ci	/* leaf should be part (not first part) of a binary
269662306a36Sopenharmony_ci	 * buddy system.
269762306a36Sopenharmony_ci	 */
269862306a36Sopenharmony_ci	assert(leaf[leafno] == NOFREE);
269962306a36Sopenharmony_ci
270062306a36Sopenharmony_ci	/* the back split is accomplished by iteratively finding the leaf
270162306a36Sopenharmony_ci	 * that starts the buddy system that contains the specified leaf and
270262306a36Sopenharmony_ci	 * splitting that system in two.  this iteration continues until
270362306a36Sopenharmony_ci	 * the specified leaf becomes the start of a buddy system.
270462306a36Sopenharmony_ci	 *
270562306a36Sopenharmony_ci	 * determine maximum possible l2 size for the specified leaf.
270662306a36Sopenharmony_ci	 */
270762306a36Sopenharmony_ci	size =
270862306a36Sopenharmony_ci	    LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs),
270962306a36Sopenharmony_ci		      tp->dmt_budmin);
271062306a36Sopenharmony_ci
271162306a36Sopenharmony_ci	/* determine the number of leaves covered by this size.  this
271262306a36Sopenharmony_ci	 * is the buddy size that we will start with as we search for
271362306a36Sopenharmony_ci	 * the buddy system that contains the specified leaf.
271462306a36Sopenharmony_ci	 */
271562306a36Sopenharmony_ci	budsz = BUDSIZE(size, tp->dmt_budmin);
271662306a36Sopenharmony_ci
271762306a36Sopenharmony_ci	/* back split.
271862306a36Sopenharmony_ci	 */
271962306a36Sopenharmony_ci	while (leaf[leafno] == NOFREE) {
272062306a36Sopenharmony_ci		/* find the leftmost buddy leaf.
272162306a36Sopenharmony_ci		 */
272262306a36Sopenharmony_ci		for (w = leafno, bsz = budsz;; bsz <<= 1,
272362306a36Sopenharmony_ci		     w = (w < bud) ? w : bud) {
272462306a36Sopenharmony_ci			if (bsz >= le32_to_cpu(tp->dmt_nleafs)) {
272562306a36Sopenharmony_ci				jfs_err("JFS: block map error in dbBackSplit");
272662306a36Sopenharmony_ci				return -EIO;
272762306a36Sopenharmony_ci			}
272862306a36Sopenharmony_ci
272962306a36Sopenharmony_ci			/* determine the buddy.
273062306a36Sopenharmony_ci			 */
273162306a36Sopenharmony_ci			bud = w ^ bsz;
273262306a36Sopenharmony_ci
273362306a36Sopenharmony_ci			/* check if this buddy is the start of the system.
273462306a36Sopenharmony_ci			 */
273562306a36Sopenharmony_ci			if (leaf[bud] != NOFREE) {
273662306a36Sopenharmony_ci				/* split the leaf at the start of the
273762306a36Sopenharmony_ci				 * system in two.
273862306a36Sopenharmony_ci				 */
273962306a36Sopenharmony_ci				cursz = leaf[bud] - 1;
274062306a36Sopenharmony_ci				dbSplit(tp, bud, cursz, cursz, is_ctl);
274162306a36Sopenharmony_ci				break;
274262306a36Sopenharmony_ci			}
274362306a36Sopenharmony_ci		}
274462306a36Sopenharmony_ci	}
274562306a36Sopenharmony_ci
274662306a36Sopenharmony_ci	if (leaf[leafno] != size) {
274762306a36Sopenharmony_ci		jfs_err("JFS: wrong leaf value in dbBackSplit");
274862306a36Sopenharmony_ci		return -EIO;
274962306a36Sopenharmony_ci	}
275062306a36Sopenharmony_ci	return 0;
275162306a36Sopenharmony_ci}
275262306a36Sopenharmony_ci
275362306a36Sopenharmony_ci
275462306a36Sopenharmony_ci/*
275562306a36Sopenharmony_ci * NAME:	dbJoin()
275662306a36Sopenharmony_ci *
275762306a36Sopenharmony_ci * FUNCTION:	update the leaf of a dmtree with a new value, joining
275862306a36Sopenharmony_ci *		the leaf with other leaves of the dmtree into a multi-leaf
275962306a36Sopenharmony_ci *		binary buddy system, as required.
276062306a36Sopenharmony_ci *
276162306a36Sopenharmony_ci * PARAMETERS:
276262306a36Sopenharmony_ci *	tp	- pointer to the tree containing the leaf.
276362306a36Sopenharmony_ci *	leafno	- the number of the leaf to be updated.
276462306a36Sopenharmony_ci *	newval	- the new value for the leaf.
276562306a36Sopenharmony_ci *
276662306a36Sopenharmony_ci * RETURN VALUES: none
276762306a36Sopenharmony_ci */
276862306a36Sopenharmony_cistatic int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl)
276962306a36Sopenharmony_ci{
277062306a36Sopenharmony_ci	int budsz, buddy;
277162306a36Sopenharmony_ci	s8 *leaf;
277262306a36Sopenharmony_ci
277362306a36Sopenharmony_ci	/* can the new leaf value require a join with other leaves ?
277462306a36Sopenharmony_ci	 */
277562306a36Sopenharmony_ci	if (newval >= tp->dmt_budmin) {
277662306a36Sopenharmony_ci		/* pickup a pointer to the leaves of the tree.
277762306a36Sopenharmony_ci		 */
277862306a36Sopenharmony_ci		leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
277962306a36Sopenharmony_ci
278062306a36Sopenharmony_ci		/* try to join the specified leaf into a large binary
278162306a36Sopenharmony_ci		 * buddy system.  the join proceeds by attempting to join
278262306a36Sopenharmony_ci		 * the specified leafno with its buddy (leaf) at new value.
278362306a36Sopenharmony_ci		 * if the join occurs, we attempt to join the left leaf
278462306a36Sopenharmony_ci		 * of the joined buddies with its buddy at new value + 1.
278562306a36Sopenharmony_ci		 * we continue to join until we find a buddy that cannot be
278662306a36Sopenharmony_ci		 * joined (does not have a value equal to the size of the
278762306a36Sopenharmony_ci		 * last join) or until all leaves have been joined into a
278862306a36Sopenharmony_ci		 * single system.
278962306a36Sopenharmony_ci		 *
279062306a36Sopenharmony_ci		 * get the buddy size (number of words covered) of
279162306a36Sopenharmony_ci		 * the new value.
279262306a36Sopenharmony_ci		 */
279362306a36Sopenharmony_ci		budsz = BUDSIZE(newval, tp->dmt_budmin);
279462306a36Sopenharmony_ci
279562306a36Sopenharmony_ci		/* try to join.
279662306a36Sopenharmony_ci		 */
279762306a36Sopenharmony_ci		while (budsz < le32_to_cpu(tp->dmt_nleafs)) {
279862306a36Sopenharmony_ci			/* get the buddy leaf.
279962306a36Sopenharmony_ci			 */
280062306a36Sopenharmony_ci			buddy = leafno ^ budsz;
280162306a36Sopenharmony_ci
280262306a36Sopenharmony_ci			/* if the leaf's new value is greater than its
280362306a36Sopenharmony_ci			 * buddy's value, we join no more.
280462306a36Sopenharmony_ci			 */
280562306a36Sopenharmony_ci			if (newval > leaf[buddy])
280662306a36Sopenharmony_ci				break;
280762306a36Sopenharmony_ci
280862306a36Sopenharmony_ci			/* It shouldn't be less */
280962306a36Sopenharmony_ci			if (newval < leaf[buddy])
281062306a36Sopenharmony_ci				return -EIO;
281162306a36Sopenharmony_ci
281262306a36Sopenharmony_ci			/* check which (leafno or buddy) is the left buddy.
281362306a36Sopenharmony_ci			 * the left buddy gets to claim the blocks resulting
281462306a36Sopenharmony_ci			 * from the join while the right gets to claim none.
281562306a36Sopenharmony_ci			 * the left buddy is also eligible to participate in
281662306a36Sopenharmony_ci			 * a join at the next higher level while the right
281762306a36Sopenharmony_ci			 * is not.
281862306a36Sopenharmony_ci			 *
281962306a36Sopenharmony_ci			 */
282062306a36Sopenharmony_ci			if (leafno < buddy) {
282162306a36Sopenharmony_ci				/* leafno is the left buddy.
282262306a36Sopenharmony_ci				 */
282362306a36Sopenharmony_ci				dbAdjTree(tp, buddy, NOFREE, is_ctl);
282462306a36Sopenharmony_ci			} else {
282562306a36Sopenharmony_ci				/* buddy is the left buddy and becomes
282662306a36Sopenharmony_ci				 * leafno.
282762306a36Sopenharmony_ci				 */
282862306a36Sopenharmony_ci				dbAdjTree(tp, leafno, NOFREE, is_ctl);
282962306a36Sopenharmony_ci				leafno = buddy;
283062306a36Sopenharmony_ci			}
283162306a36Sopenharmony_ci
283262306a36Sopenharmony_ci			/* on to try the next join.
283362306a36Sopenharmony_ci			 */
283462306a36Sopenharmony_ci			newval += 1;
283562306a36Sopenharmony_ci			budsz <<= 1;
283662306a36Sopenharmony_ci		}
283762306a36Sopenharmony_ci	}
283862306a36Sopenharmony_ci
283962306a36Sopenharmony_ci	/* update the leaf value.
284062306a36Sopenharmony_ci	 */
284162306a36Sopenharmony_ci	dbAdjTree(tp, leafno, newval, is_ctl);
284262306a36Sopenharmony_ci
284362306a36Sopenharmony_ci	return 0;
284462306a36Sopenharmony_ci}
284562306a36Sopenharmony_ci
284662306a36Sopenharmony_ci
284762306a36Sopenharmony_ci/*
284862306a36Sopenharmony_ci * NAME:	dbAdjTree()
284962306a36Sopenharmony_ci *
285062306a36Sopenharmony_ci * FUNCTION:	update a leaf of a dmtree with a new value, adjusting
285162306a36Sopenharmony_ci *		the dmtree, as required, to reflect the new leaf value.
285262306a36Sopenharmony_ci *		the combination of any buddies must already be done before
285362306a36Sopenharmony_ci *		this is called.
285462306a36Sopenharmony_ci *
285562306a36Sopenharmony_ci * PARAMETERS:
285662306a36Sopenharmony_ci *	tp	- pointer to the tree to be adjusted.
285762306a36Sopenharmony_ci *	leafno	- the number of the leaf to be updated.
285862306a36Sopenharmony_ci *	newval	- the new value for the leaf.
285962306a36Sopenharmony_ci *
286062306a36Sopenharmony_ci * RETURN VALUES: none
286162306a36Sopenharmony_ci */
286262306a36Sopenharmony_cistatic void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl)
286362306a36Sopenharmony_ci{
286462306a36Sopenharmony_ci	int lp, pp, k;
286562306a36Sopenharmony_ci	int max, size;
286662306a36Sopenharmony_ci
286762306a36Sopenharmony_ci	size = is_ctl ? CTLTREESIZE : TREESIZE;
286862306a36Sopenharmony_ci
286962306a36Sopenharmony_ci	/* pick up the index of the leaf for this leafno.
287062306a36Sopenharmony_ci	 */
287162306a36Sopenharmony_ci	lp = leafno + le32_to_cpu(tp->dmt_leafidx);
287262306a36Sopenharmony_ci
287362306a36Sopenharmony_ci	if (WARN_ON_ONCE(lp >= size || lp < 0))
287462306a36Sopenharmony_ci		return;
287562306a36Sopenharmony_ci
287662306a36Sopenharmony_ci	/* is the current value the same as the old value ?  if so,
287762306a36Sopenharmony_ci	 * there is nothing to do.
287862306a36Sopenharmony_ci	 */
287962306a36Sopenharmony_ci	if (tp->dmt_stree[lp] == newval)
288062306a36Sopenharmony_ci		return;
288162306a36Sopenharmony_ci
288262306a36Sopenharmony_ci	/* set the new value.
288362306a36Sopenharmony_ci	 */
288462306a36Sopenharmony_ci	tp->dmt_stree[lp] = newval;
288562306a36Sopenharmony_ci
288662306a36Sopenharmony_ci	/* bubble the new value up the tree as required.
288762306a36Sopenharmony_ci	 */
288862306a36Sopenharmony_ci	for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) {
288962306a36Sopenharmony_ci		/* get the index of the first leaf of the 4 leaf
289062306a36Sopenharmony_ci		 * group containing the specified leaf (leafno).
289162306a36Sopenharmony_ci		 */
289262306a36Sopenharmony_ci		lp = ((lp - 1) & ~0x03) + 1;
289362306a36Sopenharmony_ci
289462306a36Sopenharmony_ci		/* get the index of the parent of this 4 leaf group.
289562306a36Sopenharmony_ci		 */
289662306a36Sopenharmony_ci		pp = (lp - 1) >> 2;
289762306a36Sopenharmony_ci
289862306a36Sopenharmony_ci		/* determine the maximum of the 4 leaves.
289962306a36Sopenharmony_ci		 */
290062306a36Sopenharmony_ci		max = TREEMAX(&tp->dmt_stree[lp]);
290162306a36Sopenharmony_ci
290262306a36Sopenharmony_ci		/* if the maximum of the 4 is the same as the
290362306a36Sopenharmony_ci		 * parent's value, we're done.
290462306a36Sopenharmony_ci		 */
290562306a36Sopenharmony_ci		if (tp->dmt_stree[pp] == max)
290662306a36Sopenharmony_ci			break;
290762306a36Sopenharmony_ci
290862306a36Sopenharmony_ci		/* parent gets new value.
290962306a36Sopenharmony_ci		 */
291062306a36Sopenharmony_ci		tp->dmt_stree[pp] = max;
291162306a36Sopenharmony_ci
291262306a36Sopenharmony_ci		/* parent becomes leaf for next go-round.
291362306a36Sopenharmony_ci		 */
291462306a36Sopenharmony_ci		lp = pp;
291562306a36Sopenharmony_ci	}
291662306a36Sopenharmony_ci}
291762306a36Sopenharmony_ci
291862306a36Sopenharmony_ci
291962306a36Sopenharmony_ci/*
292062306a36Sopenharmony_ci * NAME:	dbFindLeaf()
292162306a36Sopenharmony_ci *
292262306a36Sopenharmony_ci * FUNCTION:	search a dmtree_t for sufficient free blocks, returning
292362306a36Sopenharmony_ci *		the index of a leaf describing the free blocks if
292462306a36Sopenharmony_ci *		sufficient free blocks are found.
292562306a36Sopenharmony_ci *
292662306a36Sopenharmony_ci *		the search starts at the top of the dmtree_t tree and
292762306a36Sopenharmony_ci *		proceeds down the tree to the leftmost leaf with sufficient
292862306a36Sopenharmony_ci *		free space.
292962306a36Sopenharmony_ci *
293062306a36Sopenharmony_ci * PARAMETERS:
293162306a36Sopenharmony_ci *	tp	- pointer to the tree to be searched.
293262306a36Sopenharmony_ci *	l2nb	- log2 number of free blocks to search for.
293362306a36Sopenharmony_ci *	leafidx	- return pointer to be set to the index of the leaf
293462306a36Sopenharmony_ci *		  describing at least l2nb free blocks if sufficient
293562306a36Sopenharmony_ci *		  free blocks are found.
293662306a36Sopenharmony_ci *	is_ctl	- determines if the tree is of type ctl
293762306a36Sopenharmony_ci *
293862306a36Sopenharmony_ci * RETURN VALUES:
293962306a36Sopenharmony_ci *	0	- success
294062306a36Sopenharmony_ci *	-ENOSPC	- insufficient free blocks.
294162306a36Sopenharmony_ci */
294262306a36Sopenharmony_cistatic int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl)
294362306a36Sopenharmony_ci{
294462306a36Sopenharmony_ci	int ti, n = 0, k, x = 0;
294562306a36Sopenharmony_ci	int max_size;
294662306a36Sopenharmony_ci
294762306a36Sopenharmony_ci	max_size = is_ctl ? CTLTREESIZE : TREESIZE;
294862306a36Sopenharmony_ci
294962306a36Sopenharmony_ci	/* first check the root of the tree to see if there is
295062306a36Sopenharmony_ci	 * sufficient free space.
295162306a36Sopenharmony_ci	 */
295262306a36Sopenharmony_ci	if (l2nb > tp->dmt_stree[ROOT])
295362306a36Sopenharmony_ci		return -ENOSPC;
295462306a36Sopenharmony_ci
295562306a36Sopenharmony_ci	/* sufficient free space available. now search down the tree
295662306a36Sopenharmony_ci	 * starting at the next level for the leftmost leaf that
295762306a36Sopenharmony_ci	 * describes sufficient free space.
295862306a36Sopenharmony_ci	 */
295962306a36Sopenharmony_ci	for (k = le32_to_cpu(tp->dmt_height), ti = 1;
296062306a36Sopenharmony_ci	     k > 0; k--, ti = ((ti + n) << 2) + 1) {
296162306a36Sopenharmony_ci		/* search the four nodes at this level, starting from
296262306a36Sopenharmony_ci		 * the left.
296362306a36Sopenharmony_ci		 */
296462306a36Sopenharmony_ci		for (x = ti, n = 0; n < 4; n++) {
296562306a36Sopenharmony_ci			/* sufficient free space found.  move to the next
296662306a36Sopenharmony_ci			 * level (or quit if this is the last level).
296762306a36Sopenharmony_ci			 */
296862306a36Sopenharmony_ci			if (x + n > max_size)
296962306a36Sopenharmony_ci				return -ENOSPC;
297062306a36Sopenharmony_ci			if (l2nb <= tp->dmt_stree[x + n])
297162306a36Sopenharmony_ci				break;
297262306a36Sopenharmony_ci		}
297362306a36Sopenharmony_ci
297462306a36Sopenharmony_ci		/* better have found something since the higher
297562306a36Sopenharmony_ci		 * levels of the tree said it was here.
297662306a36Sopenharmony_ci		 */
297762306a36Sopenharmony_ci		assert(n < 4);
297862306a36Sopenharmony_ci	}
297962306a36Sopenharmony_ci
298062306a36Sopenharmony_ci	/* set the return to the leftmost leaf describing sufficient
298162306a36Sopenharmony_ci	 * free space.
298262306a36Sopenharmony_ci	 */
298362306a36Sopenharmony_ci	*leafidx = x + n - le32_to_cpu(tp->dmt_leafidx);
298462306a36Sopenharmony_ci
298562306a36Sopenharmony_ci	return (0);
298662306a36Sopenharmony_ci}
298762306a36Sopenharmony_ci
298862306a36Sopenharmony_ci
298962306a36Sopenharmony_ci/*
299062306a36Sopenharmony_ci * NAME:	dbFindBits()
299162306a36Sopenharmony_ci *
299262306a36Sopenharmony_ci * FUNCTION:	find a specified number of binary buddy free bits within a
299362306a36Sopenharmony_ci *		dmap bitmap word value.
299462306a36Sopenharmony_ci *
299562306a36Sopenharmony_ci *		this routine searches the bitmap value for (1 << l2nb) free
299662306a36Sopenharmony_ci *		bits at (1 << l2nb) alignments within the value.
299762306a36Sopenharmony_ci *
299862306a36Sopenharmony_ci * PARAMETERS:
299962306a36Sopenharmony_ci *	word	-  dmap bitmap word value.
300062306a36Sopenharmony_ci *	l2nb	-  number of free bits specified as a log2 number.
300162306a36Sopenharmony_ci *
300262306a36Sopenharmony_ci * RETURN VALUES:
300362306a36Sopenharmony_ci *	starting bit number of free bits.
300462306a36Sopenharmony_ci */
300562306a36Sopenharmony_cistatic int dbFindBits(u32 word, int l2nb)
300662306a36Sopenharmony_ci{
300762306a36Sopenharmony_ci	int bitno, nb;
300862306a36Sopenharmony_ci	u32 mask;
300962306a36Sopenharmony_ci
301062306a36Sopenharmony_ci	/* get the number of bits.
301162306a36Sopenharmony_ci	 */
301262306a36Sopenharmony_ci	nb = 1 << l2nb;
301362306a36Sopenharmony_ci	assert(nb <= DBWORD);
301462306a36Sopenharmony_ci
301562306a36Sopenharmony_ci	/* complement the word so we can use a mask (i.e. 0s represent
301662306a36Sopenharmony_ci	 * free bits) and compute the mask.
301762306a36Sopenharmony_ci	 */
301862306a36Sopenharmony_ci	word = ~word;
301962306a36Sopenharmony_ci	mask = ONES << (DBWORD - nb);
302062306a36Sopenharmony_ci
302162306a36Sopenharmony_ci	/* scan the word for nb free bits at nb alignments.
302262306a36Sopenharmony_ci	 */
302362306a36Sopenharmony_ci	for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) {
302462306a36Sopenharmony_ci		if ((mask & word) == mask)
302562306a36Sopenharmony_ci			break;
302662306a36Sopenharmony_ci	}
302762306a36Sopenharmony_ci
302862306a36Sopenharmony_ci	ASSERT(bitno < 32);
302962306a36Sopenharmony_ci
303062306a36Sopenharmony_ci	/* return the bit number.
303162306a36Sopenharmony_ci	 */
303262306a36Sopenharmony_ci	return (bitno);
303362306a36Sopenharmony_ci}
303462306a36Sopenharmony_ci
303562306a36Sopenharmony_ci
303662306a36Sopenharmony_ci/*
303762306a36Sopenharmony_ci * NAME:	dbMaxBud(u8 *cp)
303862306a36Sopenharmony_ci *
303962306a36Sopenharmony_ci * FUNCTION:	determine the largest binary buddy string of free
304062306a36Sopenharmony_ci *		bits within 32-bits of the map.
304162306a36Sopenharmony_ci *
304262306a36Sopenharmony_ci * PARAMETERS:
304362306a36Sopenharmony_ci *	cp	-  pointer to the 32-bit value.
304462306a36Sopenharmony_ci *
304562306a36Sopenharmony_ci * RETURN VALUES:
304662306a36Sopenharmony_ci *	largest binary buddy of free bits within a dmap word.
304762306a36Sopenharmony_ci */
304862306a36Sopenharmony_cistatic int dbMaxBud(u8 * cp)
304962306a36Sopenharmony_ci{
305062306a36Sopenharmony_ci	signed char tmp1, tmp2;
305162306a36Sopenharmony_ci
305262306a36Sopenharmony_ci	/* check if the wmap word is all free. if so, the
305362306a36Sopenharmony_ci	 * free buddy size is BUDMIN.
305462306a36Sopenharmony_ci	 */
305562306a36Sopenharmony_ci	if (*((uint *) cp) == 0)
305662306a36Sopenharmony_ci		return (BUDMIN);
305762306a36Sopenharmony_ci
305862306a36Sopenharmony_ci	/* check if the wmap word is half free. if so, the
305962306a36Sopenharmony_ci	 * free buddy size is BUDMIN-1.
306062306a36Sopenharmony_ci	 */
306162306a36Sopenharmony_ci	if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0)
306262306a36Sopenharmony_ci		return (BUDMIN - 1);
306362306a36Sopenharmony_ci
306462306a36Sopenharmony_ci	/* not all free or half free. determine the free buddy
306562306a36Sopenharmony_ci	 * size thru table lookup using quarters of the wmap word.
306662306a36Sopenharmony_ci	 */
306762306a36Sopenharmony_ci	tmp1 = max(budtab[cp[2]], budtab[cp[3]]);
306862306a36Sopenharmony_ci	tmp2 = max(budtab[cp[0]], budtab[cp[1]]);
306962306a36Sopenharmony_ci	return (max(tmp1, tmp2));
307062306a36Sopenharmony_ci}
307162306a36Sopenharmony_ci
307262306a36Sopenharmony_ci
307362306a36Sopenharmony_ci/*
307462306a36Sopenharmony_ci * NAME:	cnttz(uint word)
307562306a36Sopenharmony_ci *
307662306a36Sopenharmony_ci * FUNCTION:	determine the number of trailing zeros within a 32-bit
307762306a36Sopenharmony_ci *		value.
307862306a36Sopenharmony_ci *
307962306a36Sopenharmony_ci * PARAMETERS:
308062306a36Sopenharmony_ci *	value	-  32-bit value to be examined.
308162306a36Sopenharmony_ci *
308262306a36Sopenharmony_ci * RETURN VALUES:
308362306a36Sopenharmony_ci *	count of trailing zeros
308462306a36Sopenharmony_ci */
308562306a36Sopenharmony_cistatic int cnttz(u32 word)
308662306a36Sopenharmony_ci{
308762306a36Sopenharmony_ci	int n;
308862306a36Sopenharmony_ci
308962306a36Sopenharmony_ci	for (n = 0; n < 32; n++, word >>= 1) {
309062306a36Sopenharmony_ci		if (word & 0x01)
309162306a36Sopenharmony_ci			break;
309262306a36Sopenharmony_ci	}
309362306a36Sopenharmony_ci
309462306a36Sopenharmony_ci	return (n);
309562306a36Sopenharmony_ci}
309662306a36Sopenharmony_ci
309762306a36Sopenharmony_ci
309862306a36Sopenharmony_ci/*
309962306a36Sopenharmony_ci * NAME:	cntlz(u32 value)
310062306a36Sopenharmony_ci *
310162306a36Sopenharmony_ci * FUNCTION:	determine the number of leading zeros within a 32-bit
310262306a36Sopenharmony_ci *		value.
310362306a36Sopenharmony_ci *
310462306a36Sopenharmony_ci * PARAMETERS:
310562306a36Sopenharmony_ci *	value	-  32-bit value to be examined.
310662306a36Sopenharmony_ci *
310762306a36Sopenharmony_ci * RETURN VALUES:
310862306a36Sopenharmony_ci *	count of leading zeros
310962306a36Sopenharmony_ci */
311062306a36Sopenharmony_cistatic int cntlz(u32 value)
311162306a36Sopenharmony_ci{
311262306a36Sopenharmony_ci	int n;
311362306a36Sopenharmony_ci
311462306a36Sopenharmony_ci	for (n = 0; n < 32; n++, value <<= 1) {
311562306a36Sopenharmony_ci		if (value & HIGHORDER)
311662306a36Sopenharmony_ci			break;
311762306a36Sopenharmony_ci	}
311862306a36Sopenharmony_ci	return (n);
311962306a36Sopenharmony_ci}
312062306a36Sopenharmony_ci
312162306a36Sopenharmony_ci
312262306a36Sopenharmony_ci/*
312362306a36Sopenharmony_ci * NAME:	blkstol2(s64 nb)
312462306a36Sopenharmony_ci *
312562306a36Sopenharmony_ci * FUNCTION:	convert a block count to its log2 value. if the block
312662306a36Sopenharmony_ci *		count is not a l2 multiple, it is rounded up to the next
312762306a36Sopenharmony_ci *		larger l2 multiple.
312862306a36Sopenharmony_ci *
312962306a36Sopenharmony_ci * PARAMETERS:
313062306a36Sopenharmony_ci *	nb	-  number of blocks
313162306a36Sopenharmony_ci *
313262306a36Sopenharmony_ci * RETURN VALUES:
313362306a36Sopenharmony_ci *	log2 number of blocks
313462306a36Sopenharmony_ci */
313562306a36Sopenharmony_cistatic int blkstol2(s64 nb)
313662306a36Sopenharmony_ci{
313762306a36Sopenharmony_ci	int l2nb;
313862306a36Sopenharmony_ci	s64 mask;		/* meant to be signed */
313962306a36Sopenharmony_ci
314062306a36Sopenharmony_ci	mask = (s64) 1 << (64 - 1);
314162306a36Sopenharmony_ci
314262306a36Sopenharmony_ci	/* count the leading bits.
314362306a36Sopenharmony_ci	 */
314462306a36Sopenharmony_ci	for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) {
314562306a36Sopenharmony_ci		/* leading bit found.
314662306a36Sopenharmony_ci		 */
314762306a36Sopenharmony_ci		if (nb & mask) {
314862306a36Sopenharmony_ci			/* determine the l2 value.
314962306a36Sopenharmony_ci			 */
315062306a36Sopenharmony_ci			l2nb = (64 - 1) - l2nb;
315162306a36Sopenharmony_ci
315262306a36Sopenharmony_ci			/* check if we need to round up.
315362306a36Sopenharmony_ci			 */
315462306a36Sopenharmony_ci			if (~mask & nb)
315562306a36Sopenharmony_ci				l2nb++;
315662306a36Sopenharmony_ci
315762306a36Sopenharmony_ci			return (l2nb);
315862306a36Sopenharmony_ci		}
315962306a36Sopenharmony_ci	}
316062306a36Sopenharmony_ci	assert(0);
316162306a36Sopenharmony_ci	return 0;		/* fix compiler warning */
316262306a36Sopenharmony_ci}
316362306a36Sopenharmony_ci
316462306a36Sopenharmony_ci
316562306a36Sopenharmony_ci/*
316662306a36Sopenharmony_ci * NAME:	dbAllocBottomUp()
316762306a36Sopenharmony_ci *
316862306a36Sopenharmony_ci * FUNCTION:	alloc the specified block range from the working block
316962306a36Sopenharmony_ci *		allocation map.
317062306a36Sopenharmony_ci *
317162306a36Sopenharmony_ci *		the blocks will be alloc from the working map one dmap
317262306a36Sopenharmony_ci *		at a time.
317362306a36Sopenharmony_ci *
317462306a36Sopenharmony_ci * PARAMETERS:
317562306a36Sopenharmony_ci *	ip	-  pointer to in-core inode;
317662306a36Sopenharmony_ci *	blkno	-  starting block number to be freed.
317762306a36Sopenharmony_ci *	nblocks	-  number of blocks to be freed.
317862306a36Sopenharmony_ci *
317962306a36Sopenharmony_ci * RETURN VALUES:
318062306a36Sopenharmony_ci *	0	- success
318162306a36Sopenharmony_ci *	-EIO	- i/o error
318262306a36Sopenharmony_ci */
318362306a36Sopenharmony_ciint dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
318462306a36Sopenharmony_ci{
318562306a36Sopenharmony_ci	struct metapage *mp;
318662306a36Sopenharmony_ci	struct dmap *dp;
318762306a36Sopenharmony_ci	int nb, rc;
318862306a36Sopenharmony_ci	s64 lblkno, rem;
318962306a36Sopenharmony_ci	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
319062306a36Sopenharmony_ci	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
319162306a36Sopenharmony_ci
319262306a36Sopenharmony_ci	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
319362306a36Sopenharmony_ci
319462306a36Sopenharmony_ci	/* block to be allocated better be within the mapsize. */
319562306a36Sopenharmony_ci	ASSERT(nblocks <= bmp->db_mapsize - blkno);
319662306a36Sopenharmony_ci
319762306a36Sopenharmony_ci	/*
319862306a36Sopenharmony_ci	 * allocate the blocks a dmap at a time.
319962306a36Sopenharmony_ci	 */
320062306a36Sopenharmony_ci	mp = NULL;
320162306a36Sopenharmony_ci	for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
320262306a36Sopenharmony_ci		/* release previous dmap if any */
320362306a36Sopenharmony_ci		if (mp) {
320462306a36Sopenharmony_ci			write_metapage(mp);
320562306a36Sopenharmony_ci		}
320662306a36Sopenharmony_ci
320762306a36Sopenharmony_ci		/* get the buffer for the current dmap. */
320862306a36Sopenharmony_ci		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
320962306a36Sopenharmony_ci		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
321062306a36Sopenharmony_ci		if (mp == NULL) {
321162306a36Sopenharmony_ci			IREAD_UNLOCK(ipbmap);
321262306a36Sopenharmony_ci			return -EIO;
321362306a36Sopenharmony_ci		}
321462306a36Sopenharmony_ci		dp = (struct dmap *) mp->data;
321562306a36Sopenharmony_ci
321662306a36Sopenharmony_ci		/* determine the number of blocks to be allocated from
321762306a36Sopenharmony_ci		 * this dmap.
321862306a36Sopenharmony_ci		 */
321962306a36Sopenharmony_ci		nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
322062306a36Sopenharmony_ci
322162306a36Sopenharmony_ci		/* allocate the blocks. */
322262306a36Sopenharmony_ci		if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) {
322362306a36Sopenharmony_ci			release_metapage(mp);
322462306a36Sopenharmony_ci			IREAD_UNLOCK(ipbmap);
322562306a36Sopenharmony_ci			return (rc);
322662306a36Sopenharmony_ci		}
322762306a36Sopenharmony_ci	}
322862306a36Sopenharmony_ci
322962306a36Sopenharmony_ci	/* write the last buffer. */
323062306a36Sopenharmony_ci	write_metapage(mp);
323162306a36Sopenharmony_ci
323262306a36Sopenharmony_ci	IREAD_UNLOCK(ipbmap);
323362306a36Sopenharmony_ci
323462306a36Sopenharmony_ci	return (0);
323562306a36Sopenharmony_ci}
323662306a36Sopenharmony_ci
323762306a36Sopenharmony_ci
323862306a36Sopenharmony_cistatic int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
323962306a36Sopenharmony_ci			 int nblocks)
324062306a36Sopenharmony_ci{
324162306a36Sopenharmony_ci	int rc;
324262306a36Sopenharmony_ci	int dbitno, word, rembits, nb, nwords, wbitno, agno;
324362306a36Sopenharmony_ci	s8 oldroot;
324462306a36Sopenharmony_ci	struct dmaptree *tp = (struct dmaptree *) & dp->tree;
324562306a36Sopenharmony_ci
324662306a36Sopenharmony_ci	/* save the current value of the root (i.e. maximum free string)
324762306a36Sopenharmony_ci	 * of the dmap tree.
324862306a36Sopenharmony_ci	 */
324962306a36Sopenharmony_ci	oldroot = tp->stree[ROOT];
325062306a36Sopenharmony_ci
325162306a36Sopenharmony_ci	/* determine the bit number and word within the dmap of the
325262306a36Sopenharmony_ci	 * starting block.
325362306a36Sopenharmony_ci	 */
325462306a36Sopenharmony_ci	dbitno = blkno & (BPERDMAP - 1);
325562306a36Sopenharmony_ci	word = dbitno >> L2DBWORD;
325662306a36Sopenharmony_ci
325762306a36Sopenharmony_ci	/* block range better be within the dmap */
325862306a36Sopenharmony_ci	assert(dbitno + nblocks <= BPERDMAP);
325962306a36Sopenharmony_ci
326062306a36Sopenharmony_ci	/* allocate the bits of the dmap's words corresponding to the block
326162306a36Sopenharmony_ci	 * range. not all bits of the first and last words may be contained
326262306a36Sopenharmony_ci	 * within the block range.  if this is the case, we'll work against
326362306a36Sopenharmony_ci	 * those words (i.e. partial first and/or last) on an individual basis
326462306a36Sopenharmony_ci	 * (a single pass), allocating the bits of interest by hand and
326562306a36Sopenharmony_ci	 * updating the leaf corresponding to the dmap word. a single pass
326662306a36Sopenharmony_ci	 * will be used for all dmap words fully contained within the
326762306a36Sopenharmony_ci	 * specified range.  within this pass, the bits of all fully contained
326862306a36Sopenharmony_ci	 * dmap words will be marked as free in a single shot and the leaves
326962306a36Sopenharmony_ci	 * will be updated. a single leaf may describe the free space of
327062306a36Sopenharmony_ci	 * multiple dmap words, so we may update only a subset of the actual
327162306a36Sopenharmony_ci	 * leaves corresponding to the dmap words of the block range.
327262306a36Sopenharmony_ci	 */
327362306a36Sopenharmony_ci	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
327462306a36Sopenharmony_ci		/* determine the bit number within the word and
327562306a36Sopenharmony_ci		 * the number of bits within the word.
327662306a36Sopenharmony_ci		 */
327762306a36Sopenharmony_ci		wbitno = dbitno & (DBWORD - 1);
327862306a36Sopenharmony_ci		nb = min(rembits, DBWORD - wbitno);
327962306a36Sopenharmony_ci
328062306a36Sopenharmony_ci		/* check if only part of a word is to be allocated.
328162306a36Sopenharmony_ci		 */
328262306a36Sopenharmony_ci		if (nb < DBWORD) {
328362306a36Sopenharmony_ci			/* allocate (set to 1) the appropriate bits within
328462306a36Sopenharmony_ci			 * this dmap word.
328562306a36Sopenharmony_ci			 */
328662306a36Sopenharmony_ci			dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
328762306a36Sopenharmony_ci						      >> wbitno);
328862306a36Sopenharmony_ci
328962306a36Sopenharmony_ci			word++;
329062306a36Sopenharmony_ci		} else {
329162306a36Sopenharmony_ci			/* one or more dmap words are fully contained
329262306a36Sopenharmony_ci			 * within the block range.  determine how many
329362306a36Sopenharmony_ci			 * words and allocate (set to 1) the bits of these
329462306a36Sopenharmony_ci			 * words.
329562306a36Sopenharmony_ci			 */
329662306a36Sopenharmony_ci			nwords = rembits >> L2DBWORD;
329762306a36Sopenharmony_ci			memset(&dp->wmap[word], (int) ONES, nwords * 4);
329862306a36Sopenharmony_ci
329962306a36Sopenharmony_ci			/* determine how many bits */
330062306a36Sopenharmony_ci			nb = nwords << L2DBWORD;
330162306a36Sopenharmony_ci			word += nwords;
330262306a36Sopenharmony_ci		}
330362306a36Sopenharmony_ci	}
330462306a36Sopenharmony_ci
330562306a36Sopenharmony_ci	/* update the free count for this dmap */
330662306a36Sopenharmony_ci	le32_add_cpu(&dp->nfree, -nblocks);
330762306a36Sopenharmony_ci
330862306a36Sopenharmony_ci	/* reconstruct summary tree */
330962306a36Sopenharmony_ci	dbInitDmapTree(dp);
331062306a36Sopenharmony_ci
331162306a36Sopenharmony_ci	BMAP_LOCK(bmp);
331262306a36Sopenharmony_ci
331362306a36Sopenharmony_ci	/* if this allocation group is completely free,
331462306a36Sopenharmony_ci	 * update the highest active allocation group number
331562306a36Sopenharmony_ci	 * if this allocation group is the new max.
331662306a36Sopenharmony_ci	 */
331762306a36Sopenharmony_ci	agno = blkno >> bmp->db_agl2size;
331862306a36Sopenharmony_ci	if (agno > bmp->db_maxag)
331962306a36Sopenharmony_ci		bmp->db_maxag = agno;
332062306a36Sopenharmony_ci
332162306a36Sopenharmony_ci	/* update the free count for the allocation group and map */
332262306a36Sopenharmony_ci	bmp->db_agfree[agno] -= nblocks;
332362306a36Sopenharmony_ci	bmp->db_nfree -= nblocks;
332462306a36Sopenharmony_ci
332562306a36Sopenharmony_ci	BMAP_UNLOCK(bmp);
332662306a36Sopenharmony_ci
332762306a36Sopenharmony_ci	/* if the root has not changed, done. */
332862306a36Sopenharmony_ci	if (tp->stree[ROOT] == oldroot)
332962306a36Sopenharmony_ci		return (0);
333062306a36Sopenharmony_ci
333162306a36Sopenharmony_ci	/* root changed. bubble the change up to the dmap control pages.
333262306a36Sopenharmony_ci	 * if the adjustment of the upper level control pages fails,
333362306a36Sopenharmony_ci	 * backout the bit allocation (thus making everything consistent).
333462306a36Sopenharmony_ci	 */
333562306a36Sopenharmony_ci	if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0)))
333662306a36Sopenharmony_ci		dbFreeBits(bmp, dp, blkno, nblocks);
333762306a36Sopenharmony_ci
333862306a36Sopenharmony_ci	return (rc);
333962306a36Sopenharmony_ci}
334062306a36Sopenharmony_ci
334162306a36Sopenharmony_ci
334262306a36Sopenharmony_ci/*
334362306a36Sopenharmony_ci * NAME:	dbExtendFS()
334462306a36Sopenharmony_ci *
334562306a36Sopenharmony_ci * FUNCTION:	extend bmap from blkno for nblocks;
334662306a36Sopenharmony_ci *		dbExtendFS() updates bmap ready for dbAllocBottomUp();
334762306a36Sopenharmony_ci *
334862306a36Sopenharmony_ci * L2
334962306a36Sopenharmony_ci *  |
335062306a36Sopenharmony_ci *   L1---------------------------------L1
335162306a36Sopenharmony_ci *    |					 |
335262306a36Sopenharmony_ci *     L0---------L0---------L0		  L0---------L0---------L0
335362306a36Sopenharmony_ci *      |	   |	      |		   |	      |		 |
335462306a36Sopenharmony_ci *	 d0,...,dn  d0,...,dn  d0,...,dn    d0,...,dn  d0,...,dn  d0,.,dm;
335562306a36Sopenharmony_ci * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
335662306a36Sopenharmony_ci *
335762306a36Sopenharmony_ci * <---old---><----------------------------extend----------------------->
335862306a36Sopenharmony_ci */
335962306a36Sopenharmony_ciint dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
336062306a36Sopenharmony_ci{
336162306a36Sopenharmony_ci	struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb);
336262306a36Sopenharmony_ci	int nbperpage = sbi->nbperpage;
336362306a36Sopenharmony_ci	int i, i0 = true, j, j0 = true, k, n;
336462306a36Sopenharmony_ci	s64 newsize;
336562306a36Sopenharmony_ci	s64 p;
336662306a36Sopenharmony_ci	struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL;
336762306a36Sopenharmony_ci	struct dmapctl *l2dcp, *l1dcp, *l0dcp;
336862306a36Sopenharmony_ci	struct dmap *dp;
336962306a36Sopenharmony_ci	s8 *l0leaf, *l1leaf, *l2leaf;
337062306a36Sopenharmony_ci	struct bmap *bmp = sbi->bmap;
337162306a36Sopenharmony_ci	int agno, l2agsize, oldl2agsize;
337262306a36Sopenharmony_ci	s64 ag_rem;
337362306a36Sopenharmony_ci
337462306a36Sopenharmony_ci	newsize = blkno + nblocks;
337562306a36Sopenharmony_ci
337662306a36Sopenharmony_ci	jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld",
337762306a36Sopenharmony_ci		 (long long) blkno, (long long) nblocks, (long long) newsize);
337862306a36Sopenharmony_ci
337962306a36Sopenharmony_ci	/*
338062306a36Sopenharmony_ci	 *	initialize bmap control page.
338162306a36Sopenharmony_ci	 *
338262306a36Sopenharmony_ci	 * all the data in bmap control page should exclude
338362306a36Sopenharmony_ci	 * the mkfs hidden dmap page.
338462306a36Sopenharmony_ci	 */
338562306a36Sopenharmony_ci
338662306a36Sopenharmony_ci	/* update mapsize */
338762306a36Sopenharmony_ci	bmp->db_mapsize = newsize;
338862306a36Sopenharmony_ci	bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize);
338962306a36Sopenharmony_ci
339062306a36Sopenharmony_ci	/* compute new AG size */
339162306a36Sopenharmony_ci	l2agsize = dbGetL2AGSize(newsize);
339262306a36Sopenharmony_ci	oldl2agsize = bmp->db_agl2size;
339362306a36Sopenharmony_ci
339462306a36Sopenharmony_ci	bmp->db_agl2size = l2agsize;
339562306a36Sopenharmony_ci	bmp->db_agsize = 1 << l2agsize;
339662306a36Sopenharmony_ci
339762306a36Sopenharmony_ci	/* compute new number of AG */
339862306a36Sopenharmony_ci	agno = bmp->db_numag;
339962306a36Sopenharmony_ci	bmp->db_numag = newsize >> l2agsize;
340062306a36Sopenharmony_ci	bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
340162306a36Sopenharmony_ci
340262306a36Sopenharmony_ci	/*
340362306a36Sopenharmony_ci	 *	reconfigure db_agfree[]
340462306a36Sopenharmony_ci	 * from old AG configuration to new AG configuration;
340562306a36Sopenharmony_ci	 *
340662306a36Sopenharmony_ci	 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
340762306a36Sopenharmony_ci	 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
340862306a36Sopenharmony_ci	 * note: new AG size = old AG size * (2**x).
340962306a36Sopenharmony_ci	 */
341062306a36Sopenharmony_ci	if (l2agsize == oldl2agsize)
341162306a36Sopenharmony_ci		goto extend;
341262306a36Sopenharmony_ci	k = 1 << (l2agsize - oldl2agsize);
341362306a36Sopenharmony_ci	ag_rem = bmp->db_agfree[0];	/* save agfree[0] */
341462306a36Sopenharmony_ci	for (i = 0, n = 0; i < agno; n++) {
341562306a36Sopenharmony_ci		bmp->db_agfree[n] = 0;	/* init collection point */
341662306a36Sopenharmony_ci
341762306a36Sopenharmony_ci		/* coalesce contiguous k AGs; */
341862306a36Sopenharmony_ci		for (j = 0; j < k && i < agno; j++, i++) {
341962306a36Sopenharmony_ci			/* merge AGi to AGn */
342062306a36Sopenharmony_ci			bmp->db_agfree[n] += bmp->db_agfree[i];
342162306a36Sopenharmony_ci		}
342262306a36Sopenharmony_ci	}
342362306a36Sopenharmony_ci	bmp->db_agfree[0] += ag_rem;	/* restore agfree[0] */
342462306a36Sopenharmony_ci
342562306a36Sopenharmony_ci	for (; n < MAXAG; n++)
342662306a36Sopenharmony_ci		bmp->db_agfree[n] = 0;
342762306a36Sopenharmony_ci
342862306a36Sopenharmony_ci	/*
342962306a36Sopenharmony_ci	 * update highest active ag number
343062306a36Sopenharmony_ci	 */
343162306a36Sopenharmony_ci
343262306a36Sopenharmony_ci	bmp->db_maxag = bmp->db_maxag / k;
343362306a36Sopenharmony_ci
343462306a36Sopenharmony_ci	/*
343562306a36Sopenharmony_ci	 *	extend bmap
343662306a36Sopenharmony_ci	 *
343762306a36Sopenharmony_ci	 * update bit maps and corresponding level control pages;
343862306a36Sopenharmony_ci	 * global control page db_nfree, db_agfree[agno], db_maxfreebud;
343962306a36Sopenharmony_ci	 */
344062306a36Sopenharmony_ci      extend:
344162306a36Sopenharmony_ci	/* get L2 page */
344262306a36Sopenharmony_ci	p = BMAPBLKNO + nbperpage;	/* L2 page */
344362306a36Sopenharmony_ci	l2mp = read_metapage(ipbmap, p, PSIZE, 0);
344462306a36Sopenharmony_ci	if (!l2mp) {
344562306a36Sopenharmony_ci		jfs_error(ipbmap->i_sb, "L2 page could not be read\n");
344662306a36Sopenharmony_ci		return -EIO;
344762306a36Sopenharmony_ci	}
344862306a36Sopenharmony_ci	l2dcp = (struct dmapctl *) l2mp->data;
344962306a36Sopenharmony_ci
345062306a36Sopenharmony_ci	/* compute start L1 */
345162306a36Sopenharmony_ci	k = blkno >> L2MAXL1SIZE;
345262306a36Sopenharmony_ci	l2leaf = l2dcp->stree + CTLLEAFIND + k;
345362306a36Sopenharmony_ci	p = BLKTOL1(blkno, sbi->l2nbperpage);	/* L1 page */
345462306a36Sopenharmony_ci
345562306a36Sopenharmony_ci	/*
345662306a36Sopenharmony_ci	 * extend each L1 in L2
345762306a36Sopenharmony_ci	 */
345862306a36Sopenharmony_ci	for (; k < LPERCTL; k++, p += nbperpage) {
345962306a36Sopenharmony_ci		/* get L1 page */
346062306a36Sopenharmony_ci		if (j0) {
346162306a36Sopenharmony_ci			/* read in L1 page: (blkno & (MAXL1SIZE - 1)) */
346262306a36Sopenharmony_ci			l1mp = read_metapage(ipbmap, p, PSIZE, 0);
346362306a36Sopenharmony_ci			if (l1mp == NULL)
346462306a36Sopenharmony_ci				goto errout;
346562306a36Sopenharmony_ci			l1dcp = (struct dmapctl *) l1mp->data;
346662306a36Sopenharmony_ci
346762306a36Sopenharmony_ci			/* compute start L0 */
346862306a36Sopenharmony_ci			j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE;
346962306a36Sopenharmony_ci			l1leaf = l1dcp->stree + CTLLEAFIND + j;
347062306a36Sopenharmony_ci			p = BLKTOL0(blkno, sbi->l2nbperpage);
347162306a36Sopenharmony_ci			j0 = false;
347262306a36Sopenharmony_ci		} else {
347362306a36Sopenharmony_ci			/* assign/init L1 page */
347462306a36Sopenharmony_ci			l1mp = get_metapage(ipbmap, p, PSIZE, 0);
347562306a36Sopenharmony_ci			if (l1mp == NULL)
347662306a36Sopenharmony_ci				goto errout;
347762306a36Sopenharmony_ci
347862306a36Sopenharmony_ci			l1dcp = (struct dmapctl *) l1mp->data;
347962306a36Sopenharmony_ci
348062306a36Sopenharmony_ci			/* compute start L0 */
348162306a36Sopenharmony_ci			j = 0;
348262306a36Sopenharmony_ci			l1leaf = l1dcp->stree + CTLLEAFIND;
348362306a36Sopenharmony_ci			p += nbperpage;	/* 1st L0 of L1.k */
348462306a36Sopenharmony_ci		}
348562306a36Sopenharmony_ci
348662306a36Sopenharmony_ci		/*
348762306a36Sopenharmony_ci		 * extend each L0 in L1
348862306a36Sopenharmony_ci		 */
348962306a36Sopenharmony_ci		for (; j < LPERCTL; j++) {
349062306a36Sopenharmony_ci			/* get L0 page */
349162306a36Sopenharmony_ci			if (i0) {
349262306a36Sopenharmony_ci				/* read in L0 page: (blkno & (MAXL0SIZE - 1)) */
349362306a36Sopenharmony_ci
349462306a36Sopenharmony_ci				l0mp = read_metapage(ipbmap, p, PSIZE, 0);
349562306a36Sopenharmony_ci				if (l0mp == NULL)
349662306a36Sopenharmony_ci					goto errout;
349762306a36Sopenharmony_ci				l0dcp = (struct dmapctl *) l0mp->data;
349862306a36Sopenharmony_ci
349962306a36Sopenharmony_ci				/* compute start dmap */
350062306a36Sopenharmony_ci				i = (blkno & (MAXL0SIZE - 1)) >>
350162306a36Sopenharmony_ci				    L2BPERDMAP;
350262306a36Sopenharmony_ci				l0leaf = l0dcp->stree + CTLLEAFIND + i;
350362306a36Sopenharmony_ci				p = BLKTODMAP(blkno,
350462306a36Sopenharmony_ci					      sbi->l2nbperpage);
350562306a36Sopenharmony_ci				i0 = false;
350662306a36Sopenharmony_ci			} else {
350762306a36Sopenharmony_ci				/* assign/init L0 page */
350862306a36Sopenharmony_ci				l0mp = get_metapage(ipbmap, p, PSIZE, 0);
350962306a36Sopenharmony_ci				if (l0mp == NULL)
351062306a36Sopenharmony_ci					goto errout;
351162306a36Sopenharmony_ci
351262306a36Sopenharmony_ci				l0dcp = (struct dmapctl *) l0mp->data;
351362306a36Sopenharmony_ci
351462306a36Sopenharmony_ci				/* compute start dmap */
351562306a36Sopenharmony_ci				i = 0;
351662306a36Sopenharmony_ci				l0leaf = l0dcp->stree + CTLLEAFIND;
351762306a36Sopenharmony_ci				p += nbperpage;	/* 1st dmap of L0.j */
351862306a36Sopenharmony_ci			}
351962306a36Sopenharmony_ci
352062306a36Sopenharmony_ci			/*
352162306a36Sopenharmony_ci			 * extend each dmap in L0
352262306a36Sopenharmony_ci			 */
352362306a36Sopenharmony_ci			for (; i < LPERCTL; i++) {
352462306a36Sopenharmony_ci				/*
352562306a36Sopenharmony_ci				 * reconstruct the dmap page, and
352662306a36Sopenharmony_ci				 * initialize corresponding parent L0 leaf
352762306a36Sopenharmony_ci				 */
352862306a36Sopenharmony_ci				if ((n = blkno & (BPERDMAP - 1))) {
352962306a36Sopenharmony_ci					/* read in dmap page: */
353062306a36Sopenharmony_ci					mp = read_metapage(ipbmap, p,
353162306a36Sopenharmony_ci							   PSIZE, 0);
353262306a36Sopenharmony_ci					if (mp == NULL)
353362306a36Sopenharmony_ci						goto errout;
353462306a36Sopenharmony_ci					n = min(nblocks, (s64)BPERDMAP - n);
353562306a36Sopenharmony_ci				} else {
353662306a36Sopenharmony_ci					/* assign/init dmap page */
353762306a36Sopenharmony_ci					mp = read_metapage(ipbmap, p,
353862306a36Sopenharmony_ci							   PSIZE, 0);
353962306a36Sopenharmony_ci					if (mp == NULL)
354062306a36Sopenharmony_ci						goto errout;
354162306a36Sopenharmony_ci
354262306a36Sopenharmony_ci					n = min_t(s64, nblocks, BPERDMAP);
354362306a36Sopenharmony_ci				}
354462306a36Sopenharmony_ci
354562306a36Sopenharmony_ci				dp = (struct dmap *) mp->data;
354662306a36Sopenharmony_ci				*l0leaf = dbInitDmap(dp, blkno, n);
354762306a36Sopenharmony_ci
354862306a36Sopenharmony_ci				bmp->db_nfree += n;
354962306a36Sopenharmony_ci				agno = le64_to_cpu(dp->start) >> l2agsize;
355062306a36Sopenharmony_ci				bmp->db_agfree[agno] += n;
355162306a36Sopenharmony_ci
355262306a36Sopenharmony_ci				write_metapage(mp);
355362306a36Sopenharmony_ci
355462306a36Sopenharmony_ci				l0leaf++;
355562306a36Sopenharmony_ci				p += nbperpage;
355662306a36Sopenharmony_ci
355762306a36Sopenharmony_ci				blkno += n;
355862306a36Sopenharmony_ci				nblocks -= n;
355962306a36Sopenharmony_ci				if (nblocks == 0)
356062306a36Sopenharmony_ci					break;
356162306a36Sopenharmony_ci			}	/* for each dmap in a L0 */
356262306a36Sopenharmony_ci
356362306a36Sopenharmony_ci			/*
356462306a36Sopenharmony_ci			 * build current L0 page from its leaves, and
356562306a36Sopenharmony_ci			 * initialize corresponding parent L1 leaf
356662306a36Sopenharmony_ci			 */
356762306a36Sopenharmony_ci			*l1leaf = dbInitDmapCtl(l0dcp, 0, ++i);
356862306a36Sopenharmony_ci			write_metapage(l0mp);
356962306a36Sopenharmony_ci			l0mp = NULL;
357062306a36Sopenharmony_ci
357162306a36Sopenharmony_ci			if (nblocks)
357262306a36Sopenharmony_ci				l1leaf++;	/* continue for next L0 */
357362306a36Sopenharmony_ci			else {
357462306a36Sopenharmony_ci				/* more than 1 L0 ? */
357562306a36Sopenharmony_ci				if (j > 0)
357662306a36Sopenharmony_ci					break;	/* build L1 page */
357762306a36Sopenharmony_ci				else {
357862306a36Sopenharmony_ci					/* summarize in global bmap page */
357962306a36Sopenharmony_ci					bmp->db_maxfreebud = *l1leaf;
358062306a36Sopenharmony_ci					release_metapage(l1mp);
358162306a36Sopenharmony_ci					release_metapage(l2mp);
358262306a36Sopenharmony_ci					goto finalize;
358362306a36Sopenharmony_ci				}
358462306a36Sopenharmony_ci			}
358562306a36Sopenharmony_ci		}		/* for each L0 in a L1 */
358662306a36Sopenharmony_ci
358762306a36Sopenharmony_ci		/*
358862306a36Sopenharmony_ci		 * build current L1 page from its leaves, and
358962306a36Sopenharmony_ci		 * initialize corresponding parent L2 leaf
359062306a36Sopenharmony_ci		 */
359162306a36Sopenharmony_ci		*l2leaf = dbInitDmapCtl(l1dcp, 1, ++j);
359262306a36Sopenharmony_ci		write_metapage(l1mp);
359362306a36Sopenharmony_ci		l1mp = NULL;
359462306a36Sopenharmony_ci
359562306a36Sopenharmony_ci		if (nblocks)
359662306a36Sopenharmony_ci			l2leaf++;	/* continue for next L1 */
359762306a36Sopenharmony_ci		else {
359862306a36Sopenharmony_ci			/* more than 1 L1 ? */
359962306a36Sopenharmony_ci			if (k > 0)
360062306a36Sopenharmony_ci				break;	/* build L2 page */
360162306a36Sopenharmony_ci			else {
360262306a36Sopenharmony_ci				/* summarize in global bmap page */
360362306a36Sopenharmony_ci				bmp->db_maxfreebud = *l2leaf;
360462306a36Sopenharmony_ci				release_metapage(l2mp);
360562306a36Sopenharmony_ci				goto finalize;
360662306a36Sopenharmony_ci			}
360762306a36Sopenharmony_ci		}
360862306a36Sopenharmony_ci	}			/* for each L1 in a L2 */
360962306a36Sopenharmony_ci
361062306a36Sopenharmony_ci	jfs_error(ipbmap->i_sb, "function has not returned as expected\n");
361162306a36Sopenharmony_cierrout:
361262306a36Sopenharmony_ci	if (l0mp)
361362306a36Sopenharmony_ci		release_metapage(l0mp);
361462306a36Sopenharmony_ci	if (l1mp)
361562306a36Sopenharmony_ci		release_metapage(l1mp);
361662306a36Sopenharmony_ci	release_metapage(l2mp);
361762306a36Sopenharmony_ci	return -EIO;
361862306a36Sopenharmony_ci
361962306a36Sopenharmony_ci	/*
362062306a36Sopenharmony_ci	 *	finalize bmap control page
362162306a36Sopenharmony_ci	 */
362262306a36Sopenharmony_cifinalize:
362362306a36Sopenharmony_ci
362462306a36Sopenharmony_ci	return 0;
362562306a36Sopenharmony_ci}
362662306a36Sopenharmony_ci
362762306a36Sopenharmony_ci
362862306a36Sopenharmony_ci/*
362962306a36Sopenharmony_ci *	dbFinalizeBmap()
363062306a36Sopenharmony_ci */
363162306a36Sopenharmony_civoid dbFinalizeBmap(struct inode *ipbmap)
363262306a36Sopenharmony_ci{
363362306a36Sopenharmony_ci	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
363462306a36Sopenharmony_ci	int actags, inactags, l2nl;
363562306a36Sopenharmony_ci	s64 ag_rem, actfree, inactfree, avgfree;
363662306a36Sopenharmony_ci	int i, n;
363762306a36Sopenharmony_ci
363862306a36Sopenharmony_ci	/*
363962306a36Sopenharmony_ci	 *	finalize bmap control page
364062306a36Sopenharmony_ci	 */
364162306a36Sopenharmony_ci//finalize:
364262306a36Sopenharmony_ci	/*
364362306a36Sopenharmony_ci	 * compute db_agpref: preferred ag to allocate from
364462306a36Sopenharmony_ci	 * (the leftmost ag with average free space in it);
364562306a36Sopenharmony_ci	 */
364662306a36Sopenharmony_ci//agpref:
364762306a36Sopenharmony_ci	/* get the number of active ags and inactive ags */
364862306a36Sopenharmony_ci	actags = bmp->db_maxag + 1;
364962306a36Sopenharmony_ci	inactags = bmp->db_numag - actags;
365062306a36Sopenharmony_ci	ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1);	/* ??? */
365162306a36Sopenharmony_ci
365262306a36Sopenharmony_ci	/* determine how many blocks are in the inactive allocation
365362306a36Sopenharmony_ci	 * groups. in doing this, we must account for the fact that
365462306a36Sopenharmony_ci	 * the rightmost group might be a partial group (i.e. file
365562306a36Sopenharmony_ci	 * system size is not a multiple of the group size).
365662306a36Sopenharmony_ci	 */
365762306a36Sopenharmony_ci	inactfree = (inactags && ag_rem) ?
365862306a36Sopenharmony_ci	    ((inactags - 1) << bmp->db_agl2size) + ag_rem
365962306a36Sopenharmony_ci	    : inactags << bmp->db_agl2size;
366062306a36Sopenharmony_ci
366162306a36Sopenharmony_ci	/* determine how many free blocks are in the active
366262306a36Sopenharmony_ci	 * allocation groups plus the average number of free blocks
366362306a36Sopenharmony_ci	 * within the active ags.
366462306a36Sopenharmony_ci	 */
366562306a36Sopenharmony_ci	actfree = bmp->db_nfree - inactfree;
366662306a36Sopenharmony_ci	avgfree = (u32) actfree / (u32) actags;
366762306a36Sopenharmony_ci
366862306a36Sopenharmony_ci	/* if the preferred allocation group has not average free space.
366962306a36Sopenharmony_ci	 * re-establish the preferred group as the leftmost
367062306a36Sopenharmony_ci	 * group with average free space.
367162306a36Sopenharmony_ci	 */
367262306a36Sopenharmony_ci	if (bmp->db_agfree[bmp->db_agpref] < avgfree) {
367362306a36Sopenharmony_ci		for (bmp->db_agpref = 0; bmp->db_agpref < actags;
367462306a36Sopenharmony_ci		     bmp->db_agpref++) {
367562306a36Sopenharmony_ci			if (bmp->db_agfree[bmp->db_agpref] >= avgfree)
367662306a36Sopenharmony_ci				break;
367762306a36Sopenharmony_ci		}
367862306a36Sopenharmony_ci		if (bmp->db_agpref >= bmp->db_numag) {
367962306a36Sopenharmony_ci			jfs_error(ipbmap->i_sb,
368062306a36Sopenharmony_ci				  "cannot find ag with average freespace\n");
368162306a36Sopenharmony_ci		}
368262306a36Sopenharmony_ci	}
368362306a36Sopenharmony_ci
368462306a36Sopenharmony_ci	/*
368562306a36Sopenharmony_ci	 * compute db_aglevel, db_agheight, db_width, db_agstart:
368662306a36Sopenharmony_ci	 * an ag is covered in aglevel dmapctl summary tree,
368762306a36Sopenharmony_ci	 * at agheight level height (from leaf) with agwidth number of nodes
368862306a36Sopenharmony_ci	 * each, which starts at agstart index node of the smmary tree node
368962306a36Sopenharmony_ci	 * array;
369062306a36Sopenharmony_ci	 */
369162306a36Sopenharmony_ci	bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
369262306a36Sopenharmony_ci	l2nl =
369362306a36Sopenharmony_ci	    bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
369462306a36Sopenharmony_ci	bmp->db_agheight = l2nl >> 1;
369562306a36Sopenharmony_ci	bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1));
369662306a36Sopenharmony_ci	for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0;
369762306a36Sopenharmony_ci	     i--) {
369862306a36Sopenharmony_ci		bmp->db_agstart += n;
369962306a36Sopenharmony_ci		n <<= 2;
370062306a36Sopenharmony_ci	}
370162306a36Sopenharmony_ci
370262306a36Sopenharmony_ci}
370362306a36Sopenharmony_ci
370462306a36Sopenharmony_ci
370562306a36Sopenharmony_ci/*
370662306a36Sopenharmony_ci * NAME:	dbInitDmap()/ujfs_idmap_page()
370762306a36Sopenharmony_ci *
370862306a36Sopenharmony_ci * FUNCTION:	initialize working/persistent bitmap of the dmap page
370962306a36Sopenharmony_ci *		for the specified number of blocks:
371062306a36Sopenharmony_ci *
371162306a36Sopenharmony_ci *		at entry, the bitmaps had been initialized as free (ZEROS);
371262306a36Sopenharmony_ci *		The number of blocks will only account for the actually
371362306a36Sopenharmony_ci *		existing blocks. Blocks which don't actually exist in
371462306a36Sopenharmony_ci *		the aggregate will be marked as allocated (ONES);
371562306a36Sopenharmony_ci *
371662306a36Sopenharmony_ci * PARAMETERS:
371762306a36Sopenharmony_ci *	dp	- pointer to page of map
371862306a36Sopenharmony_ci *	nblocks	- number of blocks this page
371962306a36Sopenharmony_ci *
372062306a36Sopenharmony_ci * RETURNS: NONE
372162306a36Sopenharmony_ci */
372262306a36Sopenharmony_cistatic int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
372362306a36Sopenharmony_ci{
372462306a36Sopenharmony_ci	int blkno, w, b, r, nw, nb, i;
372562306a36Sopenharmony_ci
372662306a36Sopenharmony_ci	/* starting block number within the dmap */
372762306a36Sopenharmony_ci	blkno = Blkno & (BPERDMAP - 1);
372862306a36Sopenharmony_ci
372962306a36Sopenharmony_ci	if (blkno == 0) {
373062306a36Sopenharmony_ci		dp->nblocks = dp->nfree = cpu_to_le32(nblocks);
373162306a36Sopenharmony_ci		dp->start = cpu_to_le64(Blkno);
373262306a36Sopenharmony_ci
373362306a36Sopenharmony_ci		if (nblocks == BPERDMAP) {
373462306a36Sopenharmony_ci			memset(&dp->wmap[0], 0, LPERDMAP * 4);
373562306a36Sopenharmony_ci			memset(&dp->pmap[0], 0, LPERDMAP * 4);
373662306a36Sopenharmony_ci			goto initTree;
373762306a36Sopenharmony_ci		}
373862306a36Sopenharmony_ci	} else {
373962306a36Sopenharmony_ci		le32_add_cpu(&dp->nblocks, nblocks);
374062306a36Sopenharmony_ci		le32_add_cpu(&dp->nfree, nblocks);
374162306a36Sopenharmony_ci	}
374262306a36Sopenharmony_ci
374362306a36Sopenharmony_ci	/* word number containing start block number */
374462306a36Sopenharmony_ci	w = blkno >> L2DBWORD;
374562306a36Sopenharmony_ci
374662306a36Sopenharmony_ci	/*
374762306a36Sopenharmony_ci	 * free the bits corresponding to the block range (ZEROS):
374862306a36Sopenharmony_ci	 * note: not all bits of the first and last words may be contained
374962306a36Sopenharmony_ci	 * within the block range.
375062306a36Sopenharmony_ci	 */
375162306a36Sopenharmony_ci	for (r = nblocks; r > 0; r -= nb, blkno += nb) {
375262306a36Sopenharmony_ci		/* number of bits preceding range to be freed in the word */
375362306a36Sopenharmony_ci		b = blkno & (DBWORD - 1);
375462306a36Sopenharmony_ci		/* number of bits to free in the word */
375562306a36Sopenharmony_ci		nb = min(r, DBWORD - b);
375662306a36Sopenharmony_ci
375762306a36Sopenharmony_ci		/* is partial word to be freed ? */
375862306a36Sopenharmony_ci		if (nb < DBWORD) {
375962306a36Sopenharmony_ci			/* free (set to 0) from the bitmap word */
376062306a36Sopenharmony_ci			dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
376162306a36Sopenharmony_ci						     >> b));
376262306a36Sopenharmony_ci			dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
376362306a36Sopenharmony_ci						     >> b));
376462306a36Sopenharmony_ci
376562306a36Sopenharmony_ci			/* skip the word freed */
376662306a36Sopenharmony_ci			w++;
376762306a36Sopenharmony_ci		} else {
376862306a36Sopenharmony_ci			/* free (set to 0) contiguous bitmap words */
376962306a36Sopenharmony_ci			nw = r >> L2DBWORD;
377062306a36Sopenharmony_ci			memset(&dp->wmap[w], 0, nw * 4);
377162306a36Sopenharmony_ci			memset(&dp->pmap[w], 0, nw * 4);
377262306a36Sopenharmony_ci
377362306a36Sopenharmony_ci			/* skip the words freed */
377462306a36Sopenharmony_ci			nb = nw << L2DBWORD;
377562306a36Sopenharmony_ci			w += nw;
377662306a36Sopenharmony_ci		}
377762306a36Sopenharmony_ci	}
377862306a36Sopenharmony_ci
377962306a36Sopenharmony_ci	/*
378062306a36Sopenharmony_ci	 * mark bits following the range to be freed (non-existing
378162306a36Sopenharmony_ci	 * blocks) as allocated (ONES)
378262306a36Sopenharmony_ci	 */
378362306a36Sopenharmony_ci
378462306a36Sopenharmony_ci	if (blkno == BPERDMAP)
378562306a36Sopenharmony_ci		goto initTree;
378662306a36Sopenharmony_ci
378762306a36Sopenharmony_ci	/* the first word beyond the end of existing blocks */
378862306a36Sopenharmony_ci	w = blkno >> L2DBWORD;
378962306a36Sopenharmony_ci
379062306a36Sopenharmony_ci	/* does nblocks fall on a 32-bit boundary ? */
379162306a36Sopenharmony_ci	b = blkno & (DBWORD - 1);
379262306a36Sopenharmony_ci	if (b) {
379362306a36Sopenharmony_ci		/* mark a partial word allocated */
379462306a36Sopenharmony_ci		dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b);
379562306a36Sopenharmony_ci		w++;
379662306a36Sopenharmony_ci	}
379762306a36Sopenharmony_ci
379862306a36Sopenharmony_ci	/* set the rest of the words in the page to allocated (ONES) */
379962306a36Sopenharmony_ci	for (i = w; i < LPERDMAP; i++)
380062306a36Sopenharmony_ci		dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES);
380162306a36Sopenharmony_ci
380262306a36Sopenharmony_ci	/*
380362306a36Sopenharmony_ci	 * init tree
380462306a36Sopenharmony_ci	 */
380562306a36Sopenharmony_ci      initTree:
380662306a36Sopenharmony_ci	return (dbInitDmapTree(dp));
380762306a36Sopenharmony_ci}
380862306a36Sopenharmony_ci
380962306a36Sopenharmony_ci
381062306a36Sopenharmony_ci/*
381162306a36Sopenharmony_ci * NAME:	dbInitDmapTree()/ujfs_complete_dmap()
381262306a36Sopenharmony_ci *
381362306a36Sopenharmony_ci * FUNCTION:	initialize summary tree of the specified dmap:
381462306a36Sopenharmony_ci *
381562306a36Sopenharmony_ci *		at entry, bitmap of the dmap has been initialized;
381662306a36Sopenharmony_ci *
381762306a36Sopenharmony_ci * PARAMETERS:
381862306a36Sopenharmony_ci *	dp	- dmap to complete
381962306a36Sopenharmony_ci *	blkno	- starting block number for this dmap
382062306a36Sopenharmony_ci *	treemax	- will be filled in with max free for this dmap
382162306a36Sopenharmony_ci *
382262306a36Sopenharmony_ci * RETURNS:	max free string at the root of the tree
382362306a36Sopenharmony_ci */
382462306a36Sopenharmony_cistatic int dbInitDmapTree(struct dmap * dp)
382562306a36Sopenharmony_ci{
382662306a36Sopenharmony_ci	struct dmaptree *tp;
382762306a36Sopenharmony_ci	s8 *cp;
382862306a36Sopenharmony_ci	int i;
382962306a36Sopenharmony_ci
383062306a36Sopenharmony_ci	/* init fixed info of tree */
383162306a36Sopenharmony_ci	tp = &dp->tree;
383262306a36Sopenharmony_ci	tp->nleafs = cpu_to_le32(LPERDMAP);
383362306a36Sopenharmony_ci	tp->l2nleafs = cpu_to_le32(L2LPERDMAP);
383462306a36Sopenharmony_ci	tp->leafidx = cpu_to_le32(LEAFIND);
383562306a36Sopenharmony_ci	tp->height = cpu_to_le32(4);
383662306a36Sopenharmony_ci	tp->budmin = BUDMIN;
383762306a36Sopenharmony_ci
383862306a36Sopenharmony_ci	/* init each leaf from corresponding wmap word:
383962306a36Sopenharmony_ci	 * note: leaf is set to NOFREE(-1) if all blocks of corresponding
384062306a36Sopenharmony_ci	 * bitmap word are allocated.
384162306a36Sopenharmony_ci	 */
384262306a36Sopenharmony_ci	cp = tp->stree + le32_to_cpu(tp->leafidx);
384362306a36Sopenharmony_ci	for (i = 0; i < LPERDMAP; i++)
384462306a36Sopenharmony_ci		*cp++ = dbMaxBud((u8 *) & dp->wmap[i]);
384562306a36Sopenharmony_ci
384662306a36Sopenharmony_ci	/* build the dmap's binary buddy summary tree */
384762306a36Sopenharmony_ci	return (dbInitTree(tp));
384862306a36Sopenharmony_ci}
384962306a36Sopenharmony_ci
385062306a36Sopenharmony_ci
385162306a36Sopenharmony_ci/*
385262306a36Sopenharmony_ci * NAME:	dbInitTree()/ujfs_adjtree()
385362306a36Sopenharmony_ci *
385462306a36Sopenharmony_ci * FUNCTION:	initialize binary buddy summary tree of a dmap or dmapctl.
385562306a36Sopenharmony_ci *
385662306a36Sopenharmony_ci *		at entry, the leaves of the tree has been initialized
385762306a36Sopenharmony_ci *		from corresponding bitmap word or root of summary tree
385862306a36Sopenharmony_ci *		of the child control page;
385962306a36Sopenharmony_ci *		configure binary buddy system at the leaf level, then
386062306a36Sopenharmony_ci *		bubble up the values of the leaf nodes up the tree.
386162306a36Sopenharmony_ci *
386262306a36Sopenharmony_ci * PARAMETERS:
386362306a36Sopenharmony_ci *	cp	- Pointer to the root of the tree
386462306a36Sopenharmony_ci *	l2leaves- Number of leaf nodes as a power of 2
386562306a36Sopenharmony_ci *	l2min	- Number of blocks that can be covered by a leaf
386662306a36Sopenharmony_ci *		  as a power of 2
386762306a36Sopenharmony_ci *
386862306a36Sopenharmony_ci * RETURNS: max free string at the root of the tree
386962306a36Sopenharmony_ci */
387062306a36Sopenharmony_cistatic int dbInitTree(struct dmaptree * dtp)
387162306a36Sopenharmony_ci{
387262306a36Sopenharmony_ci	int l2max, l2free, bsize, nextb, i;
387362306a36Sopenharmony_ci	int child, parent, nparent;
387462306a36Sopenharmony_ci	s8 *tp, *cp, *cp1;
387562306a36Sopenharmony_ci
387662306a36Sopenharmony_ci	tp = dtp->stree;
387762306a36Sopenharmony_ci
387862306a36Sopenharmony_ci	/* Determine the maximum free string possible for the leaves */
387962306a36Sopenharmony_ci	l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin;
388062306a36Sopenharmony_ci
388162306a36Sopenharmony_ci	/*
388262306a36Sopenharmony_ci	 * configure the leaf level into binary buddy system
388362306a36Sopenharmony_ci	 *
388462306a36Sopenharmony_ci	 * Try to combine buddies starting with a buddy size of 1
388562306a36Sopenharmony_ci	 * (i.e. two leaves). At a buddy size of 1 two buddy leaves
388662306a36Sopenharmony_ci	 * can be combined if both buddies have a maximum free of l2min;
388762306a36Sopenharmony_ci	 * the combination will result in the left-most buddy leaf having
388862306a36Sopenharmony_ci	 * a maximum free of l2min+1.
388962306a36Sopenharmony_ci	 * After processing all buddies for a given size, process buddies
389062306a36Sopenharmony_ci	 * at the next higher buddy size (i.e. current size * 2) and
389162306a36Sopenharmony_ci	 * the next maximum free (current free + 1).
389262306a36Sopenharmony_ci	 * This continues until the maximum possible buddy combination
389362306a36Sopenharmony_ci	 * yields maximum free.
389462306a36Sopenharmony_ci	 */
389562306a36Sopenharmony_ci	for (l2free = dtp->budmin, bsize = 1; l2free < l2max;
389662306a36Sopenharmony_ci	     l2free++, bsize = nextb) {
389762306a36Sopenharmony_ci		/* get next buddy size == current buddy pair size */
389862306a36Sopenharmony_ci		nextb = bsize << 1;
389962306a36Sopenharmony_ci
390062306a36Sopenharmony_ci		/* scan each adjacent buddy pair at current buddy size */
390162306a36Sopenharmony_ci		for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx);
390262306a36Sopenharmony_ci		     i < le32_to_cpu(dtp->nleafs);
390362306a36Sopenharmony_ci		     i += nextb, cp += nextb) {
390462306a36Sopenharmony_ci			/* coalesce if both adjacent buddies are max free */
390562306a36Sopenharmony_ci			if (*cp == l2free && *(cp + bsize) == l2free) {
390662306a36Sopenharmony_ci				*cp = l2free + 1;	/* left take right */
390762306a36Sopenharmony_ci				*(cp + bsize) = -1;	/* right give left */
390862306a36Sopenharmony_ci			}
390962306a36Sopenharmony_ci		}
391062306a36Sopenharmony_ci	}
391162306a36Sopenharmony_ci
391262306a36Sopenharmony_ci	/*
391362306a36Sopenharmony_ci	 * bubble summary information of leaves up the tree.
391462306a36Sopenharmony_ci	 *
391562306a36Sopenharmony_ci	 * Starting at the leaf node level, the four nodes described by
391662306a36Sopenharmony_ci	 * the higher level parent node are compared for a maximum free and
391762306a36Sopenharmony_ci	 * this maximum becomes the value of the parent node.
391862306a36Sopenharmony_ci	 * when all lower level nodes are processed in this fashion then
391962306a36Sopenharmony_ci	 * move up to the next level (parent becomes a lower level node) and
392062306a36Sopenharmony_ci	 * continue the process for that level.
392162306a36Sopenharmony_ci	 */
392262306a36Sopenharmony_ci	for (child = le32_to_cpu(dtp->leafidx),
392362306a36Sopenharmony_ci	     nparent = le32_to_cpu(dtp->nleafs) >> 2;
392462306a36Sopenharmony_ci	     nparent > 0; nparent >>= 2, child = parent) {
392562306a36Sopenharmony_ci		/* get index of 1st node of parent level */
392662306a36Sopenharmony_ci		parent = (child - 1) >> 2;
392762306a36Sopenharmony_ci
392862306a36Sopenharmony_ci		/* set the value of the parent node as the maximum
392962306a36Sopenharmony_ci		 * of the four nodes of the current level.
393062306a36Sopenharmony_ci		 */
393162306a36Sopenharmony_ci		for (i = 0, cp = tp + child, cp1 = tp + parent;
393262306a36Sopenharmony_ci		     i < nparent; i++, cp += 4, cp1++)
393362306a36Sopenharmony_ci			*cp1 = TREEMAX(cp);
393462306a36Sopenharmony_ci	}
393562306a36Sopenharmony_ci
393662306a36Sopenharmony_ci	return (*tp);
393762306a36Sopenharmony_ci}
393862306a36Sopenharmony_ci
393962306a36Sopenharmony_ci
394062306a36Sopenharmony_ci/*
394162306a36Sopenharmony_ci *	dbInitDmapCtl()
394262306a36Sopenharmony_ci *
394362306a36Sopenharmony_ci * function: initialize dmapctl page
394462306a36Sopenharmony_ci */
394562306a36Sopenharmony_cistatic int dbInitDmapCtl(struct dmapctl * dcp, int level, int i)
394662306a36Sopenharmony_ci{				/* start leaf index not covered by range */
394762306a36Sopenharmony_ci	s8 *cp;
394862306a36Sopenharmony_ci
394962306a36Sopenharmony_ci	dcp->nleafs = cpu_to_le32(LPERCTL);
395062306a36Sopenharmony_ci	dcp->l2nleafs = cpu_to_le32(L2LPERCTL);
395162306a36Sopenharmony_ci	dcp->leafidx = cpu_to_le32(CTLLEAFIND);
395262306a36Sopenharmony_ci	dcp->height = cpu_to_le32(5);
395362306a36Sopenharmony_ci	dcp->budmin = L2BPERDMAP + L2LPERCTL * level;
395462306a36Sopenharmony_ci
395562306a36Sopenharmony_ci	/*
395662306a36Sopenharmony_ci	 * initialize the leaves of current level that were not covered
395762306a36Sopenharmony_ci	 * by the specified input block range (i.e. the leaves have no
395862306a36Sopenharmony_ci	 * low level dmapctl or dmap).
395962306a36Sopenharmony_ci	 */
396062306a36Sopenharmony_ci	cp = &dcp->stree[CTLLEAFIND + i];
396162306a36Sopenharmony_ci	for (; i < LPERCTL; i++)
396262306a36Sopenharmony_ci		*cp++ = NOFREE;
396362306a36Sopenharmony_ci
396462306a36Sopenharmony_ci	/* build the dmap's binary buddy summary tree */
396562306a36Sopenharmony_ci	return (dbInitTree((struct dmaptree *) dcp));
396662306a36Sopenharmony_ci}
396762306a36Sopenharmony_ci
396862306a36Sopenharmony_ci
396962306a36Sopenharmony_ci/*
397062306a36Sopenharmony_ci * NAME:	dbGetL2AGSize()/ujfs_getagl2size()
397162306a36Sopenharmony_ci *
397262306a36Sopenharmony_ci * FUNCTION:	Determine log2(allocation group size) from aggregate size
397362306a36Sopenharmony_ci *
397462306a36Sopenharmony_ci * PARAMETERS:
397562306a36Sopenharmony_ci *	nblocks	- Number of blocks in aggregate
397662306a36Sopenharmony_ci *
397762306a36Sopenharmony_ci * RETURNS: log2(allocation group size) in aggregate blocks
397862306a36Sopenharmony_ci */
397962306a36Sopenharmony_cistatic int dbGetL2AGSize(s64 nblocks)
398062306a36Sopenharmony_ci{
398162306a36Sopenharmony_ci	s64 sz;
398262306a36Sopenharmony_ci	s64 m;
398362306a36Sopenharmony_ci	int l2sz;
398462306a36Sopenharmony_ci
398562306a36Sopenharmony_ci	if (nblocks < BPERDMAP * MAXAG)
398662306a36Sopenharmony_ci		return (L2BPERDMAP);
398762306a36Sopenharmony_ci
398862306a36Sopenharmony_ci	/* round up aggregate size to power of 2 */
398962306a36Sopenharmony_ci	m = ((u64) 1 << (64 - 1));
399062306a36Sopenharmony_ci	for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) {
399162306a36Sopenharmony_ci		if (m & nblocks)
399262306a36Sopenharmony_ci			break;
399362306a36Sopenharmony_ci	}
399462306a36Sopenharmony_ci
399562306a36Sopenharmony_ci	sz = (s64) 1 << l2sz;
399662306a36Sopenharmony_ci	if (sz < nblocks)
399762306a36Sopenharmony_ci		l2sz += 1;
399862306a36Sopenharmony_ci
399962306a36Sopenharmony_ci	/* agsize = roundupSize/max_number_of_ag */
400062306a36Sopenharmony_ci	return (l2sz - L2MAXAG);
400162306a36Sopenharmony_ci}
400262306a36Sopenharmony_ci
400362306a36Sopenharmony_ci
400462306a36Sopenharmony_ci/*
400562306a36Sopenharmony_ci * NAME:	dbMapFileSizeToMapSize()
400662306a36Sopenharmony_ci *
400762306a36Sopenharmony_ci * FUNCTION:	compute number of blocks the block allocation map file
400862306a36Sopenharmony_ci *		can cover from the map file size;
400962306a36Sopenharmony_ci *
401062306a36Sopenharmony_ci * RETURNS:	Number of blocks which can be covered by this block map file;
401162306a36Sopenharmony_ci */
401262306a36Sopenharmony_ci
401362306a36Sopenharmony_ci/*
401462306a36Sopenharmony_ci * maximum number of map pages at each level including control pages
401562306a36Sopenharmony_ci */
401662306a36Sopenharmony_ci#define MAXL0PAGES	(1 + LPERCTL)
401762306a36Sopenharmony_ci#define MAXL1PAGES	(1 + LPERCTL * MAXL0PAGES)
401862306a36Sopenharmony_ci
401962306a36Sopenharmony_ci/*
402062306a36Sopenharmony_ci * convert number of map pages to the zero origin top dmapctl level
402162306a36Sopenharmony_ci */
402262306a36Sopenharmony_ci#define BMAPPGTOLEV(npages)	\
402362306a36Sopenharmony_ci	(((npages) <= 3 + MAXL0PAGES) ? 0 : \
402462306a36Sopenharmony_ci	 ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
402562306a36Sopenharmony_ci
402662306a36Sopenharmony_cis64 dbMapFileSizeToMapSize(struct inode * ipbmap)
402762306a36Sopenharmony_ci{
402862306a36Sopenharmony_ci	struct super_block *sb = ipbmap->i_sb;
402962306a36Sopenharmony_ci	s64 nblocks;
403062306a36Sopenharmony_ci	s64 npages, ndmaps;
403162306a36Sopenharmony_ci	int level, i;
403262306a36Sopenharmony_ci	int complete, factor;
403362306a36Sopenharmony_ci
403462306a36Sopenharmony_ci	nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize;
403562306a36Sopenharmony_ci	npages = nblocks >> JFS_SBI(sb)->l2nbperpage;
403662306a36Sopenharmony_ci	level = BMAPPGTOLEV(npages);
403762306a36Sopenharmony_ci
403862306a36Sopenharmony_ci	/* At each level, accumulate the number of dmap pages covered by
403962306a36Sopenharmony_ci	 * the number of full child levels below it;
404062306a36Sopenharmony_ci	 * repeat for the last incomplete child level.
404162306a36Sopenharmony_ci	 */
404262306a36Sopenharmony_ci	ndmaps = 0;
404362306a36Sopenharmony_ci	npages--;		/* skip the first global control page */
404462306a36Sopenharmony_ci	/* skip higher level control pages above top level covered by map */
404562306a36Sopenharmony_ci	npages -= (2 - level);
404662306a36Sopenharmony_ci	npages--;		/* skip top level's control page */
404762306a36Sopenharmony_ci	for (i = level; i >= 0; i--) {
404862306a36Sopenharmony_ci		factor =
404962306a36Sopenharmony_ci		    (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
405062306a36Sopenharmony_ci		complete = (u32) npages / factor;
405162306a36Sopenharmony_ci		ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL :
405262306a36Sopenharmony_ci				      ((i == 1) ? LPERCTL : 1));
405362306a36Sopenharmony_ci
405462306a36Sopenharmony_ci		/* pages in last/incomplete child */
405562306a36Sopenharmony_ci		npages = (u32) npages % factor;
405662306a36Sopenharmony_ci		/* skip incomplete child's level control page */
405762306a36Sopenharmony_ci		npages--;
405862306a36Sopenharmony_ci	}
405962306a36Sopenharmony_ci
406062306a36Sopenharmony_ci	/* convert the number of dmaps into the number of blocks
406162306a36Sopenharmony_ci	 * which can be covered by the dmaps;
406262306a36Sopenharmony_ci	 */
406362306a36Sopenharmony_ci	nblocks = ndmaps << L2BPERDMAP;
406462306a36Sopenharmony_ci
406562306a36Sopenharmony_ci	return (nblocks);
406662306a36Sopenharmony_ci}
4067