162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) International Business Machines Corp., 2000-2004 462306a36Sopenharmony_ci * Portions Copyright (C) Tino Reichardt, 2012 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#include <linux/fs.h> 862306a36Sopenharmony_ci#include <linux/slab.h> 962306a36Sopenharmony_ci#include "jfs_incore.h" 1062306a36Sopenharmony_ci#include "jfs_superblock.h" 1162306a36Sopenharmony_ci#include "jfs_dmap.h" 1262306a36Sopenharmony_ci#include "jfs_imap.h" 1362306a36Sopenharmony_ci#include "jfs_lock.h" 1462306a36Sopenharmony_ci#include "jfs_metapage.h" 1562306a36Sopenharmony_ci#include "jfs_debug.h" 1662306a36Sopenharmony_ci#include "jfs_discard.h" 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/* 1962306a36Sopenharmony_ci * SERIALIZATION of the Block Allocation Map. 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * the working state of the block allocation map is accessed in 2262306a36Sopenharmony_ci * two directions: 2362306a36Sopenharmony_ci * 2462306a36Sopenharmony_ci * 1) allocation and free requests that start at the dmap 2562306a36Sopenharmony_ci * level and move up through the dmap control pages (i.e. 2662306a36Sopenharmony_ci * the vast majority of requests). 2762306a36Sopenharmony_ci * 2862306a36Sopenharmony_ci * 2) allocation requests that start at dmap control page 2962306a36Sopenharmony_ci * level and work down towards the dmaps. 3062306a36Sopenharmony_ci * 3162306a36Sopenharmony_ci * the serialization scheme used here is as follows. 3262306a36Sopenharmony_ci * 3362306a36Sopenharmony_ci * requests which start at the bottom are serialized against each 3462306a36Sopenharmony_ci * other through buffers and each requests holds onto its buffers 3562306a36Sopenharmony_ci * as it works it way up from a single dmap to the required level 3662306a36Sopenharmony_ci * of dmap control page. 3762306a36Sopenharmony_ci * requests that start at the top are serialized against each other 3862306a36Sopenharmony_ci * and request that start from the bottom by the multiple read/single 3962306a36Sopenharmony_ci * write inode lock of the bmap inode. requests starting at the top 4062306a36Sopenharmony_ci * take this lock in write mode while request starting at the bottom 4162306a36Sopenharmony_ci * take the lock in read mode. a single top-down request may proceed 4262306a36Sopenharmony_ci * exclusively while multiple bottoms-up requests may proceed 4362306a36Sopenharmony_ci * simultaneously (under the protection of busy buffers). 4462306a36Sopenharmony_ci * 4562306a36Sopenharmony_ci * in addition to information found in dmaps and dmap control pages, 4662306a36Sopenharmony_ci * the working state of the block allocation map also includes read/ 4762306a36Sopenharmony_ci * write information maintained in the bmap descriptor (i.e. total 4862306a36Sopenharmony_ci * free block count, allocation group level free block counts). 4962306a36Sopenharmony_ci * a single exclusive lock (BMAP_LOCK) is used to guard this information 5062306a36Sopenharmony_ci * in the face of multiple-bottoms up requests. 5162306a36Sopenharmony_ci * (lock ordering: IREAD_LOCK, BMAP_LOCK); 5262306a36Sopenharmony_ci * 5362306a36Sopenharmony_ci * accesses to the persistent state of the block allocation map (limited 5462306a36Sopenharmony_ci * to the persistent bitmaps in dmaps) is guarded by (busy) buffers. 5562306a36Sopenharmony_ci */ 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci#define BMAP_LOCK_INIT(bmp) mutex_init(&bmp->db_bmaplock) 5862306a36Sopenharmony_ci#define BMAP_LOCK(bmp) mutex_lock(&bmp->db_bmaplock) 5962306a36Sopenharmony_ci#define BMAP_UNLOCK(bmp) mutex_unlock(&bmp->db_bmaplock) 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci/* 6262306a36Sopenharmony_ci * forward references 6362306a36Sopenharmony_ci */ 6462306a36Sopenharmony_cistatic void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 6562306a36Sopenharmony_ci int nblocks); 6662306a36Sopenharmony_cistatic void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl); 6762306a36Sopenharmony_cistatic int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl); 6862306a36Sopenharmony_cistatic int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl); 6962306a36Sopenharmony_cistatic void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl); 7062306a36Sopenharmony_cistatic int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, 7162306a36Sopenharmony_ci int level); 7262306a36Sopenharmony_cistatic int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results); 7362306a36Sopenharmony_cistatic int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, 7462306a36Sopenharmony_ci int nblocks); 7562306a36Sopenharmony_cistatic int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno, 7662306a36Sopenharmony_ci int nblocks, 7762306a36Sopenharmony_ci int l2nb, s64 * results); 7862306a36Sopenharmony_cistatic int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 7962306a36Sopenharmony_ci int nblocks); 8062306a36Sopenharmony_cistatic int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks, 8162306a36Sopenharmony_ci int l2nb, 8262306a36Sopenharmony_ci s64 * results); 8362306a36Sopenharmony_cistatic int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, 8462306a36Sopenharmony_ci s64 * results); 8562306a36Sopenharmony_cistatic int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, 8662306a36Sopenharmony_ci s64 * results); 8762306a36Sopenharmony_cistatic int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks); 8862306a36Sopenharmony_cistatic int dbFindBits(u32 word, int l2nb); 8962306a36Sopenharmony_cistatic int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno); 9062306a36Sopenharmony_cistatic int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl); 9162306a36Sopenharmony_cistatic int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 9262306a36Sopenharmony_ci int nblocks); 9362306a36Sopenharmony_cistatic int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 9462306a36Sopenharmony_ci int nblocks); 9562306a36Sopenharmony_cistatic int dbMaxBud(u8 * cp); 9662306a36Sopenharmony_cistatic int blkstol2(s64 nb); 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cistatic int cntlz(u32 value); 9962306a36Sopenharmony_cistatic int cnttz(u32 word); 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_cistatic int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, 10262306a36Sopenharmony_ci int nblocks); 10362306a36Sopenharmony_cistatic int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks); 10462306a36Sopenharmony_cistatic int dbInitDmapTree(struct dmap * dp); 10562306a36Sopenharmony_cistatic int dbInitTree(struct dmaptree * dtp); 10662306a36Sopenharmony_cistatic int dbInitDmapCtl(struct dmapctl * dcp, int level, int i); 10762306a36Sopenharmony_cistatic int dbGetL2AGSize(s64 nblocks); 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci/* 11062306a36Sopenharmony_ci * buddy table 11162306a36Sopenharmony_ci * 11262306a36Sopenharmony_ci * table used for determining buddy sizes within characters of 11362306a36Sopenharmony_ci * dmap bitmap words. the characters themselves serve as indexes 11462306a36Sopenharmony_ci * into the table, with the table elements yielding the maximum 11562306a36Sopenharmony_ci * binary buddy of free bits within the character. 11662306a36Sopenharmony_ci */ 11762306a36Sopenharmony_cistatic const s8 budtab[256] = { 11862306a36Sopenharmony_ci 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 11962306a36Sopenharmony_ci 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12062306a36Sopenharmony_ci 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12162306a36Sopenharmony_ci 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12262306a36Sopenharmony_ci 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12362306a36Sopenharmony_ci 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 12462306a36Sopenharmony_ci 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 12562306a36Sopenharmony_ci 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 12662306a36Sopenharmony_ci 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12762306a36Sopenharmony_ci 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 12862306a36Sopenharmony_ci 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 12962306a36Sopenharmony_ci 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 13062306a36Sopenharmony_ci 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 13162306a36Sopenharmony_ci 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 13262306a36Sopenharmony_ci 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 13362306a36Sopenharmony_ci 2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1 13462306a36Sopenharmony_ci}; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci/* 13762306a36Sopenharmony_ci * NAME: dbMount() 13862306a36Sopenharmony_ci * 13962306a36Sopenharmony_ci * FUNCTION: initializate the block allocation map. 14062306a36Sopenharmony_ci * 14162306a36Sopenharmony_ci * memory is allocated for the in-core bmap descriptor and 14262306a36Sopenharmony_ci * the in-core descriptor is initialized from disk. 14362306a36Sopenharmony_ci * 14462306a36Sopenharmony_ci * PARAMETERS: 14562306a36Sopenharmony_ci * ipbmap - pointer to in-core inode for the block map. 14662306a36Sopenharmony_ci * 14762306a36Sopenharmony_ci * RETURN VALUES: 14862306a36Sopenharmony_ci * 0 - success 14962306a36Sopenharmony_ci * -ENOMEM - insufficient memory 15062306a36Sopenharmony_ci * -EIO - i/o error 15162306a36Sopenharmony_ci * -EINVAL - wrong bmap data 15262306a36Sopenharmony_ci */ 15362306a36Sopenharmony_ciint dbMount(struct inode *ipbmap) 15462306a36Sopenharmony_ci{ 15562306a36Sopenharmony_ci struct bmap *bmp; 15662306a36Sopenharmony_ci struct dbmap_disk *dbmp_le; 15762306a36Sopenharmony_ci struct metapage *mp; 15862306a36Sopenharmony_ci int i, err; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci /* 16162306a36Sopenharmony_ci * allocate/initialize the in-memory bmap descriptor 16262306a36Sopenharmony_ci */ 16362306a36Sopenharmony_ci /* allocate memory for the in-memory bmap descriptor */ 16462306a36Sopenharmony_ci bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL); 16562306a36Sopenharmony_ci if (bmp == NULL) 16662306a36Sopenharmony_ci return -ENOMEM; 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci /* read the on-disk bmap descriptor. */ 16962306a36Sopenharmony_ci mp = read_metapage(ipbmap, 17062306a36Sopenharmony_ci BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, 17162306a36Sopenharmony_ci PSIZE, 0); 17262306a36Sopenharmony_ci if (mp == NULL) { 17362306a36Sopenharmony_ci err = -EIO; 17462306a36Sopenharmony_ci goto err_kfree_bmp; 17562306a36Sopenharmony_ci } 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci /* copy the on-disk bmap descriptor to its in-memory version. */ 17862306a36Sopenharmony_ci dbmp_le = (struct dbmap_disk *) mp->data; 17962306a36Sopenharmony_ci bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize); 18062306a36Sopenharmony_ci bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); 18362306a36Sopenharmony_ci if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE || 18462306a36Sopenharmony_ci bmp->db_l2nbperpage < 0) { 18562306a36Sopenharmony_ci err = -EINVAL; 18662306a36Sopenharmony_ci goto err_release_metapage; 18762306a36Sopenharmony_ci } 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); 19062306a36Sopenharmony_ci if (!bmp->db_numag) { 19162306a36Sopenharmony_ci err = -EINVAL; 19262306a36Sopenharmony_ci goto err_release_metapage; 19362306a36Sopenharmony_ci } 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); 19662306a36Sopenharmony_ci bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag); 19762306a36Sopenharmony_ci bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref); 19862306a36Sopenharmony_ci if (bmp->db_maxag >= MAXAG || bmp->db_maxag < 0 || 19962306a36Sopenharmony_ci bmp->db_agpref >= MAXAG || bmp->db_agpref < 0) { 20062306a36Sopenharmony_ci err = -EINVAL; 20162306a36Sopenharmony_ci goto err_release_metapage; 20262306a36Sopenharmony_ci } 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel); 20562306a36Sopenharmony_ci bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight); 20662306a36Sopenharmony_ci bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); 20762306a36Sopenharmony_ci bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); 20862306a36Sopenharmony_ci bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); 20962306a36Sopenharmony_ci if (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG || 21062306a36Sopenharmony_ci bmp->db_agl2size < 0) { 21162306a36Sopenharmony_ci err = -EINVAL; 21262306a36Sopenharmony_ci goto err_release_metapage; 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci if (((bmp->db_mapsize - 1) >> bmp->db_agl2size) > MAXAG) { 21662306a36Sopenharmony_ci err = -EINVAL; 21762306a36Sopenharmony_ci goto err_release_metapage; 21862306a36Sopenharmony_ci } 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci for (i = 0; i < MAXAG; i++) 22162306a36Sopenharmony_ci bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]); 22262306a36Sopenharmony_ci bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize); 22362306a36Sopenharmony_ci bmp->db_maxfreebud = dbmp_le->dn_maxfreebud; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci /* release the buffer. */ 22662306a36Sopenharmony_ci release_metapage(mp); 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci /* bind the bmap inode and the bmap descriptor to each other. */ 22962306a36Sopenharmony_ci bmp->db_ipbmap = ipbmap; 23062306a36Sopenharmony_ci JFS_SBI(ipbmap->i_sb)->bmap = bmp; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci memset(bmp->db_active, 0, sizeof(bmp->db_active)); 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci /* 23562306a36Sopenharmony_ci * allocate/initialize the bmap lock 23662306a36Sopenharmony_ci */ 23762306a36Sopenharmony_ci BMAP_LOCK_INIT(bmp); 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci return (0); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_cierr_release_metapage: 24262306a36Sopenharmony_ci release_metapage(mp); 24362306a36Sopenharmony_cierr_kfree_bmp: 24462306a36Sopenharmony_ci kfree(bmp); 24562306a36Sopenharmony_ci return err; 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci/* 25062306a36Sopenharmony_ci * NAME: dbUnmount() 25162306a36Sopenharmony_ci * 25262306a36Sopenharmony_ci * FUNCTION: terminate the block allocation map in preparation for 25362306a36Sopenharmony_ci * file system unmount. 25462306a36Sopenharmony_ci * 25562306a36Sopenharmony_ci * the in-core bmap descriptor is written to disk and 25662306a36Sopenharmony_ci * the memory for this descriptor is freed. 25762306a36Sopenharmony_ci * 25862306a36Sopenharmony_ci * PARAMETERS: 25962306a36Sopenharmony_ci * ipbmap - pointer to in-core inode for the block map. 26062306a36Sopenharmony_ci * 26162306a36Sopenharmony_ci * RETURN VALUES: 26262306a36Sopenharmony_ci * 0 - success 26362306a36Sopenharmony_ci * -EIO - i/o error 26462306a36Sopenharmony_ci */ 26562306a36Sopenharmony_ciint dbUnmount(struct inode *ipbmap, int mounterror) 26662306a36Sopenharmony_ci{ 26762306a36Sopenharmony_ci struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci if (!(mounterror || isReadOnly(ipbmap))) 27062306a36Sopenharmony_ci dbSync(ipbmap); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci /* 27362306a36Sopenharmony_ci * Invalidate the page cache buffers 27462306a36Sopenharmony_ci */ 27562306a36Sopenharmony_ci truncate_inode_pages(ipbmap->i_mapping, 0); 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci /* free the memory for the in-memory bmap. */ 27862306a36Sopenharmony_ci kfree(bmp); 27962306a36Sopenharmony_ci JFS_SBI(ipbmap->i_sb)->bmap = NULL; 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci return (0); 28262306a36Sopenharmony_ci} 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci/* 28562306a36Sopenharmony_ci * dbSync() 28662306a36Sopenharmony_ci */ 28762306a36Sopenharmony_ciint dbSync(struct inode *ipbmap) 28862306a36Sopenharmony_ci{ 28962306a36Sopenharmony_ci struct dbmap_disk *dbmp_le; 29062306a36Sopenharmony_ci struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; 29162306a36Sopenharmony_ci struct metapage *mp; 29262306a36Sopenharmony_ci int i; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci /* 29562306a36Sopenharmony_ci * write bmap global control page 29662306a36Sopenharmony_ci */ 29762306a36Sopenharmony_ci /* get the buffer for the on-disk bmap descriptor. */ 29862306a36Sopenharmony_ci mp = read_metapage(ipbmap, 29962306a36Sopenharmony_ci BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, 30062306a36Sopenharmony_ci PSIZE, 0); 30162306a36Sopenharmony_ci if (mp == NULL) { 30262306a36Sopenharmony_ci jfs_err("dbSync: read_metapage failed!"); 30362306a36Sopenharmony_ci return -EIO; 30462306a36Sopenharmony_ci } 30562306a36Sopenharmony_ci /* copy the in-memory version of the bmap to the on-disk version */ 30662306a36Sopenharmony_ci dbmp_le = (struct dbmap_disk *) mp->data; 30762306a36Sopenharmony_ci dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize); 30862306a36Sopenharmony_ci dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree); 30962306a36Sopenharmony_ci dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage); 31062306a36Sopenharmony_ci dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag); 31162306a36Sopenharmony_ci dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel); 31262306a36Sopenharmony_ci dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag); 31362306a36Sopenharmony_ci dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref); 31462306a36Sopenharmony_ci dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel); 31562306a36Sopenharmony_ci dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight); 31662306a36Sopenharmony_ci dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth); 31762306a36Sopenharmony_ci dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart); 31862306a36Sopenharmony_ci dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size); 31962306a36Sopenharmony_ci for (i = 0; i < MAXAG; i++) 32062306a36Sopenharmony_ci dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]); 32162306a36Sopenharmony_ci dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize); 32262306a36Sopenharmony_ci dbmp_le->dn_maxfreebud = bmp->db_maxfreebud; 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci /* write the buffer */ 32562306a36Sopenharmony_ci write_metapage(mp); 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci /* 32862306a36Sopenharmony_ci * write out dirty pages of bmap 32962306a36Sopenharmony_ci */ 33062306a36Sopenharmony_ci filemap_write_and_wait(ipbmap->i_mapping); 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci diWriteSpecial(ipbmap, 0); 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci return (0); 33562306a36Sopenharmony_ci} 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci/* 33862306a36Sopenharmony_ci * NAME: dbFree() 33962306a36Sopenharmony_ci * 34062306a36Sopenharmony_ci * FUNCTION: free the specified block range from the working block 34162306a36Sopenharmony_ci * allocation map. 34262306a36Sopenharmony_ci * 34362306a36Sopenharmony_ci * the blocks will be free from the working map one dmap 34462306a36Sopenharmony_ci * at a time. 34562306a36Sopenharmony_ci * 34662306a36Sopenharmony_ci * PARAMETERS: 34762306a36Sopenharmony_ci * ip - pointer to in-core inode; 34862306a36Sopenharmony_ci * blkno - starting block number to be freed. 34962306a36Sopenharmony_ci * nblocks - number of blocks to be freed. 35062306a36Sopenharmony_ci * 35162306a36Sopenharmony_ci * RETURN VALUES: 35262306a36Sopenharmony_ci * 0 - success 35362306a36Sopenharmony_ci * -EIO - i/o error 35462306a36Sopenharmony_ci */ 35562306a36Sopenharmony_ciint dbFree(struct inode *ip, s64 blkno, s64 nblocks) 35662306a36Sopenharmony_ci{ 35762306a36Sopenharmony_ci struct metapage *mp; 35862306a36Sopenharmony_ci struct dmap *dp; 35962306a36Sopenharmony_ci int nb, rc; 36062306a36Sopenharmony_ci s64 lblkno, rem; 36162306a36Sopenharmony_ci struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 36262306a36Sopenharmony_ci struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; 36362306a36Sopenharmony_ci struct super_block *sb = ipbmap->i_sb; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci /* block to be freed better be within the mapsize. */ 36862306a36Sopenharmony_ci if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) { 36962306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 37062306a36Sopenharmony_ci printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", 37162306a36Sopenharmony_ci (unsigned long long) blkno, 37262306a36Sopenharmony_ci (unsigned long long) nblocks); 37362306a36Sopenharmony_ci jfs_error(ip->i_sb, "block to be freed is outside the map\n"); 37462306a36Sopenharmony_ci return -EIO; 37562306a36Sopenharmony_ci } 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci /** 37862306a36Sopenharmony_ci * TRIM the blocks, when mounted with discard option 37962306a36Sopenharmony_ci */ 38062306a36Sopenharmony_ci if (JFS_SBI(sb)->flag & JFS_DISCARD) 38162306a36Sopenharmony_ci if (JFS_SBI(sb)->minblks_trim <= nblocks) 38262306a36Sopenharmony_ci jfs_issue_discard(ipbmap, blkno, nblocks); 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ci /* 38562306a36Sopenharmony_ci * free the blocks a dmap at a time. 38662306a36Sopenharmony_ci */ 38762306a36Sopenharmony_ci mp = NULL; 38862306a36Sopenharmony_ci for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { 38962306a36Sopenharmony_ci /* release previous dmap if any */ 39062306a36Sopenharmony_ci if (mp) { 39162306a36Sopenharmony_ci write_metapage(mp); 39262306a36Sopenharmony_ci } 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci /* get the buffer for the current dmap. */ 39562306a36Sopenharmony_ci lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 39662306a36Sopenharmony_ci mp = read_metapage(ipbmap, lblkno, PSIZE, 0); 39762306a36Sopenharmony_ci if (mp == NULL) { 39862306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 39962306a36Sopenharmony_ci return -EIO; 40062306a36Sopenharmony_ci } 40162306a36Sopenharmony_ci dp = (struct dmap *) mp->data; 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci /* determine the number of blocks to be freed from 40462306a36Sopenharmony_ci * this dmap. 40562306a36Sopenharmony_ci */ 40662306a36Sopenharmony_ci nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci /* free the blocks. */ 40962306a36Sopenharmony_ci if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { 41062306a36Sopenharmony_ci jfs_error(ip->i_sb, "error in block map\n"); 41162306a36Sopenharmony_ci release_metapage(mp); 41262306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 41362306a36Sopenharmony_ci return (rc); 41462306a36Sopenharmony_ci } 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci /* write the last buffer. */ 41862306a36Sopenharmony_ci if (mp) 41962306a36Sopenharmony_ci write_metapage(mp); 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci return (0); 42462306a36Sopenharmony_ci} 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci/* 42862306a36Sopenharmony_ci * NAME: dbUpdatePMap() 42962306a36Sopenharmony_ci * 43062306a36Sopenharmony_ci * FUNCTION: update the allocation state (free or allocate) of the 43162306a36Sopenharmony_ci * specified block range in the persistent block allocation map. 43262306a36Sopenharmony_ci * 43362306a36Sopenharmony_ci * the blocks will be updated in the persistent map one 43462306a36Sopenharmony_ci * dmap at a time. 43562306a36Sopenharmony_ci * 43662306a36Sopenharmony_ci * PARAMETERS: 43762306a36Sopenharmony_ci * ipbmap - pointer to in-core inode for the block map. 43862306a36Sopenharmony_ci * free - 'true' if block range is to be freed from the persistent 43962306a36Sopenharmony_ci * map; 'false' if it is to be allocated. 44062306a36Sopenharmony_ci * blkno - starting block number of the range. 44162306a36Sopenharmony_ci * nblocks - number of contiguous blocks in the range. 44262306a36Sopenharmony_ci * tblk - transaction block; 44362306a36Sopenharmony_ci * 44462306a36Sopenharmony_ci * RETURN VALUES: 44562306a36Sopenharmony_ci * 0 - success 44662306a36Sopenharmony_ci * -EIO - i/o error 44762306a36Sopenharmony_ci */ 44862306a36Sopenharmony_ciint 44962306a36Sopenharmony_cidbUpdatePMap(struct inode *ipbmap, 45062306a36Sopenharmony_ci int free, s64 blkno, s64 nblocks, struct tblock * tblk) 45162306a36Sopenharmony_ci{ 45262306a36Sopenharmony_ci int nblks, dbitno, wbitno, rbits; 45362306a36Sopenharmony_ci int word, nbits, nwords; 45462306a36Sopenharmony_ci struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; 45562306a36Sopenharmony_ci s64 lblkno, rem, lastlblkno; 45662306a36Sopenharmony_ci u32 mask; 45762306a36Sopenharmony_ci struct dmap *dp; 45862306a36Sopenharmony_ci struct metapage *mp; 45962306a36Sopenharmony_ci struct jfs_log *log; 46062306a36Sopenharmony_ci int lsn, difft, diffp; 46162306a36Sopenharmony_ci unsigned long flags; 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci /* the blocks better be within the mapsize. */ 46462306a36Sopenharmony_ci if (blkno + nblocks > bmp->db_mapsize) { 46562306a36Sopenharmony_ci printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", 46662306a36Sopenharmony_ci (unsigned long long) blkno, 46762306a36Sopenharmony_ci (unsigned long long) nblocks); 46862306a36Sopenharmony_ci jfs_error(ipbmap->i_sb, "blocks are outside the map\n"); 46962306a36Sopenharmony_ci return -EIO; 47062306a36Sopenharmony_ci } 47162306a36Sopenharmony_ci 47262306a36Sopenharmony_ci /* compute delta of transaction lsn from log syncpt */ 47362306a36Sopenharmony_ci lsn = tblk->lsn; 47462306a36Sopenharmony_ci log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; 47562306a36Sopenharmony_ci logdiff(difft, lsn, log); 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci /* 47862306a36Sopenharmony_ci * update the block state a dmap at a time. 47962306a36Sopenharmony_ci */ 48062306a36Sopenharmony_ci mp = NULL; 48162306a36Sopenharmony_ci lastlblkno = 0; 48262306a36Sopenharmony_ci for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) { 48362306a36Sopenharmony_ci /* get the buffer for the current dmap. */ 48462306a36Sopenharmony_ci lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 48562306a36Sopenharmony_ci if (lblkno != lastlblkno) { 48662306a36Sopenharmony_ci if (mp) { 48762306a36Sopenharmony_ci write_metapage(mp); 48862306a36Sopenharmony_ci } 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 49162306a36Sopenharmony_ci 0); 49262306a36Sopenharmony_ci if (mp == NULL) 49362306a36Sopenharmony_ci return -EIO; 49462306a36Sopenharmony_ci metapage_wait_for_io(mp); 49562306a36Sopenharmony_ci } 49662306a36Sopenharmony_ci dp = (struct dmap *) mp->data; 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci /* determine the bit number and word within the dmap of 49962306a36Sopenharmony_ci * the starting block. also determine how many blocks 50062306a36Sopenharmony_ci * are to be updated within this dmap. 50162306a36Sopenharmony_ci */ 50262306a36Sopenharmony_ci dbitno = blkno & (BPERDMAP - 1); 50362306a36Sopenharmony_ci word = dbitno >> L2DBWORD; 50462306a36Sopenharmony_ci nblks = min(rem, (s64)BPERDMAP - dbitno); 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci /* update the bits of the dmap words. the first and last 50762306a36Sopenharmony_ci * words may only have a subset of their bits updated. if 50862306a36Sopenharmony_ci * this is the case, we'll work against that word (i.e. 50962306a36Sopenharmony_ci * partial first and/or last) only in a single pass. a 51062306a36Sopenharmony_ci * single pass will also be used to update all words that 51162306a36Sopenharmony_ci * are to have all their bits updated. 51262306a36Sopenharmony_ci */ 51362306a36Sopenharmony_ci for (rbits = nblks; rbits > 0; 51462306a36Sopenharmony_ci rbits -= nbits, dbitno += nbits) { 51562306a36Sopenharmony_ci /* determine the bit number within the word and 51662306a36Sopenharmony_ci * the number of bits within the word. 51762306a36Sopenharmony_ci */ 51862306a36Sopenharmony_ci wbitno = dbitno & (DBWORD - 1); 51962306a36Sopenharmony_ci nbits = min(rbits, DBWORD - wbitno); 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci /* check if only part of the word is to be updated. */ 52262306a36Sopenharmony_ci if (nbits < DBWORD) { 52362306a36Sopenharmony_ci /* update (free or allocate) the bits 52462306a36Sopenharmony_ci * in this word. 52562306a36Sopenharmony_ci */ 52662306a36Sopenharmony_ci mask = 52762306a36Sopenharmony_ci (ONES << (DBWORD - nbits) >> wbitno); 52862306a36Sopenharmony_ci if (free) 52962306a36Sopenharmony_ci dp->pmap[word] &= 53062306a36Sopenharmony_ci cpu_to_le32(~mask); 53162306a36Sopenharmony_ci else 53262306a36Sopenharmony_ci dp->pmap[word] |= 53362306a36Sopenharmony_ci cpu_to_le32(mask); 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci word += 1; 53662306a36Sopenharmony_ci } else { 53762306a36Sopenharmony_ci /* one or more words are to have all 53862306a36Sopenharmony_ci * their bits updated. determine how 53962306a36Sopenharmony_ci * many words and how many bits. 54062306a36Sopenharmony_ci */ 54162306a36Sopenharmony_ci nwords = rbits >> L2DBWORD; 54262306a36Sopenharmony_ci nbits = nwords << L2DBWORD; 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci /* update (free or allocate) the bits 54562306a36Sopenharmony_ci * in these words. 54662306a36Sopenharmony_ci */ 54762306a36Sopenharmony_ci if (free) 54862306a36Sopenharmony_ci memset(&dp->pmap[word], 0, 54962306a36Sopenharmony_ci nwords * 4); 55062306a36Sopenharmony_ci else 55162306a36Sopenharmony_ci memset(&dp->pmap[word], (int) ONES, 55262306a36Sopenharmony_ci nwords * 4); 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci word += nwords; 55562306a36Sopenharmony_ci } 55662306a36Sopenharmony_ci } 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci /* 55962306a36Sopenharmony_ci * update dmap lsn 56062306a36Sopenharmony_ci */ 56162306a36Sopenharmony_ci if (lblkno == lastlblkno) 56262306a36Sopenharmony_ci continue; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci lastlblkno = lblkno; 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci LOGSYNC_LOCK(log, flags); 56762306a36Sopenharmony_ci if (mp->lsn != 0) { 56862306a36Sopenharmony_ci /* inherit older/smaller lsn */ 56962306a36Sopenharmony_ci logdiff(diffp, mp->lsn, log); 57062306a36Sopenharmony_ci if (difft < diffp) { 57162306a36Sopenharmony_ci mp->lsn = lsn; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci /* move bp after tblock in logsync list */ 57462306a36Sopenharmony_ci list_move(&mp->synclist, &tblk->synclist); 57562306a36Sopenharmony_ci } 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci /* inherit younger/larger clsn */ 57862306a36Sopenharmony_ci logdiff(difft, tblk->clsn, log); 57962306a36Sopenharmony_ci logdiff(diffp, mp->clsn, log); 58062306a36Sopenharmony_ci if (difft > diffp) 58162306a36Sopenharmony_ci mp->clsn = tblk->clsn; 58262306a36Sopenharmony_ci } else { 58362306a36Sopenharmony_ci mp->log = log; 58462306a36Sopenharmony_ci mp->lsn = lsn; 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci /* insert bp after tblock in logsync list */ 58762306a36Sopenharmony_ci log->count++; 58862306a36Sopenharmony_ci list_add(&mp->synclist, &tblk->synclist); 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci mp->clsn = tblk->clsn; 59162306a36Sopenharmony_ci } 59262306a36Sopenharmony_ci LOGSYNC_UNLOCK(log, flags); 59362306a36Sopenharmony_ci } 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ci /* write the last buffer. */ 59662306a36Sopenharmony_ci if (mp) { 59762306a36Sopenharmony_ci write_metapage(mp); 59862306a36Sopenharmony_ci } 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci return (0); 60162306a36Sopenharmony_ci} 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci/* 60562306a36Sopenharmony_ci * NAME: dbNextAG() 60662306a36Sopenharmony_ci * 60762306a36Sopenharmony_ci * FUNCTION: find the preferred allocation group for new allocations. 60862306a36Sopenharmony_ci * 60962306a36Sopenharmony_ci * Within the allocation groups, we maintain a preferred 61062306a36Sopenharmony_ci * allocation group which consists of a group with at least 61162306a36Sopenharmony_ci * average free space. It is the preferred group that we target 61262306a36Sopenharmony_ci * new inode allocation towards. The tie-in between inode 61362306a36Sopenharmony_ci * allocation and block allocation occurs as we allocate the 61462306a36Sopenharmony_ci * first (data) block of an inode and specify the inode (block) 61562306a36Sopenharmony_ci * as the allocation hint for this block. 61662306a36Sopenharmony_ci * 61762306a36Sopenharmony_ci * We try to avoid having more than one open file growing in 61862306a36Sopenharmony_ci * an allocation group, as this will lead to fragmentation. 61962306a36Sopenharmony_ci * This differs from the old OS/2 method of trying to keep 62062306a36Sopenharmony_ci * empty ags around for large allocations. 62162306a36Sopenharmony_ci * 62262306a36Sopenharmony_ci * PARAMETERS: 62362306a36Sopenharmony_ci * ipbmap - pointer to in-core inode for the block map. 62462306a36Sopenharmony_ci * 62562306a36Sopenharmony_ci * RETURN VALUES: 62662306a36Sopenharmony_ci * the preferred allocation group number. 62762306a36Sopenharmony_ci */ 62862306a36Sopenharmony_ciint dbNextAG(struct inode *ipbmap) 62962306a36Sopenharmony_ci{ 63062306a36Sopenharmony_ci s64 avgfree; 63162306a36Sopenharmony_ci int agpref; 63262306a36Sopenharmony_ci s64 hwm = 0; 63362306a36Sopenharmony_ci int i; 63462306a36Sopenharmony_ci int next_best = -1; 63562306a36Sopenharmony_ci struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci BMAP_LOCK(bmp); 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ci /* determine the average number of free blocks within the ags. */ 64062306a36Sopenharmony_ci avgfree = (u32)bmp->db_nfree / bmp->db_numag; 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci /* 64362306a36Sopenharmony_ci * if the current preferred ag does not have an active allocator 64462306a36Sopenharmony_ci * and has at least average freespace, return it 64562306a36Sopenharmony_ci */ 64662306a36Sopenharmony_ci agpref = bmp->db_agpref; 64762306a36Sopenharmony_ci if ((atomic_read(&bmp->db_active[agpref]) == 0) && 64862306a36Sopenharmony_ci (bmp->db_agfree[agpref] >= avgfree)) 64962306a36Sopenharmony_ci goto unlock; 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci /* From the last preferred ag, find the next one with at least 65262306a36Sopenharmony_ci * average free space. 65362306a36Sopenharmony_ci */ 65462306a36Sopenharmony_ci for (i = 0 ; i < bmp->db_numag; i++, agpref++) { 65562306a36Sopenharmony_ci if (agpref == bmp->db_numag) 65662306a36Sopenharmony_ci agpref = 0; 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci if (atomic_read(&bmp->db_active[agpref])) 65962306a36Sopenharmony_ci /* open file is currently growing in this ag */ 66062306a36Sopenharmony_ci continue; 66162306a36Sopenharmony_ci if (bmp->db_agfree[agpref] >= avgfree) { 66262306a36Sopenharmony_ci /* Return this one */ 66362306a36Sopenharmony_ci bmp->db_agpref = agpref; 66462306a36Sopenharmony_ci goto unlock; 66562306a36Sopenharmony_ci } else if (bmp->db_agfree[agpref] > hwm) { 66662306a36Sopenharmony_ci /* Less than avg. freespace, but best so far */ 66762306a36Sopenharmony_ci hwm = bmp->db_agfree[agpref]; 66862306a36Sopenharmony_ci next_best = agpref; 66962306a36Sopenharmony_ci } 67062306a36Sopenharmony_ci } 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci /* 67362306a36Sopenharmony_ci * If no inactive ag was found with average freespace, use the 67462306a36Sopenharmony_ci * next best 67562306a36Sopenharmony_ci */ 67662306a36Sopenharmony_ci if (next_best != -1) 67762306a36Sopenharmony_ci bmp->db_agpref = next_best; 67862306a36Sopenharmony_ci /* else leave db_agpref unchanged */ 67962306a36Sopenharmony_ciunlock: 68062306a36Sopenharmony_ci BMAP_UNLOCK(bmp); 68162306a36Sopenharmony_ci 68262306a36Sopenharmony_ci /* return the preferred group. 68362306a36Sopenharmony_ci */ 68462306a36Sopenharmony_ci return (bmp->db_agpref); 68562306a36Sopenharmony_ci} 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci/* 68862306a36Sopenharmony_ci * NAME: dbAlloc() 68962306a36Sopenharmony_ci * 69062306a36Sopenharmony_ci * FUNCTION: attempt to allocate a specified number of contiguous free 69162306a36Sopenharmony_ci * blocks from the working allocation block map. 69262306a36Sopenharmony_ci * 69362306a36Sopenharmony_ci * the block allocation policy uses hints and a multi-step 69462306a36Sopenharmony_ci * approach. 69562306a36Sopenharmony_ci * 69662306a36Sopenharmony_ci * for allocation requests smaller than the number of blocks 69762306a36Sopenharmony_ci * per dmap, we first try to allocate the new blocks 69862306a36Sopenharmony_ci * immediately following the hint. if these blocks are not 69962306a36Sopenharmony_ci * available, we try to allocate blocks near the hint. if 70062306a36Sopenharmony_ci * no blocks near the hint are available, we next try to 70162306a36Sopenharmony_ci * allocate within the same dmap as contains the hint. 70262306a36Sopenharmony_ci * 70362306a36Sopenharmony_ci * if no blocks are available in the dmap or the allocation 70462306a36Sopenharmony_ci * request is larger than the dmap size, we try to allocate 70562306a36Sopenharmony_ci * within the same allocation group as contains the hint. if 70662306a36Sopenharmony_ci * this does not succeed, we finally try to allocate anywhere 70762306a36Sopenharmony_ci * within the aggregate. 70862306a36Sopenharmony_ci * 70962306a36Sopenharmony_ci * we also try to allocate anywhere within the aggregate 71062306a36Sopenharmony_ci * for allocation requests larger than the allocation group 71162306a36Sopenharmony_ci * size or requests that specify no hint value. 71262306a36Sopenharmony_ci * 71362306a36Sopenharmony_ci * PARAMETERS: 71462306a36Sopenharmony_ci * ip - pointer to in-core inode; 71562306a36Sopenharmony_ci * hint - allocation hint. 71662306a36Sopenharmony_ci * nblocks - number of contiguous blocks in the range. 71762306a36Sopenharmony_ci * results - on successful return, set to the starting block number 71862306a36Sopenharmony_ci * of the newly allocated contiguous range. 71962306a36Sopenharmony_ci * 72062306a36Sopenharmony_ci * RETURN VALUES: 72162306a36Sopenharmony_ci * 0 - success 72262306a36Sopenharmony_ci * -ENOSPC - insufficient disk resources 72362306a36Sopenharmony_ci * -EIO - i/o error 72462306a36Sopenharmony_ci */ 72562306a36Sopenharmony_ciint dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) 72662306a36Sopenharmony_ci{ 72762306a36Sopenharmony_ci int rc, agno; 72862306a36Sopenharmony_ci struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 72962306a36Sopenharmony_ci struct bmap *bmp; 73062306a36Sopenharmony_ci struct metapage *mp; 73162306a36Sopenharmony_ci s64 lblkno, blkno; 73262306a36Sopenharmony_ci struct dmap *dp; 73362306a36Sopenharmony_ci int l2nb; 73462306a36Sopenharmony_ci s64 mapSize; 73562306a36Sopenharmony_ci int writers; 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci /* assert that nblocks is valid */ 73862306a36Sopenharmony_ci assert(nblocks > 0); 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci /* get the log2 number of blocks to be allocated. 74162306a36Sopenharmony_ci * if the number of blocks is not a log2 multiple, 74262306a36Sopenharmony_ci * it will be rounded up to the next log2 multiple. 74362306a36Sopenharmony_ci */ 74462306a36Sopenharmony_ci l2nb = BLKSTOL2(nblocks); 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci bmp = JFS_SBI(ip->i_sb)->bmap; 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_ci mapSize = bmp->db_mapsize; 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci /* the hint should be within the map */ 75162306a36Sopenharmony_ci if (hint >= mapSize) { 75262306a36Sopenharmony_ci jfs_error(ip->i_sb, "the hint is outside the map\n"); 75362306a36Sopenharmony_ci return -EIO; 75462306a36Sopenharmony_ci } 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ci /* if the number of blocks to be allocated is greater than the 75762306a36Sopenharmony_ci * allocation group size, try to allocate anywhere. 75862306a36Sopenharmony_ci */ 75962306a36Sopenharmony_ci if (l2nb > bmp->db_agl2size) { 76062306a36Sopenharmony_ci IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci rc = dbAllocAny(bmp, nblocks, l2nb, results); 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci goto write_unlock; 76562306a36Sopenharmony_ci } 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci /* 76862306a36Sopenharmony_ci * If no hint, let dbNextAG recommend an allocation group 76962306a36Sopenharmony_ci */ 77062306a36Sopenharmony_ci if (hint == 0) 77162306a36Sopenharmony_ci goto pref_ag; 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci /* we would like to allocate close to the hint. adjust the 77462306a36Sopenharmony_ci * hint to the block following the hint since the allocators 77562306a36Sopenharmony_ci * will start looking for free space starting at this point. 77662306a36Sopenharmony_ci */ 77762306a36Sopenharmony_ci blkno = hint + 1; 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci if (blkno >= bmp->db_mapsize) 78062306a36Sopenharmony_ci goto pref_ag; 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_ci agno = blkno >> bmp->db_agl2size; 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci /* check if blkno crosses over into a new allocation group. 78562306a36Sopenharmony_ci * if so, check if we should allow allocations within this 78662306a36Sopenharmony_ci * allocation group. 78762306a36Sopenharmony_ci */ 78862306a36Sopenharmony_ci if ((blkno & (bmp->db_agsize - 1)) == 0) 78962306a36Sopenharmony_ci /* check if the AG is currently being written to. 79062306a36Sopenharmony_ci * if so, call dbNextAG() to find a non-busy 79162306a36Sopenharmony_ci * AG with sufficient free space. 79262306a36Sopenharmony_ci */ 79362306a36Sopenharmony_ci if (atomic_read(&bmp->db_active[agno])) 79462306a36Sopenharmony_ci goto pref_ag; 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci /* check if the allocation request size can be satisfied from a 79762306a36Sopenharmony_ci * single dmap. if so, try to allocate from the dmap containing 79862306a36Sopenharmony_ci * the hint using a tiered strategy. 79962306a36Sopenharmony_ci */ 80062306a36Sopenharmony_ci if (nblocks <= BPERDMAP) { 80162306a36Sopenharmony_ci IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci /* get the buffer for the dmap containing the hint. 80462306a36Sopenharmony_ci */ 80562306a36Sopenharmony_ci rc = -EIO; 80662306a36Sopenharmony_ci lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 80762306a36Sopenharmony_ci mp = read_metapage(ipbmap, lblkno, PSIZE, 0); 80862306a36Sopenharmony_ci if (mp == NULL) 80962306a36Sopenharmony_ci goto read_unlock; 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci dp = (struct dmap *) mp->data; 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci /* first, try to satisfy the allocation request with the 81462306a36Sopenharmony_ci * blocks beginning at the hint. 81562306a36Sopenharmony_ci */ 81662306a36Sopenharmony_ci if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks)) 81762306a36Sopenharmony_ci != -ENOSPC) { 81862306a36Sopenharmony_ci if (rc == 0) { 81962306a36Sopenharmony_ci *results = blkno; 82062306a36Sopenharmony_ci mark_metapage_dirty(mp); 82162306a36Sopenharmony_ci } 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci release_metapage(mp); 82462306a36Sopenharmony_ci goto read_unlock; 82562306a36Sopenharmony_ci } 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci writers = atomic_read(&bmp->db_active[agno]); 82862306a36Sopenharmony_ci if ((writers > 1) || 82962306a36Sopenharmony_ci ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) { 83062306a36Sopenharmony_ci /* 83162306a36Sopenharmony_ci * Someone else is writing in this allocation 83262306a36Sopenharmony_ci * group. To avoid fragmenting, try another ag 83362306a36Sopenharmony_ci */ 83462306a36Sopenharmony_ci release_metapage(mp); 83562306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 83662306a36Sopenharmony_ci goto pref_ag; 83762306a36Sopenharmony_ci } 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci /* next, try to satisfy the allocation request with blocks 84062306a36Sopenharmony_ci * near the hint. 84162306a36Sopenharmony_ci */ 84262306a36Sopenharmony_ci if ((rc = 84362306a36Sopenharmony_ci dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results)) 84462306a36Sopenharmony_ci != -ENOSPC) { 84562306a36Sopenharmony_ci if (rc == 0) 84662306a36Sopenharmony_ci mark_metapage_dirty(mp); 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci release_metapage(mp); 84962306a36Sopenharmony_ci goto read_unlock; 85062306a36Sopenharmony_ci } 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci /* try to satisfy the allocation request with blocks within 85362306a36Sopenharmony_ci * the same dmap as the hint. 85462306a36Sopenharmony_ci */ 85562306a36Sopenharmony_ci if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results)) 85662306a36Sopenharmony_ci != -ENOSPC) { 85762306a36Sopenharmony_ci if (rc == 0) 85862306a36Sopenharmony_ci mark_metapage_dirty(mp); 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci release_metapage(mp); 86162306a36Sopenharmony_ci goto read_unlock; 86262306a36Sopenharmony_ci } 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci release_metapage(mp); 86562306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 86662306a36Sopenharmony_ci } 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci /* try to satisfy the allocation request with blocks within 86962306a36Sopenharmony_ci * the same allocation group as the hint. 87062306a36Sopenharmony_ci */ 87162306a36Sopenharmony_ci IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); 87262306a36Sopenharmony_ci if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC) 87362306a36Sopenharmony_ci goto write_unlock; 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci IWRITE_UNLOCK(ipbmap); 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci pref_ag: 87962306a36Sopenharmony_ci /* 88062306a36Sopenharmony_ci * Let dbNextAG recommend a preferred allocation group 88162306a36Sopenharmony_ci */ 88262306a36Sopenharmony_ci agno = dbNextAG(ipbmap); 88362306a36Sopenharmony_ci IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci /* Try to allocate within this allocation group. if that fails, try to 88662306a36Sopenharmony_ci * allocate anywhere in the map. 88762306a36Sopenharmony_ci */ 88862306a36Sopenharmony_ci if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC) 88962306a36Sopenharmony_ci rc = dbAllocAny(bmp, nblocks, l2nb, results); 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci write_unlock: 89262306a36Sopenharmony_ci IWRITE_UNLOCK(ipbmap); 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci return (rc); 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci read_unlock: 89762306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci return (rc); 90062306a36Sopenharmony_ci} 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci/* 90362306a36Sopenharmony_ci * NAME: dbReAlloc() 90462306a36Sopenharmony_ci * 90562306a36Sopenharmony_ci * FUNCTION: attempt to extend a current allocation by a specified 90662306a36Sopenharmony_ci * number of blocks. 90762306a36Sopenharmony_ci * 90862306a36Sopenharmony_ci * this routine attempts to satisfy the allocation request 90962306a36Sopenharmony_ci * by first trying to extend the existing allocation in 91062306a36Sopenharmony_ci * place by allocating the additional blocks as the blocks 91162306a36Sopenharmony_ci * immediately following the current allocation. if these 91262306a36Sopenharmony_ci * blocks are not available, this routine will attempt to 91362306a36Sopenharmony_ci * allocate a new set of contiguous blocks large enough 91462306a36Sopenharmony_ci * to cover the existing allocation plus the additional 91562306a36Sopenharmony_ci * number of blocks required. 91662306a36Sopenharmony_ci * 91762306a36Sopenharmony_ci * PARAMETERS: 91862306a36Sopenharmony_ci * ip - pointer to in-core inode requiring allocation. 91962306a36Sopenharmony_ci * blkno - starting block of the current allocation. 92062306a36Sopenharmony_ci * nblocks - number of contiguous blocks within the current 92162306a36Sopenharmony_ci * allocation. 92262306a36Sopenharmony_ci * addnblocks - number of blocks to add to the allocation. 92362306a36Sopenharmony_ci * results - on successful return, set to the starting block number 92462306a36Sopenharmony_ci * of the existing allocation if the existing allocation 92562306a36Sopenharmony_ci * was extended in place or to a newly allocated contiguous 92662306a36Sopenharmony_ci * range if the existing allocation could not be extended 92762306a36Sopenharmony_ci * in place. 92862306a36Sopenharmony_ci * 92962306a36Sopenharmony_ci * RETURN VALUES: 93062306a36Sopenharmony_ci * 0 - success 93162306a36Sopenharmony_ci * -ENOSPC - insufficient disk resources 93262306a36Sopenharmony_ci * -EIO - i/o error 93362306a36Sopenharmony_ci */ 93462306a36Sopenharmony_ciint 93562306a36Sopenharmony_cidbReAlloc(struct inode *ip, 93662306a36Sopenharmony_ci s64 blkno, s64 nblocks, s64 addnblocks, s64 * results) 93762306a36Sopenharmony_ci{ 93862306a36Sopenharmony_ci int rc; 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_ci /* try to extend the allocation in place. 94162306a36Sopenharmony_ci */ 94262306a36Sopenharmony_ci if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) { 94362306a36Sopenharmony_ci *results = blkno; 94462306a36Sopenharmony_ci return (0); 94562306a36Sopenharmony_ci } else { 94662306a36Sopenharmony_ci if (rc != -ENOSPC) 94762306a36Sopenharmony_ci return (rc); 94862306a36Sopenharmony_ci } 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ci /* could not extend the allocation in place, so allocate a 95162306a36Sopenharmony_ci * new set of blocks for the entire request (i.e. try to get 95262306a36Sopenharmony_ci * a range of contiguous blocks large enough to cover the 95362306a36Sopenharmony_ci * existing allocation plus the additional blocks.) 95462306a36Sopenharmony_ci */ 95562306a36Sopenharmony_ci return (dbAlloc 95662306a36Sopenharmony_ci (ip, blkno + nblocks - 1, addnblocks + nblocks, results)); 95762306a36Sopenharmony_ci} 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci/* 96162306a36Sopenharmony_ci * NAME: dbExtend() 96262306a36Sopenharmony_ci * 96362306a36Sopenharmony_ci * FUNCTION: attempt to extend a current allocation by a specified 96462306a36Sopenharmony_ci * number of blocks. 96562306a36Sopenharmony_ci * 96662306a36Sopenharmony_ci * this routine attempts to satisfy the allocation request 96762306a36Sopenharmony_ci * by first trying to extend the existing allocation in 96862306a36Sopenharmony_ci * place by allocating the additional blocks as the blocks 96962306a36Sopenharmony_ci * immediately following the current allocation. 97062306a36Sopenharmony_ci * 97162306a36Sopenharmony_ci * PARAMETERS: 97262306a36Sopenharmony_ci * ip - pointer to in-core inode requiring allocation. 97362306a36Sopenharmony_ci * blkno - starting block of the current allocation. 97462306a36Sopenharmony_ci * nblocks - number of contiguous blocks within the current 97562306a36Sopenharmony_ci * allocation. 97662306a36Sopenharmony_ci * addnblocks - number of blocks to add to the allocation. 97762306a36Sopenharmony_ci * 97862306a36Sopenharmony_ci * RETURN VALUES: 97962306a36Sopenharmony_ci * 0 - success 98062306a36Sopenharmony_ci * -ENOSPC - insufficient disk resources 98162306a36Sopenharmony_ci * -EIO - i/o error 98262306a36Sopenharmony_ci */ 98362306a36Sopenharmony_cistatic int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) 98462306a36Sopenharmony_ci{ 98562306a36Sopenharmony_ci struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb); 98662306a36Sopenharmony_ci s64 lblkno, lastblkno, extblkno; 98762306a36Sopenharmony_ci uint rel_block; 98862306a36Sopenharmony_ci struct metapage *mp; 98962306a36Sopenharmony_ci struct dmap *dp; 99062306a36Sopenharmony_ci int rc; 99162306a36Sopenharmony_ci struct inode *ipbmap = sbi->ipbmap; 99262306a36Sopenharmony_ci struct bmap *bmp; 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci /* 99562306a36Sopenharmony_ci * We don't want a non-aligned extent to cross a page boundary 99662306a36Sopenharmony_ci */ 99762306a36Sopenharmony_ci if (((rel_block = blkno & (sbi->nbperpage - 1))) && 99862306a36Sopenharmony_ci (rel_block + nblocks + addnblocks > sbi->nbperpage)) 99962306a36Sopenharmony_ci return -ENOSPC; 100062306a36Sopenharmony_ci 100162306a36Sopenharmony_ci /* get the last block of the current allocation */ 100262306a36Sopenharmony_ci lastblkno = blkno + nblocks - 1; 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci /* determine the block number of the block following 100562306a36Sopenharmony_ci * the existing allocation. 100662306a36Sopenharmony_ci */ 100762306a36Sopenharmony_ci extblkno = lastblkno + 1; 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci /* better be within the file system */ 101262306a36Sopenharmony_ci bmp = sbi->bmap; 101362306a36Sopenharmony_ci if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { 101462306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 101562306a36Sopenharmony_ci jfs_error(ip->i_sb, "the block is outside the filesystem\n"); 101662306a36Sopenharmony_ci return -EIO; 101762306a36Sopenharmony_ci } 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci /* we'll attempt to extend the current allocation in place by 102062306a36Sopenharmony_ci * allocating the additional blocks as the blocks immediately 102162306a36Sopenharmony_ci * following the current allocation. we only try to extend the 102262306a36Sopenharmony_ci * current allocation in place if the number of additional blocks 102362306a36Sopenharmony_ci * can fit into a dmap, the last block of the current allocation 102462306a36Sopenharmony_ci * is not the last block of the file system, and the start of the 102562306a36Sopenharmony_ci * inplace extension is not on an allocation group boundary. 102662306a36Sopenharmony_ci */ 102762306a36Sopenharmony_ci if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize || 102862306a36Sopenharmony_ci (extblkno & (bmp->db_agsize - 1)) == 0) { 102962306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 103062306a36Sopenharmony_ci return -ENOSPC; 103162306a36Sopenharmony_ci } 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ci /* get the buffer for the dmap containing the first block 103462306a36Sopenharmony_ci * of the extension. 103562306a36Sopenharmony_ci */ 103662306a36Sopenharmony_ci lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage); 103762306a36Sopenharmony_ci mp = read_metapage(ipbmap, lblkno, PSIZE, 0); 103862306a36Sopenharmony_ci if (mp == NULL) { 103962306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 104062306a36Sopenharmony_ci return -EIO; 104162306a36Sopenharmony_ci } 104262306a36Sopenharmony_ci 104362306a36Sopenharmony_ci dp = (struct dmap *) mp->data; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci /* try to allocate the blocks immediately following the 104662306a36Sopenharmony_ci * current allocation. 104762306a36Sopenharmony_ci */ 104862306a36Sopenharmony_ci rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks); 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 105162306a36Sopenharmony_ci 105262306a36Sopenharmony_ci /* were we successful ? */ 105362306a36Sopenharmony_ci if (rc == 0) 105462306a36Sopenharmony_ci write_metapage(mp); 105562306a36Sopenharmony_ci else 105662306a36Sopenharmony_ci /* we were not successful */ 105762306a36Sopenharmony_ci release_metapage(mp); 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_ci return (rc); 106062306a36Sopenharmony_ci} 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci/* 106462306a36Sopenharmony_ci * NAME: dbAllocNext() 106562306a36Sopenharmony_ci * 106662306a36Sopenharmony_ci * FUNCTION: attempt to allocate the blocks of the specified block 106762306a36Sopenharmony_ci * range within a dmap. 106862306a36Sopenharmony_ci * 106962306a36Sopenharmony_ci * PARAMETERS: 107062306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 107162306a36Sopenharmony_ci * dp - pointer to dmap. 107262306a36Sopenharmony_ci * blkno - starting block number of the range. 107362306a36Sopenharmony_ci * nblocks - number of contiguous free blocks of the range. 107462306a36Sopenharmony_ci * 107562306a36Sopenharmony_ci * RETURN VALUES: 107662306a36Sopenharmony_ci * 0 - success 107762306a36Sopenharmony_ci * -ENOSPC - insufficient disk resources 107862306a36Sopenharmony_ci * -EIO - i/o error 107962306a36Sopenharmony_ci * 108062306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) held on entry/exit; 108162306a36Sopenharmony_ci */ 108262306a36Sopenharmony_cistatic int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, 108362306a36Sopenharmony_ci int nblocks) 108462306a36Sopenharmony_ci{ 108562306a36Sopenharmony_ci int dbitno, word, rembits, nb, nwords, wbitno, nw; 108662306a36Sopenharmony_ci int l2size; 108762306a36Sopenharmony_ci s8 *leaf; 108862306a36Sopenharmony_ci u32 mask; 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_ci if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { 109162306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); 109262306a36Sopenharmony_ci return -EIO; 109362306a36Sopenharmony_ci } 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci /* pick up a pointer to the leaves of the dmap tree. 109662306a36Sopenharmony_ci */ 109762306a36Sopenharmony_ci leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); 109862306a36Sopenharmony_ci 109962306a36Sopenharmony_ci /* determine the bit number and word within the dmap of the 110062306a36Sopenharmony_ci * starting block. 110162306a36Sopenharmony_ci */ 110262306a36Sopenharmony_ci dbitno = blkno & (BPERDMAP - 1); 110362306a36Sopenharmony_ci word = dbitno >> L2DBWORD; 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci /* check if the specified block range is contained within 110662306a36Sopenharmony_ci * this dmap. 110762306a36Sopenharmony_ci */ 110862306a36Sopenharmony_ci if (dbitno + nblocks > BPERDMAP) 110962306a36Sopenharmony_ci return -ENOSPC; 111062306a36Sopenharmony_ci 111162306a36Sopenharmony_ci /* check if the starting leaf indicates that anything 111262306a36Sopenharmony_ci * is free. 111362306a36Sopenharmony_ci */ 111462306a36Sopenharmony_ci if (leaf[word] == NOFREE) 111562306a36Sopenharmony_ci return -ENOSPC; 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci /* check the dmaps words corresponding to block range to see 111862306a36Sopenharmony_ci * if the block range is free. not all bits of the first and 111962306a36Sopenharmony_ci * last words may be contained within the block range. if this 112062306a36Sopenharmony_ci * is the case, we'll work against those words (i.e. partial first 112162306a36Sopenharmony_ci * and/or last) on an individual basis (a single pass) and examine 112262306a36Sopenharmony_ci * the actual bits to determine if they are free. a single pass 112362306a36Sopenharmony_ci * will be used for all dmap words fully contained within the 112462306a36Sopenharmony_ci * specified range. within this pass, the leaves of the dmap 112562306a36Sopenharmony_ci * tree will be examined to determine if the blocks are free. a 112662306a36Sopenharmony_ci * single leaf may describe the free space of multiple dmap 112762306a36Sopenharmony_ci * words, so we may visit only a subset of the actual leaves 112862306a36Sopenharmony_ci * corresponding to the dmap words of the block range. 112962306a36Sopenharmony_ci */ 113062306a36Sopenharmony_ci for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { 113162306a36Sopenharmony_ci /* determine the bit number within the word and 113262306a36Sopenharmony_ci * the number of bits within the word. 113362306a36Sopenharmony_ci */ 113462306a36Sopenharmony_ci wbitno = dbitno & (DBWORD - 1); 113562306a36Sopenharmony_ci nb = min(rembits, DBWORD - wbitno); 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_ci /* check if only part of the word is to be examined. 113862306a36Sopenharmony_ci */ 113962306a36Sopenharmony_ci if (nb < DBWORD) { 114062306a36Sopenharmony_ci /* check if the bits are free. 114162306a36Sopenharmony_ci */ 114262306a36Sopenharmony_ci mask = (ONES << (DBWORD - nb) >> wbitno); 114362306a36Sopenharmony_ci if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask) 114462306a36Sopenharmony_ci return -ENOSPC; 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci word += 1; 114762306a36Sopenharmony_ci } else { 114862306a36Sopenharmony_ci /* one or more dmap words are fully contained 114962306a36Sopenharmony_ci * within the block range. determine how many 115062306a36Sopenharmony_ci * words and how many bits. 115162306a36Sopenharmony_ci */ 115262306a36Sopenharmony_ci nwords = rembits >> L2DBWORD; 115362306a36Sopenharmony_ci nb = nwords << L2DBWORD; 115462306a36Sopenharmony_ci 115562306a36Sopenharmony_ci /* now examine the appropriate leaves to determine 115662306a36Sopenharmony_ci * if the blocks are free. 115762306a36Sopenharmony_ci */ 115862306a36Sopenharmony_ci while (nwords > 0) { 115962306a36Sopenharmony_ci /* does the leaf describe any free space ? 116062306a36Sopenharmony_ci */ 116162306a36Sopenharmony_ci if (leaf[word] < BUDMIN) 116262306a36Sopenharmony_ci return -ENOSPC; 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci /* determine the l2 number of bits provided 116562306a36Sopenharmony_ci * by this leaf. 116662306a36Sopenharmony_ci */ 116762306a36Sopenharmony_ci l2size = 116862306a36Sopenharmony_ci min_t(int, leaf[word], NLSTOL2BSZ(nwords)); 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci /* determine how many words were handled. 117162306a36Sopenharmony_ci */ 117262306a36Sopenharmony_ci nw = BUDSIZE(l2size, BUDMIN); 117362306a36Sopenharmony_ci 117462306a36Sopenharmony_ci nwords -= nw; 117562306a36Sopenharmony_ci word += nw; 117662306a36Sopenharmony_ci } 117762306a36Sopenharmony_ci } 117862306a36Sopenharmony_ci } 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_ci /* allocate the blocks. 118162306a36Sopenharmony_ci */ 118262306a36Sopenharmony_ci return (dbAllocDmap(bmp, dp, blkno, nblocks)); 118362306a36Sopenharmony_ci} 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_ci/* 118762306a36Sopenharmony_ci * NAME: dbAllocNear() 118862306a36Sopenharmony_ci * 118962306a36Sopenharmony_ci * FUNCTION: attempt to allocate a number of contiguous free blocks near 119062306a36Sopenharmony_ci * a specified block (hint) within a dmap. 119162306a36Sopenharmony_ci * 119262306a36Sopenharmony_ci * starting with the dmap leaf that covers the hint, we'll 119362306a36Sopenharmony_ci * check the next four contiguous leaves for sufficient free 119462306a36Sopenharmony_ci * space. if sufficient free space is found, we'll allocate 119562306a36Sopenharmony_ci * the desired free space. 119662306a36Sopenharmony_ci * 119762306a36Sopenharmony_ci * PARAMETERS: 119862306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 119962306a36Sopenharmony_ci * dp - pointer to dmap. 120062306a36Sopenharmony_ci * blkno - block number to allocate near. 120162306a36Sopenharmony_ci * nblocks - actual number of contiguous free blocks desired. 120262306a36Sopenharmony_ci * l2nb - log2 number of contiguous free blocks desired. 120362306a36Sopenharmony_ci * results - on successful return, set to the starting block number 120462306a36Sopenharmony_ci * of the newly allocated range. 120562306a36Sopenharmony_ci * 120662306a36Sopenharmony_ci * RETURN VALUES: 120762306a36Sopenharmony_ci * 0 - success 120862306a36Sopenharmony_ci * -ENOSPC - insufficient disk resources 120962306a36Sopenharmony_ci * -EIO - i/o error 121062306a36Sopenharmony_ci * 121162306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) held on entry/exit; 121262306a36Sopenharmony_ci */ 121362306a36Sopenharmony_cistatic int 121462306a36Sopenharmony_cidbAllocNear(struct bmap * bmp, 121562306a36Sopenharmony_ci struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results) 121662306a36Sopenharmony_ci{ 121762306a36Sopenharmony_ci int word, lword, rc; 121862306a36Sopenharmony_ci s8 *leaf; 121962306a36Sopenharmony_ci 122062306a36Sopenharmony_ci if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { 122162306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); 122262306a36Sopenharmony_ci return -EIO; 122362306a36Sopenharmony_ci } 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx); 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci /* determine the word within the dmap that holds the hint 122862306a36Sopenharmony_ci * (i.e. blkno). also, determine the last word in the dmap 122962306a36Sopenharmony_ci * that we'll include in our examination. 123062306a36Sopenharmony_ci */ 123162306a36Sopenharmony_ci word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; 123262306a36Sopenharmony_ci lword = min(word + 4, LPERDMAP); 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci /* examine the leaves for sufficient free space. 123562306a36Sopenharmony_ci */ 123662306a36Sopenharmony_ci for (; word < lword; word++) { 123762306a36Sopenharmony_ci /* does the leaf describe sufficient free space ? 123862306a36Sopenharmony_ci */ 123962306a36Sopenharmony_ci if (leaf[word] < l2nb) 124062306a36Sopenharmony_ci continue; 124162306a36Sopenharmony_ci 124262306a36Sopenharmony_ci /* determine the block number within the file system 124362306a36Sopenharmony_ci * of the first block described by this dmap word. 124462306a36Sopenharmony_ci */ 124562306a36Sopenharmony_ci blkno = le64_to_cpu(dp->start) + (word << L2DBWORD); 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_ci /* if not all bits of the dmap word are free, get the 124862306a36Sopenharmony_ci * starting bit number within the dmap word of the required 124962306a36Sopenharmony_ci * string of free bits and adjust the block number with the 125062306a36Sopenharmony_ci * value. 125162306a36Sopenharmony_ci */ 125262306a36Sopenharmony_ci if (leaf[word] < BUDMIN) 125362306a36Sopenharmony_ci blkno += 125462306a36Sopenharmony_ci dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb); 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci /* allocate the blocks. 125762306a36Sopenharmony_ci */ 125862306a36Sopenharmony_ci if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) 125962306a36Sopenharmony_ci *results = blkno; 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_ci return (rc); 126262306a36Sopenharmony_ci } 126362306a36Sopenharmony_ci 126462306a36Sopenharmony_ci return -ENOSPC; 126562306a36Sopenharmony_ci} 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci/* 126962306a36Sopenharmony_ci * NAME: dbAllocAG() 127062306a36Sopenharmony_ci * 127162306a36Sopenharmony_ci * FUNCTION: attempt to allocate the specified number of contiguous 127262306a36Sopenharmony_ci * free blocks within the specified allocation group. 127362306a36Sopenharmony_ci * 127462306a36Sopenharmony_ci * unless the allocation group size is equal to the number 127562306a36Sopenharmony_ci * of blocks per dmap, the dmap control pages will be used to 127662306a36Sopenharmony_ci * find the required free space, if available. we start the 127762306a36Sopenharmony_ci * search at the highest dmap control page level which 127862306a36Sopenharmony_ci * distinctly describes the allocation group's free space 127962306a36Sopenharmony_ci * (i.e. the highest level at which the allocation group's 128062306a36Sopenharmony_ci * free space is not mixed in with that of any other group). 128162306a36Sopenharmony_ci * in addition, we start the search within this level at a 128262306a36Sopenharmony_ci * height of the dmapctl dmtree at which the nodes distinctly 128362306a36Sopenharmony_ci * describe the allocation group's free space. at this height, 128462306a36Sopenharmony_ci * the allocation group's free space may be represented by 1 128562306a36Sopenharmony_ci * or two sub-trees, depending on the allocation group size. 128662306a36Sopenharmony_ci * we search the top nodes of these subtrees left to right for 128762306a36Sopenharmony_ci * sufficient free space. if sufficient free space is found, 128862306a36Sopenharmony_ci * the subtree is searched to find the leftmost leaf that 128962306a36Sopenharmony_ci * has free space. once we have made it to the leaf, we 129062306a36Sopenharmony_ci * move the search to the next lower level dmap control page 129162306a36Sopenharmony_ci * corresponding to this leaf. we continue down the dmap control 129262306a36Sopenharmony_ci * pages until we find the dmap that contains or starts the 129362306a36Sopenharmony_ci * sufficient free space and we allocate at this dmap. 129462306a36Sopenharmony_ci * 129562306a36Sopenharmony_ci * if the allocation group size is equal to the dmap size, 129662306a36Sopenharmony_ci * we'll start at the dmap corresponding to the allocation 129762306a36Sopenharmony_ci * group and attempt the allocation at this level. 129862306a36Sopenharmony_ci * 129962306a36Sopenharmony_ci * the dmap control page search is also not performed if the 130062306a36Sopenharmony_ci * allocation group is completely free and we go to the first 130162306a36Sopenharmony_ci * dmap of the allocation group to do the allocation. this is 130262306a36Sopenharmony_ci * done because the allocation group may be part (not the first 130362306a36Sopenharmony_ci * part) of a larger binary buddy system, causing the dmap 130462306a36Sopenharmony_ci * control pages to indicate no free space (NOFREE) within 130562306a36Sopenharmony_ci * the allocation group. 130662306a36Sopenharmony_ci * 130762306a36Sopenharmony_ci * PARAMETERS: 130862306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 130962306a36Sopenharmony_ci * agno - allocation group number. 131062306a36Sopenharmony_ci * nblocks - actual number of contiguous free blocks desired. 131162306a36Sopenharmony_ci * l2nb - log2 number of contiguous free blocks desired. 131262306a36Sopenharmony_ci * results - on successful return, set to the starting block number 131362306a36Sopenharmony_ci * of the newly allocated range. 131462306a36Sopenharmony_ci * 131562306a36Sopenharmony_ci * RETURN VALUES: 131662306a36Sopenharmony_ci * 0 - success 131762306a36Sopenharmony_ci * -ENOSPC - insufficient disk resources 131862306a36Sopenharmony_ci * -EIO - i/o error 131962306a36Sopenharmony_ci * 132062306a36Sopenharmony_ci * note: IWRITE_LOCK(ipmap) held on entry/exit; 132162306a36Sopenharmony_ci */ 132262306a36Sopenharmony_cistatic int 132362306a36Sopenharmony_cidbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) 132462306a36Sopenharmony_ci{ 132562306a36Sopenharmony_ci struct metapage *mp; 132662306a36Sopenharmony_ci struct dmapctl *dcp; 132762306a36Sopenharmony_ci int rc, ti, i, k, m, n, agperlev; 132862306a36Sopenharmony_ci s64 blkno, lblkno; 132962306a36Sopenharmony_ci int budmin; 133062306a36Sopenharmony_ci 133162306a36Sopenharmony_ci /* allocation request should not be for more than the 133262306a36Sopenharmony_ci * allocation group size. 133362306a36Sopenharmony_ci */ 133462306a36Sopenharmony_ci if (l2nb > bmp->db_agl2size) { 133562306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 133662306a36Sopenharmony_ci "allocation request is larger than the allocation group size\n"); 133762306a36Sopenharmony_ci return -EIO; 133862306a36Sopenharmony_ci } 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci /* determine the starting block number of the allocation 134162306a36Sopenharmony_ci * group. 134262306a36Sopenharmony_ci */ 134362306a36Sopenharmony_ci blkno = (s64) agno << bmp->db_agl2size; 134462306a36Sopenharmony_ci 134562306a36Sopenharmony_ci /* check if the allocation group size is the minimum allocation 134662306a36Sopenharmony_ci * group size or if the allocation group is completely free. if 134762306a36Sopenharmony_ci * the allocation group size is the minimum size of BPERDMAP (i.e. 134862306a36Sopenharmony_ci * 1 dmap), there is no need to search the dmap control page (below) 134962306a36Sopenharmony_ci * that fully describes the allocation group since the allocation 135062306a36Sopenharmony_ci * group is already fully described by a dmap. in this case, we 135162306a36Sopenharmony_ci * just call dbAllocCtl() to search the dmap tree and allocate the 135262306a36Sopenharmony_ci * required space if available. 135362306a36Sopenharmony_ci * 135462306a36Sopenharmony_ci * if the allocation group is completely free, dbAllocCtl() is 135562306a36Sopenharmony_ci * also called to allocate the required space. this is done for 135662306a36Sopenharmony_ci * two reasons. first, it makes no sense searching the dmap control 135762306a36Sopenharmony_ci * pages for free space when we know that free space exists. second, 135862306a36Sopenharmony_ci * the dmap control pages may indicate that the allocation group 135962306a36Sopenharmony_ci * has no free space if the allocation group is part (not the first 136062306a36Sopenharmony_ci * part) of a larger binary buddy system. 136162306a36Sopenharmony_ci */ 136262306a36Sopenharmony_ci if (bmp->db_agsize == BPERDMAP 136362306a36Sopenharmony_ci || bmp->db_agfree[agno] == bmp->db_agsize) { 136462306a36Sopenharmony_ci rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); 136562306a36Sopenharmony_ci if ((rc == -ENOSPC) && 136662306a36Sopenharmony_ci (bmp->db_agfree[agno] == bmp->db_agsize)) { 136762306a36Sopenharmony_ci printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n", 136862306a36Sopenharmony_ci (unsigned long long) blkno, 136962306a36Sopenharmony_ci (unsigned long long) nblocks); 137062306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 137162306a36Sopenharmony_ci "dbAllocCtl failed in free AG\n"); 137262306a36Sopenharmony_ci } 137362306a36Sopenharmony_ci return (rc); 137462306a36Sopenharmony_ci } 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci /* the buffer for the dmap control page that fully describes the 137762306a36Sopenharmony_ci * allocation group. 137862306a36Sopenharmony_ci */ 137962306a36Sopenharmony_ci lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel); 138062306a36Sopenharmony_ci mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 138162306a36Sopenharmony_ci if (mp == NULL) 138262306a36Sopenharmony_ci return -EIO; 138362306a36Sopenharmony_ci dcp = (struct dmapctl *) mp->data; 138462306a36Sopenharmony_ci budmin = dcp->budmin; 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_ci if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { 138762306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); 138862306a36Sopenharmony_ci release_metapage(mp); 138962306a36Sopenharmony_ci return -EIO; 139062306a36Sopenharmony_ci } 139162306a36Sopenharmony_ci 139262306a36Sopenharmony_ci /* search the subtree(s) of the dmap control page that describes 139362306a36Sopenharmony_ci * the allocation group, looking for sufficient free space. to begin, 139462306a36Sopenharmony_ci * determine how many allocation groups are represented in a dmap 139562306a36Sopenharmony_ci * control page at the control page level (i.e. L0, L1, L2) that 139662306a36Sopenharmony_ci * fully describes an allocation group. next, determine the starting 139762306a36Sopenharmony_ci * tree index of this allocation group within the control page. 139862306a36Sopenharmony_ci */ 139962306a36Sopenharmony_ci agperlev = 140062306a36Sopenharmony_ci (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth; 140162306a36Sopenharmony_ci ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1)); 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci /* dmap control page trees fan-out by 4 and a single allocation 140462306a36Sopenharmony_ci * group may be described by 1 or 2 subtrees within the ag level 140562306a36Sopenharmony_ci * dmap control page, depending upon the ag size. examine the ag's 140662306a36Sopenharmony_ci * subtrees for sufficient free space, starting with the leftmost 140762306a36Sopenharmony_ci * subtree. 140862306a36Sopenharmony_ci */ 140962306a36Sopenharmony_ci for (i = 0; i < bmp->db_agwidth; i++, ti++) { 141062306a36Sopenharmony_ci /* is there sufficient free space ? 141162306a36Sopenharmony_ci */ 141262306a36Sopenharmony_ci if (l2nb > dcp->stree[ti]) 141362306a36Sopenharmony_ci continue; 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ci /* sufficient free space found in a subtree. now search down 141662306a36Sopenharmony_ci * the subtree to find the leftmost leaf that describes this 141762306a36Sopenharmony_ci * free space. 141862306a36Sopenharmony_ci */ 141962306a36Sopenharmony_ci for (k = bmp->db_agheight; k > 0; k--) { 142062306a36Sopenharmony_ci for (n = 0, m = (ti << 2) + 1; n < 4; n++) { 142162306a36Sopenharmony_ci if (l2nb <= dcp->stree[m + n]) { 142262306a36Sopenharmony_ci ti = m + n; 142362306a36Sopenharmony_ci break; 142462306a36Sopenharmony_ci } 142562306a36Sopenharmony_ci } 142662306a36Sopenharmony_ci if (n == 4) { 142762306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 142862306a36Sopenharmony_ci "failed descending stree\n"); 142962306a36Sopenharmony_ci release_metapage(mp); 143062306a36Sopenharmony_ci return -EIO; 143162306a36Sopenharmony_ci } 143262306a36Sopenharmony_ci } 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci /* determine the block number within the file system 143562306a36Sopenharmony_ci * that corresponds to this leaf. 143662306a36Sopenharmony_ci */ 143762306a36Sopenharmony_ci if (bmp->db_aglevel == 2) 143862306a36Sopenharmony_ci blkno = 0; 143962306a36Sopenharmony_ci else if (bmp->db_aglevel == 1) 144062306a36Sopenharmony_ci blkno &= ~(MAXL1SIZE - 1); 144162306a36Sopenharmony_ci else /* bmp->db_aglevel == 0 */ 144262306a36Sopenharmony_ci blkno &= ~(MAXL0SIZE - 1); 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci blkno += 144562306a36Sopenharmony_ci ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin; 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci /* release the buffer in preparation for going down 144862306a36Sopenharmony_ci * the next level of dmap control pages. 144962306a36Sopenharmony_ci */ 145062306a36Sopenharmony_ci release_metapage(mp); 145162306a36Sopenharmony_ci 145262306a36Sopenharmony_ci /* check if we need to continue to search down the lower 145362306a36Sopenharmony_ci * level dmap control pages. we need to if the number of 145462306a36Sopenharmony_ci * blocks required is less than maximum number of blocks 145562306a36Sopenharmony_ci * described at the next lower level. 145662306a36Sopenharmony_ci */ 145762306a36Sopenharmony_ci if (l2nb < budmin) { 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci /* search the lower level dmap control pages to get 146062306a36Sopenharmony_ci * the starting block number of the dmap that 146162306a36Sopenharmony_ci * contains or starts off the free space. 146262306a36Sopenharmony_ci */ 146362306a36Sopenharmony_ci if ((rc = 146462306a36Sopenharmony_ci dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1, 146562306a36Sopenharmony_ci &blkno))) { 146662306a36Sopenharmony_ci if (rc == -ENOSPC) { 146762306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 146862306a36Sopenharmony_ci "control page inconsistent\n"); 146962306a36Sopenharmony_ci return -EIO; 147062306a36Sopenharmony_ci } 147162306a36Sopenharmony_ci return (rc); 147262306a36Sopenharmony_ci } 147362306a36Sopenharmony_ci } 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci /* allocate the blocks. 147662306a36Sopenharmony_ci */ 147762306a36Sopenharmony_ci rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); 147862306a36Sopenharmony_ci if (rc == -ENOSPC) { 147962306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 148062306a36Sopenharmony_ci "unable to allocate blocks\n"); 148162306a36Sopenharmony_ci rc = -EIO; 148262306a36Sopenharmony_ci } 148362306a36Sopenharmony_ci return (rc); 148462306a36Sopenharmony_ci } 148562306a36Sopenharmony_ci 148662306a36Sopenharmony_ci /* no space in the allocation group. release the buffer and 148762306a36Sopenharmony_ci * return -ENOSPC. 148862306a36Sopenharmony_ci */ 148962306a36Sopenharmony_ci release_metapage(mp); 149062306a36Sopenharmony_ci 149162306a36Sopenharmony_ci return -ENOSPC; 149262306a36Sopenharmony_ci} 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_ci/* 149662306a36Sopenharmony_ci * NAME: dbAllocAny() 149762306a36Sopenharmony_ci * 149862306a36Sopenharmony_ci * FUNCTION: attempt to allocate the specified number of contiguous 149962306a36Sopenharmony_ci * free blocks anywhere in the file system. 150062306a36Sopenharmony_ci * 150162306a36Sopenharmony_ci * dbAllocAny() attempts to find the sufficient free space by 150262306a36Sopenharmony_ci * searching down the dmap control pages, starting with the 150362306a36Sopenharmony_ci * highest level (i.e. L0, L1, L2) control page. if free space 150462306a36Sopenharmony_ci * large enough to satisfy the desired free space is found, the 150562306a36Sopenharmony_ci * desired free space is allocated. 150662306a36Sopenharmony_ci * 150762306a36Sopenharmony_ci * PARAMETERS: 150862306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 150962306a36Sopenharmony_ci * nblocks - actual number of contiguous free blocks desired. 151062306a36Sopenharmony_ci * l2nb - log2 number of contiguous free blocks desired. 151162306a36Sopenharmony_ci * results - on successful return, set to the starting block number 151262306a36Sopenharmony_ci * of the newly allocated range. 151362306a36Sopenharmony_ci * 151462306a36Sopenharmony_ci * RETURN VALUES: 151562306a36Sopenharmony_ci * 0 - success 151662306a36Sopenharmony_ci * -ENOSPC - insufficient disk resources 151762306a36Sopenharmony_ci * -EIO - i/o error 151862306a36Sopenharmony_ci * 151962306a36Sopenharmony_ci * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 152062306a36Sopenharmony_ci */ 152162306a36Sopenharmony_cistatic int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) 152262306a36Sopenharmony_ci{ 152362306a36Sopenharmony_ci int rc; 152462306a36Sopenharmony_ci s64 blkno = 0; 152562306a36Sopenharmony_ci 152662306a36Sopenharmony_ci /* starting with the top level dmap control page, search 152762306a36Sopenharmony_ci * down the dmap control levels for sufficient free space. 152862306a36Sopenharmony_ci * if free space is found, dbFindCtl() returns the starting 152962306a36Sopenharmony_ci * block number of the dmap that contains or starts off the 153062306a36Sopenharmony_ci * range of free space. 153162306a36Sopenharmony_ci */ 153262306a36Sopenharmony_ci if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno))) 153362306a36Sopenharmony_ci return (rc); 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_ci /* allocate the blocks. 153662306a36Sopenharmony_ci */ 153762306a36Sopenharmony_ci rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); 153862306a36Sopenharmony_ci if (rc == -ENOSPC) { 153962306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n"); 154062306a36Sopenharmony_ci return -EIO; 154162306a36Sopenharmony_ci } 154262306a36Sopenharmony_ci return (rc); 154362306a36Sopenharmony_ci} 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci 154662306a36Sopenharmony_ci/* 154762306a36Sopenharmony_ci * NAME: dbDiscardAG() 154862306a36Sopenharmony_ci * 154962306a36Sopenharmony_ci * FUNCTION: attempt to discard (TRIM) all free blocks of specific AG 155062306a36Sopenharmony_ci * 155162306a36Sopenharmony_ci * algorithm: 155262306a36Sopenharmony_ci * 1) allocate blocks, as large as possible and save them 155362306a36Sopenharmony_ci * while holding IWRITE_LOCK on ipbmap 155462306a36Sopenharmony_ci * 2) trim all these saved block/length values 155562306a36Sopenharmony_ci * 3) mark the blocks free again 155662306a36Sopenharmony_ci * 155762306a36Sopenharmony_ci * benefit: 155862306a36Sopenharmony_ci * - we work only on one ag at some time, minimizing how long we 155962306a36Sopenharmony_ci * need to lock ipbmap 156062306a36Sopenharmony_ci * - reading / writing the fs is possible most time, even on 156162306a36Sopenharmony_ci * trimming 156262306a36Sopenharmony_ci * 156362306a36Sopenharmony_ci * downside: 156462306a36Sopenharmony_ci * - we write two times to the dmapctl and dmap pages 156562306a36Sopenharmony_ci * - but for me, this seems the best way, better ideas? 156662306a36Sopenharmony_ci * /TR 2012 156762306a36Sopenharmony_ci * 156862306a36Sopenharmony_ci * PARAMETERS: 156962306a36Sopenharmony_ci * ip - pointer to in-core inode 157062306a36Sopenharmony_ci * agno - ag to trim 157162306a36Sopenharmony_ci * minlen - minimum value of contiguous blocks 157262306a36Sopenharmony_ci * 157362306a36Sopenharmony_ci * RETURN VALUES: 157462306a36Sopenharmony_ci * s64 - actual number of blocks trimmed 157562306a36Sopenharmony_ci */ 157662306a36Sopenharmony_cis64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) 157762306a36Sopenharmony_ci{ 157862306a36Sopenharmony_ci struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 157962306a36Sopenharmony_ci struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; 158062306a36Sopenharmony_ci s64 nblocks, blkno; 158162306a36Sopenharmony_ci u64 trimmed = 0; 158262306a36Sopenharmony_ci int rc, l2nb; 158362306a36Sopenharmony_ci struct super_block *sb = ipbmap->i_sb; 158462306a36Sopenharmony_ci 158562306a36Sopenharmony_ci struct range2trim { 158662306a36Sopenharmony_ci u64 blkno; 158762306a36Sopenharmony_ci u64 nblocks; 158862306a36Sopenharmony_ci } *totrim, *tt; 158962306a36Sopenharmony_ci 159062306a36Sopenharmony_ci /* max blkno / nblocks pairs to trim */ 159162306a36Sopenharmony_ci int count = 0, range_cnt; 159262306a36Sopenharmony_ci u64 max_ranges; 159362306a36Sopenharmony_ci 159462306a36Sopenharmony_ci /* prevent others from writing new stuff here, while trimming */ 159562306a36Sopenharmony_ci IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP); 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_ci nblocks = bmp->db_agfree[agno]; 159862306a36Sopenharmony_ci max_ranges = nblocks; 159962306a36Sopenharmony_ci do_div(max_ranges, minlen); 160062306a36Sopenharmony_ci range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); 160162306a36Sopenharmony_ci totrim = kmalloc_array(range_cnt, sizeof(struct range2trim), GFP_NOFS); 160262306a36Sopenharmony_ci if (totrim == NULL) { 160362306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, "no memory for trim array\n"); 160462306a36Sopenharmony_ci IWRITE_UNLOCK(ipbmap); 160562306a36Sopenharmony_ci return 0; 160662306a36Sopenharmony_ci } 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_ci tt = totrim; 160962306a36Sopenharmony_ci while (nblocks >= minlen) { 161062306a36Sopenharmony_ci l2nb = BLKSTOL2(nblocks); 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci /* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */ 161362306a36Sopenharmony_ci rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno); 161462306a36Sopenharmony_ci if (rc == 0) { 161562306a36Sopenharmony_ci tt->blkno = blkno; 161662306a36Sopenharmony_ci tt->nblocks = nblocks; 161762306a36Sopenharmony_ci tt++; count++; 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci /* the whole ag is free, trim now */ 162062306a36Sopenharmony_ci if (bmp->db_agfree[agno] == 0) 162162306a36Sopenharmony_ci break; 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci /* give a hint for the next while */ 162462306a36Sopenharmony_ci nblocks = bmp->db_agfree[agno]; 162562306a36Sopenharmony_ci continue; 162662306a36Sopenharmony_ci } else if (rc == -ENOSPC) { 162762306a36Sopenharmony_ci /* search for next smaller log2 block */ 162862306a36Sopenharmony_ci l2nb = BLKSTOL2(nblocks) - 1; 162962306a36Sopenharmony_ci nblocks = 1LL << l2nb; 163062306a36Sopenharmony_ci } else { 163162306a36Sopenharmony_ci /* Trim any already allocated blocks */ 163262306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n"); 163362306a36Sopenharmony_ci break; 163462306a36Sopenharmony_ci } 163562306a36Sopenharmony_ci 163662306a36Sopenharmony_ci /* check, if our trim array is full */ 163762306a36Sopenharmony_ci if (unlikely(count >= range_cnt - 1)) 163862306a36Sopenharmony_ci break; 163962306a36Sopenharmony_ci } 164062306a36Sopenharmony_ci IWRITE_UNLOCK(ipbmap); 164162306a36Sopenharmony_ci 164262306a36Sopenharmony_ci tt->nblocks = 0; /* mark the current end */ 164362306a36Sopenharmony_ci for (tt = totrim; tt->nblocks != 0; tt++) { 164462306a36Sopenharmony_ci /* when mounted with online discard, dbFree() will 164562306a36Sopenharmony_ci * call jfs_issue_discard() itself */ 164662306a36Sopenharmony_ci if (!(JFS_SBI(sb)->flag & JFS_DISCARD)) 164762306a36Sopenharmony_ci jfs_issue_discard(ip, tt->blkno, tt->nblocks); 164862306a36Sopenharmony_ci dbFree(ip, tt->blkno, tt->nblocks); 164962306a36Sopenharmony_ci trimmed += tt->nblocks; 165062306a36Sopenharmony_ci } 165162306a36Sopenharmony_ci kfree(totrim); 165262306a36Sopenharmony_ci 165362306a36Sopenharmony_ci return trimmed; 165462306a36Sopenharmony_ci} 165562306a36Sopenharmony_ci 165662306a36Sopenharmony_ci/* 165762306a36Sopenharmony_ci * NAME: dbFindCtl() 165862306a36Sopenharmony_ci * 165962306a36Sopenharmony_ci * FUNCTION: starting at a specified dmap control page level and block 166062306a36Sopenharmony_ci * number, search down the dmap control levels for a range of 166162306a36Sopenharmony_ci * contiguous free blocks large enough to satisfy an allocation 166262306a36Sopenharmony_ci * request for the specified number of free blocks. 166362306a36Sopenharmony_ci * 166462306a36Sopenharmony_ci * if sufficient contiguous free blocks are found, this routine 166562306a36Sopenharmony_ci * returns the starting block number within a dmap page that 166662306a36Sopenharmony_ci * contains or starts a range of contiqious free blocks that 166762306a36Sopenharmony_ci * is sufficient in size. 166862306a36Sopenharmony_ci * 166962306a36Sopenharmony_ci * PARAMETERS: 167062306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 167162306a36Sopenharmony_ci * level - starting dmap control page level. 167262306a36Sopenharmony_ci * l2nb - log2 number of contiguous free blocks desired. 167362306a36Sopenharmony_ci * *blkno - on entry, starting block number for conducting the search. 167462306a36Sopenharmony_ci * on successful return, the first block within a dmap page 167562306a36Sopenharmony_ci * that contains or starts a range of contiguous free blocks. 167662306a36Sopenharmony_ci * 167762306a36Sopenharmony_ci * RETURN VALUES: 167862306a36Sopenharmony_ci * 0 - success 167962306a36Sopenharmony_ci * -ENOSPC - insufficient disk resources 168062306a36Sopenharmony_ci * -EIO - i/o error 168162306a36Sopenharmony_ci * 168262306a36Sopenharmony_ci * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 168362306a36Sopenharmony_ci */ 168462306a36Sopenharmony_cistatic int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) 168562306a36Sopenharmony_ci{ 168662306a36Sopenharmony_ci int rc, leafidx, lev; 168762306a36Sopenharmony_ci s64 b, lblkno; 168862306a36Sopenharmony_ci struct dmapctl *dcp; 168962306a36Sopenharmony_ci int budmin; 169062306a36Sopenharmony_ci struct metapage *mp; 169162306a36Sopenharmony_ci 169262306a36Sopenharmony_ci /* starting at the specified dmap control page level and block 169362306a36Sopenharmony_ci * number, search down the dmap control levels for the starting 169462306a36Sopenharmony_ci * block number of a dmap page that contains or starts off 169562306a36Sopenharmony_ci * sufficient free blocks. 169662306a36Sopenharmony_ci */ 169762306a36Sopenharmony_ci for (lev = level, b = *blkno; lev >= 0; lev--) { 169862306a36Sopenharmony_ci /* get the buffer of the dmap control page for the block 169962306a36Sopenharmony_ci * number and level (i.e. L0, L1, L2). 170062306a36Sopenharmony_ci */ 170162306a36Sopenharmony_ci lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev); 170262306a36Sopenharmony_ci mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 170362306a36Sopenharmony_ci if (mp == NULL) 170462306a36Sopenharmony_ci return -EIO; 170562306a36Sopenharmony_ci dcp = (struct dmapctl *) mp->data; 170662306a36Sopenharmony_ci budmin = dcp->budmin; 170762306a36Sopenharmony_ci 170862306a36Sopenharmony_ci if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { 170962306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 171062306a36Sopenharmony_ci "Corrupt dmapctl page\n"); 171162306a36Sopenharmony_ci release_metapage(mp); 171262306a36Sopenharmony_ci return -EIO; 171362306a36Sopenharmony_ci } 171462306a36Sopenharmony_ci 171562306a36Sopenharmony_ci /* search the tree within the dmap control page for 171662306a36Sopenharmony_ci * sufficient free space. if sufficient free space is found, 171762306a36Sopenharmony_ci * dbFindLeaf() returns the index of the leaf at which 171862306a36Sopenharmony_ci * free space was found. 171962306a36Sopenharmony_ci */ 172062306a36Sopenharmony_ci rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx, true); 172162306a36Sopenharmony_ci 172262306a36Sopenharmony_ci /* release the buffer. 172362306a36Sopenharmony_ci */ 172462306a36Sopenharmony_ci release_metapage(mp); 172562306a36Sopenharmony_ci 172662306a36Sopenharmony_ci /* space found ? 172762306a36Sopenharmony_ci */ 172862306a36Sopenharmony_ci if (rc) { 172962306a36Sopenharmony_ci if (lev != level) { 173062306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 173162306a36Sopenharmony_ci "dmap inconsistent\n"); 173262306a36Sopenharmony_ci return -EIO; 173362306a36Sopenharmony_ci } 173462306a36Sopenharmony_ci return -ENOSPC; 173562306a36Sopenharmony_ci } 173662306a36Sopenharmony_ci 173762306a36Sopenharmony_ci /* adjust the block number to reflect the location within 173862306a36Sopenharmony_ci * the dmap control page (i.e. the leaf) at which free 173962306a36Sopenharmony_ci * space was found. 174062306a36Sopenharmony_ci */ 174162306a36Sopenharmony_ci b += (((s64) leafidx) << budmin); 174262306a36Sopenharmony_ci 174362306a36Sopenharmony_ci /* we stop the search at this dmap control page level if 174462306a36Sopenharmony_ci * the number of blocks required is greater than or equal 174562306a36Sopenharmony_ci * to the maximum number of blocks described at the next 174662306a36Sopenharmony_ci * (lower) level. 174762306a36Sopenharmony_ci */ 174862306a36Sopenharmony_ci if (l2nb >= budmin) 174962306a36Sopenharmony_ci break; 175062306a36Sopenharmony_ci } 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ci *blkno = b; 175362306a36Sopenharmony_ci return (0); 175462306a36Sopenharmony_ci} 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci 175762306a36Sopenharmony_ci/* 175862306a36Sopenharmony_ci * NAME: dbAllocCtl() 175962306a36Sopenharmony_ci * 176062306a36Sopenharmony_ci * FUNCTION: attempt to allocate a specified number of contiguous 176162306a36Sopenharmony_ci * blocks starting within a specific dmap. 176262306a36Sopenharmony_ci * 176362306a36Sopenharmony_ci * this routine is called by higher level routines that search 176462306a36Sopenharmony_ci * the dmap control pages above the actual dmaps for contiguous 176562306a36Sopenharmony_ci * free space. the result of successful searches by these 176662306a36Sopenharmony_ci * routines are the starting block numbers within dmaps, with 176762306a36Sopenharmony_ci * the dmaps themselves containing the desired contiguous free 176862306a36Sopenharmony_ci * space or starting a contiguous free space of desired size 176962306a36Sopenharmony_ci * that is made up of the blocks of one or more dmaps. these 177062306a36Sopenharmony_ci * calls should not fail due to insufficent resources. 177162306a36Sopenharmony_ci * 177262306a36Sopenharmony_ci * this routine is called in some cases where it is not known 177362306a36Sopenharmony_ci * whether it will fail due to insufficient resources. more 177462306a36Sopenharmony_ci * specifically, this occurs when allocating from an allocation 177562306a36Sopenharmony_ci * group whose size is equal to the number of blocks per dmap. 177662306a36Sopenharmony_ci * in this case, the dmap control pages are not examined prior 177762306a36Sopenharmony_ci * to calling this routine (to save pathlength) and the call 177862306a36Sopenharmony_ci * might fail. 177962306a36Sopenharmony_ci * 178062306a36Sopenharmony_ci * for a request size that fits within a dmap, this routine relies 178162306a36Sopenharmony_ci * upon the dmap's dmtree to find the requested contiguous free 178262306a36Sopenharmony_ci * space. for request sizes that are larger than a dmap, the 178362306a36Sopenharmony_ci * requested free space will start at the first block of the 178462306a36Sopenharmony_ci * first dmap (i.e. blkno). 178562306a36Sopenharmony_ci * 178662306a36Sopenharmony_ci * PARAMETERS: 178762306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 178862306a36Sopenharmony_ci * nblocks - actual number of contiguous free blocks to allocate. 178962306a36Sopenharmony_ci * l2nb - log2 number of contiguous free blocks to allocate. 179062306a36Sopenharmony_ci * blkno - starting block number of the dmap to start the allocation 179162306a36Sopenharmony_ci * from. 179262306a36Sopenharmony_ci * results - on successful return, set to the starting block number 179362306a36Sopenharmony_ci * of the newly allocated range. 179462306a36Sopenharmony_ci * 179562306a36Sopenharmony_ci * RETURN VALUES: 179662306a36Sopenharmony_ci * 0 - success 179762306a36Sopenharmony_ci * -ENOSPC - insufficient disk resources 179862306a36Sopenharmony_ci * -EIO - i/o error 179962306a36Sopenharmony_ci * 180062306a36Sopenharmony_ci * serialization: IWRITE_LOCK(ipbmap) held on entry/exit; 180162306a36Sopenharmony_ci */ 180262306a36Sopenharmony_cistatic int 180362306a36Sopenharmony_cidbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) 180462306a36Sopenharmony_ci{ 180562306a36Sopenharmony_ci int rc, nb; 180662306a36Sopenharmony_ci s64 b, lblkno, n; 180762306a36Sopenharmony_ci struct metapage *mp; 180862306a36Sopenharmony_ci struct dmap *dp; 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ci /* check if the allocation request is confined to a single dmap. 181162306a36Sopenharmony_ci */ 181262306a36Sopenharmony_ci if (l2nb <= L2BPERDMAP) { 181362306a36Sopenharmony_ci /* get the buffer for the dmap. 181462306a36Sopenharmony_ci */ 181562306a36Sopenharmony_ci lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 181662306a36Sopenharmony_ci mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 181762306a36Sopenharmony_ci if (mp == NULL) 181862306a36Sopenharmony_ci return -EIO; 181962306a36Sopenharmony_ci dp = (struct dmap *) mp->data; 182062306a36Sopenharmony_ci 182162306a36Sopenharmony_ci /* try to allocate the blocks. 182262306a36Sopenharmony_ci */ 182362306a36Sopenharmony_ci rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results); 182462306a36Sopenharmony_ci if (rc == 0) 182562306a36Sopenharmony_ci mark_metapage_dirty(mp); 182662306a36Sopenharmony_ci 182762306a36Sopenharmony_ci release_metapage(mp); 182862306a36Sopenharmony_ci 182962306a36Sopenharmony_ci return (rc); 183062306a36Sopenharmony_ci } 183162306a36Sopenharmony_ci 183262306a36Sopenharmony_ci /* allocation request involving multiple dmaps. it must start on 183362306a36Sopenharmony_ci * a dmap boundary. 183462306a36Sopenharmony_ci */ 183562306a36Sopenharmony_ci assert((blkno & (BPERDMAP - 1)) == 0); 183662306a36Sopenharmony_ci 183762306a36Sopenharmony_ci /* allocate the blocks dmap by dmap. 183862306a36Sopenharmony_ci */ 183962306a36Sopenharmony_ci for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) { 184062306a36Sopenharmony_ci /* get the buffer for the dmap. 184162306a36Sopenharmony_ci */ 184262306a36Sopenharmony_ci lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); 184362306a36Sopenharmony_ci mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 184462306a36Sopenharmony_ci if (mp == NULL) { 184562306a36Sopenharmony_ci rc = -EIO; 184662306a36Sopenharmony_ci goto backout; 184762306a36Sopenharmony_ci } 184862306a36Sopenharmony_ci dp = (struct dmap *) mp->data; 184962306a36Sopenharmony_ci 185062306a36Sopenharmony_ci /* the dmap better be all free. 185162306a36Sopenharmony_ci */ 185262306a36Sopenharmony_ci if (dp->tree.stree[ROOT] != L2BPERDMAP) { 185362306a36Sopenharmony_ci release_metapage(mp); 185462306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 185562306a36Sopenharmony_ci "the dmap is not all free\n"); 185662306a36Sopenharmony_ci rc = -EIO; 185762306a36Sopenharmony_ci goto backout; 185862306a36Sopenharmony_ci } 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci /* determine how many blocks to allocate from this dmap. 186162306a36Sopenharmony_ci */ 186262306a36Sopenharmony_ci nb = min_t(s64, n, BPERDMAP); 186362306a36Sopenharmony_ci 186462306a36Sopenharmony_ci /* allocate the blocks from the dmap. 186562306a36Sopenharmony_ci */ 186662306a36Sopenharmony_ci if ((rc = dbAllocDmap(bmp, dp, b, nb))) { 186762306a36Sopenharmony_ci release_metapage(mp); 186862306a36Sopenharmony_ci goto backout; 186962306a36Sopenharmony_ci } 187062306a36Sopenharmony_ci 187162306a36Sopenharmony_ci /* write the buffer. 187262306a36Sopenharmony_ci */ 187362306a36Sopenharmony_ci write_metapage(mp); 187462306a36Sopenharmony_ci } 187562306a36Sopenharmony_ci 187662306a36Sopenharmony_ci /* set the results (starting block number) and return. 187762306a36Sopenharmony_ci */ 187862306a36Sopenharmony_ci *results = blkno; 187962306a36Sopenharmony_ci return (0); 188062306a36Sopenharmony_ci 188162306a36Sopenharmony_ci /* something failed in handling an allocation request involving 188262306a36Sopenharmony_ci * multiple dmaps. we'll try to clean up by backing out any 188362306a36Sopenharmony_ci * allocation that has already happened for this request. if 188462306a36Sopenharmony_ci * we fail in backing out the allocation, we'll mark the file 188562306a36Sopenharmony_ci * system to indicate that blocks have been leaked. 188662306a36Sopenharmony_ci */ 188762306a36Sopenharmony_ci backout: 188862306a36Sopenharmony_ci 188962306a36Sopenharmony_ci /* try to backout the allocations dmap by dmap. 189062306a36Sopenharmony_ci */ 189162306a36Sopenharmony_ci for (n = nblocks - n, b = blkno; n > 0; 189262306a36Sopenharmony_ci n -= BPERDMAP, b += BPERDMAP) { 189362306a36Sopenharmony_ci /* get the buffer for this dmap. 189462306a36Sopenharmony_ci */ 189562306a36Sopenharmony_ci lblkno = BLKTODMAP(b, bmp->db_l2nbperpage); 189662306a36Sopenharmony_ci mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 189762306a36Sopenharmony_ci if (mp == NULL) { 189862306a36Sopenharmony_ci /* could not back out. mark the file system 189962306a36Sopenharmony_ci * to indicate that we have leaked blocks. 190062306a36Sopenharmony_ci */ 190162306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 190262306a36Sopenharmony_ci "I/O Error: Block Leakage\n"); 190362306a36Sopenharmony_ci continue; 190462306a36Sopenharmony_ci } 190562306a36Sopenharmony_ci dp = (struct dmap *) mp->data; 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci /* free the blocks is this dmap. 190862306a36Sopenharmony_ci */ 190962306a36Sopenharmony_ci if (dbFreeDmap(bmp, dp, b, BPERDMAP)) { 191062306a36Sopenharmony_ci /* could not back out. mark the file system 191162306a36Sopenharmony_ci * to indicate that we have leaked blocks. 191262306a36Sopenharmony_ci */ 191362306a36Sopenharmony_ci release_metapage(mp); 191462306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, "Block Leakage\n"); 191562306a36Sopenharmony_ci continue; 191662306a36Sopenharmony_ci } 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_ci /* write the buffer. 191962306a36Sopenharmony_ci */ 192062306a36Sopenharmony_ci write_metapage(mp); 192162306a36Sopenharmony_ci } 192262306a36Sopenharmony_ci 192362306a36Sopenharmony_ci return (rc); 192462306a36Sopenharmony_ci} 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_ci 192762306a36Sopenharmony_ci/* 192862306a36Sopenharmony_ci * NAME: dbAllocDmapLev() 192962306a36Sopenharmony_ci * 193062306a36Sopenharmony_ci * FUNCTION: attempt to allocate a specified number of contiguous blocks 193162306a36Sopenharmony_ci * from a specified dmap. 193262306a36Sopenharmony_ci * 193362306a36Sopenharmony_ci * this routine checks if the contiguous blocks are available. 193462306a36Sopenharmony_ci * if so, nblocks of blocks are allocated; otherwise, ENOSPC is 193562306a36Sopenharmony_ci * returned. 193662306a36Sopenharmony_ci * 193762306a36Sopenharmony_ci * PARAMETERS: 193862306a36Sopenharmony_ci * mp - pointer to bmap descriptor 193962306a36Sopenharmony_ci * dp - pointer to dmap to attempt to allocate blocks from. 194062306a36Sopenharmony_ci * l2nb - log2 number of contiguous block desired. 194162306a36Sopenharmony_ci * nblocks - actual number of contiguous block desired. 194262306a36Sopenharmony_ci * results - on successful return, set to the starting block number 194362306a36Sopenharmony_ci * of the newly allocated range. 194462306a36Sopenharmony_ci * 194562306a36Sopenharmony_ci * RETURN VALUES: 194662306a36Sopenharmony_ci * 0 - success 194762306a36Sopenharmony_ci * -ENOSPC - insufficient disk resources 194862306a36Sopenharmony_ci * -EIO - i/o error 194962306a36Sopenharmony_ci * 195062306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or 195162306a36Sopenharmony_ci * IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit; 195262306a36Sopenharmony_ci */ 195362306a36Sopenharmony_cistatic int 195462306a36Sopenharmony_cidbAllocDmapLev(struct bmap * bmp, 195562306a36Sopenharmony_ci struct dmap * dp, int nblocks, int l2nb, s64 * results) 195662306a36Sopenharmony_ci{ 195762306a36Sopenharmony_ci s64 blkno; 195862306a36Sopenharmony_ci int leafidx, rc; 195962306a36Sopenharmony_ci 196062306a36Sopenharmony_ci /* can't be more than a dmaps worth of blocks */ 196162306a36Sopenharmony_ci assert(l2nb <= L2BPERDMAP); 196262306a36Sopenharmony_ci 196362306a36Sopenharmony_ci /* search the tree within the dmap page for sufficient 196462306a36Sopenharmony_ci * free space. if sufficient free space is found, dbFindLeaf() 196562306a36Sopenharmony_ci * returns the index of the leaf at which free space was found. 196662306a36Sopenharmony_ci */ 196762306a36Sopenharmony_ci if (dbFindLeaf((dmtree_t *) &dp->tree, l2nb, &leafidx, false)) 196862306a36Sopenharmony_ci return -ENOSPC; 196962306a36Sopenharmony_ci 197062306a36Sopenharmony_ci if (leafidx < 0) 197162306a36Sopenharmony_ci return -EIO; 197262306a36Sopenharmony_ci 197362306a36Sopenharmony_ci /* determine the block number within the file system corresponding 197462306a36Sopenharmony_ci * to the leaf at which free space was found. 197562306a36Sopenharmony_ci */ 197662306a36Sopenharmony_ci blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD); 197762306a36Sopenharmony_ci 197862306a36Sopenharmony_ci /* if not all bits of the dmap word are free, get the starting 197962306a36Sopenharmony_ci * bit number within the dmap word of the required string of free 198062306a36Sopenharmony_ci * bits and adjust the block number with this value. 198162306a36Sopenharmony_ci */ 198262306a36Sopenharmony_ci if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN) 198362306a36Sopenharmony_ci blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb); 198462306a36Sopenharmony_ci 198562306a36Sopenharmony_ci /* allocate the blocks */ 198662306a36Sopenharmony_ci if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0) 198762306a36Sopenharmony_ci *results = blkno; 198862306a36Sopenharmony_ci 198962306a36Sopenharmony_ci return (rc); 199062306a36Sopenharmony_ci} 199162306a36Sopenharmony_ci 199262306a36Sopenharmony_ci 199362306a36Sopenharmony_ci/* 199462306a36Sopenharmony_ci * NAME: dbAllocDmap() 199562306a36Sopenharmony_ci * 199662306a36Sopenharmony_ci * FUNCTION: adjust the disk allocation map to reflect the allocation 199762306a36Sopenharmony_ci * of a specified block range within a dmap. 199862306a36Sopenharmony_ci * 199962306a36Sopenharmony_ci * this routine allocates the specified blocks from the dmap 200062306a36Sopenharmony_ci * through a call to dbAllocBits(). if the allocation of the 200162306a36Sopenharmony_ci * block range causes the maximum string of free blocks within 200262306a36Sopenharmony_ci * the dmap to change (i.e. the value of the root of the dmap's 200362306a36Sopenharmony_ci * dmtree), this routine will cause this change to be reflected 200462306a36Sopenharmony_ci * up through the appropriate levels of the dmap control pages 200562306a36Sopenharmony_ci * by a call to dbAdjCtl() for the L0 dmap control page that 200662306a36Sopenharmony_ci * covers this dmap. 200762306a36Sopenharmony_ci * 200862306a36Sopenharmony_ci * PARAMETERS: 200962306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 201062306a36Sopenharmony_ci * dp - pointer to dmap to allocate the block range from. 201162306a36Sopenharmony_ci * blkno - starting block number of the block to be allocated. 201262306a36Sopenharmony_ci * nblocks - number of blocks to be allocated. 201362306a36Sopenharmony_ci * 201462306a36Sopenharmony_ci * RETURN VALUES: 201562306a36Sopenharmony_ci * 0 - success 201662306a36Sopenharmony_ci * -EIO - i/o error 201762306a36Sopenharmony_ci * 201862306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 201962306a36Sopenharmony_ci */ 202062306a36Sopenharmony_cistatic int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 202162306a36Sopenharmony_ci int nblocks) 202262306a36Sopenharmony_ci{ 202362306a36Sopenharmony_ci s8 oldroot; 202462306a36Sopenharmony_ci int rc; 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci /* save the current value of the root (i.e. maximum free string) 202762306a36Sopenharmony_ci * of the dmap tree. 202862306a36Sopenharmony_ci */ 202962306a36Sopenharmony_ci oldroot = dp->tree.stree[ROOT]; 203062306a36Sopenharmony_ci 203162306a36Sopenharmony_ci /* allocate the specified (blocks) bits */ 203262306a36Sopenharmony_ci dbAllocBits(bmp, dp, blkno, nblocks); 203362306a36Sopenharmony_ci 203462306a36Sopenharmony_ci /* if the root has not changed, done. */ 203562306a36Sopenharmony_ci if (dp->tree.stree[ROOT] == oldroot) 203662306a36Sopenharmony_ci return (0); 203762306a36Sopenharmony_ci 203862306a36Sopenharmony_ci /* root changed. bubble the change up to the dmap control pages. 203962306a36Sopenharmony_ci * if the adjustment of the upper level control pages fails, 204062306a36Sopenharmony_ci * backout the bit allocation (thus making everything consistent). 204162306a36Sopenharmony_ci */ 204262306a36Sopenharmony_ci if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0))) 204362306a36Sopenharmony_ci dbFreeBits(bmp, dp, blkno, nblocks); 204462306a36Sopenharmony_ci 204562306a36Sopenharmony_ci return (rc); 204662306a36Sopenharmony_ci} 204762306a36Sopenharmony_ci 204862306a36Sopenharmony_ci 204962306a36Sopenharmony_ci/* 205062306a36Sopenharmony_ci * NAME: dbFreeDmap() 205162306a36Sopenharmony_ci * 205262306a36Sopenharmony_ci * FUNCTION: adjust the disk allocation map to reflect the allocation 205362306a36Sopenharmony_ci * of a specified block range within a dmap. 205462306a36Sopenharmony_ci * 205562306a36Sopenharmony_ci * this routine frees the specified blocks from the dmap through 205662306a36Sopenharmony_ci * a call to dbFreeBits(). if the deallocation of the block range 205762306a36Sopenharmony_ci * causes the maximum string of free blocks within the dmap to 205862306a36Sopenharmony_ci * change (i.e. the value of the root of the dmap's dmtree), this 205962306a36Sopenharmony_ci * routine will cause this change to be reflected up through the 206062306a36Sopenharmony_ci * appropriate levels of the dmap control pages by a call to 206162306a36Sopenharmony_ci * dbAdjCtl() for the L0 dmap control page that covers this dmap. 206262306a36Sopenharmony_ci * 206362306a36Sopenharmony_ci * PARAMETERS: 206462306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 206562306a36Sopenharmony_ci * dp - pointer to dmap to free the block range from. 206662306a36Sopenharmony_ci * blkno - starting block number of the block to be freed. 206762306a36Sopenharmony_ci * nblocks - number of blocks to be freed. 206862306a36Sopenharmony_ci * 206962306a36Sopenharmony_ci * RETURN VALUES: 207062306a36Sopenharmony_ci * 0 - success 207162306a36Sopenharmony_ci * -EIO - i/o error 207262306a36Sopenharmony_ci * 207362306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 207462306a36Sopenharmony_ci */ 207562306a36Sopenharmony_cistatic int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno, 207662306a36Sopenharmony_ci int nblocks) 207762306a36Sopenharmony_ci{ 207862306a36Sopenharmony_ci s8 oldroot; 207962306a36Sopenharmony_ci int rc = 0, word; 208062306a36Sopenharmony_ci 208162306a36Sopenharmony_ci /* save the current value of the root (i.e. maximum free string) 208262306a36Sopenharmony_ci * of the dmap tree. 208362306a36Sopenharmony_ci */ 208462306a36Sopenharmony_ci oldroot = dp->tree.stree[ROOT]; 208562306a36Sopenharmony_ci 208662306a36Sopenharmony_ci /* free the specified (blocks) bits */ 208762306a36Sopenharmony_ci rc = dbFreeBits(bmp, dp, blkno, nblocks); 208862306a36Sopenharmony_ci 208962306a36Sopenharmony_ci /* if error or the root has not changed, done. */ 209062306a36Sopenharmony_ci if (rc || (dp->tree.stree[ROOT] == oldroot)) 209162306a36Sopenharmony_ci return (rc); 209262306a36Sopenharmony_ci 209362306a36Sopenharmony_ci /* root changed. bubble the change up to the dmap control pages. 209462306a36Sopenharmony_ci * if the adjustment of the upper level control pages fails, 209562306a36Sopenharmony_ci * backout the deallocation. 209662306a36Sopenharmony_ci */ 209762306a36Sopenharmony_ci if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) { 209862306a36Sopenharmony_ci word = (blkno & (BPERDMAP - 1)) >> L2DBWORD; 209962306a36Sopenharmony_ci 210062306a36Sopenharmony_ci /* as part of backing out the deallocation, we will have 210162306a36Sopenharmony_ci * to back split the dmap tree if the deallocation caused 210262306a36Sopenharmony_ci * the freed blocks to become part of a larger binary buddy 210362306a36Sopenharmony_ci * system. 210462306a36Sopenharmony_ci */ 210562306a36Sopenharmony_ci if (dp->tree.stree[word] == NOFREE) 210662306a36Sopenharmony_ci dbBackSplit((dmtree_t *)&dp->tree, word, false); 210762306a36Sopenharmony_ci 210862306a36Sopenharmony_ci dbAllocBits(bmp, dp, blkno, nblocks); 210962306a36Sopenharmony_ci } 211062306a36Sopenharmony_ci 211162306a36Sopenharmony_ci return (rc); 211262306a36Sopenharmony_ci} 211362306a36Sopenharmony_ci 211462306a36Sopenharmony_ci 211562306a36Sopenharmony_ci/* 211662306a36Sopenharmony_ci * NAME: dbAllocBits() 211762306a36Sopenharmony_ci * 211862306a36Sopenharmony_ci * FUNCTION: allocate a specified block range from a dmap. 211962306a36Sopenharmony_ci * 212062306a36Sopenharmony_ci * this routine updates the dmap to reflect the working 212162306a36Sopenharmony_ci * state allocation of the specified block range. it directly 212262306a36Sopenharmony_ci * updates the bits of the working map and causes the adjustment 212362306a36Sopenharmony_ci * of the binary buddy system described by the dmap's dmtree 212462306a36Sopenharmony_ci * leaves to reflect the bits allocated. it also causes the 212562306a36Sopenharmony_ci * dmap's dmtree, as a whole, to reflect the allocated range. 212662306a36Sopenharmony_ci * 212762306a36Sopenharmony_ci * PARAMETERS: 212862306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 212962306a36Sopenharmony_ci * dp - pointer to dmap to allocate bits from. 213062306a36Sopenharmony_ci * blkno - starting block number of the bits to be allocated. 213162306a36Sopenharmony_ci * nblocks - number of bits to be allocated. 213262306a36Sopenharmony_ci * 213362306a36Sopenharmony_ci * RETURN VALUES: none 213462306a36Sopenharmony_ci * 213562306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 213662306a36Sopenharmony_ci */ 213762306a36Sopenharmony_cistatic void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 213862306a36Sopenharmony_ci int nblocks) 213962306a36Sopenharmony_ci{ 214062306a36Sopenharmony_ci int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; 214162306a36Sopenharmony_ci dmtree_t *tp = (dmtree_t *) & dp->tree; 214262306a36Sopenharmony_ci int size; 214362306a36Sopenharmony_ci s8 *leaf; 214462306a36Sopenharmony_ci 214562306a36Sopenharmony_ci /* pick up a pointer to the leaves of the dmap tree */ 214662306a36Sopenharmony_ci leaf = dp->tree.stree + LEAFIND; 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_ci /* determine the bit number and word within the dmap of the 214962306a36Sopenharmony_ci * starting block. 215062306a36Sopenharmony_ci */ 215162306a36Sopenharmony_ci dbitno = blkno & (BPERDMAP - 1); 215262306a36Sopenharmony_ci word = dbitno >> L2DBWORD; 215362306a36Sopenharmony_ci 215462306a36Sopenharmony_ci /* block range better be within the dmap */ 215562306a36Sopenharmony_ci assert(dbitno + nblocks <= BPERDMAP); 215662306a36Sopenharmony_ci 215762306a36Sopenharmony_ci /* allocate the bits of the dmap's words corresponding to the block 215862306a36Sopenharmony_ci * range. not all bits of the first and last words may be contained 215962306a36Sopenharmony_ci * within the block range. if this is the case, we'll work against 216062306a36Sopenharmony_ci * those words (i.e. partial first and/or last) on an individual basis 216162306a36Sopenharmony_ci * (a single pass), allocating the bits of interest by hand and 216262306a36Sopenharmony_ci * updating the leaf corresponding to the dmap word. a single pass 216362306a36Sopenharmony_ci * will be used for all dmap words fully contained within the 216462306a36Sopenharmony_ci * specified range. within this pass, the bits of all fully contained 216562306a36Sopenharmony_ci * dmap words will be marked as free in a single shot and the leaves 216662306a36Sopenharmony_ci * will be updated. a single leaf may describe the free space of 216762306a36Sopenharmony_ci * multiple dmap words, so we may update only a subset of the actual 216862306a36Sopenharmony_ci * leaves corresponding to the dmap words of the block range. 216962306a36Sopenharmony_ci */ 217062306a36Sopenharmony_ci for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { 217162306a36Sopenharmony_ci /* determine the bit number within the word and 217262306a36Sopenharmony_ci * the number of bits within the word. 217362306a36Sopenharmony_ci */ 217462306a36Sopenharmony_ci wbitno = dbitno & (DBWORD - 1); 217562306a36Sopenharmony_ci nb = min(rembits, DBWORD - wbitno); 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_ci /* check if only part of a word is to be allocated. 217862306a36Sopenharmony_ci */ 217962306a36Sopenharmony_ci if (nb < DBWORD) { 218062306a36Sopenharmony_ci /* allocate (set to 1) the appropriate bits within 218162306a36Sopenharmony_ci * this dmap word. 218262306a36Sopenharmony_ci */ 218362306a36Sopenharmony_ci dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) 218462306a36Sopenharmony_ci >> wbitno); 218562306a36Sopenharmony_ci 218662306a36Sopenharmony_ci /* update the leaf for this dmap word. in addition 218762306a36Sopenharmony_ci * to setting the leaf value to the binary buddy max 218862306a36Sopenharmony_ci * of the updated dmap word, dbSplit() will split 218962306a36Sopenharmony_ci * the binary system of the leaves if need be. 219062306a36Sopenharmony_ci */ 219162306a36Sopenharmony_ci dbSplit(tp, word, BUDMIN, 219262306a36Sopenharmony_ci dbMaxBud((u8 *)&dp->wmap[word]), false); 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_ci word += 1; 219562306a36Sopenharmony_ci } else { 219662306a36Sopenharmony_ci /* one or more dmap words are fully contained 219762306a36Sopenharmony_ci * within the block range. determine how many 219862306a36Sopenharmony_ci * words and allocate (set to 1) the bits of these 219962306a36Sopenharmony_ci * words. 220062306a36Sopenharmony_ci */ 220162306a36Sopenharmony_ci nwords = rembits >> L2DBWORD; 220262306a36Sopenharmony_ci memset(&dp->wmap[word], (int) ONES, nwords * 4); 220362306a36Sopenharmony_ci 220462306a36Sopenharmony_ci /* determine how many bits. 220562306a36Sopenharmony_ci */ 220662306a36Sopenharmony_ci nb = nwords << L2DBWORD; 220762306a36Sopenharmony_ci 220862306a36Sopenharmony_ci /* now update the appropriate leaves to reflect 220962306a36Sopenharmony_ci * the allocated words. 221062306a36Sopenharmony_ci */ 221162306a36Sopenharmony_ci for (; nwords > 0; nwords -= nw) { 221262306a36Sopenharmony_ci if (leaf[word] < BUDMIN) { 221362306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 221462306a36Sopenharmony_ci "leaf page corrupt\n"); 221562306a36Sopenharmony_ci break; 221662306a36Sopenharmony_ci } 221762306a36Sopenharmony_ci 221862306a36Sopenharmony_ci /* determine what the leaf value should be 221962306a36Sopenharmony_ci * updated to as the minimum of the l2 number 222062306a36Sopenharmony_ci * of bits being allocated and the l2 number 222162306a36Sopenharmony_ci * of bits currently described by this leaf. 222262306a36Sopenharmony_ci */ 222362306a36Sopenharmony_ci size = min_t(int, leaf[word], 222462306a36Sopenharmony_ci NLSTOL2BSZ(nwords)); 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci /* update the leaf to reflect the allocation. 222762306a36Sopenharmony_ci * in addition to setting the leaf value to 222862306a36Sopenharmony_ci * NOFREE, dbSplit() will split the binary 222962306a36Sopenharmony_ci * system of the leaves to reflect the current 223062306a36Sopenharmony_ci * allocation (size). 223162306a36Sopenharmony_ci */ 223262306a36Sopenharmony_ci dbSplit(tp, word, size, NOFREE, false); 223362306a36Sopenharmony_ci 223462306a36Sopenharmony_ci /* get the number of dmap words handled */ 223562306a36Sopenharmony_ci nw = BUDSIZE(size, BUDMIN); 223662306a36Sopenharmony_ci word += nw; 223762306a36Sopenharmony_ci } 223862306a36Sopenharmony_ci } 223962306a36Sopenharmony_ci } 224062306a36Sopenharmony_ci 224162306a36Sopenharmony_ci /* update the free count for this dmap */ 224262306a36Sopenharmony_ci le32_add_cpu(&dp->nfree, -nblocks); 224362306a36Sopenharmony_ci 224462306a36Sopenharmony_ci BMAP_LOCK(bmp); 224562306a36Sopenharmony_ci 224662306a36Sopenharmony_ci /* if this allocation group is completely free, 224762306a36Sopenharmony_ci * update the maximum allocation group number if this allocation 224862306a36Sopenharmony_ci * group is the new max. 224962306a36Sopenharmony_ci */ 225062306a36Sopenharmony_ci agno = blkno >> bmp->db_agl2size; 225162306a36Sopenharmony_ci if (agno > bmp->db_maxag) 225262306a36Sopenharmony_ci bmp->db_maxag = agno; 225362306a36Sopenharmony_ci 225462306a36Sopenharmony_ci /* update the free count for the allocation group and map */ 225562306a36Sopenharmony_ci bmp->db_agfree[agno] -= nblocks; 225662306a36Sopenharmony_ci bmp->db_nfree -= nblocks; 225762306a36Sopenharmony_ci 225862306a36Sopenharmony_ci BMAP_UNLOCK(bmp); 225962306a36Sopenharmony_ci} 226062306a36Sopenharmony_ci 226162306a36Sopenharmony_ci 226262306a36Sopenharmony_ci/* 226362306a36Sopenharmony_ci * NAME: dbFreeBits() 226462306a36Sopenharmony_ci * 226562306a36Sopenharmony_ci * FUNCTION: free a specified block range from a dmap. 226662306a36Sopenharmony_ci * 226762306a36Sopenharmony_ci * this routine updates the dmap to reflect the working 226862306a36Sopenharmony_ci * state allocation of the specified block range. it directly 226962306a36Sopenharmony_ci * updates the bits of the working map and causes the adjustment 227062306a36Sopenharmony_ci * of the binary buddy system described by the dmap's dmtree 227162306a36Sopenharmony_ci * leaves to reflect the bits freed. it also causes the dmap's 227262306a36Sopenharmony_ci * dmtree, as a whole, to reflect the deallocated range. 227362306a36Sopenharmony_ci * 227462306a36Sopenharmony_ci * PARAMETERS: 227562306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 227662306a36Sopenharmony_ci * dp - pointer to dmap to free bits from. 227762306a36Sopenharmony_ci * blkno - starting block number of the bits to be freed. 227862306a36Sopenharmony_ci * nblocks - number of bits to be freed. 227962306a36Sopenharmony_ci * 228062306a36Sopenharmony_ci * RETURN VALUES: 0 for success 228162306a36Sopenharmony_ci * 228262306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 228362306a36Sopenharmony_ci */ 228462306a36Sopenharmony_cistatic int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, 228562306a36Sopenharmony_ci int nblocks) 228662306a36Sopenharmony_ci{ 228762306a36Sopenharmony_ci int dbitno, word, rembits, nb, nwords, wbitno, nw, agno; 228862306a36Sopenharmony_ci dmtree_t *tp = (dmtree_t *) & dp->tree; 228962306a36Sopenharmony_ci int rc = 0; 229062306a36Sopenharmony_ci int size; 229162306a36Sopenharmony_ci 229262306a36Sopenharmony_ci /* determine the bit number and word within the dmap of the 229362306a36Sopenharmony_ci * starting block. 229462306a36Sopenharmony_ci */ 229562306a36Sopenharmony_ci dbitno = blkno & (BPERDMAP - 1); 229662306a36Sopenharmony_ci word = dbitno >> L2DBWORD; 229762306a36Sopenharmony_ci 229862306a36Sopenharmony_ci /* block range better be within the dmap. 229962306a36Sopenharmony_ci */ 230062306a36Sopenharmony_ci assert(dbitno + nblocks <= BPERDMAP); 230162306a36Sopenharmony_ci 230262306a36Sopenharmony_ci /* free the bits of the dmaps words corresponding to the block range. 230362306a36Sopenharmony_ci * not all bits of the first and last words may be contained within 230462306a36Sopenharmony_ci * the block range. if this is the case, we'll work against those 230562306a36Sopenharmony_ci * words (i.e. partial first and/or last) on an individual basis 230662306a36Sopenharmony_ci * (a single pass), freeing the bits of interest by hand and updating 230762306a36Sopenharmony_ci * the leaf corresponding to the dmap word. a single pass will be used 230862306a36Sopenharmony_ci * for all dmap words fully contained within the specified range. 230962306a36Sopenharmony_ci * within this pass, the bits of all fully contained dmap words will 231062306a36Sopenharmony_ci * be marked as free in a single shot and the leaves will be updated. a 231162306a36Sopenharmony_ci * single leaf may describe the free space of multiple dmap words, 231262306a36Sopenharmony_ci * so we may update only a subset of the actual leaves corresponding 231362306a36Sopenharmony_ci * to the dmap words of the block range. 231462306a36Sopenharmony_ci * 231562306a36Sopenharmony_ci * dbJoin() is used to update leaf values and will join the binary 231662306a36Sopenharmony_ci * buddy system of the leaves if the new leaf values indicate this 231762306a36Sopenharmony_ci * should be done. 231862306a36Sopenharmony_ci */ 231962306a36Sopenharmony_ci for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { 232062306a36Sopenharmony_ci /* determine the bit number within the word and 232162306a36Sopenharmony_ci * the number of bits within the word. 232262306a36Sopenharmony_ci */ 232362306a36Sopenharmony_ci wbitno = dbitno & (DBWORD - 1); 232462306a36Sopenharmony_ci nb = min(rembits, DBWORD - wbitno); 232562306a36Sopenharmony_ci 232662306a36Sopenharmony_ci /* check if only part of a word is to be freed. 232762306a36Sopenharmony_ci */ 232862306a36Sopenharmony_ci if (nb < DBWORD) { 232962306a36Sopenharmony_ci /* free (zero) the appropriate bits within this 233062306a36Sopenharmony_ci * dmap word. 233162306a36Sopenharmony_ci */ 233262306a36Sopenharmony_ci dp->wmap[word] &= 233362306a36Sopenharmony_ci cpu_to_le32(~(ONES << (DBWORD - nb) 233462306a36Sopenharmony_ci >> wbitno)); 233562306a36Sopenharmony_ci 233662306a36Sopenharmony_ci /* update the leaf for this dmap word. 233762306a36Sopenharmony_ci */ 233862306a36Sopenharmony_ci rc = dbJoin(tp, word, 233962306a36Sopenharmony_ci dbMaxBud((u8 *)&dp->wmap[word]), false); 234062306a36Sopenharmony_ci if (rc) 234162306a36Sopenharmony_ci return rc; 234262306a36Sopenharmony_ci 234362306a36Sopenharmony_ci word += 1; 234462306a36Sopenharmony_ci } else { 234562306a36Sopenharmony_ci /* one or more dmap words are fully contained 234662306a36Sopenharmony_ci * within the block range. determine how many 234762306a36Sopenharmony_ci * words and free (zero) the bits of these words. 234862306a36Sopenharmony_ci */ 234962306a36Sopenharmony_ci nwords = rembits >> L2DBWORD; 235062306a36Sopenharmony_ci memset(&dp->wmap[word], 0, nwords * 4); 235162306a36Sopenharmony_ci 235262306a36Sopenharmony_ci /* determine how many bits. 235362306a36Sopenharmony_ci */ 235462306a36Sopenharmony_ci nb = nwords << L2DBWORD; 235562306a36Sopenharmony_ci 235662306a36Sopenharmony_ci /* now update the appropriate leaves to reflect 235762306a36Sopenharmony_ci * the freed words. 235862306a36Sopenharmony_ci */ 235962306a36Sopenharmony_ci for (; nwords > 0; nwords -= nw) { 236062306a36Sopenharmony_ci /* determine what the leaf value should be 236162306a36Sopenharmony_ci * updated to as the minimum of the l2 number 236262306a36Sopenharmony_ci * of bits being freed and the l2 (max) number 236362306a36Sopenharmony_ci * of bits that can be described by this leaf. 236462306a36Sopenharmony_ci */ 236562306a36Sopenharmony_ci size = 236662306a36Sopenharmony_ci min(LITOL2BSZ 236762306a36Sopenharmony_ci (word, L2LPERDMAP, BUDMIN), 236862306a36Sopenharmony_ci NLSTOL2BSZ(nwords)); 236962306a36Sopenharmony_ci 237062306a36Sopenharmony_ci /* update the leaf. 237162306a36Sopenharmony_ci */ 237262306a36Sopenharmony_ci rc = dbJoin(tp, word, size, false); 237362306a36Sopenharmony_ci if (rc) 237462306a36Sopenharmony_ci return rc; 237562306a36Sopenharmony_ci 237662306a36Sopenharmony_ci /* get the number of dmap words handled. 237762306a36Sopenharmony_ci */ 237862306a36Sopenharmony_ci nw = BUDSIZE(size, BUDMIN); 237962306a36Sopenharmony_ci word += nw; 238062306a36Sopenharmony_ci } 238162306a36Sopenharmony_ci } 238262306a36Sopenharmony_ci } 238362306a36Sopenharmony_ci 238462306a36Sopenharmony_ci /* update the free count for this dmap. 238562306a36Sopenharmony_ci */ 238662306a36Sopenharmony_ci le32_add_cpu(&dp->nfree, nblocks); 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_ci BMAP_LOCK(bmp); 238962306a36Sopenharmony_ci 239062306a36Sopenharmony_ci /* update the free count for the allocation group and 239162306a36Sopenharmony_ci * map. 239262306a36Sopenharmony_ci */ 239362306a36Sopenharmony_ci agno = blkno >> bmp->db_agl2size; 239462306a36Sopenharmony_ci bmp->db_nfree += nblocks; 239562306a36Sopenharmony_ci bmp->db_agfree[agno] += nblocks; 239662306a36Sopenharmony_ci 239762306a36Sopenharmony_ci /* check if this allocation group is not completely free and 239862306a36Sopenharmony_ci * if it is currently the maximum (rightmost) allocation group. 239962306a36Sopenharmony_ci * if so, establish the new maximum allocation group number by 240062306a36Sopenharmony_ci * searching left for the first allocation group with allocation. 240162306a36Sopenharmony_ci */ 240262306a36Sopenharmony_ci if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) || 240362306a36Sopenharmony_ci (agno == bmp->db_numag - 1 && 240462306a36Sopenharmony_ci bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) { 240562306a36Sopenharmony_ci while (bmp->db_maxag > 0) { 240662306a36Sopenharmony_ci bmp->db_maxag -= 1; 240762306a36Sopenharmony_ci if (bmp->db_agfree[bmp->db_maxag] != 240862306a36Sopenharmony_ci bmp->db_agsize) 240962306a36Sopenharmony_ci break; 241062306a36Sopenharmony_ci } 241162306a36Sopenharmony_ci 241262306a36Sopenharmony_ci /* re-establish the allocation group preference if the 241362306a36Sopenharmony_ci * current preference is right of the maximum allocation 241462306a36Sopenharmony_ci * group. 241562306a36Sopenharmony_ci */ 241662306a36Sopenharmony_ci if (bmp->db_agpref > bmp->db_maxag) 241762306a36Sopenharmony_ci bmp->db_agpref = bmp->db_maxag; 241862306a36Sopenharmony_ci } 241962306a36Sopenharmony_ci 242062306a36Sopenharmony_ci BMAP_UNLOCK(bmp); 242162306a36Sopenharmony_ci 242262306a36Sopenharmony_ci return 0; 242362306a36Sopenharmony_ci} 242462306a36Sopenharmony_ci 242562306a36Sopenharmony_ci 242662306a36Sopenharmony_ci/* 242762306a36Sopenharmony_ci * NAME: dbAdjCtl() 242862306a36Sopenharmony_ci * 242962306a36Sopenharmony_ci * FUNCTION: adjust a dmap control page at a specified level to reflect 243062306a36Sopenharmony_ci * the change in a lower level dmap or dmap control page's 243162306a36Sopenharmony_ci * maximum string of free blocks (i.e. a change in the root 243262306a36Sopenharmony_ci * of the lower level object's dmtree) due to the allocation 243362306a36Sopenharmony_ci * or deallocation of a range of blocks with a single dmap. 243462306a36Sopenharmony_ci * 243562306a36Sopenharmony_ci * on entry, this routine is provided with the new value of 243662306a36Sopenharmony_ci * the lower level dmap or dmap control page root and the 243762306a36Sopenharmony_ci * starting block number of the block range whose allocation 243862306a36Sopenharmony_ci * or deallocation resulted in the root change. this range 243962306a36Sopenharmony_ci * is respresented by a single leaf of the current dmapctl 244062306a36Sopenharmony_ci * and the leaf will be updated with this value, possibly 244162306a36Sopenharmony_ci * causing a binary buddy system within the leaves to be 244262306a36Sopenharmony_ci * split or joined. the update may also cause the dmapctl's 244362306a36Sopenharmony_ci * dmtree to be updated. 244462306a36Sopenharmony_ci * 244562306a36Sopenharmony_ci * if the adjustment of the dmap control page, itself, causes its 244662306a36Sopenharmony_ci * root to change, this change will be bubbled up to the next dmap 244762306a36Sopenharmony_ci * control level by a recursive call to this routine, specifying 244862306a36Sopenharmony_ci * the new root value and the next dmap control page level to 244962306a36Sopenharmony_ci * be adjusted. 245062306a36Sopenharmony_ci * PARAMETERS: 245162306a36Sopenharmony_ci * bmp - pointer to bmap descriptor 245262306a36Sopenharmony_ci * blkno - the first block of a block range within a dmap. it is 245362306a36Sopenharmony_ci * the allocation or deallocation of this block range that 245462306a36Sopenharmony_ci * requires the dmap control page to be adjusted. 245562306a36Sopenharmony_ci * newval - the new value of the lower level dmap or dmap control 245662306a36Sopenharmony_ci * page root. 245762306a36Sopenharmony_ci * alloc - 'true' if adjustment is due to an allocation. 245862306a36Sopenharmony_ci * level - current level of dmap control page (i.e. L0, L1, L2) to 245962306a36Sopenharmony_ci * be adjusted. 246062306a36Sopenharmony_ci * 246162306a36Sopenharmony_ci * RETURN VALUES: 246262306a36Sopenharmony_ci * 0 - success 246362306a36Sopenharmony_ci * -EIO - i/o error 246462306a36Sopenharmony_ci * 246562306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 246662306a36Sopenharmony_ci */ 246762306a36Sopenharmony_cistatic int 246862306a36Sopenharmony_cidbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) 246962306a36Sopenharmony_ci{ 247062306a36Sopenharmony_ci struct metapage *mp; 247162306a36Sopenharmony_ci s8 oldroot; 247262306a36Sopenharmony_ci int oldval; 247362306a36Sopenharmony_ci s64 lblkno; 247462306a36Sopenharmony_ci struct dmapctl *dcp; 247562306a36Sopenharmony_ci int rc, leafno, ti; 247662306a36Sopenharmony_ci 247762306a36Sopenharmony_ci /* get the buffer for the dmap control page for the specified 247862306a36Sopenharmony_ci * block number and control page level. 247962306a36Sopenharmony_ci */ 248062306a36Sopenharmony_ci lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level); 248162306a36Sopenharmony_ci mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0); 248262306a36Sopenharmony_ci if (mp == NULL) 248362306a36Sopenharmony_ci return -EIO; 248462306a36Sopenharmony_ci dcp = (struct dmapctl *) mp->data; 248562306a36Sopenharmony_ci 248662306a36Sopenharmony_ci if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { 248762306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); 248862306a36Sopenharmony_ci release_metapage(mp); 248962306a36Sopenharmony_ci return -EIO; 249062306a36Sopenharmony_ci } 249162306a36Sopenharmony_ci 249262306a36Sopenharmony_ci /* determine the leaf number corresponding to the block and 249362306a36Sopenharmony_ci * the index within the dmap control tree. 249462306a36Sopenharmony_ci */ 249562306a36Sopenharmony_ci leafno = BLKTOCTLLEAF(blkno, dcp->budmin); 249662306a36Sopenharmony_ci ti = leafno + le32_to_cpu(dcp->leafidx); 249762306a36Sopenharmony_ci 249862306a36Sopenharmony_ci /* save the current leaf value and the current root level (i.e. 249962306a36Sopenharmony_ci * maximum l2 free string described by this dmapctl). 250062306a36Sopenharmony_ci */ 250162306a36Sopenharmony_ci oldval = dcp->stree[ti]; 250262306a36Sopenharmony_ci oldroot = dcp->stree[ROOT]; 250362306a36Sopenharmony_ci 250462306a36Sopenharmony_ci /* check if this is a control page update for an allocation. 250562306a36Sopenharmony_ci * if so, update the leaf to reflect the new leaf value using 250662306a36Sopenharmony_ci * dbSplit(); otherwise (deallocation), use dbJoin() to update 250762306a36Sopenharmony_ci * the leaf with the new value. in addition to updating the 250862306a36Sopenharmony_ci * leaf, dbSplit() will also split the binary buddy system of 250962306a36Sopenharmony_ci * the leaves, if required, and bubble new values within the 251062306a36Sopenharmony_ci * dmapctl tree, if required. similarly, dbJoin() will join 251162306a36Sopenharmony_ci * the binary buddy system of leaves and bubble new values up 251262306a36Sopenharmony_ci * the dmapctl tree as required by the new leaf value. 251362306a36Sopenharmony_ci */ 251462306a36Sopenharmony_ci if (alloc) { 251562306a36Sopenharmony_ci /* check if we are in the middle of a binary buddy 251662306a36Sopenharmony_ci * system. this happens when we are performing the 251762306a36Sopenharmony_ci * first allocation out of an allocation group that 251862306a36Sopenharmony_ci * is part (not the first part) of a larger binary 251962306a36Sopenharmony_ci * buddy system. if we are in the middle, back split 252062306a36Sopenharmony_ci * the system prior to calling dbSplit() which assumes 252162306a36Sopenharmony_ci * that it is at the front of a binary buddy system. 252262306a36Sopenharmony_ci */ 252362306a36Sopenharmony_ci if (oldval == NOFREE) { 252462306a36Sopenharmony_ci rc = dbBackSplit((dmtree_t *)dcp, leafno, true); 252562306a36Sopenharmony_ci if (rc) { 252662306a36Sopenharmony_ci release_metapage(mp); 252762306a36Sopenharmony_ci return rc; 252862306a36Sopenharmony_ci } 252962306a36Sopenharmony_ci oldval = dcp->stree[ti]; 253062306a36Sopenharmony_ci } 253162306a36Sopenharmony_ci dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval, true); 253262306a36Sopenharmony_ci } else { 253362306a36Sopenharmony_ci rc = dbJoin((dmtree_t *) dcp, leafno, newval, true); 253462306a36Sopenharmony_ci if (rc) { 253562306a36Sopenharmony_ci release_metapage(mp); 253662306a36Sopenharmony_ci return rc; 253762306a36Sopenharmony_ci } 253862306a36Sopenharmony_ci } 253962306a36Sopenharmony_ci 254062306a36Sopenharmony_ci /* check if the root of the current dmap control page changed due 254162306a36Sopenharmony_ci * to the update and if the current dmap control page is not at 254262306a36Sopenharmony_ci * the current top level (i.e. L0, L1, L2) of the map. if so (i.e. 254362306a36Sopenharmony_ci * root changed and this is not the top level), call this routine 254462306a36Sopenharmony_ci * again (recursion) for the next higher level of the mapping to 254562306a36Sopenharmony_ci * reflect the change in root for the current dmap control page. 254662306a36Sopenharmony_ci */ 254762306a36Sopenharmony_ci if (dcp->stree[ROOT] != oldroot) { 254862306a36Sopenharmony_ci /* are we below the top level of the map. if so, 254962306a36Sopenharmony_ci * bubble the root up to the next higher level. 255062306a36Sopenharmony_ci */ 255162306a36Sopenharmony_ci if (level < bmp->db_maxlevel) { 255262306a36Sopenharmony_ci /* bubble up the new root of this dmap control page to 255362306a36Sopenharmony_ci * the next level. 255462306a36Sopenharmony_ci */ 255562306a36Sopenharmony_ci if ((rc = 255662306a36Sopenharmony_ci dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc, 255762306a36Sopenharmony_ci level + 1))) { 255862306a36Sopenharmony_ci /* something went wrong in bubbling up the new 255962306a36Sopenharmony_ci * root value, so backout the changes to the 256062306a36Sopenharmony_ci * current dmap control page. 256162306a36Sopenharmony_ci */ 256262306a36Sopenharmony_ci if (alloc) { 256362306a36Sopenharmony_ci dbJoin((dmtree_t *) dcp, leafno, 256462306a36Sopenharmony_ci oldval, true); 256562306a36Sopenharmony_ci } else { 256662306a36Sopenharmony_ci /* the dbJoin() above might have 256762306a36Sopenharmony_ci * caused a larger binary buddy system 256862306a36Sopenharmony_ci * to form and we may now be in the 256962306a36Sopenharmony_ci * middle of it. if this is the case, 257062306a36Sopenharmony_ci * back split the buddies. 257162306a36Sopenharmony_ci */ 257262306a36Sopenharmony_ci if (dcp->stree[ti] == NOFREE) 257362306a36Sopenharmony_ci dbBackSplit((dmtree_t *) 257462306a36Sopenharmony_ci dcp, leafno, true); 257562306a36Sopenharmony_ci dbSplit((dmtree_t *) dcp, leafno, 257662306a36Sopenharmony_ci dcp->budmin, oldval, true); 257762306a36Sopenharmony_ci } 257862306a36Sopenharmony_ci 257962306a36Sopenharmony_ci /* release the buffer and return the error. 258062306a36Sopenharmony_ci */ 258162306a36Sopenharmony_ci release_metapage(mp); 258262306a36Sopenharmony_ci return (rc); 258362306a36Sopenharmony_ci } 258462306a36Sopenharmony_ci } else { 258562306a36Sopenharmony_ci /* we're at the top level of the map. update 258662306a36Sopenharmony_ci * the bmap control page to reflect the size 258762306a36Sopenharmony_ci * of the maximum free buddy system. 258862306a36Sopenharmony_ci */ 258962306a36Sopenharmony_ci assert(level == bmp->db_maxlevel); 259062306a36Sopenharmony_ci if (bmp->db_maxfreebud != oldroot) { 259162306a36Sopenharmony_ci jfs_error(bmp->db_ipbmap->i_sb, 259262306a36Sopenharmony_ci "the maximum free buddy is not the old root\n"); 259362306a36Sopenharmony_ci } 259462306a36Sopenharmony_ci bmp->db_maxfreebud = dcp->stree[ROOT]; 259562306a36Sopenharmony_ci } 259662306a36Sopenharmony_ci } 259762306a36Sopenharmony_ci 259862306a36Sopenharmony_ci /* write the buffer. 259962306a36Sopenharmony_ci */ 260062306a36Sopenharmony_ci write_metapage(mp); 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_ci return (0); 260362306a36Sopenharmony_ci} 260462306a36Sopenharmony_ci 260562306a36Sopenharmony_ci 260662306a36Sopenharmony_ci/* 260762306a36Sopenharmony_ci * NAME: dbSplit() 260862306a36Sopenharmony_ci * 260962306a36Sopenharmony_ci * FUNCTION: update the leaf of a dmtree with a new value, splitting 261062306a36Sopenharmony_ci * the leaf from the binary buddy system of the dmtree's 261162306a36Sopenharmony_ci * leaves, as required. 261262306a36Sopenharmony_ci * 261362306a36Sopenharmony_ci * PARAMETERS: 261462306a36Sopenharmony_ci * tp - pointer to the tree containing the leaf. 261562306a36Sopenharmony_ci * leafno - the number of the leaf to be updated. 261662306a36Sopenharmony_ci * splitsz - the size the binary buddy system starting at the leaf 261762306a36Sopenharmony_ci * must be split to, specified as the log2 number of blocks. 261862306a36Sopenharmony_ci * newval - the new value for the leaf. 261962306a36Sopenharmony_ci * 262062306a36Sopenharmony_ci * RETURN VALUES: none 262162306a36Sopenharmony_ci * 262262306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 262362306a36Sopenharmony_ci */ 262462306a36Sopenharmony_cistatic void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl) 262562306a36Sopenharmony_ci{ 262662306a36Sopenharmony_ci int budsz; 262762306a36Sopenharmony_ci int cursz; 262862306a36Sopenharmony_ci s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); 262962306a36Sopenharmony_ci 263062306a36Sopenharmony_ci /* check if the leaf needs to be split. 263162306a36Sopenharmony_ci */ 263262306a36Sopenharmony_ci if (leaf[leafno] > tp->dmt_budmin) { 263362306a36Sopenharmony_ci /* the split occurs by cutting the buddy system in half 263462306a36Sopenharmony_ci * at the specified leaf until we reach the specified 263562306a36Sopenharmony_ci * size. pick up the starting split size (current size 263662306a36Sopenharmony_ci * - 1 in l2) and the corresponding buddy size. 263762306a36Sopenharmony_ci */ 263862306a36Sopenharmony_ci cursz = leaf[leafno] - 1; 263962306a36Sopenharmony_ci budsz = BUDSIZE(cursz, tp->dmt_budmin); 264062306a36Sopenharmony_ci 264162306a36Sopenharmony_ci /* split until we reach the specified size. 264262306a36Sopenharmony_ci */ 264362306a36Sopenharmony_ci while (cursz >= splitsz) { 264462306a36Sopenharmony_ci /* update the buddy's leaf with its new value. 264562306a36Sopenharmony_ci */ 264662306a36Sopenharmony_ci dbAdjTree(tp, leafno ^ budsz, cursz, is_ctl); 264762306a36Sopenharmony_ci 264862306a36Sopenharmony_ci /* on to the next size and buddy. 264962306a36Sopenharmony_ci */ 265062306a36Sopenharmony_ci cursz -= 1; 265162306a36Sopenharmony_ci budsz >>= 1; 265262306a36Sopenharmony_ci } 265362306a36Sopenharmony_ci } 265462306a36Sopenharmony_ci 265562306a36Sopenharmony_ci /* adjust the dmap tree to reflect the specified leaf's new 265662306a36Sopenharmony_ci * value. 265762306a36Sopenharmony_ci */ 265862306a36Sopenharmony_ci dbAdjTree(tp, leafno, newval, is_ctl); 265962306a36Sopenharmony_ci} 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci 266262306a36Sopenharmony_ci/* 266362306a36Sopenharmony_ci * NAME: dbBackSplit() 266462306a36Sopenharmony_ci * 266562306a36Sopenharmony_ci * FUNCTION: back split the binary buddy system of dmtree leaves 266662306a36Sopenharmony_ci * that hold a specified leaf until the specified leaf 266762306a36Sopenharmony_ci * starts its own binary buddy system. 266862306a36Sopenharmony_ci * 266962306a36Sopenharmony_ci * the allocators typically perform allocations at the start 267062306a36Sopenharmony_ci * of binary buddy systems and dbSplit() is used to accomplish 267162306a36Sopenharmony_ci * any required splits. in some cases, however, allocation 267262306a36Sopenharmony_ci * may occur in the middle of a binary system and requires a 267362306a36Sopenharmony_ci * back split, with the split proceeding out from the middle of 267462306a36Sopenharmony_ci * the system (less efficient) rather than the start of the 267562306a36Sopenharmony_ci * system (more efficient). the cases in which a back split 267662306a36Sopenharmony_ci * is required are rare and are limited to the first allocation 267762306a36Sopenharmony_ci * within an allocation group which is a part (not first part) 267862306a36Sopenharmony_ci * of a larger binary buddy system and a few exception cases 267962306a36Sopenharmony_ci * in which a previous join operation must be backed out. 268062306a36Sopenharmony_ci * 268162306a36Sopenharmony_ci * PARAMETERS: 268262306a36Sopenharmony_ci * tp - pointer to the tree containing the leaf. 268362306a36Sopenharmony_ci * leafno - the number of the leaf to be updated. 268462306a36Sopenharmony_ci * 268562306a36Sopenharmony_ci * RETURN VALUES: none 268662306a36Sopenharmony_ci * 268762306a36Sopenharmony_ci * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit; 268862306a36Sopenharmony_ci */ 268962306a36Sopenharmony_cistatic int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl) 269062306a36Sopenharmony_ci{ 269162306a36Sopenharmony_ci int budsz, bud, w, bsz, size; 269262306a36Sopenharmony_ci int cursz; 269362306a36Sopenharmony_ci s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); 269462306a36Sopenharmony_ci 269562306a36Sopenharmony_ci /* leaf should be part (not first part) of a binary 269662306a36Sopenharmony_ci * buddy system. 269762306a36Sopenharmony_ci */ 269862306a36Sopenharmony_ci assert(leaf[leafno] == NOFREE); 269962306a36Sopenharmony_ci 270062306a36Sopenharmony_ci /* the back split is accomplished by iteratively finding the leaf 270162306a36Sopenharmony_ci * that starts the buddy system that contains the specified leaf and 270262306a36Sopenharmony_ci * splitting that system in two. this iteration continues until 270362306a36Sopenharmony_ci * the specified leaf becomes the start of a buddy system. 270462306a36Sopenharmony_ci * 270562306a36Sopenharmony_ci * determine maximum possible l2 size for the specified leaf. 270662306a36Sopenharmony_ci */ 270762306a36Sopenharmony_ci size = 270862306a36Sopenharmony_ci LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs), 270962306a36Sopenharmony_ci tp->dmt_budmin); 271062306a36Sopenharmony_ci 271162306a36Sopenharmony_ci /* determine the number of leaves covered by this size. this 271262306a36Sopenharmony_ci * is the buddy size that we will start with as we search for 271362306a36Sopenharmony_ci * the buddy system that contains the specified leaf. 271462306a36Sopenharmony_ci */ 271562306a36Sopenharmony_ci budsz = BUDSIZE(size, tp->dmt_budmin); 271662306a36Sopenharmony_ci 271762306a36Sopenharmony_ci /* back split. 271862306a36Sopenharmony_ci */ 271962306a36Sopenharmony_ci while (leaf[leafno] == NOFREE) { 272062306a36Sopenharmony_ci /* find the leftmost buddy leaf. 272162306a36Sopenharmony_ci */ 272262306a36Sopenharmony_ci for (w = leafno, bsz = budsz;; bsz <<= 1, 272362306a36Sopenharmony_ci w = (w < bud) ? w : bud) { 272462306a36Sopenharmony_ci if (bsz >= le32_to_cpu(tp->dmt_nleafs)) { 272562306a36Sopenharmony_ci jfs_err("JFS: block map error in dbBackSplit"); 272662306a36Sopenharmony_ci return -EIO; 272762306a36Sopenharmony_ci } 272862306a36Sopenharmony_ci 272962306a36Sopenharmony_ci /* determine the buddy. 273062306a36Sopenharmony_ci */ 273162306a36Sopenharmony_ci bud = w ^ bsz; 273262306a36Sopenharmony_ci 273362306a36Sopenharmony_ci /* check if this buddy is the start of the system. 273462306a36Sopenharmony_ci */ 273562306a36Sopenharmony_ci if (leaf[bud] != NOFREE) { 273662306a36Sopenharmony_ci /* split the leaf at the start of the 273762306a36Sopenharmony_ci * system in two. 273862306a36Sopenharmony_ci */ 273962306a36Sopenharmony_ci cursz = leaf[bud] - 1; 274062306a36Sopenharmony_ci dbSplit(tp, bud, cursz, cursz, is_ctl); 274162306a36Sopenharmony_ci break; 274262306a36Sopenharmony_ci } 274362306a36Sopenharmony_ci } 274462306a36Sopenharmony_ci } 274562306a36Sopenharmony_ci 274662306a36Sopenharmony_ci if (leaf[leafno] != size) { 274762306a36Sopenharmony_ci jfs_err("JFS: wrong leaf value in dbBackSplit"); 274862306a36Sopenharmony_ci return -EIO; 274962306a36Sopenharmony_ci } 275062306a36Sopenharmony_ci return 0; 275162306a36Sopenharmony_ci} 275262306a36Sopenharmony_ci 275362306a36Sopenharmony_ci 275462306a36Sopenharmony_ci/* 275562306a36Sopenharmony_ci * NAME: dbJoin() 275662306a36Sopenharmony_ci * 275762306a36Sopenharmony_ci * FUNCTION: update the leaf of a dmtree with a new value, joining 275862306a36Sopenharmony_ci * the leaf with other leaves of the dmtree into a multi-leaf 275962306a36Sopenharmony_ci * binary buddy system, as required. 276062306a36Sopenharmony_ci * 276162306a36Sopenharmony_ci * PARAMETERS: 276262306a36Sopenharmony_ci * tp - pointer to the tree containing the leaf. 276362306a36Sopenharmony_ci * leafno - the number of the leaf to be updated. 276462306a36Sopenharmony_ci * newval - the new value for the leaf. 276562306a36Sopenharmony_ci * 276662306a36Sopenharmony_ci * RETURN VALUES: none 276762306a36Sopenharmony_ci */ 276862306a36Sopenharmony_cistatic int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl) 276962306a36Sopenharmony_ci{ 277062306a36Sopenharmony_ci int budsz, buddy; 277162306a36Sopenharmony_ci s8 *leaf; 277262306a36Sopenharmony_ci 277362306a36Sopenharmony_ci /* can the new leaf value require a join with other leaves ? 277462306a36Sopenharmony_ci */ 277562306a36Sopenharmony_ci if (newval >= tp->dmt_budmin) { 277662306a36Sopenharmony_ci /* pickup a pointer to the leaves of the tree. 277762306a36Sopenharmony_ci */ 277862306a36Sopenharmony_ci leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx); 277962306a36Sopenharmony_ci 278062306a36Sopenharmony_ci /* try to join the specified leaf into a large binary 278162306a36Sopenharmony_ci * buddy system. the join proceeds by attempting to join 278262306a36Sopenharmony_ci * the specified leafno with its buddy (leaf) at new value. 278362306a36Sopenharmony_ci * if the join occurs, we attempt to join the left leaf 278462306a36Sopenharmony_ci * of the joined buddies with its buddy at new value + 1. 278562306a36Sopenharmony_ci * we continue to join until we find a buddy that cannot be 278662306a36Sopenharmony_ci * joined (does not have a value equal to the size of the 278762306a36Sopenharmony_ci * last join) or until all leaves have been joined into a 278862306a36Sopenharmony_ci * single system. 278962306a36Sopenharmony_ci * 279062306a36Sopenharmony_ci * get the buddy size (number of words covered) of 279162306a36Sopenharmony_ci * the new value. 279262306a36Sopenharmony_ci */ 279362306a36Sopenharmony_ci budsz = BUDSIZE(newval, tp->dmt_budmin); 279462306a36Sopenharmony_ci 279562306a36Sopenharmony_ci /* try to join. 279662306a36Sopenharmony_ci */ 279762306a36Sopenharmony_ci while (budsz < le32_to_cpu(tp->dmt_nleafs)) { 279862306a36Sopenharmony_ci /* get the buddy leaf. 279962306a36Sopenharmony_ci */ 280062306a36Sopenharmony_ci buddy = leafno ^ budsz; 280162306a36Sopenharmony_ci 280262306a36Sopenharmony_ci /* if the leaf's new value is greater than its 280362306a36Sopenharmony_ci * buddy's value, we join no more. 280462306a36Sopenharmony_ci */ 280562306a36Sopenharmony_ci if (newval > leaf[buddy]) 280662306a36Sopenharmony_ci break; 280762306a36Sopenharmony_ci 280862306a36Sopenharmony_ci /* It shouldn't be less */ 280962306a36Sopenharmony_ci if (newval < leaf[buddy]) 281062306a36Sopenharmony_ci return -EIO; 281162306a36Sopenharmony_ci 281262306a36Sopenharmony_ci /* check which (leafno or buddy) is the left buddy. 281362306a36Sopenharmony_ci * the left buddy gets to claim the blocks resulting 281462306a36Sopenharmony_ci * from the join while the right gets to claim none. 281562306a36Sopenharmony_ci * the left buddy is also eligible to participate in 281662306a36Sopenharmony_ci * a join at the next higher level while the right 281762306a36Sopenharmony_ci * is not. 281862306a36Sopenharmony_ci * 281962306a36Sopenharmony_ci */ 282062306a36Sopenharmony_ci if (leafno < buddy) { 282162306a36Sopenharmony_ci /* leafno is the left buddy. 282262306a36Sopenharmony_ci */ 282362306a36Sopenharmony_ci dbAdjTree(tp, buddy, NOFREE, is_ctl); 282462306a36Sopenharmony_ci } else { 282562306a36Sopenharmony_ci /* buddy is the left buddy and becomes 282662306a36Sopenharmony_ci * leafno. 282762306a36Sopenharmony_ci */ 282862306a36Sopenharmony_ci dbAdjTree(tp, leafno, NOFREE, is_ctl); 282962306a36Sopenharmony_ci leafno = buddy; 283062306a36Sopenharmony_ci } 283162306a36Sopenharmony_ci 283262306a36Sopenharmony_ci /* on to try the next join. 283362306a36Sopenharmony_ci */ 283462306a36Sopenharmony_ci newval += 1; 283562306a36Sopenharmony_ci budsz <<= 1; 283662306a36Sopenharmony_ci } 283762306a36Sopenharmony_ci } 283862306a36Sopenharmony_ci 283962306a36Sopenharmony_ci /* update the leaf value. 284062306a36Sopenharmony_ci */ 284162306a36Sopenharmony_ci dbAdjTree(tp, leafno, newval, is_ctl); 284262306a36Sopenharmony_ci 284362306a36Sopenharmony_ci return 0; 284462306a36Sopenharmony_ci} 284562306a36Sopenharmony_ci 284662306a36Sopenharmony_ci 284762306a36Sopenharmony_ci/* 284862306a36Sopenharmony_ci * NAME: dbAdjTree() 284962306a36Sopenharmony_ci * 285062306a36Sopenharmony_ci * FUNCTION: update a leaf of a dmtree with a new value, adjusting 285162306a36Sopenharmony_ci * the dmtree, as required, to reflect the new leaf value. 285262306a36Sopenharmony_ci * the combination of any buddies must already be done before 285362306a36Sopenharmony_ci * this is called. 285462306a36Sopenharmony_ci * 285562306a36Sopenharmony_ci * PARAMETERS: 285662306a36Sopenharmony_ci * tp - pointer to the tree to be adjusted. 285762306a36Sopenharmony_ci * leafno - the number of the leaf to be updated. 285862306a36Sopenharmony_ci * newval - the new value for the leaf. 285962306a36Sopenharmony_ci * 286062306a36Sopenharmony_ci * RETURN VALUES: none 286162306a36Sopenharmony_ci */ 286262306a36Sopenharmony_cistatic void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl) 286362306a36Sopenharmony_ci{ 286462306a36Sopenharmony_ci int lp, pp, k; 286562306a36Sopenharmony_ci int max, size; 286662306a36Sopenharmony_ci 286762306a36Sopenharmony_ci size = is_ctl ? CTLTREESIZE : TREESIZE; 286862306a36Sopenharmony_ci 286962306a36Sopenharmony_ci /* pick up the index of the leaf for this leafno. 287062306a36Sopenharmony_ci */ 287162306a36Sopenharmony_ci lp = leafno + le32_to_cpu(tp->dmt_leafidx); 287262306a36Sopenharmony_ci 287362306a36Sopenharmony_ci if (WARN_ON_ONCE(lp >= size || lp < 0)) 287462306a36Sopenharmony_ci return; 287562306a36Sopenharmony_ci 287662306a36Sopenharmony_ci /* is the current value the same as the old value ? if so, 287762306a36Sopenharmony_ci * there is nothing to do. 287862306a36Sopenharmony_ci */ 287962306a36Sopenharmony_ci if (tp->dmt_stree[lp] == newval) 288062306a36Sopenharmony_ci return; 288162306a36Sopenharmony_ci 288262306a36Sopenharmony_ci /* set the new value. 288362306a36Sopenharmony_ci */ 288462306a36Sopenharmony_ci tp->dmt_stree[lp] = newval; 288562306a36Sopenharmony_ci 288662306a36Sopenharmony_ci /* bubble the new value up the tree as required. 288762306a36Sopenharmony_ci */ 288862306a36Sopenharmony_ci for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) { 288962306a36Sopenharmony_ci /* get the index of the first leaf of the 4 leaf 289062306a36Sopenharmony_ci * group containing the specified leaf (leafno). 289162306a36Sopenharmony_ci */ 289262306a36Sopenharmony_ci lp = ((lp - 1) & ~0x03) + 1; 289362306a36Sopenharmony_ci 289462306a36Sopenharmony_ci /* get the index of the parent of this 4 leaf group. 289562306a36Sopenharmony_ci */ 289662306a36Sopenharmony_ci pp = (lp - 1) >> 2; 289762306a36Sopenharmony_ci 289862306a36Sopenharmony_ci /* determine the maximum of the 4 leaves. 289962306a36Sopenharmony_ci */ 290062306a36Sopenharmony_ci max = TREEMAX(&tp->dmt_stree[lp]); 290162306a36Sopenharmony_ci 290262306a36Sopenharmony_ci /* if the maximum of the 4 is the same as the 290362306a36Sopenharmony_ci * parent's value, we're done. 290462306a36Sopenharmony_ci */ 290562306a36Sopenharmony_ci if (tp->dmt_stree[pp] == max) 290662306a36Sopenharmony_ci break; 290762306a36Sopenharmony_ci 290862306a36Sopenharmony_ci /* parent gets new value. 290962306a36Sopenharmony_ci */ 291062306a36Sopenharmony_ci tp->dmt_stree[pp] = max; 291162306a36Sopenharmony_ci 291262306a36Sopenharmony_ci /* parent becomes leaf for next go-round. 291362306a36Sopenharmony_ci */ 291462306a36Sopenharmony_ci lp = pp; 291562306a36Sopenharmony_ci } 291662306a36Sopenharmony_ci} 291762306a36Sopenharmony_ci 291862306a36Sopenharmony_ci 291962306a36Sopenharmony_ci/* 292062306a36Sopenharmony_ci * NAME: dbFindLeaf() 292162306a36Sopenharmony_ci * 292262306a36Sopenharmony_ci * FUNCTION: search a dmtree_t for sufficient free blocks, returning 292362306a36Sopenharmony_ci * the index of a leaf describing the free blocks if 292462306a36Sopenharmony_ci * sufficient free blocks are found. 292562306a36Sopenharmony_ci * 292662306a36Sopenharmony_ci * the search starts at the top of the dmtree_t tree and 292762306a36Sopenharmony_ci * proceeds down the tree to the leftmost leaf with sufficient 292862306a36Sopenharmony_ci * free space. 292962306a36Sopenharmony_ci * 293062306a36Sopenharmony_ci * PARAMETERS: 293162306a36Sopenharmony_ci * tp - pointer to the tree to be searched. 293262306a36Sopenharmony_ci * l2nb - log2 number of free blocks to search for. 293362306a36Sopenharmony_ci * leafidx - return pointer to be set to the index of the leaf 293462306a36Sopenharmony_ci * describing at least l2nb free blocks if sufficient 293562306a36Sopenharmony_ci * free blocks are found. 293662306a36Sopenharmony_ci * is_ctl - determines if the tree is of type ctl 293762306a36Sopenharmony_ci * 293862306a36Sopenharmony_ci * RETURN VALUES: 293962306a36Sopenharmony_ci * 0 - success 294062306a36Sopenharmony_ci * -ENOSPC - insufficient free blocks. 294162306a36Sopenharmony_ci */ 294262306a36Sopenharmony_cistatic int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl) 294362306a36Sopenharmony_ci{ 294462306a36Sopenharmony_ci int ti, n = 0, k, x = 0; 294562306a36Sopenharmony_ci int max_size; 294662306a36Sopenharmony_ci 294762306a36Sopenharmony_ci max_size = is_ctl ? CTLTREESIZE : TREESIZE; 294862306a36Sopenharmony_ci 294962306a36Sopenharmony_ci /* first check the root of the tree to see if there is 295062306a36Sopenharmony_ci * sufficient free space. 295162306a36Sopenharmony_ci */ 295262306a36Sopenharmony_ci if (l2nb > tp->dmt_stree[ROOT]) 295362306a36Sopenharmony_ci return -ENOSPC; 295462306a36Sopenharmony_ci 295562306a36Sopenharmony_ci /* sufficient free space available. now search down the tree 295662306a36Sopenharmony_ci * starting at the next level for the leftmost leaf that 295762306a36Sopenharmony_ci * describes sufficient free space. 295862306a36Sopenharmony_ci */ 295962306a36Sopenharmony_ci for (k = le32_to_cpu(tp->dmt_height), ti = 1; 296062306a36Sopenharmony_ci k > 0; k--, ti = ((ti + n) << 2) + 1) { 296162306a36Sopenharmony_ci /* search the four nodes at this level, starting from 296262306a36Sopenharmony_ci * the left. 296362306a36Sopenharmony_ci */ 296462306a36Sopenharmony_ci for (x = ti, n = 0; n < 4; n++) { 296562306a36Sopenharmony_ci /* sufficient free space found. move to the next 296662306a36Sopenharmony_ci * level (or quit if this is the last level). 296762306a36Sopenharmony_ci */ 296862306a36Sopenharmony_ci if (x + n > max_size) 296962306a36Sopenharmony_ci return -ENOSPC; 297062306a36Sopenharmony_ci if (l2nb <= tp->dmt_stree[x + n]) 297162306a36Sopenharmony_ci break; 297262306a36Sopenharmony_ci } 297362306a36Sopenharmony_ci 297462306a36Sopenharmony_ci /* better have found something since the higher 297562306a36Sopenharmony_ci * levels of the tree said it was here. 297662306a36Sopenharmony_ci */ 297762306a36Sopenharmony_ci assert(n < 4); 297862306a36Sopenharmony_ci } 297962306a36Sopenharmony_ci 298062306a36Sopenharmony_ci /* set the return to the leftmost leaf describing sufficient 298162306a36Sopenharmony_ci * free space. 298262306a36Sopenharmony_ci */ 298362306a36Sopenharmony_ci *leafidx = x + n - le32_to_cpu(tp->dmt_leafidx); 298462306a36Sopenharmony_ci 298562306a36Sopenharmony_ci return (0); 298662306a36Sopenharmony_ci} 298762306a36Sopenharmony_ci 298862306a36Sopenharmony_ci 298962306a36Sopenharmony_ci/* 299062306a36Sopenharmony_ci * NAME: dbFindBits() 299162306a36Sopenharmony_ci * 299262306a36Sopenharmony_ci * FUNCTION: find a specified number of binary buddy free bits within a 299362306a36Sopenharmony_ci * dmap bitmap word value. 299462306a36Sopenharmony_ci * 299562306a36Sopenharmony_ci * this routine searches the bitmap value for (1 << l2nb) free 299662306a36Sopenharmony_ci * bits at (1 << l2nb) alignments within the value. 299762306a36Sopenharmony_ci * 299862306a36Sopenharmony_ci * PARAMETERS: 299962306a36Sopenharmony_ci * word - dmap bitmap word value. 300062306a36Sopenharmony_ci * l2nb - number of free bits specified as a log2 number. 300162306a36Sopenharmony_ci * 300262306a36Sopenharmony_ci * RETURN VALUES: 300362306a36Sopenharmony_ci * starting bit number of free bits. 300462306a36Sopenharmony_ci */ 300562306a36Sopenharmony_cistatic int dbFindBits(u32 word, int l2nb) 300662306a36Sopenharmony_ci{ 300762306a36Sopenharmony_ci int bitno, nb; 300862306a36Sopenharmony_ci u32 mask; 300962306a36Sopenharmony_ci 301062306a36Sopenharmony_ci /* get the number of bits. 301162306a36Sopenharmony_ci */ 301262306a36Sopenharmony_ci nb = 1 << l2nb; 301362306a36Sopenharmony_ci assert(nb <= DBWORD); 301462306a36Sopenharmony_ci 301562306a36Sopenharmony_ci /* complement the word so we can use a mask (i.e. 0s represent 301662306a36Sopenharmony_ci * free bits) and compute the mask. 301762306a36Sopenharmony_ci */ 301862306a36Sopenharmony_ci word = ~word; 301962306a36Sopenharmony_ci mask = ONES << (DBWORD - nb); 302062306a36Sopenharmony_ci 302162306a36Sopenharmony_ci /* scan the word for nb free bits at nb alignments. 302262306a36Sopenharmony_ci */ 302362306a36Sopenharmony_ci for (bitno = 0; mask != 0; bitno += nb, mask >>= nb) { 302462306a36Sopenharmony_ci if ((mask & word) == mask) 302562306a36Sopenharmony_ci break; 302662306a36Sopenharmony_ci } 302762306a36Sopenharmony_ci 302862306a36Sopenharmony_ci ASSERT(bitno < 32); 302962306a36Sopenharmony_ci 303062306a36Sopenharmony_ci /* return the bit number. 303162306a36Sopenharmony_ci */ 303262306a36Sopenharmony_ci return (bitno); 303362306a36Sopenharmony_ci} 303462306a36Sopenharmony_ci 303562306a36Sopenharmony_ci 303662306a36Sopenharmony_ci/* 303762306a36Sopenharmony_ci * NAME: dbMaxBud(u8 *cp) 303862306a36Sopenharmony_ci * 303962306a36Sopenharmony_ci * FUNCTION: determine the largest binary buddy string of free 304062306a36Sopenharmony_ci * bits within 32-bits of the map. 304162306a36Sopenharmony_ci * 304262306a36Sopenharmony_ci * PARAMETERS: 304362306a36Sopenharmony_ci * cp - pointer to the 32-bit value. 304462306a36Sopenharmony_ci * 304562306a36Sopenharmony_ci * RETURN VALUES: 304662306a36Sopenharmony_ci * largest binary buddy of free bits within a dmap word. 304762306a36Sopenharmony_ci */ 304862306a36Sopenharmony_cistatic int dbMaxBud(u8 * cp) 304962306a36Sopenharmony_ci{ 305062306a36Sopenharmony_ci signed char tmp1, tmp2; 305162306a36Sopenharmony_ci 305262306a36Sopenharmony_ci /* check if the wmap word is all free. if so, the 305362306a36Sopenharmony_ci * free buddy size is BUDMIN. 305462306a36Sopenharmony_ci */ 305562306a36Sopenharmony_ci if (*((uint *) cp) == 0) 305662306a36Sopenharmony_ci return (BUDMIN); 305762306a36Sopenharmony_ci 305862306a36Sopenharmony_ci /* check if the wmap word is half free. if so, the 305962306a36Sopenharmony_ci * free buddy size is BUDMIN-1. 306062306a36Sopenharmony_ci */ 306162306a36Sopenharmony_ci if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0) 306262306a36Sopenharmony_ci return (BUDMIN - 1); 306362306a36Sopenharmony_ci 306462306a36Sopenharmony_ci /* not all free or half free. determine the free buddy 306562306a36Sopenharmony_ci * size thru table lookup using quarters of the wmap word. 306662306a36Sopenharmony_ci */ 306762306a36Sopenharmony_ci tmp1 = max(budtab[cp[2]], budtab[cp[3]]); 306862306a36Sopenharmony_ci tmp2 = max(budtab[cp[0]], budtab[cp[1]]); 306962306a36Sopenharmony_ci return (max(tmp1, tmp2)); 307062306a36Sopenharmony_ci} 307162306a36Sopenharmony_ci 307262306a36Sopenharmony_ci 307362306a36Sopenharmony_ci/* 307462306a36Sopenharmony_ci * NAME: cnttz(uint word) 307562306a36Sopenharmony_ci * 307662306a36Sopenharmony_ci * FUNCTION: determine the number of trailing zeros within a 32-bit 307762306a36Sopenharmony_ci * value. 307862306a36Sopenharmony_ci * 307962306a36Sopenharmony_ci * PARAMETERS: 308062306a36Sopenharmony_ci * value - 32-bit value to be examined. 308162306a36Sopenharmony_ci * 308262306a36Sopenharmony_ci * RETURN VALUES: 308362306a36Sopenharmony_ci * count of trailing zeros 308462306a36Sopenharmony_ci */ 308562306a36Sopenharmony_cistatic int cnttz(u32 word) 308662306a36Sopenharmony_ci{ 308762306a36Sopenharmony_ci int n; 308862306a36Sopenharmony_ci 308962306a36Sopenharmony_ci for (n = 0; n < 32; n++, word >>= 1) { 309062306a36Sopenharmony_ci if (word & 0x01) 309162306a36Sopenharmony_ci break; 309262306a36Sopenharmony_ci } 309362306a36Sopenharmony_ci 309462306a36Sopenharmony_ci return (n); 309562306a36Sopenharmony_ci} 309662306a36Sopenharmony_ci 309762306a36Sopenharmony_ci 309862306a36Sopenharmony_ci/* 309962306a36Sopenharmony_ci * NAME: cntlz(u32 value) 310062306a36Sopenharmony_ci * 310162306a36Sopenharmony_ci * FUNCTION: determine the number of leading zeros within a 32-bit 310262306a36Sopenharmony_ci * value. 310362306a36Sopenharmony_ci * 310462306a36Sopenharmony_ci * PARAMETERS: 310562306a36Sopenharmony_ci * value - 32-bit value to be examined. 310662306a36Sopenharmony_ci * 310762306a36Sopenharmony_ci * RETURN VALUES: 310862306a36Sopenharmony_ci * count of leading zeros 310962306a36Sopenharmony_ci */ 311062306a36Sopenharmony_cistatic int cntlz(u32 value) 311162306a36Sopenharmony_ci{ 311262306a36Sopenharmony_ci int n; 311362306a36Sopenharmony_ci 311462306a36Sopenharmony_ci for (n = 0; n < 32; n++, value <<= 1) { 311562306a36Sopenharmony_ci if (value & HIGHORDER) 311662306a36Sopenharmony_ci break; 311762306a36Sopenharmony_ci } 311862306a36Sopenharmony_ci return (n); 311962306a36Sopenharmony_ci} 312062306a36Sopenharmony_ci 312162306a36Sopenharmony_ci 312262306a36Sopenharmony_ci/* 312362306a36Sopenharmony_ci * NAME: blkstol2(s64 nb) 312462306a36Sopenharmony_ci * 312562306a36Sopenharmony_ci * FUNCTION: convert a block count to its log2 value. if the block 312662306a36Sopenharmony_ci * count is not a l2 multiple, it is rounded up to the next 312762306a36Sopenharmony_ci * larger l2 multiple. 312862306a36Sopenharmony_ci * 312962306a36Sopenharmony_ci * PARAMETERS: 313062306a36Sopenharmony_ci * nb - number of blocks 313162306a36Sopenharmony_ci * 313262306a36Sopenharmony_ci * RETURN VALUES: 313362306a36Sopenharmony_ci * log2 number of blocks 313462306a36Sopenharmony_ci */ 313562306a36Sopenharmony_cistatic int blkstol2(s64 nb) 313662306a36Sopenharmony_ci{ 313762306a36Sopenharmony_ci int l2nb; 313862306a36Sopenharmony_ci s64 mask; /* meant to be signed */ 313962306a36Sopenharmony_ci 314062306a36Sopenharmony_ci mask = (s64) 1 << (64 - 1); 314162306a36Sopenharmony_ci 314262306a36Sopenharmony_ci /* count the leading bits. 314362306a36Sopenharmony_ci */ 314462306a36Sopenharmony_ci for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) { 314562306a36Sopenharmony_ci /* leading bit found. 314662306a36Sopenharmony_ci */ 314762306a36Sopenharmony_ci if (nb & mask) { 314862306a36Sopenharmony_ci /* determine the l2 value. 314962306a36Sopenharmony_ci */ 315062306a36Sopenharmony_ci l2nb = (64 - 1) - l2nb; 315162306a36Sopenharmony_ci 315262306a36Sopenharmony_ci /* check if we need to round up. 315362306a36Sopenharmony_ci */ 315462306a36Sopenharmony_ci if (~mask & nb) 315562306a36Sopenharmony_ci l2nb++; 315662306a36Sopenharmony_ci 315762306a36Sopenharmony_ci return (l2nb); 315862306a36Sopenharmony_ci } 315962306a36Sopenharmony_ci } 316062306a36Sopenharmony_ci assert(0); 316162306a36Sopenharmony_ci return 0; /* fix compiler warning */ 316262306a36Sopenharmony_ci} 316362306a36Sopenharmony_ci 316462306a36Sopenharmony_ci 316562306a36Sopenharmony_ci/* 316662306a36Sopenharmony_ci * NAME: dbAllocBottomUp() 316762306a36Sopenharmony_ci * 316862306a36Sopenharmony_ci * FUNCTION: alloc the specified block range from the working block 316962306a36Sopenharmony_ci * allocation map. 317062306a36Sopenharmony_ci * 317162306a36Sopenharmony_ci * the blocks will be alloc from the working map one dmap 317262306a36Sopenharmony_ci * at a time. 317362306a36Sopenharmony_ci * 317462306a36Sopenharmony_ci * PARAMETERS: 317562306a36Sopenharmony_ci * ip - pointer to in-core inode; 317662306a36Sopenharmony_ci * blkno - starting block number to be freed. 317762306a36Sopenharmony_ci * nblocks - number of blocks to be freed. 317862306a36Sopenharmony_ci * 317962306a36Sopenharmony_ci * RETURN VALUES: 318062306a36Sopenharmony_ci * 0 - success 318162306a36Sopenharmony_ci * -EIO - i/o error 318262306a36Sopenharmony_ci */ 318362306a36Sopenharmony_ciint dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks) 318462306a36Sopenharmony_ci{ 318562306a36Sopenharmony_ci struct metapage *mp; 318662306a36Sopenharmony_ci struct dmap *dp; 318762306a36Sopenharmony_ci int nb, rc; 318862306a36Sopenharmony_ci s64 lblkno, rem; 318962306a36Sopenharmony_ci struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; 319062306a36Sopenharmony_ci struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap; 319162306a36Sopenharmony_ci 319262306a36Sopenharmony_ci IREAD_LOCK(ipbmap, RDWRLOCK_DMAP); 319362306a36Sopenharmony_ci 319462306a36Sopenharmony_ci /* block to be allocated better be within the mapsize. */ 319562306a36Sopenharmony_ci ASSERT(nblocks <= bmp->db_mapsize - blkno); 319662306a36Sopenharmony_ci 319762306a36Sopenharmony_ci /* 319862306a36Sopenharmony_ci * allocate the blocks a dmap at a time. 319962306a36Sopenharmony_ci */ 320062306a36Sopenharmony_ci mp = NULL; 320162306a36Sopenharmony_ci for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) { 320262306a36Sopenharmony_ci /* release previous dmap if any */ 320362306a36Sopenharmony_ci if (mp) { 320462306a36Sopenharmony_ci write_metapage(mp); 320562306a36Sopenharmony_ci } 320662306a36Sopenharmony_ci 320762306a36Sopenharmony_ci /* get the buffer for the current dmap. */ 320862306a36Sopenharmony_ci lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage); 320962306a36Sopenharmony_ci mp = read_metapage(ipbmap, lblkno, PSIZE, 0); 321062306a36Sopenharmony_ci if (mp == NULL) { 321162306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 321262306a36Sopenharmony_ci return -EIO; 321362306a36Sopenharmony_ci } 321462306a36Sopenharmony_ci dp = (struct dmap *) mp->data; 321562306a36Sopenharmony_ci 321662306a36Sopenharmony_ci /* determine the number of blocks to be allocated from 321762306a36Sopenharmony_ci * this dmap. 321862306a36Sopenharmony_ci */ 321962306a36Sopenharmony_ci nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1))); 322062306a36Sopenharmony_ci 322162306a36Sopenharmony_ci /* allocate the blocks. */ 322262306a36Sopenharmony_ci if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) { 322362306a36Sopenharmony_ci release_metapage(mp); 322462306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 322562306a36Sopenharmony_ci return (rc); 322662306a36Sopenharmony_ci } 322762306a36Sopenharmony_ci } 322862306a36Sopenharmony_ci 322962306a36Sopenharmony_ci /* write the last buffer. */ 323062306a36Sopenharmony_ci write_metapage(mp); 323162306a36Sopenharmony_ci 323262306a36Sopenharmony_ci IREAD_UNLOCK(ipbmap); 323362306a36Sopenharmony_ci 323462306a36Sopenharmony_ci return (0); 323562306a36Sopenharmony_ci} 323662306a36Sopenharmony_ci 323762306a36Sopenharmony_ci 323862306a36Sopenharmony_cistatic int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, 323962306a36Sopenharmony_ci int nblocks) 324062306a36Sopenharmony_ci{ 324162306a36Sopenharmony_ci int rc; 324262306a36Sopenharmony_ci int dbitno, word, rembits, nb, nwords, wbitno, agno; 324362306a36Sopenharmony_ci s8 oldroot; 324462306a36Sopenharmony_ci struct dmaptree *tp = (struct dmaptree *) & dp->tree; 324562306a36Sopenharmony_ci 324662306a36Sopenharmony_ci /* save the current value of the root (i.e. maximum free string) 324762306a36Sopenharmony_ci * of the dmap tree. 324862306a36Sopenharmony_ci */ 324962306a36Sopenharmony_ci oldroot = tp->stree[ROOT]; 325062306a36Sopenharmony_ci 325162306a36Sopenharmony_ci /* determine the bit number and word within the dmap of the 325262306a36Sopenharmony_ci * starting block. 325362306a36Sopenharmony_ci */ 325462306a36Sopenharmony_ci dbitno = blkno & (BPERDMAP - 1); 325562306a36Sopenharmony_ci word = dbitno >> L2DBWORD; 325662306a36Sopenharmony_ci 325762306a36Sopenharmony_ci /* block range better be within the dmap */ 325862306a36Sopenharmony_ci assert(dbitno + nblocks <= BPERDMAP); 325962306a36Sopenharmony_ci 326062306a36Sopenharmony_ci /* allocate the bits of the dmap's words corresponding to the block 326162306a36Sopenharmony_ci * range. not all bits of the first and last words may be contained 326262306a36Sopenharmony_ci * within the block range. if this is the case, we'll work against 326362306a36Sopenharmony_ci * those words (i.e. partial first and/or last) on an individual basis 326462306a36Sopenharmony_ci * (a single pass), allocating the bits of interest by hand and 326562306a36Sopenharmony_ci * updating the leaf corresponding to the dmap word. a single pass 326662306a36Sopenharmony_ci * will be used for all dmap words fully contained within the 326762306a36Sopenharmony_ci * specified range. within this pass, the bits of all fully contained 326862306a36Sopenharmony_ci * dmap words will be marked as free in a single shot and the leaves 326962306a36Sopenharmony_ci * will be updated. a single leaf may describe the free space of 327062306a36Sopenharmony_ci * multiple dmap words, so we may update only a subset of the actual 327162306a36Sopenharmony_ci * leaves corresponding to the dmap words of the block range. 327262306a36Sopenharmony_ci */ 327362306a36Sopenharmony_ci for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) { 327462306a36Sopenharmony_ci /* determine the bit number within the word and 327562306a36Sopenharmony_ci * the number of bits within the word. 327662306a36Sopenharmony_ci */ 327762306a36Sopenharmony_ci wbitno = dbitno & (DBWORD - 1); 327862306a36Sopenharmony_ci nb = min(rembits, DBWORD - wbitno); 327962306a36Sopenharmony_ci 328062306a36Sopenharmony_ci /* check if only part of a word is to be allocated. 328162306a36Sopenharmony_ci */ 328262306a36Sopenharmony_ci if (nb < DBWORD) { 328362306a36Sopenharmony_ci /* allocate (set to 1) the appropriate bits within 328462306a36Sopenharmony_ci * this dmap word. 328562306a36Sopenharmony_ci */ 328662306a36Sopenharmony_ci dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb) 328762306a36Sopenharmony_ci >> wbitno); 328862306a36Sopenharmony_ci 328962306a36Sopenharmony_ci word++; 329062306a36Sopenharmony_ci } else { 329162306a36Sopenharmony_ci /* one or more dmap words are fully contained 329262306a36Sopenharmony_ci * within the block range. determine how many 329362306a36Sopenharmony_ci * words and allocate (set to 1) the bits of these 329462306a36Sopenharmony_ci * words. 329562306a36Sopenharmony_ci */ 329662306a36Sopenharmony_ci nwords = rembits >> L2DBWORD; 329762306a36Sopenharmony_ci memset(&dp->wmap[word], (int) ONES, nwords * 4); 329862306a36Sopenharmony_ci 329962306a36Sopenharmony_ci /* determine how many bits */ 330062306a36Sopenharmony_ci nb = nwords << L2DBWORD; 330162306a36Sopenharmony_ci word += nwords; 330262306a36Sopenharmony_ci } 330362306a36Sopenharmony_ci } 330462306a36Sopenharmony_ci 330562306a36Sopenharmony_ci /* update the free count for this dmap */ 330662306a36Sopenharmony_ci le32_add_cpu(&dp->nfree, -nblocks); 330762306a36Sopenharmony_ci 330862306a36Sopenharmony_ci /* reconstruct summary tree */ 330962306a36Sopenharmony_ci dbInitDmapTree(dp); 331062306a36Sopenharmony_ci 331162306a36Sopenharmony_ci BMAP_LOCK(bmp); 331262306a36Sopenharmony_ci 331362306a36Sopenharmony_ci /* if this allocation group is completely free, 331462306a36Sopenharmony_ci * update the highest active allocation group number 331562306a36Sopenharmony_ci * if this allocation group is the new max. 331662306a36Sopenharmony_ci */ 331762306a36Sopenharmony_ci agno = blkno >> bmp->db_agl2size; 331862306a36Sopenharmony_ci if (agno > bmp->db_maxag) 331962306a36Sopenharmony_ci bmp->db_maxag = agno; 332062306a36Sopenharmony_ci 332162306a36Sopenharmony_ci /* update the free count for the allocation group and map */ 332262306a36Sopenharmony_ci bmp->db_agfree[agno] -= nblocks; 332362306a36Sopenharmony_ci bmp->db_nfree -= nblocks; 332462306a36Sopenharmony_ci 332562306a36Sopenharmony_ci BMAP_UNLOCK(bmp); 332662306a36Sopenharmony_ci 332762306a36Sopenharmony_ci /* if the root has not changed, done. */ 332862306a36Sopenharmony_ci if (tp->stree[ROOT] == oldroot) 332962306a36Sopenharmony_ci return (0); 333062306a36Sopenharmony_ci 333162306a36Sopenharmony_ci /* root changed. bubble the change up to the dmap control pages. 333262306a36Sopenharmony_ci * if the adjustment of the upper level control pages fails, 333362306a36Sopenharmony_ci * backout the bit allocation (thus making everything consistent). 333462306a36Sopenharmony_ci */ 333562306a36Sopenharmony_ci if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0))) 333662306a36Sopenharmony_ci dbFreeBits(bmp, dp, blkno, nblocks); 333762306a36Sopenharmony_ci 333862306a36Sopenharmony_ci return (rc); 333962306a36Sopenharmony_ci} 334062306a36Sopenharmony_ci 334162306a36Sopenharmony_ci 334262306a36Sopenharmony_ci/* 334362306a36Sopenharmony_ci * NAME: dbExtendFS() 334462306a36Sopenharmony_ci * 334562306a36Sopenharmony_ci * FUNCTION: extend bmap from blkno for nblocks; 334662306a36Sopenharmony_ci * dbExtendFS() updates bmap ready for dbAllocBottomUp(); 334762306a36Sopenharmony_ci * 334862306a36Sopenharmony_ci * L2 334962306a36Sopenharmony_ci * | 335062306a36Sopenharmony_ci * L1---------------------------------L1 335162306a36Sopenharmony_ci * | | 335262306a36Sopenharmony_ci * L0---------L0---------L0 L0---------L0---------L0 335362306a36Sopenharmony_ci * | | | | | | 335462306a36Sopenharmony_ci * d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,...,dn d0,.,dm; 335562306a36Sopenharmony_ci * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm 335662306a36Sopenharmony_ci * 335762306a36Sopenharmony_ci * <---old---><----------------------------extend-----------------------> 335862306a36Sopenharmony_ci */ 335962306a36Sopenharmony_ciint dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) 336062306a36Sopenharmony_ci{ 336162306a36Sopenharmony_ci struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb); 336262306a36Sopenharmony_ci int nbperpage = sbi->nbperpage; 336362306a36Sopenharmony_ci int i, i0 = true, j, j0 = true, k, n; 336462306a36Sopenharmony_ci s64 newsize; 336562306a36Sopenharmony_ci s64 p; 336662306a36Sopenharmony_ci struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL; 336762306a36Sopenharmony_ci struct dmapctl *l2dcp, *l1dcp, *l0dcp; 336862306a36Sopenharmony_ci struct dmap *dp; 336962306a36Sopenharmony_ci s8 *l0leaf, *l1leaf, *l2leaf; 337062306a36Sopenharmony_ci struct bmap *bmp = sbi->bmap; 337162306a36Sopenharmony_ci int agno, l2agsize, oldl2agsize; 337262306a36Sopenharmony_ci s64 ag_rem; 337362306a36Sopenharmony_ci 337462306a36Sopenharmony_ci newsize = blkno + nblocks; 337562306a36Sopenharmony_ci 337662306a36Sopenharmony_ci jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld", 337762306a36Sopenharmony_ci (long long) blkno, (long long) nblocks, (long long) newsize); 337862306a36Sopenharmony_ci 337962306a36Sopenharmony_ci /* 338062306a36Sopenharmony_ci * initialize bmap control page. 338162306a36Sopenharmony_ci * 338262306a36Sopenharmony_ci * all the data in bmap control page should exclude 338362306a36Sopenharmony_ci * the mkfs hidden dmap page. 338462306a36Sopenharmony_ci */ 338562306a36Sopenharmony_ci 338662306a36Sopenharmony_ci /* update mapsize */ 338762306a36Sopenharmony_ci bmp->db_mapsize = newsize; 338862306a36Sopenharmony_ci bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize); 338962306a36Sopenharmony_ci 339062306a36Sopenharmony_ci /* compute new AG size */ 339162306a36Sopenharmony_ci l2agsize = dbGetL2AGSize(newsize); 339262306a36Sopenharmony_ci oldl2agsize = bmp->db_agl2size; 339362306a36Sopenharmony_ci 339462306a36Sopenharmony_ci bmp->db_agl2size = l2agsize; 339562306a36Sopenharmony_ci bmp->db_agsize = 1 << l2agsize; 339662306a36Sopenharmony_ci 339762306a36Sopenharmony_ci /* compute new number of AG */ 339862306a36Sopenharmony_ci agno = bmp->db_numag; 339962306a36Sopenharmony_ci bmp->db_numag = newsize >> l2agsize; 340062306a36Sopenharmony_ci bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0; 340162306a36Sopenharmony_ci 340262306a36Sopenharmony_ci /* 340362306a36Sopenharmony_ci * reconfigure db_agfree[] 340462306a36Sopenharmony_ci * from old AG configuration to new AG configuration; 340562306a36Sopenharmony_ci * 340662306a36Sopenharmony_ci * coalesce contiguous k (newAGSize/oldAGSize) AGs; 340762306a36Sopenharmony_ci * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn; 340862306a36Sopenharmony_ci * note: new AG size = old AG size * (2**x). 340962306a36Sopenharmony_ci */ 341062306a36Sopenharmony_ci if (l2agsize == oldl2agsize) 341162306a36Sopenharmony_ci goto extend; 341262306a36Sopenharmony_ci k = 1 << (l2agsize - oldl2agsize); 341362306a36Sopenharmony_ci ag_rem = bmp->db_agfree[0]; /* save agfree[0] */ 341462306a36Sopenharmony_ci for (i = 0, n = 0; i < agno; n++) { 341562306a36Sopenharmony_ci bmp->db_agfree[n] = 0; /* init collection point */ 341662306a36Sopenharmony_ci 341762306a36Sopenharmony_ci /* coalesce contiguous k AGs; */ 341862306a36Sopenharmony_ci for (j = 0; j < k && i < agno; j++, i++) { 341962306a36Sopenharmony_ci /* merge AGi to AGn */ 342062306a36Sopenharmony_ci bmp->db_agfree[n] += bmp->db_agfree[i]; 342162306a36Sopenharmony_ci } 342262306a36Sopenharmony_ci } 342362306a36Sopenharmony_ci bmp->db_agfree[0] += ag_rem; /* restore agfree[0] */ 342462306a36Sopenharmony_ci 342562306a36Sopenharmony_ci for (; n < MAXAG; n++) 342662306a36Sopenharmony_ci bmp->db_agfree[n] = 0; 342762306a36Sopenharmony_ci 342862306a36Sopenharmony_ci /* 342962306a36Sopenharmony_ci * update highest active ag number 343062306a36Sopenharmony_ci */ 343162306a36Sopenharmony_ci 343262306a36Sopenharmony_ci bmp->db_maxag = bmp->db_maxag / k; 343362306a36Sopenharmony_ci 343462306a36Sopenharmony_ci /* 343562306a36Sopenharmony_ci * extend bmap 343662306a36Sopenharmony_ci * 343762306a36Sopenharmony_ci * update bit maps and corresponding level control pages; 343862306a36Sopenharmony_ci * global control page db_nfree, db_agfree[agno], db_maxfreebud; 343962306a36Sopenharmony_ci */ 344062306a36Sopenharmony_ci extend: 344162306a36Sopenharmony_ci /* get L2 page */ 344262306a36Sopenharmony_ci p = BMAPBLKNO + nbperpage; /* L2 page */ 344362306a36Sopenharmony_ci l2mp = read_metapage(ipbmap, p, PSIZE, 0); 344462306a36Sopenharmony_ci if (!l2mp) { 344562306a36Sopenharmony_ci jfs_error(ipbmap->i_sb, "L2 page could not be read\n"); 344662306a36Sopenharmony_ci return -EIO; 344762306a36Sopenharmony_ci } 344862306a36Sopenharmony_ci l2dcp = (struct dmapctl *) l2mp->data; 344962306a36Sopenharmony_ci 345062306a36Sopenharmony_ci /* compute start L1 */ 345162306a36Sopenharmony_ci k = blkno >> L2MAXL1SIZE; 345262306a36Sopenharmony_ci l2leaf = l2dcp->stree + CTLLEAFIND + k; 345362306a36Sopenharmony_ci p = BLKTOL1(blkno, sbi->l2nbperpage); /* L1 page */ 345462306a36Sopenharmony_ci 345562306a36Sopenharmony_ci /* 345662306a36Sopenharmony_ci * extend each L1 in L2 345762306a36Sopenharmony_ci */ 345862306a36Sopenharmony_ci for (; k < LPERCTL; k++, p += nbperpage) { 345962306a36Sopenharmony_ci /* get L1 page */ 346062306a36Sopenharmony_ci if (j0) { 346162306a36Sopenharmony_ci /* read in L1 page: (blkno & (MAXL1SIZE - 1)) */ 346262306a36Sopenharmony_ci l1mp = read_metapage(ipbmap, p, PSIZE, 0); 346362306a36Sopenharmony_ci if (l1mp == NULL) 346462306a36Sopenharmony_ci goto errout; 346562306a36Sopenharmony_ci l1dcp = (struct dmapctl *) l1mp->data; 346662306a36Sopenharmony_ci 346762306a36Sopenharmony_ci /* compute start L0 */ 346862306a36Sopenharmony_ci j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE; 346962306a36Sopenharmony_ci l1leaf = l1dcp->stree + CTLLEAFIND + j; 347062306a36Sopenharmony_ci p = BLKTOL0(blkno, sbi->l2nbperpage); 347162306a36Sopenharmony_ci j0 = false; 347262306a36Sopenharmony_ci } else { 347362306a36Sopenharmony_ci /* assign/init L1 page */ 347462306a36Sopenharmony_ci l1mp = get_metapage(ipbmap, p, PSIZE, 0); 347562306a36Sopenharmony_ci if (l1mp == NULL) 347662306a36Sopenharmony_ci goto errout; 347762306a36Sopenharmony_ci 347862306a36Sopenharmony_ci l1dcp = (struct dmapctl *) l1mp->data; 347962306a36Sopenharmony_ci 348062306a36Sopenharmony_ci /* compute start L0 */ 348162306a36Sopenharmony_ci j = 0; 348262306a36Sopenharmony_ci l1leaf = l1dcp->stree + CTLLEAFIND; 348362306a36Sopenharmony_ci p += nbperpage; /* 1st L0 of L1.k */ 348462306a36Sopenharmony_ci } 348562306a36Sopenharmony_ci 348662306a36Sopenharmony_ci /* 348762306a36Sopenharmony_ci * extend each L0 in L1 348862306a36Sopenharmony_ci */ 348962306a36Sopenharmony_ci for (; j < LPERCTL; j++) { 349062306a36Sopenharmony_ci /* get L0 page */ 349162306a36Sopenharmony_ci if (i0) { 349262306a36Sopenharmony_ci /* read in L0 page: (blkno & (MAXL0SIZE - 1)) */ 349362306a36Sopenharmony_ci 349462306a36Sopenharmony_ci l0mp = read_metapage(ipbmap, p, PSIZE, 0); 349562306a36Sopenharmony_ci if (l0mp == NULL) 349662306a36Sopenharmony_ci goto errout; 349762306a36Sopenharmony_ci l0dcp = (struct dmapctl *) l0mp->data; 349862306a36Sopenharmony_ci 349962306a36Sopenharmony_ci /* compute start dmap */ 350062306a36Sopenharmony_ci i = (blkno & (MAXL0SIZE - 1)) >> 350162306a36Sopenharmony_ci L2BPERDMAP; 350262306a36Sopenharmony_ci l0leaf = l0dcp->stree + CTLLEAFIND + i; 350362306a36Sopenharmony_ci p = BLKTODMAP(blkno, 350462306a36Sopenharmony_ci sbi->l2nbperpage); 350562306a36Sopenharmony_ci i0 = false; 350662306a36Sopenharmony_ci } else { 350762306a36Sopenharmony_ci /* assign/init L0 page */ 350862306a36Sopenharmony_ci l0mp = get_metapage(ipbmap, p, PSIZE, 0); 350962306a36Sopenharmony_ci if (l0mp == NULL) 351062306a36Sopenharmony_ci goto errout; 351162306a36Sopenharmony_ci 351262306a36Sopenharmony_ci l0dcp = (struct dmapctl *) l0mp->data; 351362306a36Sopenharmony_ci 351462306a36Sopenharmony_ci /* compute start dmap */ 351562306a36Sopenharmony_ci i = 0; 351662306a36Sopenharmony_ci l0leaf = l0dcp->stree + CTLLEAFIND; 351762306a36Sopenharmony_ci p += nbperpage; /* 1st dmap of L0.j */ 351862306a36Sopenharmony_ci } 351962306a36Sopenharmony_ci 352062306a36Sopenharmony_ci /* 352162306a36Sopenharmony_ci * extend each dmap in L0 352262306a36Sopenharmony_ci */ 352362306a36Sopenharmony_ci for (; i < LPERCTL; i++) { 352462306a36Sopenharmony_ci /* 352562306a36Sopenharmony_ci * reconstruct the dmap page, and 352662306a36Sopenharmony_ci * initialize corresponding parent L0 leaf 352762306a36Sopenharmony_ci */ 352862306a36Sopenharmony_ci if ((n = blkno & (BPERDMAP - 1))) { 352962306a36Sopenharmony_ci /* read in dmap page: */ 353062306a36Sopenharmony_ci mp = read_metapage(ipbmap, p, 353162306a36Sopenharmony_ci PSIZE, 0); 353262306a36Sopenharmony_ci if (mp == NULL) 353362306a36Sopenharmony_ci goto errout; 353462306a36Sopenharmony_ci n = min(nblocks, (s64)BPERDMAP - n); 353562306a36Sopenharmony_ci } else { 353662306a36Sopenharmony_ci /* assign/init dmap page */ 353762306a36Sopenharmony_ci mp = read_metapage(ipbmap, p, 353862306a36Sopenharmony_ci PSIZE, 0); 353962306a36Sopenharmony_ci if (mp == NULL) 354062306a36Sopenharmony_ci goto errout; 354162306a36Sopenharmony_ci 354262306a36Sopenharmony_ci n = min_t(s64, nblocks, BPERDMAP); 354362306a36Sopenharmony_ci } 354462306a36Sopenharmony_ci 354562306a36Sopenharmony_ci dp = (struct dmap *) mp->data; 354662306a36Sopenharmony_ci *l0leaf = dbInitDmap(dp, blkno, n); 354762306a36Sopenharmony_ci 354862306a36Sopenharmony_ci bmp->db_nfree += n; 354962306a36Sopenharmony_ci agno = le64_to_cpu(dp->start) >> l2agsize; 355062306a36Sopenharmony_ci bmp->db_agfree[agno] += n; 355162306a36Sopenharmony_ci 355262306a36Sopenharmony_ci write_metapage(mp); 355362306a36Sopenharmony_ci 355462306a36Sopenharmony_ci l0leaf++; 355562306a36Sopenharmony_ci p += nbperpage; 355662306a36Sopenharmony_ci 355762306a36Sopenharmony_ci blkno += n; 355862306a36Sopenharmony_ci nblocks -= n; 355962306a36Sopenharmony_ci if (nblocks == 0) 356062306a36Sopenharmony_ci break; 356162306a36Sopenharmony_ci } /* for each dmap in a L0 */ 356262306a36Sopenharmony_ci 356362306a36Sopenharmony_ci /* 356462306a36Sopenharmony_ci * build current L0 page from its leaves, and 356562306a36Sopenharmony_ci * initialize corresponding parent L1 leaf 356662306a36Sopenharmony_ci */ 356762306a36Sopenharmony_ci *l1leaf = dbInitDmapCtl(l0dcp, 0, ++i); 356862306a36Sopenharmony_ci write_metapage(l0mp); 356962306a36Sopenharmony_ci l0mp = NULL; 357062306a36Sopenharmony_ci 357162306a36Sopenharmony_ci if (nblocks) 357262306a36Sopenharmony_ci l1leaf++; /* continue for next L0 */ 357362306a36Sopenharmony_ci else { 357462306a36Sopenharmony_ci /* more than 1 L0 ? */ 357562306a36Sopenharmony_ci if (j > 0) 357662306a36Sopenharmony_ci break; /* build L1 page */ 357762306a36Sopenharmony_ci else { 357862306a36Sopenharmony_ci /* summarize in global bmap page */ 357962306a36Sopenharmony_ci bmp->db_maxfreebud = *l1leaf; 358062306a36Sopenharmony_ci release_metapage(l1mp); 358162306a36Sopenharmony_ci release_metapage(l2mp); 358262306a36Sopenharmony_ci goto finalize; 358362306a36Sopenharmony_ci } 358462306a36Sopenharmony_ci } 358562306a36Sopenharmony_ci } /* for each L0 in a L1 */ 358662306a36Sopenharmony_ci 358762306a36Sopenharmony_ci /* 358862306a36Sopenharmony_ci * build current L1 page from its leaves, and 358962306a36Sopenharmony_ci * initialize corresponding parent L2 leaf 359062306a36Sopenharmony_ci */ 359162306a36Sopenharmony_ci *l2leaf = dbInitDmapCtl(l1dcp, 1, ++j); 359262306a36Sopenharmony_ci write_metapage(l1mp); 359362306a36Sopenharmony_ci l1mp = NULL; 359462306a36Sopenharmony_ci 359562306a36Sopenharmony_ci if (nblocks) 359662306a36Sopenharmony_ci l2leaf++; /* continue for next L1 */ 359762306a36Sopenharmony_ci else { 359862306a36Sopenharmony_ci /* more than 1 L1 ? */ 359962306a36Sopenharmony_ci if (k > 0) 360062306a36Sopenharmony_ci break; /* build L2 page */ 360162306a36Sopenharmony_ci else { 360262306a36Sopenharmony_ci /* summarize in global bmap page */ 360362306a36Sopenharmony_ci bmp->db_maxfreebud = *l2leaf; 360462306a36Sopenharmony_ci release_metapage(l2mp); 360562306a36Sopenharmony_ci goto finalize; 360662306a36Sopenharmony_ci } 360762306a36Sopenharmony_ci } 360862306a36Sopenharmony_ci } /* for each L1 in a L2 */ 360962306a36Sopenharmony_ci 361062306a36Sopenharmony_ci jfs_error(ipbmap->i_sb, "function has not returned as expected\n"); 361162306a36Sopenharmony_cierrout: 361262306a36Sopenharmony_ci if (l0mp) 361362306a36Sopenharmony_ci release_metapage(l0mp); 361462306a36Sopenharmony_ci if (l1mp) 361562306a36Sopenharmony_ci release_metapage(l1mp); 361662306a36Sopenharmony_ci release_metapage(l2mp); 361762306a36Sopenharmony_ci return -EIO; 361862306a36Sopenharmony_ci 361962306a36Sopenharmony_ci /* 362062306a36Sopenharmony_ci * finalize bmap control page 362162306a36Sopenharmony_ci */ 362262306a36Sopenharmony_cifinalize: 362362306a36Sopenharmony_ci 362462306a36Sopenharmony_ci return 0; 362562306a36Sopenharmony_ci} 362662306a36Sopenharmony_ci 362762306a36Sopenharmony_ci 362862306a36Sopenharmony_ci/* 362962306a36Sopenharmony_ci * dbFinalizeBmap() 363062306a36Sopenharmony_ci */ 363162306a36Sopenharmony_civoid dbFinalizeBmap(struct inode *ipbmap) 363262306a36Sopenharmony_ci{ 363362306a36Sopenharmony_ci struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; 363462306a36Sopenharmony_ci int actags, inactags, l2nl; 363562306a36Sopenharmony_ci s64 ag_rem, actfree, inactfree, avgfree; 363662306a36Sopenharmony_ci int i, n; 363762306a36Sopenharmony_ci 363862306a36Sopenharmony_ci /* 363962306a36Sopenharmony_ci * finalize bmap control page 364062306a36Sopenharmony_ci */ 364162306a36Sopenharmony_ci//finalize: 364262306a36Sopenharmony_ci /* 364362306a36Sopenharmony_ci * compute db_agpref: preferred ag to allocate from 364462306a36Sopenharmony_ci * (the leftmost ag with average free space in it); 364562306a36Sopenharmony_ci */ 364662306a36Sopenharmony_ci//agpref: 364762306a36Sopenharmony_ci /* get the number of active ags and inactive ags */ 364862306a36Sopenharmony_ci actags = bmp->db_maxag + 1; 364962306a36Sopenharmony_ci inactags = bmp->db_numag - actags; 365062306a36Sopenharmony_ci ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1); /* ??? */ 365162306a36Sopenharmony_ci 365262306a36Sopenharmony_ci /* determine how many blocks are in the inactive allocation 365362306a36Sopenharmony_ci * groups. in doing this, we must account for the fact that 365462306a36Sopenharmony_ci * the rightmost group might be a partial group (i.e. file 365562306a36Sopenharmony_ci * system size is not a multiple of the group size). 365662306a36Sopenharmony_ci */ 365762306a36Sopenharmony_ci inactfree = (inactags && ag_rem) ? 365862306a36Sopenharmony_ci ((inactags - 1) << bmp->db_agl2size) + ag_rem 365962306a36Sopenharmony_ci : inactags << bmp->db_agl2size; 366062306a36Sopenharmony_ci 366162306a36Sopenharmony_ci /* determine how many free blocks are in the active 366262306a36Sopenharmony_ci * allocation groups plus the average number of free blocks 366362306a36Sopenharmony_ci * within the active ags. 366462306a36Sopenharmony_ci */ 366562306a36Sopenharmony_ci actfree = bmp->db_nfree - inactfree; 366662306a36Sopenharmony_ci avgfree = (u32) actfree / (u32) actags; 366762306a36Sopenharmony_ci 366862306a36Sopenharmony_ci /* if the preferred allocation group has not average free space. 366962306a36Sopenharmony_ci * re-establish the preferred group as the leftmost 367062306a36Sopenharmony_ci * group with average free space. 367162306a36Sopenharmony_ci */ 367262306a36Sopenharmony_ci if (bmp->db_agfree[bmp->db_agpref] < avgfree) { 367362306a36Sopenharmony_ci for (bmp->db_agpref = 0; bmp->db_agpref < actags; 367462306a36Sopenharmony_ci bmp->db_agpref++) { 367562306a36Sopenharmony_ci if (bmp->db_agfree[bmp->db_agpref] >= avgfree) 367662306a36Sopenharmony_ci break; 367762306a36Sopenharmony_ci } 367862306a36Sopenharmony_ci if (bmp->db_agpref >= bmp->db_numag) { 367962306a36Sopenharmony_ci jfs_error(ipbmap->i_sb, 368062306a36Sopenharmony_ci "cannot find ag with average freespace\n"); 368162306a36Sopenharmony_ci } 368262306a36Sopenharmony_ci } 368362306a36Sopenharmony_ci 368462306a36Sopenharmony_ci /* 368562306a36Sopenharmony_ci * compute db_aglevel, db_agheight, db_width, db_agstart: 368662306a36Sopenharmony_ci * an ag is covered in aglevel dmapctl summary tree, 368762306a36Sopenharmony_ci * at agheight level height (from leaf) with agwidth number of nodes 368862306a36Sopenharmony_ci * each, which starts at agstart index node of the smmary tree node 368962306a36Sopenharmony_ci * array; 369062306a36Sopenharmony_ci */ 369162306a36Sopenharmony_ci bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize); 369262306a36Sopenharmony_ci l2nl = 369362306a36Sopenharmony_ci bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL); 369462306a36Sopenharmony_ci bmp->db_agheight = l2nl >> 1; 369562306a36Sopenharmony_ci bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1)); 369662306a36Sopenharmony_ci for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0; 369762306a36Sopenharmony_ci i--) { 369862306a36Sopenharmony_ci bmp->db_agstart += n; 369962306a36Sopenharmony_ci n <<= 2; 370062306a36Sopenharmony_ci } 370162306a36Sopenharmony_ci 370262306a36Sopenharmony_ci} 370362306a36Sopenharmony_ci 370462306a36Sopenharmony_ci 370562306a36Sopenharmony_ci/* 370662306a36Sopenharmony_ci * NAME: dbInitDmap()/ujfs_idmap_page() 370762306a36Sopenharmony_ci * 370862306a36Sopenharmony_ci * FUNCTION: initialize working/persistent bitmap of the dmap page 370962306a36Sopenharmony_ci * for the specified number of blocks: 371062306a36Sopenharmony_ci * 371162306a36Sopenharmony_ci * at entry, the bitmaps had been initialized as free (ZEROS); 371262306a36Sopenharmony_ci * The number of blocks will only account for the actually 371362306a36Sopenharmony_ci * existing blocks. Blocks which don't actually exist in 371462306a36Sopenharmony_ci * the aggregate will be marked as allocated (ONES); 371562306a36Sopenharmony_ci * 371662306a36Sopenharmony_ci * PARAMETERS: 371762306a36Sopenharmony_ci * dp - pointer to page of map 371862306a36Sopenharmony_ci * nblocks - number of blocks this page 371962306a36Sopenharmony_ci * 372062306a36Sopenharmony_ci * RETURNS: NONE 372162306a36Sopenharmony_ci */ 372262306a36Sopenharmony_cistatic int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks) 372362306a36Sopenharmony_ci{ 372462306a36Sopenharmony_ci int blkno, w, b, r, nw, nb, i; 372562306a36Sopenharmony_ci 372662306a36Sopenharmony_ci /* starting block number within the dmap */ 372762306a36Sopenharmony_ci blkno = Blkno & (BPERDMAP - 1); 372862306a36Sopenharmony_ci 372962306a36Sopenharmony_ci if (blkno == 0) { 373062306a36Sopenharmony_ci dp->nblocks = dp->nfree = cpu_to_le32(nblocks); 373162306a36Sopenharmony_ci dp->start = cpu_to_le64(Blkno); 373262306a36Sopenharmony_ci 373362306a36Sopenharmony_ci if (nblocks == BPERDMAP) { 373462306a36Sopenharmony_ci memset(&dp->wmap[0], 0, LPERDMAP * 4); 373562306a36Sopenharmony_ci memset(&dp->pmap[0], 0, LPERDMAP * 4); 373662306a36Sopenharmony_ci goto initTree; 373762306a36Sopenharmony_ci } 373862306a36Sopenharmony_ci } else { 373962306a36Sopenharmony_ci le32_add_cpu(&dp->nblocks, nblocks); 374062306a36Sopenharmony_ci le32_add_cpu(&dp->nfree, nblocks); 374162306a36Sopenharmony_ci } 374262306a36Sopenharmony_ci 374362306a36Sopenharmony_ci /* word number containing start block number */ 374462306a36Sopenharmony_ci w = blkno >> L2DBWORD; 374562306a36Sopenharmony_ci 374662306a36Sopenharmony_ci /* 374762306a36Sopenharmony_ci * free the bits corresponding to the block range (ZEROS): 374862306a36Sopenharmony_ci * note: not all bits of the first and last words may be contained 374962306a36Sopenharmony_ci * within the block range. 375062306a36Sopenharmony_ci */ 375162306a36Sopenharmony_ci for (r = nblocks; r > 0; r -= nb, blkno += nb) { 375262306a36Sopenharmony_ci /* number of bits preceding range to be freed in the word */ 375362306a36Sopenharmony_ci b = blkno & (DBWORD - 1); 375462306a36Sopenharmony_ci /* number of bits to free in the word */ 375562306a36Sopenharmony_ci nb = min(r, DBWORD - b); 375662306a36Sopenharmony_ci 375762306a36Sopenharmony_ci /* is partial word to be freed ? */ 375862306a36Sopenharmony_ci if (nb < DBWORD) { 375962306a36Sopenharmony_ci /* free (set to 0) from the bitmap word */ 376062306a36Sopenharmony_ci dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) 376162306a36Sopenharmony_ci >> b)); 376262306a36Sopenharmony_ci dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb) 376362306a36Sopenharmony_ci >> b)); 376462306a36Sopenharmony_ci 376562306a36Sopenharmony_ci /* skip the word freed */ 376662306a36Sopenharmony_ci w++; 376762306a36Sopenharmony_ci } else { 376862306a36Sopenharmony_ci /* free (set to 0) contiguous bitmap words */ 376962306a36Sopenharmony_ci nw = r >> L2DBWORD; 377062306a36Sopenharmony_ci memset(&dp->wmap[w], 0, nw * 4); 377162306a36Sopenharmony_ci memset(&dp->pmap[w], 0, nw * 4); 377262306a36Sopenharmony_ci 377362306a36Sopenharmony_ci /* skip the words freed */ 377462306a36Sopenharmony_ci nb = nw << L2DBWORD; 377562306a36Sopenharmony_ci w += nw; 377662306a36Sopenharmony_ci } 377762306a36Sopenharmony_ci } 377862306a36Sopenharmony_ci 377962306a36Sopenharmony_ci /* 378062306a36Sopenharmony_ci * mark bits following the range to be freed (non-existing 378162306a36Sopenharmony_ci * blocks) as allocated (ONES) 378262306a36Sopenharmony_ci */ 378362306a36Sopenharmony_ci 378462306a36Sopenharmony_ci if (blkno == BPERDMAP) 378562306a36Sopenharmony_ci goto initTree; 378662306a36Sopenharmony_ci 378762306a36Sopenharmony_ci /* the first word beyond the end of existing blocks */ 378862306a36Sopenharmony_ci w = blkno >> L2DBWORD; 378962306a36Sopenharmony_ci 379062306a36Sopenharmony_ci /* does nblocks fall on a 32-bit boundary ? */ 379162306a36Sopenharmony_ci b = blkno & (DBWORD - 1); 379262306a36Sopenharmony_ci if (b) { 379362306a36Sopenharmony_ci /* mark a partial word allocated */ 379462306a36Sopenharmony_ci dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b); 379562306a36Sopenharmony_ci w++; 379662306a36Sopenharmony_ci } 379762306a36Sopenharmony_ci 379862306a36Sopenharmony_ci /* set the rest of the words in the page to allocated (ONES) */ 379962306a36Sopenharmony_ci for (i = w; i < LPERDMAP; i++) 380062306a36Sopenharmony_ci dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES); 380162306a36Sopenharmony_ci 380262306a36Sopenharmony_ci /* 380362306a36Sopenharmony_ci * init tree 380462306a36Sopenharmony_ci */ 380562306a36Sopenharmony_ci initTree: 380662306a36Sopenharmony_ci return (dbInitDmapTree(dp)); 380762306a36Sopenharmony_ci} 380862306a36Sopenharmony_ci 380962306a36Sopenharmony_ci 381062306a36Sopenharmony_ci/* 381162306a36Sopenharmony_ci * NAME: dbInitDmapTree()/ujfs_complete_dmap() 381262306a36Sopenharmony_ci * 381362306a36Sopenharmony_ci * FUNCTION: initialize summary tree of the specified dmap: 381462306a36Sopenharmony_ci * 381562306a36Sopenharmony_ci * at entry, bitmap of the dmap has been initialized; 381662306a36Sopenharmony_ci * 381762306a36Sopenharmony_ci * PARAMETERS: 381862306a36Sopenharmony_ci * dp - dmap to complete 381962306a36Sopenharmony_ci * blkno - starting block number for this dmap 382062306a36Sopenharmony_ci * treemax - will be filled in with max free for this dmap 382162306a36Sopenharmony_ci * 382262306a36Sopenharmony_ci * RETURNS: max free string at the root of the tree 382362306a36Sopenharmony_ci */ 382462306a36Sopenharmony_cistatic int dbInitDmapTree(struct dmap * dp) 382562306a36Sopenharmony_ci{ 382662306a36Sopenharmony_ci struct dmaptree *tp; 382762306a36Sopenharmony_ci s8 *cp; 382862306a36Sopenharmony_ci int i; 382962306a36Sopenharmony_ci 383062306a36Sopenharmony_ci /* init fixed info of tree */ 383162306a36Sopenharmony_ci tp = &dp->tree; 383262306a36Sopenharmony_ci tp->nleafs = cpu_to_le32(LPERDMAP); 383362306a36Sopenharmony_ci tp->l2nleafs = cpu_to_le32(L2LPERDMAP); 383462306a36Sopenharmony_ci tp->leafidx = cpu_to_le32(LEAFIND); 383562306a36Sopenharmony_ci tp->height = cpu_to_le32(4); 383662306a36Sopenharmony_ci tp->budmin = BUDMIN; 383762306a36Sopenharmony_ci 383862306a36Sopenharmony_ci /* init each leaf from corresponding wmap word: 383962306a36Sopenharmony_ci * note: leaf is set to NOFREE(-1) if all blocks of corresponding 384062306a36Sopenharmony_ci * bitmap word are allocated. 384162306a36Sopenharmony_ci */ 384262306a36Sopenharmony_ci cp = tp->stree + le32_to_cpu(tp->leafidx); 384362306a36Sopenharmony_ci for (i = 0; i < LPERDMAP; i++) 384462306a36Sopenharmony_ci *cp++ = dbMaxBud((u8 *) & dp->wmap[i]); 384562306a36Sopenharmony_ci 384662306a36Sopenharmony_ci /* build the dmap's binary buddy summary tree */ 384762306a36Sopenharmony_ci return (dbInitTree(tp)); 384862306a36Sopenharmony_ci} 384962306a36Sopenharmony_ci 385062306a36Sopenharmony_ci 385162306a36Sopenharmony_ci/* 385262306a36Sopenharmony_ci * NAME: dbInitTree()/ujfs_adjtree() 385362306a36Sopenharmony_ci * 385462306a36Sopenharmony_ci * FUNCTION: initialize binary buddy summary tree of a dmap or dmapctl. 385562306a36Sopenharmony_ci * 385662306a36Sopenharmony_ci * at entry, the leaves of the tree has been initialized 385762306a36Sopenharmony_ci * from corresponding bitmap word or root of summary tree 385862306a36Sopenharmony_ci * of the child control page; 385962306a36Sopenharmony_ci * configure binary buddy system at the leaf level, then 386062306a36Sopenharmony_ci * bubble up the values of the leaf nodes up the tree. 386162306a36Sopenharmony_ci * 386262306a36Sopenharmony_ci * PARAMETERS: 386362306a36Sopenharmony_ci * cp - Pointer to the root of the tree 386462306a36Sopenharmony_ci * l2leaves- Number of leaf nodes as a power of 2 386562306a36Sopenharmony_ci * l2min - Number of blocks that can be covered by a leaf 386662306a36Sopenharmony_ci * as a power of 2 386762306a36Sopenharmony_ci * 386862306a36Sopenharmony_ci * RETURNS: max free string at the root of the tree 386962306a36Sopenharmony_ci */ 387062306a36Sopenharmony_cistatic int dbInitTree(struct dmaptree * dtp) 387162306a36Sopenharmony_ci{ 387262306a36Sopenharmony_ci int l2max, l2free, bsize, nextb, i; 387362306a36Sopenharmony_ci int child, parent, nparent; 387462306a36Sopenharmony_ci s8 *tp, *cp, *cp1; 387562306a36Sopenharmony_ci 387662306a36Sopenharmony_ci tp = dtp->stree; 387762306a36Sopenharmony_ci 387862306a36Sopenharmony_ci /* Determine the maximum free string possible for the leaves */ 387962306a36Sopenharmony_ci l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin; 388062306a36Sopenharmony_ci 388162306a36Sopenharmony_ci /* 388262306a36Sopenharmony_ci * configure the leaf level into binary buddy system 388362306a36Sopenharmony_ci * 388462306a36Sopenharmony_ci * Try to combine buddies starting with a buddy size of 1 388562306a36Sopenharmony_ci * (i.e. two leaves). At a buddy size of 1 two buddy leaves 388662306a36Sopenharmony_ci * can be combined if both buddies have a maximum free of l2min; 388762306a36Sopenharmony_ci * the combination will result in the left-most buddy leaf having 388862306a36Sopenharmony_ci * a maximum free of l2min+1. 388962306a36Sopenharmony_ci * After processing all buddies for a given size, process buddies 389062306a36Sopenharmony_ci * at the next higher buddy size (i.e. current size * 2) and 389162306a36Sopenharmony_ci * the next maximum free (current free + 1). 389262306a36Sopenharmony_ci * This continues until the maximum possible buddy combination 389362306a36Sopenharmony_ci * yields maximum free. 389462306a36Sopenharmony_ci */ 389562306a36Sopenharmony_ci for (l2free = dtp->budmin, bsize = 1; l2free < l2max; 389662306a36Sopenharmony_ci l2free++, bsize = nextb) { 389762306a36Sopenharmony_ci /* get next buddy size == current buddy pair size */ 389862306a36Sopenharmony_ci nextb = bsize << 1; 389962306a36Sopenharmony_ci 390062306a36Sopenharmony_ci /* scan each adjacent buddy pair at current buddy size */ 390162306a36Sopenharmony_ci for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx); 390262306a36Sopenharmony_ci i < le32_to_cpu(dtp->nleafs); 390362306a36Sopenharmony_ci i += nextb, cp += nextb) { 390462306a36Sopenharmony_ci /* coalesce if both adjacent buddies are max free */ 390562306a36Sopenharmony_ci if (*cp == l2free && *(cp + bsize) == l2free) { 390662306a36Sopenharmony_ci *cp = l2free + 1; /* left take right */ 390762306a36Sopenharmony_ci *(cp + bsize) = -1; /* right give left */ 390862306a36Sopenharmony_ci } 390962306a36Sopenharmony_ci } 391062306a36Sopenharmony_ci } 391162306a36Sopenharmony_ci 391262306a36Sopenharmony_ci /* 391362306a36Sopenharmony_ci * bubble summary information of leaves up the tree. 391462306a36Sopenharmony_ci * 391562306a36Sopenharmony_ci * Starting at the leaf node level, the four nodes described by 391662306a36Sopenharmony_ci * the higher level parent node are compared for a maximum free and 391762306a36Sopenharmony_ci * this maximum becomes the value of the parent node. 391862306a36Sopenharmony_ci * when all lower level nodes are processed in this fashion then 391962306a36Sopenharmony_ci * move up to the next level (parent becomes a lower level node) and 392062306a36Sopenharmony_ci * continue the process for that level. 392162306a36Sopenharmony_ci */ 392262306a36Sopenharmony_ci for (child = le32_to_cpu(dtp->leafidx), 392362306a36Sopenharmony_ci nparent = le32_to_cpu(dtp->nleafs) >> 2; 392462306a36Sopenharmony_ci nparent > 0; nparent >>= 2, child = parent) { 392562306a36Sopenharmony_ci /* get index of 1st node of parent level */ 392662306a36Sopenharmony_ci parent = (child - 1) >> 2; 392762306a36Sopenharmony_ci 392862306a36Sopenharmony_ci /* set the value of the parent node as the maximum 392962306a36Sopenharmony_ci * of the four nodes of the current level. 393062306a36Sopenharmony_ci */ 393162306a36Sopenharmony_ci for (i = 0, cp = tp + child, cp1 = tp + parent; 393262306a36Sopenharmony_ci i < nparent; i++, cp += 4, cp1++) 393362306a36Sopenharmony_ci *cp1 = TREEMAX(cp); 393462306a36Sopenharmony_ci } 393562306a36Sopenharmony_ci 393662306a36Sopenharmony_ci return (*tp); 393762306a36Sopenharmony_ci} 393862306a36Sopenharmony_ci 393962306a36Sopenharmony_ci 394062306a36Sopenharmony_ci/* 394162306a36Sopenharmony_ci * dbInitDmapCtl() 394262306a36Sopenharmony_ci * 394362306a36Sopenharmony_ci * function: initialize dmapctl page 394462306a36Sopenharmony_ci */ 394562306a36Sopenharmony_cistatic int dbInitDmapCtl(struct dmapctl * dcp, int level, int i) 394662306a36Sopenharmony_ci{ /* start leaf index not covered by range */ 394762306a36Sopenharmony_ci s8 *cp; 394862306a36Sopenharmony_ci 394962306a36Sopenharmony_ci dcp->nleafs = cpu_to_le32(LPERCTL); 395062306a36Sopenharmony_ci dcp->l2nleafs = cpu_to_le32(L2LPERCTL); 395162306a36Sopenharmony_ci dcp->leafidx = cpu_to_le32(CTLLEAFIND); 395262306a36Sopenharmony_ci dcp->height = cpu_to_le32(5); 395362306a36Sopenharmony_ci dcp->budmin = L2BPERDMAP + L2LPERCTL * level; 395462306a36Sopenharmony_ci 395562306a36Sopenharmony_ci /* 395662306a36Sopenharmony_ci * initialize the leaves of current level that were not covered 395762306a36Sopenharmony_ci * by the specified input block range (i.e. the leaves have no 395862306a36Sopenharmony_ci * low level dmapctl or dmap). 395962306a36Sopenharmony_ci */ 396062306a36Sopenharmony_ci cp = &dcp->stree[CTLLEAFIND + i]; 396162306a36Sopenharmony_ci for (; i < LPERCTL; i++) 396262306a36Sopenharmony_ci *cp++ = NOFREE; 396362306a36Sopenharmony_ci 396462306a36Sopenharmony_ci /* build the dmap's binary buddy summary tree */ 396562306a36Sopenharmony_ci return (dbInitTree((struct dmaptree *) dcp)); 396662306a36Sopenharmony_ci} 396762306a36Sopenharmony_ci 396862306a36Sopenharmony_ci 396962306a36Sopenharmony_ci/* 397062306a36Sopenharmony_ci * NAME: dbGetL2AGSize()/ujfs_getagl2size() 397162306a36Sopenharmony_ci * 397262306a36Sopenharmony_ci * FUNCTION: Determine log2(allocation group size) from aggregate size 397362306a36Sopenharmony_ci * 397462306a36Sopenharmony_ci * PARAMETERS: 397562306a36Sopenharmony_ci * nblocks - Number of blocks in aggregate 397662306a36Sopenharmony_ci * 397762306a36Sopenharmony_ci * RETURNS: log2(allocation group size) in aggregate blocks 397862306a36Sopenharmony_ci */ 397962306a36Sopenharmony_cistatic int dbGetL2AGSize(s64 nblocks) 398062306a36Sopenharmony_ci{ 398162306a36Sopenharmony_ci s64 sz; 398262306a36Sopenharmony_ci s64 m; 398362306a36Sopenharmony_ci int l2sz; 398462306a36Sopenharmony_ci 398562306a36Sopenharmony_ci if (nblocks < BPERDMAP * MAXAG) 398662306a36Sopenharmony_ci return (L2BPERDMAP); 398762306a36Sopenharmony_ci 398862306a36Sopenharmony_ci /* round up aggregate size to power of 2 */ 398962306a36Sopenharmony_ci m = ((u64) 1 << (64 - 1)); 399062306a36Sopenharmony_ci for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) { 399162306a36Sopenharmony_ci if (m & nblocks) 399262306a36Sopenharmony_ci break; 399362306a36Sopenharmony_ci } 399462306a36Sopenharmony_ci 399562306a36Sopenharmony_ci sz = (s64) 1 << l2sz; 399662306a36Sopenharmony_ci if (sz < nblocks) 399762306a36Sopenharmony_ci l2sz += 1; 399862306a36Sopenharmony_ci 399962306a36Sopenharmony_ci /* agsize = roundupSize/max_number_of_ag */ 400062306a36Sopenharmony_ci return (l2sz - L2MAXAG); 400162306a36Sopenharmony_ci} 400262306a36Sopenharmony_ci 400362306a36Sopenharmony_ci 400462306a36Sopenharmony_ci/* 400562306a36Sopenharmony_ci * NAME: dbMapFileSizeToMapSize() 400662306a36Sopenharmony_ci * 400762306a36Sopenharmony_ci * FUNCTION: compute number of blocks the block allocation map file 400862306a36Sopenharmony_ci * can cover from the map file size; 400962306a36Sopenharmony_ci * 401062306a36Sopenharmony_ci * RETURNS: Number of blocks which can be covered by this block map file; 401162306a36Sopenharmony_ci */ 401262306a36Sopenharmony_ci 401362306a36Sopenharmony_ci/* 401462306a36Sopenharmony_ci * maximum number of map pages at each level including control pages 401562306a36Sopenharmony_ci */ 401662306a36Sopenharmony_ci#define MAXL0PAGES (1 + LPERCTL) 401762306a36Sopenharmony_ci#define MAXL1PAGES (1 + LPERCTL * MAXL0PAGES) 401862306a36Sopenharmony_ci 401962306a36Sopenharmony_ci/* 402062306a36Sopenharmony_ci * convert number of map pages to the zero origin top dmapctl level 402162306a36Sopenharmony_ci */ 402262306a36Sopenharmony_ci#define BMAPPGTOLEV(npages) \ 402362306a36Sopenharmony_ci (((npages) <= 3 + MAXL0PAGES) ? 0 : \ 402462306a36Sopenharmony_ci ((npages) <= 2 + MAXL1PAGES) ? 1 : 2) 402562306a36Sopenharmony_ci 402662306a36Sopenharmony_cis64 dbMapFileSizeToMapSize(struct inode * ipbmap) 402762306a36Sopenharmony_ci{ 402862306a36Sopenharmony_ci struct super_block *sb = ipbmap->i_sb; 402962306a36Sopenharmony_ci s64 nblocks; 403062306a36Sopenharmony_ci s64 npages, ndmaps; 403162306a36Sopenharmony_ci int level, i; 403262306a36Sopenharmony_ci int complete, factor; 403362306a36Sopenharmony_ci 403462306a36Sopenharmony_ci nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize; 403562306a36Sopenharmony_ci npages = nblocks >> JFS_SBI(sb)->l2nbperpage; 403662306a36Sopenharmony_ci level = BMAPPGTOLEV(npages); 403762306a36Sopenharmony_ci 403862306a36Sopenharmony_ci /* At each level, accumulate the number of dmap pages covered by 403962306a36Sopenharmony_ci * the number of full child levels below it; 404062306a36Sopenharmony_ci * repeat for the last incomplete child level. 404162306a36Sopenharmony_ci */ 404262306a36Sopenharmony_ci ndmaps = 0; 404362306a36Sopenharmony_ci npages--; /* skip the first global control page */ 404462306a36Sopenharmony_ci /* skip higher level control pages above top level covered by map */ 404562306a36Sopenharmony_ci npages -= (2 - level); 404662306a36Sopenharmony_ci npages--; /* skip top level's control page */ 404762306a36Sopenharmony_ci for (i = level; i >= 0; i--) { 404862306a36Sopenharmony_ci factor = 404962306a36Sopenharmony_ci (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1); 405062306a36Sopenharmony_ci complete = (u32) npages / factor; 405162306a36Sopenharmony_ci ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL : 405262306a36Sopenharmony_ci ((i == 1) ? LPERCTL : 1)); 405362306a36Sopenharmony_ci 405462306a36Sopenharmony_ci /* pages in last/incomplete child */ 405562306a36Sopenharmony_ci npages = (u32) npages % factor; 405662306a36Sopenharmony_ci /* skip incomplete child's level control page */ 405762306a36Sopenharmony_ci npages--; 405862306a36Sopenharmony_ci } 405962306a36Sopenharmony_ci 406062306a36Sopenharmony_ci /* convert the number of dmaps into the number of blocks 406162306a36Sopenharmony_ci * which can be covered by the dmaps; 406262306a36Sopenharmony_ci */ 406362306a36Sopenharmony_ci nblocks = ndmaps << L2BPERDMAP; 406462306a36Sopenharmony_ci 406562306a36Sopenharmony_ci return (nblocks); 406662306a36Sopenharmony_ci} 4067