162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *   Copyright (C) International Business Machines Corp., 2000-2005
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci/*
662306a36Sopenharmony_ci *	jfs_xtree.c: extent allocation descriptor B+-tree manager
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <linux/fs.h>
1062306a36Sopenharmony_ci#include <linux/module.h>
1162306a36Sopenharmony_ci#include <linux/quotaops.h>
1262306a36Sopenharmony_ci#include <linux/seq_file.h>
1362306a36Sopenharmony_ci#include "jfs_incore.h"
1462306a36Sopenharmony_ci#include "jfs_filsys.h"
1562306a36Sopenharmony_ci#include "jfs_metapage.h"
1662306a36Sopenharmony_ci#include "jfs_dmap.h"
1762306a36Sopenharmony_ci#include "jfs_dinode.h"
1862306a36Sopenharmony_ci#include "jfs_superblock.h"
1962306a36Sopenharmony_ci#include "jfs_debug.h"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci/*
2262306a36Sopenharmony_ci * xtree local flag
2362306a36Sopenharmony_ci */
2462306a36Sopenharmony_ci#define XT_INSERT	0x00000001
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci/*
2762306a36Sopenharmony_ci *	xtree key/entry comparison: extent offset
2862306a36Sopenharmony_ci *
2962306a36Sopenharmony_ci * return:
3062306a36Sopenharmony_ci *	-1: k < start of extent
3162306a36Sopenharmony_ci *	 0: start_of_extent <= k <= end_of_extent
3262306a36Sopenharmony_ci *	 1: k > end_of_extent
3362306a36Sopenharmony_ci */
3462306a36Sopenharmony_ci#define XT_CMP(CMP, K, X, OFFSET64)\
3562306a36Sopenharmony_ci{\
3662306a36Sopenharmony_ci	OFFSET64 = offsetXAD(X);\
3762306a36Sopenharmony_ci	(CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
3862306a36Sopenharmony_ci		((K) < OFFSET64) ? -1 : 0;\
3962306a36Sopenharmony_ci}
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci/* write a xad entry */
4262306a36Sopenharmony_ci#define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
4362306a36Sopenharmony_ci{\
4462306a36Sopenharmony_ci	(XAD)->flag = (FLAG);\
4562306a36Sopenharmony_ci	XADoffset((XAD), (OFF));\
4662306a36Sopenharmony_ci	XADlength((XAD), (LEN));\
4762306a36Sopenharmony_ci	XADaddress((XAD), (ADDR));\
4862306a36Sopenharmony_ci}
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci#define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci/* get page buffer for specified block address */
5362306a36Sopenharmony_ci/* ToDo: Replace this ugly macro with a function */
5462306a36Sopenharmony_ci#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)				\
5562306a36Sopenharmony_cido {									\
5662306a36Sopenharmony_ci	BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot);	\
5762306a36Sopenharmony_ci	if (!(RC)) {							\
5862306a36Sopenharmony_ci		if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \
5962306a36Sopenharmony_ci		    (le16_to_cpu((P)->header.nextindex) >		\
6062306a36Sopenharmony_ci		     le16_to_cpu((P)->header.maxentry)) ||		\
6162306a36Sopenharmony_ci		    (le16_to_cpu((P)->header.maxentry) >		\
6262306a36Sopenharmony_ci		     (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \
6362306a36Sopenharmony_ci			jfs_error((IP)->i_sb,				\
6462306a36Sopenharmony_ci				  "XT_GETPAGE: xtree page corrupt\n");	\
6562306a36Sopenharmony_ci			BT_PUTPAGE(MP);					\
6662306a36Sopenharmony_ci			MP = NULL;					\
6762306a36Sopenharmony_ci			RC = -EIO;					\
6862306a36Sopenharmony_ci		}							\
6962306a36Sopenharmony_ci	}								\
7062306a36Sopenharmony_ci} while (0)
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci/* for consistency */
7362306a36Sopenharmony_ci#define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci#define XT_GETSEARCH(IP, LEAF, BN, MP, P, INDEX) \
7662306a36Sopenharmony_ci	BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
7762306a36Sopenharmony_ci/* xtree entry parameter descriptor */
7862306a36Sopenharmony_cistruct xtsplit {
7962306a36Sopenharmony_ci	struct metapage *mp;
8062306a36Sopenharmony_ci	s16 index;
8162306a36Sopenharmony_ci	u8 flag;
8262306a36Sopenharmony_ci	s64 off;
8362306a36Sopenharmony_ci	s64 addr;
8462306a36Sopenharmony_ci	int len;
8562306a36Sopenharmony_ci	struct pxdlist *pxdlist;
8662306a36Sopenharmony_ci};
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci/*
9062306a36Sopenharmony_ci *	statistics
9162306a36Sopenharmony_ci */
9262306a36Sopenharmony_ci#ifdef CONFIG_JFS_STATISTICS
9362306a36Sopenharmony_cistatic struct {
9462306a36Sopenharmony_ci	uint search;
9562306a36Sopenharmony_ci	uint fastSearch;
9662306a36Sopenharmony_ci	uint split;
9762306a36Sopenharmony_ci} xtStat;
9862306a36Sopenharmony_ci#endif
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci/*
10262306a36Sopenharmony_ci * forward references
10362306a36Sopenharmony_ci */
10462306a36Sopenharmony_cistatic int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
10562306a36Sopenharmony_ci		    struct btstack * btstack, int flag);
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_cistatic int xtSplitUp(tid_t tid,
10862306a36Sopenharmony_ci		     struct inode *ip,
10962306a36Sopenharmony_ci		     struct xtsplit * split, struct btstack * btstack);
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_cistatic int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
11262306a36Sopenharmony_ci		       struct metapage ** rmpp, s64 * rbnp);
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_cistatic int xtSplitRoot(tid_t tid, struct inode *ip,
11562306a36Sopenharmony_ci		       struct xtsplit * split, struct metapage ** rmpp);
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci/*
11862306a36Sopenharmony_ci *	xtLookup()
11962306a36Sopenharmony_ci *
12062306a36Sopenharmony_ci * function: map a single page into a physical extent;
12162306a36Sopenharmony_ci */
12262306a36Sopenharmony_ciint xtLookup(struct inode *ip, s64 lstart,
12362306a36Sopenharmony_ci	     s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
12462306a36Sopenharmony_ci{
12562306a36Sopenharmony_ci	int rc = 0;
12662306a36Sopenharmony_ci	struct btstack btstack;
12762306a36Sopenharmony_ci	int cmp;
12862306a36Sopenharmony_ci	s64 bn;
12962306a36Sopenharmony_ci	struct metapage *mp;
13062306a36Sopenharmony_ci	xtpage_t *p;
13162306a36Sopenharmony_ci	int index;
13262306a36Sopenharmony_ci	xad_t *xad;
13362306a36Sopenharmony_ci	s64 next, size, xoff, xend;
13462306a36Sopenharmony_ci	int xlen;
13562306a36Sopenharmony_ci	s64 xaddr;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	*paddr = 0;
13862306a36Sopenharmony_ci	*plen = llen;
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	if (!no_check) {
14162306a36Sopenharmony_ci		/* is lookup offset beyond eof ? */
14262306a36Sopenharmony_ci		size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
14362306a36Sopenharmony_ci		    JFS_SBI(ip->i_sb)->l2bsize;
14462306a36Sopenharmony_ci		if (lstart >= size)
14562306a36Sopenharmony_ci			return 0;
14662306a36Sopenharmony_ci	}
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	/*
14962306a36Sopenharmony_ci	 * search for the xad entry covering the logical extent
15062306a36Sopenharmony_ci	 */
15162306a36Sopenharmony_ci//search:
15262306a36Sopenharmony_ci	if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
15362306a36Sopenharmony_ci		jfs_err("xtLookup: xtSearch returned %d", rc);
15462306a36Sopenharmony_ci		return rc;
15562306a36Sopenharmony_ci	}
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	/*
15862306a36Sopenharmony_ci	 *	compute the physical extent covering logical extent
15962306a36Sopenharmony_ci	 *
16062306a36Sopenharmony_ci	 * N.B. search may have failed (e.g., hole in sparse file),
16162306a36Sopenharmony_ci	 * and returned the index of the next entry.
16262306a36Sopenharmony_ci	 */
16362306a36Sopenharmony_ci	/* retrieve search result */
16462306a36Sopenharmony_ci	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	/* is xad found covering start of logical extent ?
16762306a36Sopenharmony_ci	 * lstart is a page start address,
16862306a36Sopenharmony_ci	 * i.e., lstart cannot start in a hole;
16962306a36Sopenharmony_ci	 */
17062306a36Sopenharmony_ci	if (cmp) {
17162306a36Sopenharmony_ci		if (next)
17262306a36Sopenharmony_ci			*plen = min(next - lstart, llen);
17362306a36Sopenharmony_ci		goto out;
17462306a36Sopenharmony_ci	}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	/*
17762306a36Sopenharmony_ci	 * lxd covered by xad
17862306a36Sopenharmony_ci	 */
17962306a36Sopenharmony_ci	xad = &p->xad[index];
18062306a36Sopenharmony_ci	xoff = offsetXAD(xad);
18162306a36Sopenharmony_ci	xlen = lengthXAD(xad);
18262306a36Sopenharmony_ci	xend = xoff + xlen;
18362306a36Sopenharmony_ci	xaddr = addressXAD(xad);
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	/* initialize new pxd */
18662306a36Sopenharmony_ci	*pflag = xad->flag;
18762306a36Sopenharmony_ci	*paddr = xaddr + (lstart - xoff);
18862306a36Sopenharmony_ci	/* a page must be fully covered by an xad */
18962306a36Sopenharmony_ci	*plen = min(xend - lstart, llen);
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci      out:
19262306a36Sopenharmony_ci	XT_PUTPAGE(mp);
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	return rc;
19562306a36Sopenharmony_ci}
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci/*
19862306a36Sopenharmony_ci *	xtSearch()
19962306a36Sopenharmony_ci *
20062306a36Sopenharmony_ci * function:	search for the xad entry covering specified offset.
20162306a36Sopenharmony_ci *
20262306a36Sopenharmony_ci * parameters:
20362306a36Sopenharmony_ci *	ip	- file object;
20462306a36Sopenharmony_ci *	xoff	- extent offset;
20562306a36Sopenharmony_ci *	nextp	- address of next extent (if any) for search miss
20662306a36Sopenharmony_ci *	cmpp	- comparison result:
20762306a36Sopenharmony_ci *	btstack - traverse stack;
20862306a36Sopenharmony_ci *	flag	- search process flag (XT_INSERT);
20962306a36Sopenharmony_ci *
21062306a36Sopenharmony_ci * returns:
21162306a36Sopenharmony_ci *	btstack contains (bn, index) of search path traversed to the entry.
21262306a36Sopenharmony_ci *	*cmpp is set to result of comparison with the entry returned.
21362306a36Sopenharmony_ci *	the page containing the entry is pinned at exit.
21462306a36Sopenharmony_ci */
21562306a36Sopenharmony_cistatic int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
21662306a36Sopenharmony_ci		    int *cmpp, struct btstack * btstack, int flag)
21762306a36Sopenharmony_ci{
21862306a36Sopenharmony_ci	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
21962306a36Sopenharmony_ci	int rc = 0;
22062306a36Sopenharmony_ci	int cmp = 1;		/* init for empty page */
22162306a36Sopenharmony_ci	s64 bn;			/* block number */
22262306a36Sopenharmony_ci	struct metapage *mp;	/* page buffer */
22362306a36Sopenharmony_ci	xtpage_t *p;		/* page */
22462306a36Sopenharmony_ci	xad_t *xad;
22562306a36Sopenharmony_ci	int base, index, lim, btindex;
22662306a36Sopenharmony_ci	struct btframe *btsp;
22762306a36Sopenharmony_ci	int nsplit = 0;		/* number of pages to split */
22862306a36Sopenharmony_ci	s64 t64;
22962306a36Sopenharmony_ci	s64 next = 0;
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	INCREMENT(xtStat.search);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	BT_CLR(btstack);
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	btstack->nsplit = 0;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	/*
23862306a36Sopenharmony_ci	 *	search down tree from root:
23962306a36Sopenharmony_ci	 *
24062306a36Sopenharmony_ci	 * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
24162306a36Sopenharmony_ci	 * internal page, child page Pi contains entry with k, Ki <= K < Kj.
24262306a36Sopenharmony_ci	 *
24362306a36Sopenharmony_ci	 * if entry with search key K is not found
24462306a36Sopenharmony_ci	 * internal page search find the entry with largest key Ki
24562306a36Sopenharmony_ci	 * less than K which point to the child page to search;
24662306a36Sopenharmony_ci	 * leaf page search find the entry with smallest key Kj
24762306a36Sopenharmony_ci	 * greater than K so that the returned index is the position of
24862306a36Sopenharmony_ci	 * the entry to be shifted right for insertion of new entry.
24962306a36Sopenharmony_ci	 * for empty tree, search key is greater than any key of the tree.
25062306a36Sopenharmony_ci	 *
25162306a36Sopenharmony_ci	 * by convention, root bn = 0.
25262306a36Sopenharmony_ci	 */
25362306a36Sopenharmony_ci	for (bn = 0;;) {
25462306a36Sopenharmony_ci		/* get/pin the page to search */
25562306a36Sopenharmony_ci		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
25662306a36Sopenharmony_ci		if (rc)
25762306a36Sopenharmony_ci			return rc;
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci		/* try sequential access heuristics with the previous
26062306a36Sopenharmony_ci		 * access entry in target leaf page:
26162306a36Sopenharmony_ci		 * once search narrowed down into the target leaf,
26262306a36Sopenharmony_ci		 * key must either match an entry in the leaf or
26362306a36Sopenharmony_ci		 * key entry does not exist in the tree;
26462306a36Sopenharmony_ci		 */
26562306a36Sopenharmony_ci//fastSearch:
26662306a36Sopenharmony_ci		if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
26762306a36Sopenharmony_ci		    (p->header.flag & BT_LEAF) &&
26862306a36Sopenharmony_ci		    (index = jfs_ip->btindex) <
26962306a36Sopenharmony_ci		    le16_to_cpu(p->header.nextindex)) {
27062306a36Sopenharmony_ci			xad = &p->xad[index];
27162306a36Sopenharmony_ci			t64 = offsetXAD(xad);
27262306a36Sopenharmony_ci			if (xoff < t64 + lengthXAD(xad)) {
27362306a36Sopenharmony_ci				if (xoff >= t64) {
27462306a36Sopenharmony_ci					*cmpp = 0;
27562306a36Sopenharmony_ci					goto out;
27662306a36Sopenharmony_ci				}
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci				/* stop sequential access heuristics */
27962306a36Sopenharmony_ci				goto binarySearch;
28062306a36Sopenharmony_ci			} else {	/* (t64 + lengthXAD(xad)) <= xoff */
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci				/* try next sequential entry */
28362306a36Sopenharmony_ci				index++;
28462306a36Sopenharmony_ci				if (index <
28562306a36Sopenharmony_ci				    le16_to_cpu(p->header.nextindex)) {
28662306a36Sopenharmony_ci					xad++;
28762306a36Sopenharmony_ci					t64 = offsetXAD(xad);
28862306a36Sopenharmony_ci					if (xoff < t64 + lengthXAD(xad)) {
28962306a36Sopenharmony_ci						if (xoff >= t64) {
29062306a36Sopenharmony_ci							*cmpp = 0;
29162306a36Sopenharmony_ci							goto out;
29262306a36Sopenharmony_ci						}
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci						/* miss: key falls between
29562306a36Sopenharmony_ci						 * previous and this entry
29662306a36Sopenharmony_ci						 */
29762306a36Sopenharmony_ci						*cmpp = 1;
29862306a36Sopenharmony_ci						next = t64;
29962306a36Sopenharmony_ci						goto out;
30062306a36Sopenharmony_ci					}
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci					/* (xoff >= t64 + lengthXAD(xad));
30362306a36Sopenharmony_ci					 * matching entry may be further out:
30462306a36Sopenharmony_ci					 * stop heuristic search
30562306a36Sopenharmony_ci					 */
30662306a36Sopenharmony_ci					/* stop sequential access heuristics */
30762306a36Sopenharmony_ci					goto binarySearch;
30862306a36Sopenharmony_ci				}
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci				/* (index == p->header.nextindex);
31162306a36Sopenharmony_ci				 * miss: key entry does not exist in
31262306a36Sopenharmony_ci				 * the target leaf/tree
31362306a36Sopenharmony_ci				 */
31462306a36Sopenharmony_ci				*cmpp = 1;
31562306a36Sopenharmony_ci				goto out;
31662306a36Sopenharmony_ci			}
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci			/*
31962306a36Sopenharmony_ci			 * if hit, return index of the entry found, and
32062306a36Sopenharmony_ci			 * if miss, where new entry with search key is
32162306a36Sopenharmony_ci			 * to be inserted;
32262306a36Sopenharmony_ci			 */
32362306a36Sopenharmony_ci		      out:
32462306a36Sopenharmony_ci			/* compute number of pages to split */
32562306a36Sopenharmony_ci			if (flag & XT_INSERT) {
32662306a36Sopenharmony_ci				if (p->header.nextindex ==	/* little-endian */
32762306a36Sopenharmony_ci				    p->header.maxentry)
32862306a36Sopenharmony_ci					nsplit++;
32962306a36Sopenharmony_ci				else
33062306a36Sopenharmony_ci					nsplit = 0;
33162306a36Sopenharmony_ci				btstack->nsplit = nsplit;
33262306a36Sopenharmony_ci			}
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci			/* save search result */
33562306a36Sopenharmony_ci			btsp = btstack->top;
33662306a36Sopenharmony_ci			btsp->bn = bn;
33762306a36Sopenharmony_ci			btsp->index = index;
33862306a36Sopenharmony_ci			btsp->mp = mp;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci			/* update sequential access heuristics */
34162306a36Sopenharmony_ci			jfs_ip->btindex = index;
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci			if (nextp)
34462306a36Sopenharmony_ci				*nextp = next;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci			INCREMENT(xtStat.fastSearch);
34762306a36Sopenharmony_ci			return 0;
34862306a36Sopenharmony_ci		}
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci		/* well, ... full search now */
35162306a36Sopenharmony_ci	      binarySearch:
35262306a36Sopenharmony_ci		lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci		/*
35562306a36Sopenharmony_ci		 * binary search with search key K on the current page
35662306a36Sopenharmony_ci		 */
35762306a36Sopenharmony_ci		for (base = XTENTRYSTART; lim; lim >>= 1) {
35862306a36Sopenharmony_ci			index = base + (lim >> 1);
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci			XT_CMP(cmp, xoff, &p->xad[index], t64);
36162306a36Sopenharmony_ci			if (cmp == 0) {
36262306a36Sopenharmony_ci				/*
36362306a36Sopenharmony_ci				 *	search hit
36462306a36Sopenharmony_ci				 */
36562306a36Sopenharmony_ci				/* search hit - leaf page:
36662306a36Sopenharmony_ci				 * return the entry found
36762306a36Sopenharmony_ci				 */
36862306a36Sopenharmony_ci				if (p->header.flag & BT_LEAF) {
36962306a36Sopenharmony_ci					*cmpp = cmp;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci					/* compute number of pages to split */
37262306a36Sopenharmony_ci					if (flag & XT_INSERT) {
37362306a36Sopenharmony_ci						if (p->header.nextindex ==
37462306a36Sopenharmony_ci						    p->header.maxentry)
37562306a36Sopenharmony_ci							nsplit++;
37662306a36Sopenharmony_ci						else
37762306a36Sopenharmony_ci							nsplit = 0;
37862306a36Sopenharmony_ci						btstack->nsplit = nsplit;
37962306a36Sopenharmony_ci					}
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci					/* save search result */
38262306a36Sopenharmony_ci					btsp = btstack->top;
38362306a36Sopenharmony_ci					btsp->bn = bn;
38462306a36Sopenharmony_ci					btsp->index = index;
38562306a36Sopenharmony_ci					btsp->mp = mp;
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci					/* init sequential access heuristics */
38862306a36Sopenharmony_ci					btindex = jfs_ip->btindex;
38962306a36Sopenharmony_ci					if (index == btindex ||
39062306a36Sopenharmony_ci					    index == btindex + 1)
39162306a36Sopenharmony_ci						jfs_ip->btorder = BT_SEQUENTIAL;
39262306a36Sopenharmony_ci					else
39362306a36Sopenharmony_ci						jfs_ip->btorder = BT_RANDOM;
39462306a36Sopenharmony_ci					jfs_ip->btindex = index;
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci					return 0;
39762306a36Sopenharmony_ci				}
39862306a36Sopenharmony_ci				/* search hit - internal page:
39962306a36Sopenharmony_ci				 * descend/search its child page
40062306a36Sopenharmony_ci				 */
40162306a36Sopenharmony_ci				if (index < le16_to_cpu(p->header.nextindex)-1)
40262306a36Sopenharmony_ci					next = offsetXAD(&p->xad[index + 1]);
40362306a36Sopenharmony_ci				goto next;
40462306a36Sopenharmony_ci			}
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_ci			if (cmp > 0) {
40762306a36Sopenharmony_ci				base = index + 1;
40862306a36Sopenharmony_ci				--lim;
40962306a36Sopenharmony_ci			}
41062306a36Sopenharmony_ci		}
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci		/*
41362306a36Sopenharmony_ci		 *	search miss
41462306a36Sopenharmony_ci		 *
41562306a36Sopenharmony_ci		 * base is the smallest index with key (Kj) greater than
41662306a36Sopenharmony_ci		 * search key (K) and may be zero or maxentry index.
41762306a36Sopenharmony_ci		 */
41862306a36Sopenharmony_ci		if (base < le16_to_cpu(p->header.nextindex))
41962306a36Sopenharmony_ci			next = offsetXAD(&p->xad[base]);
42062306a36Sopenharmony_ci		/*
42162306a36Sopenharmony_ci		 * search miss - leaf page:
42262306a36Sopenharmony_ci		 *
42362306a36Sopenharmony_ci		 * return location of entry (base) where new entry with
42462306a36Sopenharmony_ci		 * search key K is to be inserted.
42562306a36Sopenharmony_ci		 */
42662306a36Sopenharmony_ci		if (p->header.flag & BT_LEAF) {
42762306a36Sopenharmony_ci			*cmpp = cmp;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci			/* compute number of pages to split */
43062306a36Sopenharmony_ci			if (flag & XT_INSERT) {
43162306a36Sopenharmony_ci				if (p->header.nextindex ==
43262306a36Sopenharmony_ci				    p->header.maxentry)
43362306a36Sopenharmony_ci					nsplit++;
43462306a36Sopenharmony_ci				else
43562306a36Sopenharmony_ci					nsplit = 0;
43662306a36Sopenharmony_ci				btstack->nsplit = nsplit;
43762306a36Sopenharmony_ci			}
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci			/* save search result */
44062306a36Sopenharmony_ci			btsp = btstack->top;
44162306a36Sopenharmony_ci			btsp->bn = bn;
44262306a36Sopenharmony_ci			btsp->index = base;
44362306a36Sopenharmony_ci			btsp->mp = mp;
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci			/* init sequential access heuristics */
44662306a36Sopenharmony_ci			btindex = jfs_ip->btindex;
44762306a36Sopenharmony_ci			if (base == btindex || base == btindex + 1)
44862306a36Sopenharmony_ci				jfs_ip->btorder = BT_SEQUENTIAL;
44962306a36Sopenharmony_ci			else
45062306a36Sopenharmony_ci				jfs_ip->btorder = BT_RANDOM;
45162306a36Sopenharmony_ci			jfs_ip->btindex = base;
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci			if (nextp)
45462306a36Sopenharmony_ci				*nextp = next;
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci			return 0;
45762306a36Sopenharmony_ci		}
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci		/*
46062306a36Sopenharmony_ci		 * search miss - non-leaf page:
46162306a36Sopenharmony_ci		 *
46262306a36Sopenharmony_ci		 * if base is non-zero, decrement base by one to get the parent
46362306a36Sopenharmony_ci		 * entry of the child page to search.
46462306a36Sopenharmony_ci		 */
46562306a36Sopenharmony_ci		index = base ? base - 1 : base;
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci		/*
46862306a36Sopenharmony_ci		 * go down to child page
46962306a36Sopenharmony_ci		 */
47062306a36Sopenharmony_ci	      next:
47162306a36Sopenharmony_ci		/* update number of pages to split */
47262306a36Sopenharmony_ci		if (p->header.nextindex == p->header.maxentry)
47362306a36Sopenharmony_ci			nsplit++;
47462306a36Sopenharmony_ci		else
47562306a36Sopenharmony_ci			nsplit = 0;
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci		/* push (bn, index) of the parent page/entry */
47862306a36Sopenharmony_ci		if (BT_STACK_FULL(btstack)) {
47962306a36Sopenharmony_ci			jfs_error(ip->i_sb, "stack overrun!\n");
48062306a36Sopenharmony_ci			XT_PUTPAGE(mp);
48162306a36Sopenharmony_ci			return -EIO;
48262306a36Sopenharmony_ci		}
48362306a36Sopenharmony_ci		BT_PUSH(btstack, bn, index);
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci		/* get the child page block number */
48662306a36Sopenharmony_ci		bn = addressXAD(&p->xad[index]);
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci		/* unpin the parent page */
48962306a36Sopenharmony_ci		XT_PUTPAGE(mp);
49062306a36Sopenharmony_ci	}
49162306a36Sopenharmony_ci}
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci/*
49462306a36Sopenharmony_ci *	xtInsert()
49562306a36Sopenharmony_ci *
49662306a36Sopenharmony_ci * function:
49762306a36Sopenharmony_ci *
49862306a36Sopenharmony_ci * parameter:
49962306a36Sopenharmony_ci *	tid	- transaction id;
50062306a36Sopenharmony_ci *	ip	- file object;
50162306a36Sopenharmony_ci *	xflag	- extent flag (XAD_NOTRECORDED):
50262306a36Sopenharmony_ci *	xoff	- extent offset;
50362306a36Sopenharmony_ci *	xlen	- extent length;
50462306a36Sopenharmony_ci *	xaddrp	- extent address pointer (in/out):
50562306a36Sopenharmony_ci *		if (*xaddrp)
50662306a36Sopenharmony_ci *			caller allocated data extent at *xaddrp;
50762306a36Sopenharmony_ci *		else
50862306a36Sopenharmony_ci *			allocate data extent and return its xaddr;
50962306a36Sopenharmony_ci *	flag	-
51062306a36Sopenharmony_ci *
51162306a36Sopenharmony_ci * return:
51262306a36Sopenharmony_ci */
51362306a36Sopenharmony_ciint xtInsert(tid_t tid,		/* transaction id */
51462306a36Sopenharmony_ci	     struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
51562306a36Sopenharmony_ci	     int flag)
51662306a36Sopenharmony_ci{
51762306a36Sopenharmony_ci	int rc = 0;
51862306a36Sopenharmony_ci	s64 xaddr, hint;
51962306a36Sopenharmony_ci	struct metapage *mp;	/* meta-page buffer */
52062306a36Sopenharmony_ci	xtpage_t *p;		/* base B+-tree index page */
52162306a36Sopenharmony_ci	s64 bn;
52262306a36Sopenharmony_ci	int index, nextindex;
52362306a36Sopenharmony_ci	struct btstack btstack;	/* traverse stack */
52462306a36Sopenharmony_ci	struct xtsplit split;	/* split information */
52562306a36Sopenharmony_ci	xad_t *xad;
52662306a36Sopenharmony_ci	int cmp;
52762306a36Sopenharmony_ci	s64 next;
52862306a36Sopenharmony_ci	struct tlock *tlck;
52962306a36Sopenharmony_ci	struct xtlock *xtlck;
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	/*
53462306a36Sopenharmony_ci	 *	search for the entry location at which to insert:
53562306a36Sopenharmony_ci	 *
53662306a36Sopenharmony_ci	 * xtFastSearch() and xtSearch() both returns (leaf page
53762306a36Sopenharmony_ci	 * pinned, index at which to insert).
53862306a36Sopenharmony_ci	 * n.b. xtSearch() may return index of maxentry of
53962306a36Sopenharmony_ci	 * the full page.
54062306a36Sopenharmony_ci	 */
54162306a36Sopenharmony_ci	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
54262306a36Sopenharmony_ci		return rc;
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci	/* retrieve search result */
54562306a36Sopenharmony_ci	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	/* This test must follow XT_GETSEARCH since mp must be valid if
54862306a36Sopenharmony_ci	 * we branch to out: */
54962306a36Sopenharmony_ci	if ((cmp == 0) || (next && (xlen > next - xoff))) {
55062306a36Sopenharmony_ci		rc = -EEXIST;
55162306a36Sopenharmony_ci		goto out;
55262306a36Sopenharmony_ci	}
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci	/*
55562306a36Sopenharmony_ci	 * allocate data extent requested
55662306a36Sopenharmony_ci	 *
55762306a36Sopenharmony_ci	 * allocation hint: last xad
55862306a36Sopenharmony_ci	 */
55962306a36Sopenharmony_ci	if ((xaddr = *xaddrp) == 0) {
56062306a36Sopenharmony_ci		if (index > XTENTRYSTART) {
56162306a36Sopenharmony_ci			xad = &p->xad[index - 1];
56262306a36Sopenharmony_ci			hint = addressXAD(xad) + lengthXAD(xad) - 1;
56362306a36Sopenharmony_ci		} else
56462306a36Sopenharmony_ci			hint = 0;
56562306a36Sopenharmony_ci		if ((rc = dquot_alloc_block(ip, xlen)))
56662306a36Sopenharmony_ci			goto out;
56762306a36Sopenharmony_ci		if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr))) {
56862306a36Sopenharmony_ci			dquot_free_block(ip, xlen);
56962306a36Sopenharmony_ci			goto out;
57062306a36Sopenharmony_ci		}
57162306a36Sopenharmony_ci	}
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci	/*
57462306a36Sopenharmony_ci	 *	insert entry for new extent
57562306a36Sopenharmony_ci	 */
57662306a36Sopenharmony_ci	xflag |= XAD_NEW;
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	/*
57962306a36Sopenharmony_ci	 *	if the leaf page is full, split the page and
58062306a36Sopenharmony_ci	 *	propagate up the router entry for the new page from split
58162306a36Sopenharmony_ci	 *
58262306a36Sopenharmony_ci	 * The xtSplitUp() will insert the entry and unpin the leaf page.
58362306a36Sopenharmony_ci	 */
58462306a36Sopenharmony_ci	nextindex = le16_to_cpu(p->header.nextindex);
58562306a36Sopenharmony_ci	if (nextindex == le16_to_cpu(p->header.maxentry)) {
58662306a36Sopenharmony_ci		split.mp = mp;
58762306a36Sopenharmony_ci		split.index = index;
58862306a36Sopenharmony_ci		split.flag = xflag;
58962306a36Sopenharmony_ci		split.off = xoff;
59062306a36Sopenharmony_ci		split.len = xlen;
59162306a36Sopenharmony_ci		split.addr = xaddr;
59262306a36Sopenharmony_ci		split.pxdlist = NULL;
59362306a36Sopenharmony_ci		if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
59462306a36Sopenharmony_ci			/* undo data extent allocation */
59562306a36Sopenharmony_ci			if (*xaddrp == 0) {
59662306a36Sopenharmony_ci				dbFree(ip, xaddr, (s64) xlen);
59762306a36Sopenharmony_ci				dquot_free_block(ip, xlen);
59862306a36Sopenharmony_ci			}
59962306a36Sopenharmony_ci			return rc;
60062306a36Sopenharmony_ci		}
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci		*xaddrp = xaddr;
60362306a36Sopenharmony_ci		return 0;
60462306a36Sopenharmony_ci	}
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci	/*
60762306a36Sopenharmony_ci	 *	insert the new entry into the leaf page
60862306a36Sopenharmony_ci	 */
60962306a36Sopenharmony_ci	/*
61062306a36Sopenharmony_ci	 * acquire a transaction lock on the leaf page;
61162306a36Sopenharmony_ci	 *
61262306a36Sopenharmony_ci	 * action: xad insertion/extension;
61362306a36Sopenharmony_ci	 */
61462306a36Sopenharmony_ci	BT_MARK_DIRTY(mp, ip);
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci	/* if insert into middle, shift right remaining entries. */
61762306a36Sopenharmony_ci	if (index < nextindex)
61862306a36Sopenharmony_ci		memmove(&p->xad[index + 1], &p->xad[index],
61962306a36Sopenharmony_ci			(nextindex - index) * sizeof(xad_t));
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	/* insert the new entry: mark the entry NEW */
62262306a36Sopenharmony_ci	xad = &p->xad[index];
62362306a36Sopenharmony_ci	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci	/* advance next available entry index */
62662306a36Sopenharmony_ci	le16_add_cpu(&p->header.nextindex, 1);
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci	/* Don't log it if there are no links to the file */
62962306a36Sopenharmony_ci	if (!test_cflag(COMMIT_Nolink, ip)) {
63062306a36Sopenharmony_ci		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
63162306a36Sopenharmony_ci		xtlck = (struct xtlock *) & tlck->lock;
63262306a36Sopenharmony_ci		xtlck->lwm.offset =
63362306a36Sopenharmony_ci		    (xtlck->lwm.offset) ? min(index,
63462306a36Sopenharmony_ci					      (int)xtlck->lwm.offset) : index;
63562306a36Sopenharmony_ci		xtlck->lwm.length =
63662306a36Sopenharmony_ci		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
63762306a36Sopenharmony_ci	}
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci	*xaddrp = xaddr;
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci      out:
64262306a36Sopenharmony_ci	/* unpin the leaf page */
64362306a36Sopenharmony_ci	XT_PUTPAGE(mp);
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	return rc;
64662306a36Sopenharmony_ci}
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_ci/*
65062306a36Sopenharmony_ci *	xtSplitUp()
65162306a36Sopenharmony_ci *
65262306a36Sopenharmony_ci * function:
65362306a36Sopenharmony_ci *	split full pages as propagating insertion up the tree
65462306a36Sopenharmony_ci *
65562306a36Sopenharmony_ci * parameter:
65662306a36Sopenharmony_ci *	tid	- transaction id;
65762306a36Sopenharmony_ci *	ip	- file object;
65862306a36Sopenharmony_ci *	split	- entry parameter descriptor;
65962306a36Sopenharmony_ci *	btstack - traverse stack from xtSearch()
66062306a36Sopenharmony_ci *
66162306a36Sopenharmony_ci * return:
66262306a36Sopenharmony_ci */
66362306a36Sopenharmony_cistatic int
66462306a36Sopenharmony_cixtSplitUp(tid_t tid,
66562306a36Sopenharmony_ci	  struct inode *ip, struct xtsplit * split, struct btstack * btstack)
66662306a36Sopenharmony_ci{
66762306a36Sopenharmony_ci	int rc = 0;
66862306a36Sopenharmony_ci	struct metapage *smp;
66962306a36Sopenharmony_ci	xtpage_t *sp;		/* split page */
67062306a36Sopenharmony_ci	struct metapage *rmp;
67162306a36Sopenharmony_ci	s64 rbn;		/* new right page block number */
67262306a36Sopenharmony_ci	struct metapage *rcmp;
67362306a36Sopenharmony_ci	xtpage_t *rcp;		/* right child page */
67462306a36Sopenharmony_ci	s64 rcbn;		/* right child page block number */
67562306a36Sopenharmony_ci	int skip;		/* index of entry of insertion */
67662306a36Sopenharmony_ci	int nextindex;		/* next available entry index of p */
67762306a36Sopenharmony_ci	struct btframe *parent;	/* parent page entry on traverse stack */
67862306a36Sopenharmony_ci	xad_t *xad;
67962306a36Sopenharmony_ci	s64 xaddr;
68062306a36Sopenharmony_ci	int xlen;
68162306a36Sopenharmony_ci	int nsplit;		/* number of pages split */
68262306a36Sopenharmony_ci	struct pxdlist pxdlist;
68362306a36Sopenharmony_ci	pxd_t *pxd;
68462306a36Sopenharmony_ci	struct tlock *tlck;
68562306a36Sopenharmony_ci	struct xtlock *xtlck;
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci	smp = split->mp;
68862306a36Sopenharmony_ci	sp = XT_PAGE(ip, smp);
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci	/* is inode xtree root extension/inline EA area free ? */
69162306a36Sopenharmony_ci	if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
69262306a36Sopenharmony_ci	    (le16_to_cpu(sp->header.maxentry) < XTROOTMAXSLOT) &&
69362306a36Sopenharmony_ci	    (JFS_IP(ip)->mode2 & INLINEEA)) {
69462306a36Sopenharmony_ci		sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
69562306a36Sopenharmony_ci		JFS_IP(ip)->mode2 &= ~INLINEEA;
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci		BT_MARK_DIRTY(smp, ip);
69862306a36Sopenharmony_ci		/*
69962306a36Sopenharmony_ci		 * acquire a transaction lock on the leaf page;
70062306a36Sopenharmony_ci		 *
70162306a36Sopenharmony_ci		 * action: xad insertion/extension;
70262306a36Sopenharmony_ci		 */
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci		/* if insert into middle, shift right remaining entries. */
70562306a36Sopenharmony_ci		skip = split->index;
70662306a36Sopenharmony_ci		nextindex = le16_to_cpu(sp->header.nextindex);
70762306a36Sopenharmony_ci		if (skip < nextindex)
70862306a36Sopenharmony_ci			memmove(&sp->xad[skip + 1], &sp->xad[skip],
70962306a36Sopenharmony_ci				(nextindex - skip) * sizeof(xad_t));
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci		/* insert the new entry: mark the entry NEW */
71262306a36Sopenharmony_ci		xad = &sp->xad[skip];
71362306a36Sopenharmony_ci		XT_PUTENTRY(xad, split->flag, split->off, split->len,
71462306a36Sopenharmony_ci			    split->addr);
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci		/* advance next available entry index */
71762306a36Sopenharmony_ci		le16_add_cpu(&sp->header.nextindex, 1);
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_ci		/* Don't log it if there are no links to the file */
72062306a36Sopenharmony_ci		if (!test_cflag(COMMIT_Nolink, ip)) {
72162306a36Sopenharmony_ci			tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
72262306a36Sopenharmony_ci			xtlck = (struct xtlock *) & tlck->lock;
72362306a36Sopenharmony_ci			xtlck->lwm.offset = (xtlck->lwm.offset) ?
72462306a36Sopenharmony_ci			    min(skip, (int)xtlck->lwm.offset) : skip;
72562306a36Sopenharmony_ci			xtlck->lwm.length =
72662306a36Sopenharmony_ci			    le16_to_cpu(sp->header.nextindex) -
72762306a36Sopenharmony_ci			    xtlck->lwm.offset;
72862306a36Sopenharmony_ci		}
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci		return 0;
73162306a36Sopenharmony_ci	}
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	/*
73462306a36Sopenharmony_ci	 * allocate new index blocks to cover index page split(s)
73562306a36Sopenharmony_ci	 *
73662306a36Sopenharmony_ci	 * allocation hint: ?
73762306a36Sopenharmony_ci	 */
73862306a36Sopenharmony_ci	if (split->pxdlist == NULL) {
73962306a36Sopenharmony_ci		nsplit = btstack->nsplit;
74062306a36Sopenharmony_ci		split->pxdlist = &pxdlist;
74162306a36Sopenharmony_ci		pxdlist.maxnpxd = pxdlist.npxd = 0;
74262306a36Sopenharmony_ci		pxd = &pxdlist.pxd[0];
74362306a36Sopenharmony_ci		xlen = JFS_SBI(ip->i_sb)->nbperpage;
74462306a36Sopenharmony_ci		for (; nsplit > 0; nsplit--, pxd++) {
74562306a36Sopenharmony_ci			if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
74662306a36Sopenharmony_ci			    == 0) {
74762306a36Sopenharmony_ci				PXDaddress(pxd, xaddr);
74862306a36Sopenharmony_ci				PXDlength(pxd, xlen);
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci				pxdlist.maxnpxd++;
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci				continue;
75362306a36Sopenharmony_ci			}
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci			/* undo allocation */
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci			XT_PUTPAGE(smp);
75862306a36Sopenharmony_ci			return rc;
75962306a36Sopenharmony_ci		}
76062306a36Sopenharmony_ci	}
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_ci	/*
76362306a36Sopenharmony_ci	 * Split leaf page <sp> into <sp> and a new right page <rp>.
76462306a36Sopenharmony_ci	 *
76562306a36Sopenharmony_ci	 * The split routines insert the new entry into the leaf page,
76662306a36Sopenharmony_ci	 * and acquire txLock as appropriate.
76762306a36Sopenharmony_ci	 * return <rp> pinned and its block number <rpbn>.
76862306a36Sopenharmony_ci	 */
76962306a36Sopenharmony_ci	rc = (sp->header.flag & BT_ROOT) ?
77062306a36Sopenharmony_ci	    xtSplitRoot(tid, ip, split, &rmp) :
77162306a36Sopenharmony_ci	    xtSplitPage(tid, ip, split, &rmp, &rbn);
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	XT_PUTPAGE(smp);
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	if (rc)
77662306a36Sopenharmony_ci		return -EIO;
77762306a36Sopenharmony_ci	/*
77862306a36Sopenharmony_ci	 * propagate up the router entry for the leaf page just split
77962306a36Sopenharmony_ci	 *
78062306a36Sopenharmony_ci	 * insert a router entry for the new page into the parent page,
78162306a36Sopenharmony_ci	 * propagate the insert/split up the tree by walking back the stack
78262306a36Sopenharmony_ci	 * of (bn of parent page, index of child page entry in parent page)
78362306a36Sopenharmony_ci	 * that were traversed during the search for the page that split.
78462306a36Sopenharmony_ci	 *
78562306a36Sopenharmony_ci	 * the propagation of insert/split up the tree stops if the root
78662306a36Sopenharmony_ci	 * splits or the page inserted into doesn't have to split to hold
78762306a36Sopenharmony_ci	 * the new entry.
78862306a36Sopenharmony_ci	 *
78962306a36Sopenharmony_ci	 * the parent entry for the split page remains the same, and
79062306a36Sopenharmony_ci	 * a new entry is inserted at its right with the first key and
79162306a36Sopenharmony_ci	 * block number of the new right page.
79262306a36Sopenharmony_ci	 *
79362306a36Sopenharmony_ci	 * There are a maximum of 3 pages pinned at any time:
79462306a36Sopenharmony_ci	 * right child, left parent and right parent (when the parent splits)
79562306a36Sopenharmony_ci	 * to keep the child page pinned while working on the parent.
79662306a36Sopenharmony_ci	 * make sure that all pins are released at exit.
79762306a36Sopenharmony_ci	 */
79862306a36Sopenharmony_ci	while ((parent = BT_POP(btstack)) != NULL) {
79962306a36Sopenharmony_ci		/* parent page specified by stack frame <parent> */
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci		/* keep current child pages <rcp> pinned */
80262306a36Sopenharmony_ci		rcmp = rmp;
80362306a36Sopenharmony_ci		rcbn = rbn;
80462306a36Sopenharmony_ci		rcp = XT_PAGE(ip, rcmp);
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_ci		/*
80762306a36Sopenharmony_ci		 * insert router entry in parent for new right child page <rp>
80862306a36Sopenharmony_ci		 */
80962306a36Sopenharmony_ci		/* get/pin the parent page <sp> */
81062306a36Sopenharmony_ci		XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
81162306a36Sopenharmony_ci		if (rc) {
81262306a36Sopenharmony_ci			XT_PUTPAGE(rcmp);
81362306a36Sopenharmony_ci			return rc;
81462306a36Sopenharmony_ci		}
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci		/*
81762306a36Sopenharmony_ci		 * The new key entry goes ONE AFTER the index of parent entry,
81862306a36Sopenharmony_ci		 * because the split was to the right.
81962306a36Sopenharmony_ci		 */
82062306a36Sopenharmony_ci		skip = parent->index + 1;
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci		/*
82362306a36Sopenharmony_ci		 * split or shift right remaining entries of the parent page
82462306a36Sopenharmony_ci		 */
82562306a36Sopenharmony_ci		nextindex = le16_to_cpu(sp->header.nextindex);
82662306a36Sopenharmony_ci		/*
82762306a36Sopenharmony_ci		 * parent page is full - split the parent page
82862306a36Sopenharmony_ci		 */
82962306a36Sopenharmony_ci		if (nextindex == le16_to_cpu(sp->header.maxentry)) {
83062306a36Sopenharmony_ci			/* init for parent page split */
83162306a36Sopenharmony_ci			split->mp = smp;
83262306a36Sopenharmony_ci			split->index = skip;	/* index at insert */
83362306a36Sopenharmony_ci			split->flag = XAD_NEW;
83462306a36Sopenharmony_ci			split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
83562306a36Sopenharmony_ci			split->len = JFS_SBI(ip->i_sb)->nbperpage;
83662306a36Sopenharmony_ci			split->addr = rcbn;
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci			/* unpin previous right child page */
83962306a36Sopenharmony_ci			XT_PUTPAGE(rcmp);
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci			/* The split routines insert the new entry,
84262306a36Sopenharmony_ci			 * and acquire txLock as appropriate.
84362306a36Sopenharmony_ci			 * return <rp> pinned and its block number <rpbn>.
84462306a36Sopenharmony_ci			 */
84562306a36Sopenharmony_ci			rc = (sp->header.flag & BT_ROOT) ?
84662306a36Sopenharmony_ci			    xtSplitRoot(tid, ip, split, &rmp) :
84762306a36Sopenharmony_ci			    xtSplitPage(tid, ip, split, &rmp, &rbn);
84862306a36Sopenharmony_ci			if (rc) {
84962306a36Sopenharmony_ci				XT_PUTPAGE(smp);
85062306a36Sopenharmony_ci				return rc;
85162306a36Sopenharmony_ci			}
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci			XT_PUTPAGE(smp);
85462306a36Sopenharmony_ci			/* keep new child page <rp> pinned */
85562306a36Sopenharmony_ci		}
85662306a36Sopenharmony_ci		/*
85762306a36Sopenharmony_ci		 * parent page is not full - insert in parent page
85862306a36Sopenharmony_ci		 */
85962306a36Sopenharmony_ci		else {
86062306a36Sopenharmony_ci			/*
86162306a36Sopenharmony_ci			 * insert router entry in parent for the right child
86262306a36Sopenharmony_ci			 * page from the first entry of the right child page:
86362306a36Sopenharmony_ci			 */
86462306a36Sopenharmony_ci			/*
86562306a36Sopenharmony_ci			 * acquire a transaction lock on the parent page;
86662306a36Sopenharmony_ci			 *
86762306a36Sopenharmony_ci			 * action: router xad insertion;
86862306a36Sopenharmony_ci			 */
86962306a36Sopenharmony_ci			BT_MARK_DIRTY(smp, ip);
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci			/*
87262306a36Sopenharmony_ci			 * if insert into middle, shift right remaining entries
87362306a36Sopenharmony_ci			 */
87462306a36Sopenharmony_ci			if (skip < nextindex)
87562306a36Sopenharmony_ci				memmove(&sp->xad[skip + 1], &sp->xad[skip],
87662306a36Sopenharmony_ci					(nextindex -
87762306a36Sopenharmony_ci					 skip) << L2XTSLOTSIZE);
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci			/* insert the router entry */
88062306a36Sopenharmony_ci			xad = &sp->xad[skip];
88162306a36Sopenharmony_ci			XT_PUTENTRY(xad, XAD_NEW,
88262306a36Sopenharmony_ci				    offsetXAD(&rcp->xad[XTENTRYSTART]),
88362306a36Sopenharmony_ci				    JFS_SBI(ip->i_sb)->nbperpage, rcbn);
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci			/* advance next available entry index. */
88662306a36Sopenharmony_ci			le16_add_cpu(&sp->header.nextindex, 1);
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci			/* Don't log it if there are no links to the file */
88962306a36Sopenharmony_ci			if (!test_cflag(COMMIT_Nolink, ip)) {
89062306a36Sopenharmony_ci				tlck = txLock(tid, ip, smp,
89162306a36Sopenharmony_ci					      tlckXTREE | tlckGROW);
89262306a36Sopenharmony_ci				xtlck = (struct xtlock *) & tlck->lock;
89362306a36Sopenharmony_ci				xtlck->lwm.offset = (xtlck->lwm.offset) ?
89462306a36Sopenharmony_ci				    min(skip, (int)xtlck->lwm.offset) : skip;
89562306a36Sopenharmony_ci				xtlck->lwm.length =
89662306a36Sopenharmony_ci				    le16_to_cpu(sp->header.nextindex) -
89762306a36Sopenharmony_ci				    xtlck->lwm.offset;
89862306a36Sopenharmony_ci			}
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci			/* unpin parent page */
90162306a36Sopenharmony_ci			XT_PUTPAGE(smp);
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci			/* exit propagate up */
90462306a36Sopenharmony_ci			break;
90562306a36Sopenharmony_ci		}
90662306a36Sopenharmony_ci	}
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci	/* unpin current right page */
90962306a36Sopenharmony_ci	XT_PUTPAGE(rmp);
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	return 0;
91262306a36Sopenharmony_ci}
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci
91562306a36Sopenharmony_ci/*
91662306a36Sopenharmony_ci *	xtSplitPage()
91762306a36Sopenharmony_ci *
91862306a36Sopenharmony_ci * function:
91962306a36Sopenharmony_ci *	split a full non-root page into
92062306a36Sopenharmony_ci *	original/split/left page and new right page
92162306a36Sopenharmony_ci *	i.e., the original/split page remains as left page.
92262306a36Sopenharmony_ci *
92362306a36Sopenharmony_ci * parameter:
92462306a36Sopenharmony_ci *	int		tid,
92562306a36Sopenharmony_ci *	struct inode	*ip,
92662306a36Sopenharmony_ci *	struct xtsplit	*split,
92762306a36Sopenharmony_ci *	struct metapage	**rmpp,
92862306a36Sopenharmony_ci *	u64		*rbnp,
92962306a36Sopenharmony_ci *
93062306a36Sopenharmony_ci * return:
93162306a36Sopenharmony_ci *	Pointer to page in which to insert or NULL on error.
93262306a36Sopenharmony_ci */
93362306a36Sopenharmony_cistatic int
93462306a36Sopenharmony_cixtSplitPage(tid_t tid, struct inode *ip,
93562306a36Sopenharmony_ci	    struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
93662306a36Sopenharmony_ci{
93762306a36Sopenharmony_ci	int rc = 0;
93862306a36Sopenharmony_ci	struct metapage *smp;
93962306a36Sopenharmony_ci	xtpage_t *sp;
94062306a36Sopenharmony_ci	struct metapage *rmp;
94162306a36Sopenharmony_ci	xtpage_t *rp;		/* new right page allocated */
94262306a36Sopenharmony_ci	s64 rbn;		/* new right page block number */
94362306a36Sopenharmony_ci	struct metapage *mp;
94462306a36Sopenharmony_ci	xtpage_t *p;
94562306a36Sopenharmony_ci	s64 nextbn;
94662306a36Sopenharmony_ci	int skip, maxentry, middle, righthalf, n;
94762306a36Sopenharmony_ci	xad_t *xad;
94862306a36Sopenharmony_ci	struct pxdlist *pxdlist;
94962306a36Sopenharmony_ci	pxd_t *pxd;
95062306a36Sopenharmony_ci	struct tlock *tlck;
95162306a36Sopenharmony_ci	struct xtlock *sxtlck = NULL, *rxtlck = NULL;
95262306a36Sopenharmony_ci	int quota_allocation = 0;
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ci	smp = split->mp;
95562306a36Sopenharmony_ci	sp = XT_PAGE(ip, smp);
95662306a36Sopenharmony_ci
95762306a36Sopenharmony_ci	INCREMENT(xtStat.split);
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci	pxdlist = split->pxdlist;
96062306a36Sopenharmony_ci	pxd = &pxdlist->pxd[pxdlist->npxd];
96162306a36Sopenharmony_ci	pxdlist->npxd++;
96262306a36Sopenharmony_ci	rbn = addressPXD(pxd);
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci	/* Allocate blocks to quota. */
96562306a36Sopenharmony_ci	rc = dquot_alloc_block(ip, lengthPXD(pxd));
96662306a36Sopenharmony_ci	if (rc)
96762306a36Sopenharmony_ci		goto clean_up;
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_ci	quota_allocation += lengthPXD(pxd);
97062306a36Sopenharmony_ci
97162306a36Sopenharmony_ci	/*
97262306a36Sopenharmony_ci	 * allocate the new right page for the split
97362306a36Sopenharmony_ci	 */
97462306a36Sopenharmony_ci	rmp = get_metapage(ip, rbn, PSIZE, 1);
97562306a36Sopenharmony_ci	if (rmp == NULL) {
97662306a36Sopenharmony_ci		rc = -EIO;
97762306a36Sopenharmony_ci		goto clean_up;
97862306a36Sopenharmony_ci	}
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci	jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
98162306a36Sopenharmony_ci
98262306a36Sopenharmony_ci	BT_MARK_DIRTY(rmp, ip);
98362306a36Sopenharmony_ci	/*
98462306a36Sopenharmony_ci	 * action: new page;
98562306a36Sopenharmony_ci	 */
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_ci	rp = (xtpage_t *) rmp->data;
98862306a36Sopenharmony_ci	rp->header.self = *pxd;
98962306a36Sopenharmony_ci	rp->header.flag = sp->header.flag & BT_TYPE;
99062306a36Sopenharmony_ci	rp->header.maxentry = sp->header.maxentry;	/* little-endian */
99162306a36Sopenharmony_ci	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci	BT_MARK_DIRTY(smp, ip);
99462306a36Sopenharmony_ci	/* Don't log it if there are no links to the file */
99562306a36Sopenharmony_ci	if (!test_cflag(COMMIT_Nolink, ip)) {
99662306a36Sopenharmony_ci		/*
99762306a36Sopenharmony_ci		 * acquire a transaction lock on the new right page;
99862306a36Sopenharmony_ci		 */
99962306a36Sopenharmony_ci		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
100062306a36Sopenharmony_ci		rxtlck = (struct xtlock *) & tlck->lock;
100162306a36Sopenharmony_ci		rxtlck->lwm.offset = XTENTRYSTART;
100262306a36Sopenharmony_ci		/*
100362306a36Sopenharmony_ci		 * acquire a transaction lock on the split page
100462306a36Sopenharmony_ci		 */
100562306a36Sopenharmony_ci		tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
100662306a36Sopenharmony_ci		sxtlck = (struct xtlock *) & tlck->lock;
100762306a36Sopenharmony_ci	}
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci	/*
101062306a36Sopenharmony_ci	 * initialize/update sibling pointers of <sp> and <rp>
101162306a36Sopenharmony_ci	 */
101262306a36Sopenharmony_ci	nextbn = le64_to_cpu(sp->header.next);
101362306a36Sopenharmony_ci	rp->header.next = cpu_to_le64(nextbn);
101462306a36Sopenharmony_ci	rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
101562306a36Sopenharmony_ci	sp->header.next = cpu_to_le64(rbn);
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci	skip = split->index;
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci	/*
102062306a36Sopenharmony_ci	 *	sequential append at tail (after last entry of last page)
102162306a36Sopenharmony_ci	 *
102262306a36Sopenharmony_ci	 * if splitting the last page on a level because of appending
102362306a36Sopenharmony_ci	 * a entry to it (skip is maxentry), it's likely that the access is
102462306a36Sopenharmony_ci	 * sequential. adding an empty page on the side of the level is less
102562306a36Sopenharmony_ci	 * work and can push the fill factor much higher than normal.
102662306a36Sopenharmony_ci	 * if we're wrong it's no big deal -  we will do the split the right
102762306a36Sopenharmony_ci	 * way next time.
102862306a36Sopenharmony_ci	 * (it may look like it's equally easy to do a similar hack for
102962306a36Sopenharmony_ci	 * reverse sorted data, that is, split the tree left, but it's not.
103062306a36Sopenharmony_ci	 * Be my guest.)
103162306a36Sopenharmony_ci	 */
103262306a36Sopenharmony_ci	if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
103362306a36Sopenharmony_ci		/*
103462306a36Sopenharmony_ci		 * acquire a transaction lock on the new/right page;
103562306a36Sopenharmony_ci		 *
103662306a36Sopenharmony_ci		 * action: xad insertion;
103762306a36Sopenharmony_ci		 */
103862306a36Sopenharmony_ci		/* insert entry at the first entry of the new right page */
103962306a36Sopenharmony_ci		xad = &rp->xad[XTENTRYSTART];
104062306a36Sopenharmony_ci		XT_PUTENTRY(xad, split->flag, split->off, split->len,
104162306a36Sopenharmony_ci			    split->addr);
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ci		rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci		if (!test_cflag(COMMIT_Nolink, ip)) {
104662306a36Sopenharmony_ci			/* rxtlck->lwm.offset = XTENTRYSTART; */
104762306a36Sopenharmony_ci			rxtlck->lwm.length = 1;
104862306a36Sopenharmony_ci		}
104962306a36Sopenharmony_ci
105062306a36Sopenharmony_ci		*rmpp = rmp;
105162306a36Sopenharmony_ci		*rbnp = rbn;
105262306a36Sopenharmony_ci
105362306a36Sopenharmony_ci		jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
105462306a36Sopenharmony_ci		return 0;
105562306a36Sopenharmony_ci	}
105662306a36Sopenharmony_ci
105762306a36Sopenharmony_ci	/*
105862306a36Sopenharmony_ci	 *	non-sequential insert (at possibly middle page)
105962306a36Sopenharmony_ci	 */
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci	/*
106262306a36Sopenharmony_ci	 * update previous pointer of old next/right page of <sp>
106362306a36Sopenharmony_ci	 */
106462306a36Sopenharmony_ci	if (nextbn != 0) {
106562306a36Sopenharmony_ci		XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
106662306a36Sopenharmony_ci		if (rc) {
106762306a36Sopenharmony_ci			XT_PUTPAGE(rmp);
106862306a36Sopenharmony_ci			goto clean_up;
106962306a36Sopenharmony_ci		}
107062306a36Sopenharmony_ci
107162306a36Sopenharmony_ci		BT_MARK_DIRTY(mp, ip);
107262306a36Sopenharmony_ci		/*
107362306a36Sopenharmony_ci		 * acquire a transaction lock on the next page;
107462306a36Sopenharmony_ci		 *
107562306a36Sopenharmony_ci		 * action:sibling pointer update;
107662306a36Sopenharmony_ci		 */
107762306a36Sopenharmony_ci		if (!test_cflag(COMMIT_Nolink, ip))
107862306a36Sopenharmony_ci			tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci		p->header.prev = cpu_to_le64(rbn);
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_ci		/* sibling page may have been updated previously, or
108362306a36Sopenharmony_ci		 * it may be updated later;
108462306a36Sopenharmony_ci		 */
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci		XT_PUTPAGE(mp);
108762306a36Sopenharmony_ci	}
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ci	/*
109062306a36Sopenharmony_ci	 * split the data between the split and new/right pages
109162306a36Sopenharmony_ci	 */
109262306a36Sopenharmony_ci	maxentry = le16_to_cpu(sp->header.maxentry);
109362306a36Sopenharmony_ci	middle = maxentry >> 1;
109462306a36Sopenharmony_ci	righthalf = maxentry - middle;
109562306a36Sopenharmony_ci
109662306a36Sopenharmony_ci	/*
109762306a36Sopenharmony_ci	 * skip index in old split/left page - insert into left page:
109862306a36Sopenharmony_ci	 */
109962306a36Sopenharmony_ci	if (skip <= middle) {
110062306a36Sopenharmony_ci		/* move right half of split page to the new right page */
110162306a36Sopenharmony_ci		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
110262306a36Sopenharmony_ci			righthalf << L2XTSLOTSIZE);
110362306a36Sopenharmony_ci
110462306a36Sopenharmony_ci		/* shift right tail of left half to make room for new entry */
110562306a36Sopenharmony_ci		if (skip < middle)
110662306a36Sopenharmony_ci			memmove(&sp->xad[skip + 1], &sp->xad[skip],
110762306a36Sopenharmony_ci				(middle - skip) << L2XTSLOTSIZE);
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci		/* insert new entry */
111062306a36Sopenharmony_ci		xad = &sp->xad[skip];
111162306a36Sopenharmony_ci		XT_PUTENTRY(xad, split->flag, split->off, split->len,
111262306a36Sopenharmony_ci			    split->addr);
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci		/* update page header */
111562306a36Sopenharmony_ci		sp->header.nextindex = cpu_to_le16(middle + 1);
111662306a36Sopenharmony_ci		if (!test_cflag(COMMIT_Nolink, ip)) {
111762306a36Sopenharmony_ci			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
111862306a36Sopenharmony_ci			    min(skip, (int)sxtlck->lwm.offset) : skip;
111962306a36Sopenharmony_ci		}
112062306a36Sopenharmony_ci
112162306a36Sopenharmony_ci		rp->header.nextindex =
112262306a36Sopenharmony_ci		    cpu_to_le16(XTENTRYSTART + righthalf);
112362306a36Sopenharmony_ci	}
112462306a36Sopenharmony_ci	/*
112562306a36Sopenharmony_ci	 * skip index in new right page - insert into right page:
112662306a36Sopenharmony_ci	 */
112762306a36Sopenharmony_ci	else {
112862306a36Sopenharmony_ci		/* move left head of right half to right page */
112962306a36Sopenharmony_ci		n = skip - middle;
113062306a36Sopenharmony_ci		memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
113162306a36Sopenharmony_ci			n << L2XTSLOTSIZE);
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ci		/* insert new entry */
113462306a36Sopenharmony_ci		n += XTENTRYSTART;
113562306a36Sopenharmony_ci		xad = &rp->xad[n];
113662306a36Sopenharmony_ci		XT_PUTENTRY(xad, split->flag, split->off, split->len,
113762306a36Sopenharmony_ci			    split->addr);
113862306a36Sopenharmony_ci
113962306a36Sopenharmony_ci		/* move right tail of right half to right page */
114062306a36Sopenharmony_ci		if (skip < maxentry)
114162306a36Sopenharmony_ci			memmove(&rp->xad[n + 1], &sp->xad[skip],
114262306a36Sopenharmony_ci				(maxentry - skip) << L2XTSLOTSIZE);
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ci		/* update page header */
114562306a36Sopenharmony_ci		sp->header.nextindex = cpu_to_le16(middle);
114662306a36Sopenharmony_ci		if (!test_cflag(COMMIT_Nolink, ip)) {
114762306a36Sopenharmony_ci			sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
114862306a36Sopenharmony_ci			    min(middle, (int)sxtlck->lwm.offset) : middle;
114962306a36Sopenharmony_ci		}
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci		rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
115262306a36Sopenharmony_ci						   righthalf + 1);
115362306a36Sopenharmony_ci	}
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci	if (!test_cflag(COMMIT_Nolink, ip)) {
115662306a36Sopenharmony_ci		sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
115762306a36Sopenharmony_ci		    sxtlck->lwm.offset;
115862306a36Sopenharmony_ci
115962306a36Sopenharmony_ci		/* rxtlck->lwm.offset = XTENTRYSTART; */
116062306a36Sopenharmony_ci		rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
116162306a36Sopenharmony_ci		    XTENTRYSTART;
116262306a36Sopenharmony_ci	}
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci	*rmpp = rmp;
116562306a36Sopenharmony_ci	*rbnp = rbn;
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci	jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
116862306a36Sopenharmony_ci	return rc;
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci      clean_up:
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci	/* Rollback quota allocation. */
117362306a36Sopenharmony_ci	if (quota_allocation)
117462306a36Sopenharmony_ci		dquot_free_block(ip, quota_allocation);
117562306a36Sopenharmony_ci
117662306a36Sopenharmony_ci	return (rc);
117762306a36Sopenharmony_ci}
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_ci/*
118162306a36Sopenharmony_ci *	xtSplitRoot()
118262306a36Sopenharmony_ci *
118362306a36Sopenharmony_ci * function:
118462306a36Sopenharmony_ci *	split the full root page into original/root/split page and new
118562306a36Sopenharmony_ci *	right page
118662306a36Sopenharmony_ci *	i.e., root remains fixed in tree anchor (inode) and the root is
118762306a36Sopenharmony_ci *	copied to a single new right child page since root page <<
118862306a36Sopenharmony_ci *	non-root page, and the split root page contains a single entry
118962306a36Sopenharmony_ci *	for the new right child page.
119062306a36Sopenharmony_ci *
119162306a36Sopenharmony_ci * parameter:
119262306a36Sopenharmony_ci *	int		tid,
119362306a36Sopenharmony_ci *	struct inode	*ip,
119462306a36Sopenharmony_ci *	struct xtsplit	*split,
119562306a36Sopenharmony_ci *	struct metapage	**rmpp)
119662306a36Sopenharmony_ci *
119762306a36Sopenharmony_ci * return:
119862306a36Sopenharmony_ci *	Pointer to page in which to insert or NULL on error.
119962306a36Sopenharmony_ci */
120062306a36Sopenharmony_cistatic int
120162306a36Sopenharmony_cixtSplitRoot(tid_t tid,
120262306a36Sopenharmony_ci	    struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
120362306a36Sopenharmony_ci{
120462306a36Sopenharmony_ci	xtpage_t *sp;
120562306a36Sopenharmony_ci	struct metapage *rmp;
120662306a36Sopenharmony_ci	xtpage_t *rp;
120762306a36Sopenharmony_ci	s64 rbn;
120862306a36Sopenharmony_ci	int skip, nextindex;
120962306a36Sopenharmony_ci	xad_t *xad;
121062306a36Sopenharmony_ci	pxd_t *pxd;
121162306a36Sopenharmony_ci	struct pxdlist *pxdlist;
121262306a36Sopenharmony_ci	struct tlock *tlck;
121362306a36Sopenharmony_ci	struct xtlock *xtlck;
121462306a36Sopenharmony_ci	int rc;
121562306a36Sopenharmony_ci
121662306a36Sopenharmony_ci	sp = &JFS_IP(ip)->i_xtroot;
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	INCREMENT(xtStat.split);
121962306a36Sopenharmony_ci
122062306a36Sopenharmony_ci	/*
122162306a36Sopenharmony_ci	 *	allocate a single (right) child page
122262306a36Sopenharmony_ci	 */
122362306a36Sopenharmony_ci	pxdlist = split->pxdlist;
122462306a36Sopenharmony_ci	pxd = &pxdlist->pxd[pxdlist->npxd];
122562306a36Sopenharmony_ci	pxdlist->npxd++;
122662306a36Sopenharmony_ci	rbn = addressPXD(pxd);
122762306a36Sopenharmony_ci	rmp = get_metapage(ip, rbn, PSIZE, 1);
122862306a36Sopenharmony_ci	if (rmp == NULL)
122962306a36Sopenharmony_ci		return -EIO;
123062306a36Sopenharmony_ci
123162306a36Sopenharmony_ci	/* Allocate blocks to quota. */
123262306a36Sopenharmony_ci	rc = dquot_alloc_block(ip, lengthPXD(pxd));
123362306a36Sopenharmony_ci	if (rc) {
123462306a36Sopenharmony_ci		release_metapage(rmp);
123562306a36Sopenharmony_ci		return rc;
123662306a36Sopenharmony_ci	}
123762306a36Sopenharmony_ci
123862306a36Sopenharmony_ci	jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
123962306a36Sopenharmony_ci
124062306a36Sopenharmony_ci	/*
124162306a36Sopenharmony_ci	 * acquire a transaction lock on the new right page;
124262306a36Sopenharmony_ci	 *
124362306a36Sopenharmony_ci	 * action: new page;
124462306a36Sopenharmony_ci	 */
124562306a36Sopenharmony_ci	BT_MARK_DIRTY(rmp, ip);
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_ci	rp = (xtpage_t *) rmp->data;
124862306a36Sopenharmony_ci	rp->header.flag =
124962306a36Sopenharmony_ci	    (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
125062306a36Sopenharmony_ci	rp->header.self = *pxd;
125162306a36Sopenharmony_ci	rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
125262306a36Sopenharmony_ci	rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
125362306a36Sopenharmony_ci
125462306a36Sopenharmony_ci	/* initialize sibling pointers */
125562306a36Sopenharmony_ci	rp->header.next = 0;
125662306a36Sopenharmony_ci	rp->header.prev = 0;
125762306a36Sopenharmony_ci
125862306a36Sopenharmony_ci	/*
125962306a36Sopenharmony_ci	 * copy the in-line root page into new right page extent
126062306a36Sopenharmony_ci	 */
126162306a36Sopenharmony_ci	nextindex = le16_to_cpu(sp->header.maxentry);
126262306a36Sopenharmony_ci	memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
126362306a36Sopenharmony_ci		(nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci	/*
126662306a36Sopenharmony_ci	 * insert the new entry into the new right/child page
126762306a36Sopenharmony_ci	 * (skip index in the new right page will not change)
126862306a36Sopenharmony_ci	 */
126962306a36Sopenharmony_ci	skip = split->index;
127062306a36Sopenharmony_ci	/* if insert into middle, shift right remaining entries */
127162306a36Sopenharmony_ci	if (skip != nextindex)
127262306a36Sopenharmony_ci		memmove(&rp->xad[skip + 1], &rp->xad[skip],
127362306a36Sopenharmony_ci			(nextindex - skip) * sizeof(xad_t));
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_ci	xad = &rp->xad[skip];
127662306a36Sopenharmony_ci	XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
127762306a36Sopenharmony_ci
127862306a36Sopenharmony_ci	/* update page header */
127962306a36Sopenharmony_ci	rp->header.nextindex = cpu_to_le16(nextindex + 1);
128062306a36Sopenharmony_ci
128162306a36Sopenharmony_ci	if (!test_cflag(COMMIT_Nolink, ip)) {
128262306a36Sopenharmony_ci		tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
128362306a36Sopenharmony_ci		xtlck = (struct xtlock *) & tlck->lock;
128462306a36Sopenharmony_ci		xtlck->lwm.offset = XTENTRYSTART;
128562306a36Sopenharmony_ci		xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
128662306a36Sopenharmony_ci		    XTENTRYSTART;
128762306a36Sopenharmony_ci	}
128862306a36Sopenharmony_ci
128962306a36Sopenharmony_ci	/*
129062306a36Sopenharmony_ci	 *	reset the root
129162306a36Sopenharmony_ci	 *
129262306a36Sopenharmony_ci	 * init root with the single entry for the new right page
129362306a36Sopenharmony_ci	 * set the 1st entry offset to 0, which force the left-most key
129462306a36Sopenharmony_ci	 * at any level of the tree to be less than any search key.
129562306a36Sopenharmony_ci	 */
129662306a36Sopenharmony_ci	/*
129762306a36Sopenharmony_ci	 * acquire a transaction lock on the root page (in-memory inode);
129862306a36Sopenharmony_ci	 *
129962306a36Sopenharmony_ci	 * action: root split;
130062306a36Sopenharmony_ci	 */
130162306a36Sopenharmony_ci	BT_MARK_DIRTY(split->mp, ip);
130262306a36Sopenharmony_ci
130362306a36Sopenharmony_ci	xad = &sp->xad[XTENTRYSTART];
130462306a36Sopenharmony_ci	XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_ci	/* update page header of root */
130762306a36Sopenharmony_ci	sp->header.flag &= ~BT_LEAF;
130862306a36Sopenharmony_ci	sp->header.flag |= BT_INTERNAL;
130962306a36Sopenharmony_ci
131062306a36Sopenharmony_ci	sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
131162306a36Sopenharmony_ci
131262306a36Sopenharmony_ci	if (!test_cflag(COMMIT_Nolink, ip)) {
131362306a36Sopenharmony_ci		tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
131462306a36Sopenharmony_ci		xtlck = (struct xtlock *) & tlck->lock;
131562306a36Sopenharmony_ci		xtlck->lwm.offset = XTENTRYSTART;
131662306a36Sopenharmony_ci		xtlck->lwm.length = 1;
131762306a36Sopenharmony_ci	}
131862306a36Sopenharmony_ci
131962306a36Sopenharmony_ci	*rmpp = rmp;
132062306a36Sopenharmony_ci
132162306a36Sopenharmony_ci	jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
132262306a36Sopenharmony_ci	return 0;
132362306a36Sopenharmony_ci}
132462306a36Sopenharmony_ci
132562306a36Sopenharmony_ci
132662306a36Sopenharmony_ci/*
132762306a36Sopenharmony_ci *	xtExtend()
132862306a36Sopenharmony_ci *
132962306a36Sopenharmony_ci * function: extend in-place;
133062306a36Sopenharmony_ci *
133162306a36Sopenharmony_ci * note: existing extent may or may not have been committed.
133262306a36Sopenharmony_ci * caller is responsible for pager buffer cache update, and
133362306a36Sopenharmony_ci * working block allocation map update;
133462306a36Sopenharmony_ci * update pmap: alloc whole extended extent;
133562306a36Sopenharmony_ci */
133662306a36Sopenharmony_ciint xtExtend(tid_t tid,		/* transaction id */
133762306a36Sopenharmony_ci	     struct inode *ip, s64 xoff,	/* delta extent offset */
133862306a36Sopenharmony_ci	     s32 xlen,		/* delta extent length */
133962306a36Sopenharmony_ci	     int flag)
134062306a36Sopenharmony_ci{
134162306a36Sopenharmony_ci	int rc = 0;
134262306a36Sopenharmony_ci	int cmp;
134362306a36Sopenharmony_ci	struct metapage *mp;	/* meta-page buffer */
134462306a36Sopenharmony_ci	xtpage_t *p;		/* base B+-tree index page */
134562306a36Sopenharmony_ci	s64 bn;
134662306a36Sopenharmony_ci	int index, nextindex, len;
134762306a36Sopenharmony_ci	struct btstack btstack;	/* traverse stack */
134862306a36Sopenharmony_ci	struct xtsplit split;	/* split information */
134962306a36Sopenharmony_ci	xad_t *xad;
135062306a36Sopenharmony_ci	s64 xaddr;
135162306a36Sopenharmony_ci	struct tlock *tlck;
135262306a36Sopenharmony_ci	struct xtlock *xtlck = NULL;
135362306a36Sopenharmony_ci
135462306a36Sopenharmony_ci	jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
135562306a36Sopenharmony_ci
135662306a36Sopenharmony_ci	/* there must exist extent to be extended */
135762306a36Sopenharmony_ci	if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
135862306a36Sopenharmony_ci		return rc;
135962306a36Sopenharmony_ci
136062306a36Sopenharmony_ci	/* retrieve search result */
136162306a36Sopenharmony_ci	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
136262306a36Sopenharmony_ci
136362306a36Sopenharmony_ci	if (cmp != 0) {
136462306a36Sopenharmony_ci		XT_PUTPAGE(mp);
136562306a36Sopenharmony_ci		jfs_error(ip->i_sb, "xtSearch did not find extent\n");
136662306a36Sopenharmony_ci		return -EIO;
136762306a36Sopenharmony_ci	}
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_ci	/* extension must be contiguous */
137062306a36Sopenharmony_ci	xad = &p->xad[index];
137162306a36Sopenharmony_ci	if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) {
137262306a36Sopenharmony_ci		XT_PUTPAGE(mp);
137362306a36Sopenharmony_ci		jfs_error(ip->i_sb, "extension is not contiguous\n");
137462306a36Sopenharmony_ci		return -EIO;
137562306a36Sopenharmony_ci	}
137662306a36Sopenharmony_ci
137762306a36Sopenharmony_ci	/*
137862306a36Sopenharmony_ci	 * acquire a transaction lock on the leaf page;
137962306a36Sopenharmony_ci	 *
138062306a36Sopenharmony_ci	 * action: xad insertion/extension;
138162306a36Sopenharmony_ci	 */
138262306a36Sopenharmony_ci	BT_MARK_DIRTY(mp, ip);
138362306a36Sopenharmony_ci	if (!test_cflag(COMMIT_Nolink, ip)) {
138462306a36Sopenharmony_ci		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
138562306a36Sopenharmony_ci		xtlck = (struct xtlock *) & tlck->lock;
138662306a36Sopenharmony_ci	}
138762306a36Sopenharmony_ci
138862306a36Sopenharmony_ci	/* extend will overflow extent ? */
138962306a36Sopenharmony_ci	xlen = lengthXAD(xad) + xlen;
139062306a36Sopenharmony_ci	if ((len = xlen - MAXXLEN) <= 0)
139162306a36Sopenharmony_ci		goto extendOld;
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_ci	/*
139462306a36Sopenharmony_ci	 *	extent overflow: insert entry for new extent
139562306a36Sopenharmony_ci	 */
139662306a36Sopenharmony_ci//insertNew:
139762306a36Sopenharmony_ci	xoff = offsetXAD(xad) + MAXXLEN;
139862306a36Sopenharmony_ci	xaddr = addressXAD(xad) + MAXXLEN;
139962306a36Sopenharmony_ci	nextindex = le16_to_cpu(p->header.nextindex);
140062306a36Sopenharmony_ci
140162306a36Sopenharmony_ci	/*
140262306a36Sopenharmony_ci	 *	if the leaf page is full, insert the new entry and
140362306a36Sopenharmony_ci	 *	propagate up the router entry for the new page from split
140462306a36Sopenharmony_ci	 *
140562306a36Sopenharmony_ci	 * The xtSplitUp() will insert the entry and unpin the leaf page.
140662306a36Sopenharmony_ci	 */
140762306a36Sopenharmony_ci	if (nextindex == le16_to_cpu(p->header.maxentry)) {
140862306a36Sopenharmony_ci		/* xtSpliUp() unpins leaf pages */
140962306a36Sopenharmony_ci		split.mp = mp;
141062306a36Sopenharmony_ci		split.index = index + 1;
141162306a36Sopenharmony_ci		split.flag = XAD_NEW;
141262306a36Sopenharmony_ci		split.off = xoff;	/* split offset */
141362306a36Sopenharmony_ci		split.len = len;
141462306a36Sopenharmony_ci		split.addr = xaddr;
141562306a36Sopenharmony_ci		split.pxdlist = NULL;
141662306a36Sopenharmony_ci		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
141762306a36Sopenharmony_ci			return rc;
141862306a36Sopenharmony_ci
141962306a36Sopenharmony_ci		/* get back old page */
142062306a36Sopenharmony_ci		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
142162306a36Sopenharmony_ci		if (rc)
142262306a36Sopenharmony_ci			return rc;
142362306a36Sopenharmony_ci		/*
142462306a36Sopenharmony_ci		 * if leaf root has been split, original root has been
142562306a36Sopenharmony_ci		 * copied to new child page, i.e., original entry now
142662306a36Sopenharmony_ci		 * resides on the new child page;
142762306a36Sopenharmony_ci		 */
142862306a36Sopenharmony_ci		if (p->header.flag & BT_INTERNAL) {
142962306a36Sopenharmony_ci			ASSERT(p->header.nextindex ==
143062306a36Sopenharmony_ci			       cpu_to_le16(XTENTRYSTART + 1));
143162306a36Sopenharmony_ci			xad = &p->xad[XTENTRYSTART];
143262306a36Sopenharmony_ci			bn = addressXAD(xad);
143362306a36Sopenharmony_ci			XT_PUTPAGE(mp);
143462306a36Sopenharmony_ci
143562306a36Sopenharmony_ci			/* get new child page */
143662306a36Sopenharmony_ci			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
143762306a36Sopenharmony_ci			if (rc)
143862306a36Sopenharmony_ci				return rc;
143962306a36Sopenharmony_ci
144062306a36Sopenharmony_ci			BT_MARK_DIRTY(mp, ip);
144162306a36Sopenharmony_ci			if (!test_cflag(COMMIT_Nolink, ip)) {
144262306a36Sopenharmony_ci				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
144362306a36Sopenharmony_ci				xtlck = (struct xtlock *) & tlck->lock;
144462306a36Sopenharmony_ci			}
144562306a36Sopenharmony_ci		}
144662306a36Sopenharmony_ci	}
144762306a36Sopenharmony_ci	/*
144862306a36Sopenharmony_ci	 *	insert the new entry into the leaf page
144962306a36Sopenharmony_ci	 */
145062306a36Sopenharmony_ci	else {
145162306a36Sopenharmony_ci		/* insert the new entry: mark the entry NEW */
145262306a36Sopenharmony_ci		xad = &p->xad[index + 1];
145362306a36Sopenharmony_ci		XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
145462306a36Sopenharmony_ci
145562306a36Sopenharmony_ci		/* advance next available entry index */
145662306a36Sopenharmony_ci		le16_add_cpu(&p->header.nextindex, 1);
145762306a36Sopenharmony_ci	}
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci	/* get back old entry */
146062306a36Sopenharmony_ci	xad = &p->xad[index];
146162306a36Sopenharmony_ci	xlen = MAXXLEN;
146262306a36Sopenharmony_ci
146362306a36Sopenharmony_ci	/*
146462306a36Sopenharmony_ci	 * extend old extent
146562306a36Sopenharmony_ci	 */
146662306a36Sopenharmony_ci      extendOld:
146762306a36Sopenharmony_ci	XADlength(xad, xlen);
146862306a36Sopenharmony_ci	if (!(xad->flag & XAD_NEW))
146962306a36Sopenharmony_ci		xad->flag |= XAD_EXTENDED;
147062306a36Sopenharmony_ci
147162306a36Sopenharmony_ci	if (!test_cflag(COMMIT_Nolink, ip)) {
147262306a36Sopenharmony_ci		xtlck->lwm.offset =
147362306a36Sopenharmony_ci		    (xtlck->lwm.offset) ? min(index,
147462306a36Sopenharmony_ci					      (int)xtlck->lwm.offset) : index;
147562306a36Sopenharmony_ci		xtlck->lwm.length =
147662306a36Sopenharmony_ci		    le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
147762306a36Sopenharmony_ci	}
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci	/* unpin the leaf page */
148062306a36Sopenharmony_ci	XT_PUTPAGE(mp);
148162306a36Sopenharmony_ci
148262306a36Sopenharmony_ci	return rc;
148362306a36Sopenharmony_ci}
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_ci/*
148662306a36Sopenharmony_ci *	xtUpdate()
148762306a36Sopenharmony_ci *
148862306a36Sopenharmony_ci * function: update XAD;
148962306a36Sopenharmony_ci *
149062306a36Sopenharmony_ci *	update extent for allocated_but_not_recorded or
149162306a36Sopenharmony_ci *	compressed extent;
149262306a36Sopenharmony_ci *
149362306a36Sopenharmony_ci * parameter:
149462306a36Sopenharmony_ci *	nxad	- new XAD;
149562306a36Sopenharmony_ci *		logical extent of the specified XAD must be completely
149662306a36Sopenharmony_ci *		contained by an existing XAD;
149762306a36Sopenharmony_ci */
149862306a36Sopenharmony_ciint xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
149962306a36Sopenharmony_ci{				/* new XAD */
150062306a36Sopenharmony_ci	int rc = 0;
150162306a36Sopenharmony_ci	int cmp;
150262306a36Sopenharmony_ci	struct metapage *mp;	/* meta-page buffer */
150362306a36Sopenharmony_ci	xtpage_t *p;		/* base B+-tree index page */
150462306a36Sopenharmony_ci	s64 bn;
150562306a36Sopenharmony_ci	int index0, index, newindex, nextindex;
150662306a36Sopenharmony_ci	struct btstack btstack;	/* traverse stack */
150762306a36Sopenharmony_ci	struct xtsplit split;	/* split information */
150862306a36Sopenharmony_ci	xad_t *xad, *lxad, *rxad;
150962306a36Sopenharmony_ci	int xflag;
151062306a36Sopenharmony_ci	s64 nxoff, xoff;
151162306a36Sopenharmony_ci	int nxlen, xlen, lxlen, rxlen;
151262306a36Sopenharmony_ci	s64 nxaddr, xaddr;
151362306a36Sopenharmony_ci	struct tlock *tlck;
151462306a36Sopenharmony_ci	struct xtlock *xtlck = NULL;
151562306a36Sopenharmony_ci	int newpage = 0;
151662306a36Sopenharmony_ci
151762306a36Sopenharmony_ci	/* there must exist extent to be tailgated */
151862306a36Sopenharmony_ci	nxoff = offsetXAD(nxad);
151962306a36Sopenharmony_ci	nxlen = lengthXAD(nxad);
152062306a36Sopenharmony_ci	nxaddr = addressXAD(nxad);
152162306a36Sopenharmony_ci
152262306a36Sopenharmony_ci	if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
152362306a36Sopenharmony_ci		return rc;
152462306a36Sopenharmony_ci
152562306a36Sopenharmony_ci	/* retrieve search result */
152662306a36Sopenharmony_ci	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
152762306a36Sopenharmony_ci
152862306a36Sopenharmony_ci	if (cmp != 0) {
152962306a36Sopenharmony_ci		XT_PUTPAGE(mp);
153062306a36Sopenharmony_ci		jfs_error(ip->i_sb, "Could not find extent\n");
153162306a36Sopenharmony_ci		return -EIO;
153262306a36Sopenharmony_ci	}
153362306a36Sopenharmony_ci
153462306a36Sopenharmony_ci	BT_MARK_DIRTY(mp, ip);
153562306a36Sopenharmony_ci	/*
153662306a36Sopenharmony_ci	 * acquire tlock of the leaf page containing original entry
153762306a36Sopenharmony_ci	 */
153862306a36Sopenharmony_ci	if (!test_cflag(COMMIT_Nolink, ip)) {
153962306a36Sopenharmony_ci		tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
154062306a36Sopenharmony_ci		xtlck = (struct xtlock *) & tlck->lock;
154162306a36Sopenharmony_ci	}
154262306a36Sopenharmony_ci
154362306a36Sopenharmony_ci	xad = &p->xad[index0];
154462306a36Sopenharmony_ci	xflag = xad->flag;
154562306a36Sopenharmony_ci	xoff = offsetXAD(xad);
154662306a36Sopenharmony_ci	xlen = lengthXAD(xad);
154762306a36Sopenharmony_ci	xaddr = addressXAD(xad);
154862306a36Sopenharmony_ci
154962306a36Sopenharmony_ci	/* nXAD must be completely contained within XAD */
155062306a36Sopenharmony_ci	if ((xoff > nxoff) ||
155162306a36Sopenharmony_ci	    (nxoff + nxlen > xoff + xlen)) {
155262306a36Sopenharmony_ci		XT_PUTPAGE(mp);
155362306a36Sopenharmony_ci		jfs_error(ip->i_sb,
155462306a36Sopenharmony_ci			  "nXAD in not completely contained within XAD\n");
155562306a36Sopenharmony_ci		return -EIO;
155662306a36Sopenharmony_ci	}
155762306a36Sopenharmony_ci
155862306a36Sopenharmony_ci	index = index0;
155962306a36Sopenharmony_ci	newindex = index + 1;
156062306a36Sopenharmony_ci	nextindex = le16_to_cpu(p->header.nextindex);
156162306a36Sopenharmony_ci
156262306a36Sopenharmony_ci	if (xoff < nxoff)
156362306a36Sopenharmony_ci		goto coalesceRight;
156462306a36Sopenharmony_ci
156562306a36Sopenharmony_ci	/*
156662306a36Sopenharmony_ci	 * coalesce with left XAD
156762306a36Sopenharmony_ci	 */
156862306a36Sopenharmony_ci	/* is XAD first entry of page ? */
156962306a36Sopenharmony_ci	if (index == XTENTRYSTART)
157062306a36Sopenharmony_ci		goto replace;
157162306a36Sopenharmony_ci
157262306a36Sopenharmony_ci	/* is nXAD logically and physically contiguous with lXAD ? */
157362306a36Sopenharmony_ci	lxad = &p->xad[index - 1];
157462306a36Sopenharmony_ci	lxlen = lengthXAD(lxad);
157562306a36Sopenharmony_ci	if (!(lxad->flag & XAD_NOTRECORDED) &&
157662306a36Sopenharmony_ci	    (nxoff == offsetXAD(lxad) + lxlen) &&
157762306a36Sopenharmony_ci	    (nxaddr == addressXAD(lxad) + lxlen) &&
157862306a36Sopenharmony_ci	    (lxlen + nxlen < MAXXLEN)) {
157962306a36Sopenharmony_ci		/* extend right lXAD */
158062306a36Sopenharmony_ci		index0 = index - 1;
158162306a36Sopenharmony_ci		XADlength(lxad, lxlen + nxlen);
158262306a36Sopenharmony_ci
158362306a36Sopenharmony_ci		/* If we just merged two extents together, need to make sure the
158462306a36Sopenharmony_ci		 * right extent gets logged.  If the left one is marked XAD_NEW,
158562306a36Sopenharmony_ci		 * then we know it will be logged.  Otherwise, mark as
158662306a36Sopenharmony_ci		 * XAD_EXTENDED
158762306a36Sopenharmony_ci		 */
158862306a36Sopenharmony_ci		if (!(lxad->flag & XAD_NEW))
158962306a36Sopenharmony_ci			lxad->flag |= XAD_EXTENDED;
159062306a36Sopenharmony_ci
159162306a36Sopenharmony_ci		if (xlen > nxlen) {
159262306a36Sopenharmony_ci			/* truncate XAD */
159362306a36Sopenharmony_ci			XADoffset(xad, xoff + nxlen);
159462306a36Sopenharmony_ci			XADlength(xad, xlen - nxlen);
159562306a36Sopenharmony_ci			XADaddress(xad, xaddr + nxlen);
159662306a36Sopenharmony_ci			goto out;
159762306a36Sopenharmony_ci		} else {	/* (xlen == nxlen) */
159862306a36Sopenharmony_ci
159962306a36Sopenharmony_ci			/* remove XAD */
160062306a36Sopenharmony_ci			if (index < nextindex - 1)
160162306a36Sopenharmony_ci				memmove(&p->xad[index], &p->xad[index + 1],
160262306a36Sopenharmony_ci					(nextindex - index -
160362306a36Sopenharmony_ci					 1) << L2XTSLOTSIZE);
160462306a36Sopenharmony_ci
160562306a36Sopenharmony_ci			p->header.nextindex =
160662306a36Sopenharmony_ci			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
160762306a36Sopenharmony_ci					1);
160862306a36Sopenharmony_ci
160962306a36Sopenharmony_ci			index = index0;
161062306a36Sopenharmony_ci			newindex = index + 1;
161162306a36Sopenharmony_ci			nextindex = le16_to_cpu(p->header.nextindex);
161262306a36Sopenharmony_ci			xoff = nxoff = offsetXAD(lxad);
161362306a36Sopenharmony_ci			xlen = nxlen = lxlen + nxlen;
161462306a36Sopenharmony_ci			xaddr = nxaddr = addressXAD(lxad);
161562306a36Sopenharmony_ci			goto coalesceRight;
161662306a36Sopenharmony_ci		}
161762306a36Sopenharmony_ci	}
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci	/*
162062306a36Sopenharmony_ci	 * replace XAD with nXAD
162162306a36Sopenharmony_ci	 */
162262306a36Sopenharmony_ci      replace:			/* (nxoff == xoff) */
162362306a36Sopenharmony_ci	if (nxlen == xlen) {
162462306a36Sopenharmony_ci		/* replace XAD with nXAD:recorded */
162562306a36Sopenharmony_ci		*xad = *nxad;
162662306a36Sopenharmony_ci		xad->flag = xflag & ~XAD_NOTRECORDED;
162762306a36Sopenharmony_ci
162862306a36Sopenharmony_ci		goto coalesceRight;
162962306a36Sopenharmony_ci	} else			/* (nxlen < xlen) */
163062306a36Sopenharmony_ci		goto updateLeft;
163162306a36Sopenharmony_ci
163262306a36Sopenharmony_ci	/*
163362306a36Sopenharmony_ci	 * coalesce with right XAD
163462306a36Sopenharmony_ci	 */
163562306a36Sopenharmony_ci      coalesceRight:		/* (xoff <= nxoff) */
163662306a36Sopenharmony_ci	/* is XAD last entry of page ? */
163762306a36Sopenharmony_ci	if (newindex == nextindex) {
163862306a36Sopenharmony_ci		if (xoff == nxoff)
163962306a36Sopenharmony_ci			goto out;
164062306a36Sopenharmony_ci		goto updateRight;
164162306a36Sopenharmony_ci	}
164262306a36Sopenharmony_ci
164362306a36Sopenharmony_ci	/* is nXAD logically and physically contiguous with rXAD ? */
164462306a36Sopenharmony_ci	rxad = &p->xad[index + 1];
164562306a36Sopenharmony_ci	rxlen = lengthXAD(rxad);
164662306a36Sopenharmony_ci	if (!(rxad->flag & XAD_NOTRECORDED) &&
164762306a36Sopenharmony_ci	    (nxoff + nxlen == offsetXAD(rxad)) &&
164862306a36Sopenharmony_ci	    (nxaddr + nxlen == addressXAD(rxad)) &&
164962306a36Sopenharmony_ci	    (rxlen + nxlen < MAXXLEN)) {
165062306a36Sopenharmony_ci		/* extend left rXAD */
165162306a36Sopenharmony_ci		XADoffset(rxad, nxoff);
165262306a36Sopenharmony_ci		XADlength(rxad, rxlen + nxlen);
165362306a36Sopenharmony_ci		XADaddress(rxad, nxaddr);
165462306a36Sopenharmony_ci
165562306a36Sopenharmony_ci		/* If we just merged two extents together, need to make sure
165662306a36Sopenharmony_ci		 * the left extent gets logged.  If the right one is marked
165762306a36Sopenharmony_ci		 * XAD_NEW, then we know it will be logged.  Otherwise, mark as
165862306a36Sopenharmony_ci		 * XAD_EXTENDED
165962306a36Sopenharmony_ci		 */
166062306a36Sopenharmony_ci		if (!(rxad->flag & XAD_NEW))
166162306a36Sopenharmony_ci			rxad->flag |= XAD_EXTENDED;
166262306a36Sopenharmony_ci
166362306a36Sopenharmony_ci		if (xlen > nxlen)
166462306a36Sopenharmony_ci			/* truncate XAD */
166562306a36Sopenharmony_ci			XADlength(xad, xlen - nxlen);
166662306a36Sopenharmony_ci		else {		/* (xlen == nxlen) */
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_ci			/* remove XAD */
166962306a36Sopenharmony_ci			memmove(&p->xad[index], &p->xad[index + 1],
167062306a36Sopenharmony_ci				(nextindex - index - 1) << L2XTSLOTSIZE);
167162306a36Sopenharmony_ci
167262306a36Sopenharmony_ci			p->header.nextindex =
167362306a36Sopenharmony_ci			    cpu_to_le16(le16_to_cpu(p->header.nextindex) -
167462306a36Sopenharmony_ci					1);
167562306a36Sopenharmony_ci		}
167662306a36Sopenharmony_ci
167762306a36Sopenharmony_ci		goto out;
167862306a36Sopenharmony_ci	} else if (xoff == nxoff)
167962306a36Sopenharmony_ci		goto out;
168062306a36Sopenharmony_ci
168162306a36Sopenharmony_ci	if (xoff >= nxoff) {
168262306a36Sopenharmony_ci		XT_PUTPAGE(mp);
168362306a36Sopenharmony_ci		jfs_error(ip->i_sb, "xoff >= nxoff\n");
168462306a36Sopenharmony_ci		return -EIO;
168562306a36Sopenharmony_ci	}
168662306a36Sopenharmony_ci
168762306a36Sopenharmony_ci	/*
168862306a36Sopenharmony_ci	 * split XAD into (lXAD, nXAD):
168962306a36Sopenharmony_ci	 *
169062306a36Sopenharmony_ci	 *          |---nXAD--->
169162306a36Sopenharmony_ci	 * --|----------XAD----------|--
169262306a36Sopenharmony_ci	 *   |-lXAD-|
169362306a36Sopenharmony_ci	 */
169462306a36Sopenharmony_ci      updateRight:		/* (xoff < nxoff) */
169562306a36Sopenharmony_ci	/* truncate old XAD as lXAD:not_recorded */
169662306a36Sopenharmony_ci	xad = &p->xad[index];
169762306a36Sopenharmony_ci	XADlength(xad, nxoff - xoff);
169862306a36Sopenharmony_ci
169962306a36Sopenharmony_ci	/* insert nXAD:recorded */
170062306a36Sopenharmony_ci	if (nextindex == le16_to_cpu(p->header.maxentry)) {
170162306a36Sopenharmony_ci
170262306a36Sopenharmony_ci		/* xtSpliUp() unpins leaf pages */
170362306a36Sopenharmony_ci		split.mp = mp;
170462306a36Sopenharmony_ci		split.index = newindex;
170562306a36Sopenharmony_ci		split.flag = xflag & ~XAD_NOTRECORDED;
170662306a36Sopenharmony_ci		split.off = nxoff;
170762306a36Sopenharmony_ci		split.len = nxlen;
170862306a36Sopenharmony_ci		split.addr = nxaddr;
170962306a36Sopenharmony_ci		split.pxdlist = NULL;
171062306a36Sopenharmony_ci		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
171162306a36Sopenharmony_ci			return rc;
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ci		/* get back old page */
171462306a36Sopenharmony_ci		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
171562306a36Sopenharmony_ci		if (rc)
171662306a36Sopenharmony_ci			return rc;
171762306a36Sopenharmony_ci		/*
171862306a36Sopenharmony_ci		 * if leaf root has been split, original root has been
171962306a36Sopenharmony_ci		 * copied to new child page, i.e., original entry now
172062306a36Sopenharmony_ci		 * resides on the new child page;
172162306a36Sopenharmony_ci		 */
172262306a36Sopenharmony_ci		if (p->header.flag & BT_INTERNAL) {
172362306a36Sopenharmony_ci			ASSERT(p->header.nextindex ==
172462306a36Sopenharmony_ci			       cpu_to_le16(XTENTRYSTART + 1));
172562306a36Sopenharmony_ci			xad = &p->xad[XTENTRYSTART];
172662306a36Sopenharmony_ci			bn = addressXAD(xad);
172762306a36Sopenharmony_ci			XT_PUTPAGE(mp);
172862306a36Sopenharmony_ci
172962306a36Sopenharmony_ci			/* get new child page */
173062306a36Sopenharmony_ci			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
173162306a36Sopenharmony_ci			if (rc)
173262306a36Sopenharmony_ci				return rc;
173362306a36Sopenharmony_ci
173462306a36Sopenharmony_ci			BT_MARK_DIRTY(mp, ip);
173562306a36Sopenharmony_ci			if (!test_cflag(COMMIT_Nolink, ip)) {
173662306a36Sopenharmony_ci				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
173762306a36Sopenharmony_ci				xtlck = (struct xtlock *) & tlck->lock;
173862306a36Sopenharmony_ci			}
173962306a36Sopenharmony_ci		} else {
174062306a36Sopenharmony_ci			/* is nXAD on new page ? */
174162306a36Sopenharmony_ci			if (newindex >
174262306a36Sopenharmony_ci			    (le16_to_cpu(p->header.maxentry) >> 1)) {
174362306a36Sopenharmony_ci				newindex =
174462306a36Sopenharmony_ci				    newindex -
174562306a36Sopenharmony_ci				    le16_to_cpu(p->header.nextindex) +
174662306a36Sopenharmony_ci				    XTENTRYSTART;
174762306a36Sopenharmony_ci				newpage = 1;
174862306a36Sopenharmony_ci			}
174962306a36Sopenharmony_ci		}
175062306a36Sopenharmony_ci	} else {
175162306a36Sopenharmony_ci		/* if insert into middle, shift right remaining entries */
175262306a36Sopenharmony_ci		if (newindex < nextindex)
175362306a36Sopenharmony_ci			memmove(&p->xad[newindex + 1], &p->xad[newindex],
175462306a36Sopenharmony_ci				(nextindex - newindex) << L2XTSLOTSIZE);
175562306a36Sopenharmony_ci
175662306a36Sopenharmony_ci		/* insert the entry */
175762306a36Sopenharmony_ci		xad = &p->xad[newindex];
175862306a36Sopenharmony_ci		*xad = *nxad;
175962306a36Sopenharmony_ci		xad->flag = xflag & ~XAD_NOTRECORDED;
176062306a36Sopenharmony_ci
176162306a36Sopenharmony_ci		/* advance next available entry index. */
176262306a36Sopenharmony_ci		p->header.nextindex =
176362306a36Sopenharmony_ci		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
176462306a36Sopenharmony_ci	}
176562306a36Sopenharmony_ci
176662306a36Sopenharmony_ci	/*
176762306a36Sopenharmony_ci	 * does nXAD force 3-way split ?
176862306a36Sopenharmony_ci	 *
176962306a36Sopenharmony_ci	 *          |---nXAD--->|
177062306a36Sopenharmony_ci	 * --|----------XAD-------------|--
177162306a36Sopenharmony_ci	 *   |-lXAD-|           |-rXAD -|
177262306a36Sopenharmony_ci	 */
177362306a36Sopenharmony_ci	if (nxoff + nxlen == xoff + xlen)
177462306a36Sopenharmony_ci		goto out;
177562306a36Sopenharmony_ci
177662306a36Sopenharmony_ci	/* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
177762306a36Sopenharmony_ci	if (newpage) {
177862306a36Sopenharmony_ci		/* close out old page */
177962306a36Sopenharmony_ci		if (!test_cflag(COMMIT_Nolink, ip)) {
178062306a36Sopenharmony_ci			xtlck->lwm.offset = (xtlck->lwm.offset) ?
178162306a36Sopenharmony_ci			    min(index0, (int)xtlck->lwm.offset) : index0;
178262306a36Sopenharmony_ci			xtlck->lwm.length =
178362306a36Sopenharmony_ci			    le16_to_cpu(p->header.nextindex) -
178462306a36Sopenharmony_ci			    xtlck->lwm.offset;
178562306a36Sopenharmony_ci		}
178662306a36Sopenharmony_ci
178762306a36Sopenharmony_ci		bn = le64_to_cpu(p->header.next);
178862306a36Sopenharmony_ci		XT_PUTPAGE(mp);
178962306a36Sopenharmony_ci
179062306a36Sopenharmony_ci		/* get new right page */
179162306a36Sopenharmony_ci		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
179262306a36Sopenharmony_ci		if (rc)
179362306a36Sopenharmony_ci			return rc;
179462306a36Sopenharmony_ci
179562306a36Sopenharmony_ci		BT_MARK_DIRTY(mp, ip);
179662306a36Sopenharmony_ci		if (!test_cflag(COMMIT_Nolink, ip)) {
179762306a36Sopenharmony_ci			tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
179862306a36Sopenharmony_ci			xtlck = (struct xtlock *) & tlck->lock;
179962306a36Sopenharmony_ci		}
180062306a36Sopenharmony_ci
180162306a36Sopenharmony_ci		index0 = index = newindex;
180262306a36Sopenharmony_ci	} else
180362306a36Sopenharmony_ci		index++;
180462306a36Sopenharmony_ci
180562306a36Sopenharmony_ci	newindex = index + 1;
180662306a36Sopenharmony_ci	nextindex = le16_to_cpu(p->header.nextindex);
180762306a36Sopenharmony_ci	xlen = xlen - (nxoff - xoff);
180862306a36Sopenharmony_ci	xoff = nxoff;
180962306a36Sopenharmony_ci	xaddr = nxaddr;
181062306a36Sopenharmony_ci
181162306a36Sopenharmony_ci	/* recompute split pages */
181262306a36Sopenharmony_ci	if (nextindex == le16_to_cpu(p->header.maxentry)) {
181362306a36Sopenharmony_ci		XT_PUTPAGE(mp);
181462306a36Sopenharmony_ci
181562306a36Sopenharmony_ci		if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
181662306a36Sopenharmony_ci			return rc;
181762306a36Sopenharmony_ci
181862306a36Sopenharmony_ci		/* retrieve search result */
181962306a36Sopenharmony_ci		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
182062306a36Sopenharmony_ci
182162306a36Sopenharmony_ci		if (cmp != 0) {
182262306a36Sopenharmony_ci			XT_PUTPAGE(mp);
182362306a36Sopenharmony_ci			jfs_error(ip->i_sb, "xtSearch failed\n");
182462306a36Sopenharmony_ci			return -EIO;
182562306a36Sopenharmony_ci		}
182662306a36Sopenharmony_ci
182762306a36Sopenharmony_ci		if (index0 != index) {
182862306a36Sopenharmony_ci			XT_PUTPAGE(mp);
182962306a36Sopenharmony_ci			jfs_error(ip->i_sb, "unexpected value of index\n");
183062306a36Sopenharmony_ci			return -EIO;
183162306a36Sopenharmony_ci		}
183262306a36Sopenharmony_ci	}
183362306a36Sopenharmony_ci
183462306a36Sopenharmony_ci	/*
183562306a36Sopenharmony_ci	 * split XAD into (nXAD, rXAD)
183662306a36Sopenharmony_ci	 *
183762306a36Sopenharmony_ci	 *          ---nXAD---|
183862306a36Sopenharmony_ci	 * --|----------XAD----------|--
183962306a36Sopenharmony_ci	 *                    |-rXAD-|
184062306a36Sopenharmony_ci	 */
184162306a36Sopenharmony_ci      updateLeft:		/* (nxoff == xoff) && (nxlen < xlen) */
184262306a36Sopenharmony_ci	/* update old XAD with nXAD:recorded */
184362306a36Sopenharmony_ci	xad = &p->xad[index];
184462306a36Sopenharmony_ci	*xad = *nxad;
184562306a36Sopenharmony_ci	xad->flag = xflag & ~XAD_NOTRECORDED;
184662306a36Sopenharmony_ci
184762306a36Sopenharmony_ci	/* insert rXAD:not_recorded */
184862306a36Sopenharmony_ci	xoff = xoff + nxlen;
184962306a36Sopenharmony_ci	xlen = xlen - nxlen;
185062306a36Sopenharmony_ci	xaddr = xaddr + nxlen;
185162306a36Sopenharmony_ci	if (nextindex == le16_to_cpu(p->header.maxentry)) {
185262306a36Sopenharmony_ci/*
185362306a36Sopenharmony_ciprintf("xtUpdate.updateLeft.split p:0x%p\n", p);
185462306a36Sopenharmony_ci*/
185562306a36Sopenharmony_ci		/* xtSpliUp() unpins leaf pages */
185662306a36Sopenharmony_ci		split.mp = mp;
185762306a36Sopenharmony_ci		split.index = newindex;
185862306a36Sopenharmony_ci		split.flag = xflag;
185962306a36Sopenharmony_ci		split.off = xoff;
186062306a36Sopenharmony_ci		split.len = xlen;
186162306a36Sopenharmony_ci		split.addr = xaddr;
186262306a36Sopenharmony_ci		split.pxdlist = NULL;
186362306a36Sopenharmony_ci		if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
186462306a36Sopenharmony_ci			return rc;
186562306a36Sopenharmony_ci
186662306a36Sopenharmony_ci		/* get back old page */
186762306a36Sopenharmony_ci		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
186862306a36Sopenharmony_ci		if (rc)
186962306a36Sopenharmony_ci			return rc;
187062306a36Sopenharmony_ci
187162306a36Sopenharmony_ci		/*
187262306a36Sopenharmony_ci		 * if leaf root has been split, original root has been
187362306a36Sopenharmony_ci		 * copied to new child page, i.e., original entry now
187462306a36Sopenharmony_ci		 * resides on the new child page;
187562306a36Sopenharmony_ci		 */
187662306a36Sopenharmony_ci		if (p->header.flag & BT_INTERNAL) {
187762306a36Sopenharmony_ci			ASSERT(p->header.nextindex ==
187862306a36Sopenharmony_ci			       cpu_to_le16(XTENTRYSTART + 1));
187962306a36Sopenharmony_ci			xad = &p->xad[XTENTRYSTART];
188062306a36Sopenharmony_ci			bn = addressXAD(xad);
188162306a36Sopenharmony_ci			XT_PUTPAGE(mp);
188262306a36Sopenharmony_ci
188362306a36Sopenharmony_ci			/* get new child page */
188462306a36Sopenharmony_ci			XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
188562306a36Sopenharmony_ci			if (rc)
188662306a36Sopenharmony_ci				return rc;
188762306a36Sopenharmony_ci
188862306a36Sopenharmony_ci			BT_MARK_DIRTY(mp, ip);
188962306a36Sopenharmony_ci			if (!test_cflag(COMMIT_Nolink, ip)) {
189062306a36Sopenharmony_ci				tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
189162306a36Sopenharmony_ci				xtlck = (struct xtlock *) & tlck->lock;
189262306a36Sopenharmony_ci			}
189362306a36Sopenharmony_ci		}
189462306a36Sopenharmony_ci	} else {
189562306a36Sopenharmony_ci		/* if insert into middle, shift right remaining entries */
189662306a36Sopenharmony_ci		if (newindex < nextindex)
189762306a36Sopenharmony_ci			memmove(&p->xad[newindex + 1], &p->xad[newindex],
189862306a36Sopenharmony_ci				(nextindex - newindex) << L2XTSLOTSIZE);
189962306a36Sopenharmony_ci
190062306a36Sopenharmony_ci		/* insert the entry */
190162306a36Sopenharmony_ci		xad = &p->xad[newindex];
190262306a36Sopenharmony_ci		XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
190362306a36Sopenharmony_ci
190462306a36Sopenharmony_ci		/* advance next available entry index. */
190562306a36Sopenharmony_ci		p->header.nextindex =
190662306a36Sopenharmony_ci		    cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
190762306a36Sopenharmony_ci	}
190862306a36Sopenharmony_ci
190962306a36Sopenharmony_ci      out:
191062306a36Sopenharmony_ci	if (!test_cflag(COMMIT_Nolink, ip)) {
191162306a36Sopenharmony_ci		xtlck->lwm.offset = (xtlck->lwm.offset) ?
191262306a36Sopenharmony_ci		    min(index0, (int)xtlck->lwm.offset) : index0;
191362306a36Sopenharmony_ci		xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
191462306a36Sopenharmony_ci		    xtlck->lwm.offset;
191562306a36Sopenharmony_ci	}
191662306a36Sopenharmony_ci
191762306a36Sopenharmony_ci	/* unpin the leaf page */
191862306a36Sopenharmony_ci	XT_PUTPAGE(mp);
191962306a36Sopenharmony_ci
192062306a36Sopenharmony_ci	return rc;
192162306a36Sopenharmony_ci}
192262306a36Sopenharmony_ci
192362306a36Sopenharmony_ci
192462306a36Sopenharmony_ci/*
192562306a36Sopenharmony_ci *	xtAppend()
192662306a36Sopenharmony_ci *
192762306a36Sopenharmony_ci * function: grow in append mode from contiguous region specified ;
192862306a36Sopenharmony_ci *
192962306a36Sopenharmony_ci * parameter:
193062306a36Sopenharmony_ci *	tid		- transaction id;
193162306a36Sopenharmony_ci *	ip		- file object;
193262306a36Sopenharmony_ci *	xflag		- extent flag:
193362306a36Sopenharmony_ci *	xoff		- extent offset;
193462306a36Sopenharmony_ci *	maxblocks	- max extent length;
193562306a36Sopenharmony_ci *	xlen		- extent length (in/out);
193662306a36Sopenharmony_ci *	xaddrp		- extent address pointer (in/out):
193762306a36Sopenharmony_ci *	flag		-
193862306a36Sopenharmony_ci *
193962306a36Sopenharmony_ci * return:
194062306a36Sopenharmony_ci */
194162306a36Sopenharmony_ciint xtAppend(tid_t tid,		/* transaction id */
194262306a36Sopenharmony_ci	     struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
194362306a36Sopenharmony_ci	     s32 * xlenp,	/* (in/out) */
194462306a36Sopenharmony_ci	     s64 * xaddrp,	/* (in/out) */
194562306a36Sopenharmony_ci	     int flag)
194662306a36Sopenharmony_ci{
194762306a36Sopenharmony_ci	int rc = 0;
194862306a36Sopenharmony_ci	struct metapage *mp;	/* meta-page buffer */
194962306a36Sopenharmony_ci	xtpage_t *p;		/* base B+-tree index page */
195062306a36Sopenharmony_ci	s64 bn, xaddr;
195162306a36Sopenharmony_ci	int index, nextindex;
195262306a36Sopenharmony_ci	struct btstack btstack;	/* traverse stack */
195362306a36Sopenharmony_ci	struct xtsplit split;	/* split information */
195462306a36Sopenharmony_ci	xad_t *xad;
195562306a36Sopenharmony_ci	int cmp;
195662306a36Sopenharmony_ci	struct tlock *tlck;
195762306a36Sopenharmony_ci	struct xtlock *xtlck;
195862306a36Sopenharmony_ci	int nsplit, nblocks, xlen;
195962306a36Sopenharmony_ci	struct pxdlist pxdlist;
196062306a36Sopenharmony_ci	pxd_t *pxd;
196162306a36Sopenharmony_ci	s64 next;
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci	xaddr = *xaddrp;
196462306a36Sopenharmony_ci	xlen = *xlenp;
196562306a36Sopenharmony_ci	jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
196662306a36Sopenharmony_ci		 (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
196762306a36Sopenharmony_ci
196862306a36Sopenharmony_ci	/*
196962306a36Sopenharmony_ci	 *	search for the entry location at which to insert:
197062306a36Sopenharmony_ci	 *
197162306a36Sopenharmony_ci	 * xtFastSearch() and xtSearch() both returns (leaf page
197262306a36Sopenharmony_ci	 * pinned, index at which to insert).
197362306a36Sopenharmony_ci	 * n.b. xtSearch() may return index of maxentry of
197462306a36Sopenharmony_ci	 * the full page.
197562306a36Sopenharmony_ci	 */
197662306a36Sopenharmony_ci	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
197762306a36Sopenharmony_ci		return rc;
197862306a36Sopenharmony_ci
197962306a36Sopenharmony_ci	/* retrieve search result */
198062306a36Sopenharmony_ci	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
198162306a36Sopenharmony_ci
198262306a36Sopenharmony_ci	if (cmp == 0) {
198362306a36Sopenharmony_ci		rc = -EEXIST;
198462306a36Sopenharmony_ci		goto out;
198562306a36Sopenharmony_ci	}
198662306a36Sopenharmony_ci
198762306a36Sopenharmony_ci	if (next)
198862306a36Sopenharmony_ci		xlen = min(xlen, (int)(next - xoff));
198962306a36Sopenharmony_ci//insert:
199062306a36Sopenharmony_ci	/*
199162306a36Sopenharmony_ci	 *	insert entry for new extent
199262306a36Sopenharmony_ci	 */
199362306a36Sopenharmony_ci	xflag |= XAD_NEW;
199462306a36Sopenharmony_ci
199562306a36Sopenharmony_ci	/*
199662306a36Sopenharmony_ci	 *	if the leaf page is full, split the page and
199762306a36Sopenharmony_ci	 *	propagate up the router entry for the new page from split
199862306a36Sopenharmony_ci	 *
199962306a36Sopenharmony_ci	 * The xtSplitUp() will insert the entry and unpin the leaf page.
200062306a36Sopenharmony_ci	 */
200162306a36Sopenharmony_ci	nextindex = le16_to_cpu(p->header.nextindex);
200262306a36Sopenharmony_ci	if (nextindex < le16_to_cpu(p->header.maxentry))
200362306a36Sopenharmony_ci		goto insertLeaf;
200462306a36Sopenharmony_ci
200562306a36Sopenharmony_ci	/*
200662306a36Sopenharmony_ci	 * allocate new index blocks to cover index page split(s)
200762306a36Sopenharmony_ci	 */
200862306a36Sopenharmony_ci	nsplit = btstack.nsplit;
200962306a36Sopenharmony_ci	split.pxdlist = &pxdlist;
201062306a36Sopenharmony_ci	pxdlist.maxnpxd = pxdlist.npxd = 0;
201162306a36Sopenharmony_ci	pxd = &pxdlist.pxd[0];
201262306a36Sopenharmony_ci	nblocks = JFS_SBI(ip->i_sb)->nbperpage;
201362306a36Sopenharmony_ci	for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
201462306a36Sopenharmony_ci		if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
201562306a36Sopenharmony_ci			PXDaddress(pxd, xaddr);
201662306a36Sopenharmony_ci			PXDlength(pxd, nblocks);
201762306a36Sopenharmony_ci
201862306a36Sopenharmony_ci			pxdlist.maxnpxd++;
201962306a36Sopenharmony_ci
202062306a36Sopenharmony_ci			continue;
202162306a36Sopenharmony_ci		}
202262306a36Sopenharmony_ci
202362306a36Sopenharmony_ci		/* undo allocation */
202462306a36Sopenharmony_ci
202562306a36Sopenharmony_ci		goto out;
202662306a36Sopenharmony_ci	}
202762306a36Sopenharmony_ci
202862306a36Sopenharmony_ci	xlen = min(xlen, maxblocks);
202962306a36Sopenharmony_ci
203062306a36Sopenharmony_ci	/*
203162306a36Sopenharmony_ci	 * allocate data extent requested
203262306a36Sopenharmony_ci	 */
203362306a36Sopenharmony_ci	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
203462306a36Sopenharmony_ci		goto out;
203562306a36Sopenharmony_ci
203662306a36Sopenharmony_ci	split.mp = mp;
203762306a36Sopenharmony_ci	split.index = index;
203862306a36Sopenharmony_ci	split.flag = xflag;
203962306a36Sopenharmony_ci	split.off = xoff;
204062306a36Sopenharmony_ci	split.len = xlen;
204162306a36Sopenharmony_ci	split.addr = xaddr;
204262306a36Sopenharmony_ci	if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
204362306a36Sopenharmony_ci		/* undo data extent allocation */
204462306a36Sopenharmony_ci		dbFree(ip, *xaddrp, (s64) * xlenp);
204562306a36Sopenharmony_ci
204662306a36Sopenharmony_ci		return rc;
204762306a36Sopenharmony_ci	}
204862306a36Sopenharmony_ci
204962306a36Sopenharmony_ci	*xaddrp = xaddr;
205062306a36Sopenharmony_ci	*xlenp = xlen;
205162306a36Sopenharmony_ci	return 0;
205262306a36Sopenharmony_ci
205362306a36Sopenharmony_ci	/*
205462306a36Sopenharmony_ci	 *	insert the new entry into the leaf page
205562306a36Sopenharmony_ci	 */
205662306a36Sopenharmony_ci      insertLeaf:
205762306a36Sopenharmony_ci	/*
205862306a36Sopenharmony_ci	 * allocate data extent requested
205962306a36Sopenharmony_ci	 */
206062306a36Sopenharmony_ci	if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
206162306a36Sopenharmony_ci		goto out;
206262306a36Sopenharmony_ci
206362306a36Sopenharmony_ci	BT_MARK_DIRTY(mp, ip);
206462306a36Sopenharmony_ci	/*
206562306a36Sopenharmony_ci	 * acquire a transaction lock on the leaf page;
206662306a36Sopenharmony_ci	 *
206762306a36Sopenharmony_ci	 * action: xad insertion/extension;
206862306a36Sopenharmony_ci	 */
206962306a36Sopenharmony_ci	tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
207062306a36Sopenharmony_ci	xtlck = (struct xtlock *) & tlck->lock;
207162306a36Sopenharmony_ci
207262306a36Sopenharmony_ci	/* insert the new entry: mark the entry NEW */
207362306a36Sopenharmony_ci	xad = &p->xad[index];
207462306a36Sopenharmony_ci	XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
207562306a36Sopenharmony_ci
207662306a36Sopenharmony_ci	/* advance next available entry index */
207762306a36Sopenharmony_ci	le16_add_cpu(&p->header.nextindex, 1);
207862306a36Sopenharmony_ci
207962306a36Sopenharmony_ci	xtlck->lwm.offset =
208062306a36Sopenharmony_ci	    (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
208162306a36Sopenharmony_ci	xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
208262306a36Sopenharmony_ci	    xtlck->lwm.offset;
208362306a36Sopenharmony_ci
208462306a36Sopenharmony_ci	*xaddrp = xaddr;
208562306a36Sopenharmony_ci	*xlenp = xlen;
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci      out:
208862306a36Sopenharmony_ci	/* unpin the leaf page */
208962306a36Sopenharmony_ci	XT_PUTPAGE(mp);
209062306a36Sopenharmony_ci
209162306a36Sopenharmony_ci	return rc;
209262306a36Sopenharmony_ci}
209362306a36Sopenharmony_ci
209462306a36Sopenharmony_ci/*
209562306a36Sopenharmony_ci *	xtInitRoot()
209662306a36Sopenharmony_ci *
209762306a36Sopenharmony_ci * initialize file root (inline in inode)
209862306a36Sopenharmony_ci */
209962306a36Sopenharmony_civoid xtInitRoot(tid_t tid, struct inode *ip)
210062306a36Sopenharmony_ci{
210162306a36Sopenharmony_ci	xtpage_t *p;
210262306a36Sopenharmony_ci
210362306a36Sopenharmony_ci	/*
210462306a36Sopenharmony_ci	 * acquire a transaction lock on the root
210562306a36Sopenharmony_ci	 *
210662306a36Sopenharmony_ci	 * action:
210762306a36Sopenharmony_ci	 */
210862306a36Sopenharmony_ci	txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
210962306a36Sopenharmony_ci		      tlckXTREE | tlckNEW);
211062306a36Sopenharmony_ci	p = &JFS_IP(ip)->i_xtroot;
211162306a36Sopenharmony_ci
211262306a36Sopenharmony_ci	p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
211362306a36Sopenharmony_ci	p->header.nextindex = cpu_to_le16(XTENTRYSTART);
211462306a36Sopenharmony_ci
211562306a36Sopenharmony_ci	if (S_ISDIR(ip->i_mode))
211662306a36Sopenharmony_ci		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
211762306a36Sopenharmony_ci	else {
211862306a36Sopenharmony_ci		p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
211962306a36Sopenharmony_ci		ip->i_size = 0;
212062306a36Sopenharmony_ci	}
212162306a36Sopenharmony_ci
212262306a36Sopenharmony_ci
212362306a36Sopenharmony_ci	return;
212462306a36Sopenharmony_ci}
212562306a36Sopenharmony_ci
212662306a36Sopenharmony_ci
212762306a36Sopenharmony_ci/*
212862306a36Sopenharmony_ci * We can run into a deadlock truncating a file with a large number of
212962306a36Sopenharmony_ci * xtree pages (large fragmented file).  A robust fix would entail a
213062306a36Sopenharmony_ci * reservation system where we would reserve a number of metadata pages
213162306a36Sopenharmony_ci * and tlocks which we would be guaranteed without a deadlock.  Without
213262306a36Sopenharmony_ci * this, a partial fix is to limit number of metadata pages we will lock
213362306a36Sopenharmony_ci * in a single transaction.  Currently we will truncate the file so that
213462306a36Sopenharmony_ci * no more than 50 leaf pages will be locked.  The caller of xtTruncate
213562306a36Sopenharmony_ci * will be responsible for ensuring that the current transaction gets
213662306a36Sopenharmony_ci * committed, and that subsequent transactions are created to truncate
213762306a36Sopenharmony_ci * the file further if needed.
213862306a36Sopenharmony_ci */
213962306a36Sopenharmony_ci#define MAX_TRUNCATE_LEAVES 50
214062306a36Sopenharmony_ci
214162306a36Sopenharmony_ci/*
214262306a36Sopenharmony_ci *	xtTruncate()
214362306a36Sopenharmony_ci *
214462306a36Sopenharmony_ci * function:
214562306a36Sopenharmony_ci *	traverse for truncation logging backward bottom up;
214662306a36Sopenharmony_ci *	terminate at the last extent entry at the current subtree
214762306a36Sopenharmony_ci *	root page covering new down size.
214862306a36Sopenharmony_ci *	truncation may occur within the last extent entry.
214962306a36Sopenharmony_ci *
215062306a36Sopenharmony_ci * parameter:
215162306a36Sopenharmony_ci *	int		tid,
215262306a36Sopenharmony_ci *	struct inode	*ip,
215362306a36Sopenharmony_ci *	s64		newsize,
215462306a36Sopenharmony_ci *	int		type)	{PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
215562306a36Sopenharmony_ci *
215662306a36Sopenharmony_ci * return:
215762306a36Sopenharmony_ci *
215862306a36Sopenharmony_ci * note:
215962306a36Sopenharmony_ci *	PWMAP:
216062306a36Sopenharmony_ci *	 1. truncate (non-COMMIT_NOLINK file)
216162306a36Sopenharmony_ci *	    by jfs_truncate() or jfs_open(O_TRUNC):
216262306a36Sopenharmony_ci *	    xtree is updated;
216362306a36Sopenharmony_ci *	 2. truncate index table of directory when last entry removed
216462306a36Sopenharmony_ci *	map update via tlock at commit time;
216562306a36Sopenharmony_ci *	PMAP:
216662306a36Sopenharmony_ci *	 Call xtTruncate_pmap instead
216762306a36Sopenharmony_ci *	WMAP:
216862306a36Sopenharmony_ci *	 1. remove (free zero link count) on last reference release
216962306a36Sopenharmony_ci *	    (pmap has been freed at commit zero link count);
217062306a36Sopenharmony_ci *	 2. truncate (COMMIT_NOLINK file, i.e., tmp file):
217162306a36Sopenharmony_ci *	    xtree is updated;
217262306a36Sopenharmony_ci *	 map update directly at truncation time;
217362306a36Sopenharmony_ci *
217462306a36Sopenharmony_ci *	if (DELETE)
217562306a36Sopenharmony_ci *		no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
217662306a36Sopenharmony_ci *	else if (TRUNCATE)
217762306a36Sopenharmony_ci *		must write LOG_NOREDOPAGE for deleted index page;
217862306a36Sopenharmony_ci *
217962306a36Sopenharmony_ci * pages may already have been tlocked by anonymous transactions
218062306a36Sopenharmony_ci * during file growth (i.e., write) before truncation;
218162306a36Sopenharmony_ci *
218262306a36Sopenharmony_ci * except last truncated entry, deleted entries remains as is
218362306a36Sopenharmony_ci * in the page (nextindex is updated) for other use
218462306a36Sopenharmony_ci * (e.g., log/update allocation map): this avoid copying the page
218562306a36Sopenharmony_ci * info but delay free of pages;
218662306a36Sopenharmony_ci *
218762306a36Sopenharmony_ci */
218862306a36Sopenharmony_cis64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
218962306a36Sopenharmony_ci{
219062306a36Sopenharmony_ci	int rc = 0;
219162306a36Sopenharmony_ci	s64 teof;
219262306a36Sopenharmony_ci	struct metapage *mp;
219362306a36Sopenharmony_ci	xtpage_t *p;
219462306a36Sopenharmony_ci	s64 bn;
219562306a36Sopenharmony_ci	int index, nextindex;
219662306a36Sopenharmony_ci	xad_t *xad;
219762306a36Sopenharmony_ci	s64 xoff, xaddr;
219862306a36Sopenharmony_ci	int xlen, len, freexlen;
219962306a36Sopenharmony_ci	struct btstack btstack;
220062306a36Sopenharmony_ci	struct btframe *parent;
220162306a36Sopenharmony_ci	struct tblock *tblk = NULL;
220262306a36Sopenharmony_ci	struct tlock *tlck = NULL;
220362306a36Sopenharmony_ci	struct xtlock *xtlck = NULL;
220462306a36Sopenharmony_ci	struct xdlistlock xadlock;	/* maplock for COMMIT_WMAP */
220562306a36Sopenharmony_ci	struct pxd_lock *pxdlock;		/* maplock for COMMIT_WMAP */
220662306a36Sopenharmony_ci	s64 nfreed;
220762306a36Sopenharmony_ci	int freed, log;
220862306a36Sopenharmony_ci	int locked_leaves = 0;
220962306a36Sopenharmony_ci
221062306a36Sopenharmony_ci	/* save object truncation type */
221162306a36Sopenharmony_ci	if (tid) {
221262306a36Sopenharmony_ci		tblk = tid_to_tblock(tid);
221362306a36Sopenharmony_ci		tblk->xflag |= flag;
221462306a36Sopenharmony_ci	}
221562306a36Sopenharmony_ci
221662306a36Sopenharmony_ci	nfreed = 0;
221762306a36Sopenharmony_ci
221862306a36Sopenharmony_ci	flag &= COMMIT_MAP;
221962306a36Sopenharmony_ci	assert(flag != COMMIT_PMAP);
222062306a36Sopenharmony_ci
222162306a36Sopenharmony_ci	if (flag == COMMIT_PWMAP)
222262306a36Sopenharmony_ci		log = 1;
222362306a36Sopenharmony_ci	else {
222462306a36Sopenharmony_ci		log = 0;
222562306a36Sopenharmony_ci		xadlock.flag = mlckFREEXADLIST;
222662306a36Sopenharmony_ci		xadlock.index = 1;
222762306a36Sopenharmony_ci	}
222862306a36Sopenharmony_ci
222962306a36Sopenharmony_ci	/*
223062306a36Sopenharmony_ci	 * if the newsize is not an integral number of pages,
223162306a36Sopenharmony_ci	 * the file between newsize and next page boundary will
223262306a36Sopenharmony_ci	 * be cleared.
223362306a36Sopenharmony_ci	 * if truncating into a file hole, it will cause
223462306a36Sopenharmony_ci	 * a full block to be allocated for the logical block.
223562306a36Sopenharmony_ci	 */
223662306a36Sopenharmony_ci
223762306a36Sopenharmony_ci	/*
223862306a36Sopenharmony_ci	 * release page blocks of truncated region <teof, eof>
223962306a36Sopenharmony_ci	 *
224062306a36Sopenharmony_ci	 * free the data blocks from the leaf index blocks.
224162306a36Sopenharmony_ci	 * delete the parent index entries corresponding to
224262306a36Sopenharmony_ci	 * the freed child data/index blocks.
224362306a36Sopenharmony_ci	 * free the index blocks themselves which aren't needed
224462306a36Sopenharmony_ci	 * in new sized file.
224562306a36Sopenharmony_ci	 *
224662306a36Sopenharmony_ci	 * index blocks are updated only if the blocks are to be
224762306a36Sopenharmony_ci	 * retained in the new sized file.
224862306a36Sopenharmony_ci	 * if type is PMAP, the data and index pages are NOT
224962306a36Sopenharmony_ci	 * freed, and the data and index blocks are NOT freed
225062306a36Sopenharmony_ci	 * from working map.
225162306a36Sopenharmony_ci	 * (this will allow continued access of data/index of
225262306a36Sopenharmony_ci	 * temporary file (zerolink count file truncated to zero-length)).
225362306a36Sopenharmony_ci	 */
225462306a36Sopenharmony_ci	teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
225562306a36Sopenharmony_ci	    JFS_SBI(ip->i_sb)->l2bsize;
225662306a36Sopenharmony_ci
225762306a36Sopenharmony_ci	/* clear stack */
225862306a36Sopenharmony_ci	BT_CLR(&btstack);
225962306a36Sopenharmony_ci
226062306a36Sopenharmony_ci	/*
226162306a36Sopenharmony_ci	 * start with root
226262306a36Sopenharmony_ci	 *
226362306a36Sopenharmony_ci	 * root resides in the inode
226462306a36Sopenharmony_ci	 */
226562306a36Sopenharmony_ci	bn = 0;
226662306a36Sopenharmony_ci
226762306a36Sopenharmony_ci	/*
226862306a36Sopenharmony_ci	 * first access of each page:
226962306a36Sopenharmony_ci	 */
227062306a36Sopenharmony_ci      getPage:
227162306a36Sopenharmony_ci	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
227262306a36Sopenharmony_ci	if (rc)
227362306a36Sopenharmony_ci		return rc;
227462306a36Sopenharmony_ci
227562306a36Sopenharmony_ci	/* process entries backward from last index */
227662306a36Sopenharmony_ci	index = le16_to_cpu(p->header.nextindex) - 1;
227762306a36Sopenharmony_ci
227862306a36Sopenharmony_ci
227962306a36Sopenharmony_ci	/* Since this is the rightmost page at this level, and we may have
228062306a36Sopenharmony_ci	 * already freed a page that was formerly to the right, let's make
228162306a36Sopenharmony_ci	 * sure that the next pointer is zero.
228262306a36Sopenharmony_ci	 */
228362306a36Sopenharmony_ci	if (p->header.next) {
228462306a36Sopenharmony_ci		if (log)
228562306a36Sopenharmony_ci			/*
228662306a36Sopenharmony_ci			 * Make sure this change to the header is logged.
228762306a36Sopenharmony_ci			 * If we really truncate this leaf, the flag
228862306a36Sopenharmony_ci			 * will be changed to tlckTRUNCATE
228962306a36Sopenharmony_ci			 */
229062306a36Sopenharmony_ci			tlck = txLock(tid, ip, mp, tlckXTREE|tlckGROW);
229162306a36Sopenharmony_ci		BT_MARK_DIRTY(mp, ip);
229262306a36Sopenharmony_ci		p->header.next = 0;
229362306a36Sopenharmony_ci	}
229462306a36Sopenharmony_ci
229562306a36Sopenharmony_ci	if (p->header.flag & BT_INTERNAL)
229662306a36Sopenharmony_ci		goto getChild;
229762306a36Sopenharmony_ci
229862306a36Sopenharmony_ci	/*
229962306a36Sopenharmony_ci	 *	leaf page
230062306a36Sopenharmony_ci	 */
230162306a36Sopenharmony_ci	freed = 0;
230262306a36Sopenharmony_ci
230362306a36Sopenharmony_ci	/* does region covered by leaf page precede Teof ? */
230462306a36Sopenharmony_ci	xad = &p->xad[index];
230562306a36Sopenharmony_ci	xoff = offsetXAD(xad);
230662306a36Sopenharmony_ci	xlen = lengthXAD(xad);
230762306a36Sopenharmony_ci	if (teof >= xoff + xlen) {
230862306a36Sopenharmony_ci		XT_PUTPAGE(mp);
230962306a36Sopenharmony_ci		goto getParent;
231062306a36Sopenharmony_ci	}
231162306a36Sopenharmony_ci
231262306a36Sopenharmony_ci	/* (re)acquire tlock of the leaf page */
231362306a36Sopenharmony_ci	if (log) {
231462306a36Sopenharmony_ci		if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
231562306a36Sopenharmony_ci			/*
231662306a36Sopenharmony_ci			 * We need to limit the size of the transaction
231762306a36Sopenharmony_ci			 * to avoid exhausting pagecache & tlocks
231862306a36Sopenharmony_ci			 */
231962306a36Sopenharmony_ci			XT_PUTPAGE(mp);
232062306a36Sopenharmony_ci			newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
232162306a36Sopenharmony_ci			goto getParent;
232262306a36Sopenharmony_ci		}
232362306a36Sopenharmony_ci		tlck = txLock(tid, ip, mp, tlckXTREE);
232462306a36Sopenharmony_ci		tlck->type = tlckXTREE | tlckTRUNCATE;
232562306a36Sopenharmony_ci		xtlck = (struct xtlock *) & tlck->lock;
232662306a36Sopenharmony_ci		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
232762306a36Sopenharmony_ci	}
232862306a36Sopenharmony_ci	BT_MARK_DIRTY(mp, ip);
232962306a36Sopenharmony_ci
233062306a36Sopenharmony_ci	/*
233162306a36Sopenharmony_ci	 * scan backward leaf page entries
233262306a36Sopenharmony_ci	 */
233362306a36Sopenharmony_ci	for (; index >= XTENTRYSTART; index--) {
233462306a36Sopenharmony_ci		xad = &p->xad[index];
233562306a36Sopenharmony_ci		xoff = offsetXAD(xad);
233662306a36Sopenharmony_ci		xlen = lengthXAD(xad);
233762306a36Sopenharmony_ci		xaddr = addressXAD(xad);
233862306a36Sopenharmony_ci
233962306a36Sopenharmony_ci		/*
234062306a36Sopenharmony_ci		 * The "data" for a directory is indexed by the block
234162306a36Sopenharmony_ci		 * device's address space.  This metadata must be invalidated
234262306a36Sopenharmony_ci		 * here
234362306a36Sopenharmony_ci		 */
234462306a36Sopenharmony_ci		if (S_ISDIR(ip->i_mode) && (teof == 0))
234562306a36Sopenharmony_ci			invalidate_xad_metapages(ip, *xad);
234662306a36Sopenharmony_ci		/*
234762306a36Sopenharmony_ci		 * entry beyond eof: continue scan of current page
234862306a36Sopenharmony_ci		 *          xad
234962306a36Sopenharmony_ci		 * ---|---=======------->
235062306a36Sopenharmony_ci		 *   eof
235162306a36Sopenharmony_ci		 */
235262306a36Sopenharmony_ci		if (teof < xoff) {
235362306a36Sopenharmony_ci			nfreed += xlen;
235462306a36Sopenharmony_ci			continue;
235562306a36Sopenharmony_ci		}
235662306a36Sopenharmony_ci
235762306a36Sopenharmony_ci		/*
235862306a36Sopenharmony_ci		 * (xoff <= teof): last entry to be deleted from page;
235962306a36Sopenharmony_ci		 * If other entries remain in page: keep and update the page.
236062306a36Sopenharmony_ci		 */
236162306a36Sopenharmony_ci
236262306a36Sopenharmony_ci		/*
236362306a36Sopenharmony_ci		 * eof == entry_start: delete the entry
236462306a36Sopenharmony_ci		 *           xad
236562306a36Sopenharmony_ci		 * -------|=======------->
236662306a36Sopenharmony_ci		 *       eof
236762306a36Sopenharmony_ci		 *
236862306a36Sopenharmony_ci		 */
236962306a36Sopenharmony_ci		if (teof == xoff) {
237062306a36Sopenharmony_ci			nfreed += xlen;
237162306a36Sopenharmony_ci
237262306a36Sopenharmony_ci			if (index == XTENTRYSTART)
237362306a36Sopenharmony_ci				break;
237462306a36Sopenharmony_ci
237562306a36Sopenharmony_ci			nextindex = index;
237662306a36Sopenharmony_ci		}
237762306a36Sopenharmony_ci		/*
237862306a36Sopenharmony_ci		 * eof within the entry: truncate the entry.
237962306a36Sopenharmony_ci		 *          xad
238062306a36Sopenharmony_ci		 * -------===|===------->
238162306a36Sopenharmony_ci		 *          eof
238262306a36Sopenharmony_ci		 */
238362306a36Sopenharmony_ci		else if (teof < xoff + xlen) {
238462306a36Sopenharmony_ci			/* update truncated entry */
238562306a36Sopenharmony_ci			len = teof - xoff;
238662306a36Sopenharmony_ci			freexlen = xlen - len;
238762306a36Sopenharmony_ci			XADlength(xad, len);
238862306a36Sopenharmony_ci
238962306a36Sopenharmony_ci			/* save pxd of truncated extent in tlck */
239062306a36Sopenharmony_ci			xaddr += len;
239162306a36Sopenharmony_ci			if (log) {	/* COMMIT_PWMAP */
239262306a36Sopenharmony_ci				xtlck->lwm.offset = (xtlck->lwm.offset) ?
239362306a36Sopenharmony_ci				    min(index, (int)xtlck->lwm.offset) : index;
239462306a36Sopenharmony_ci				xtlck->lwm.length = index + 1 -
239562306a36Sopenharmony_ci				    xtlck->lwm.offset;
239662306a36Sopenharmony_ci				xtlck->twm.offset = index;
239762306a36Sopenharmony_ci				pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
239862306a36Sopenharmony_ci				pxdlock->flag = mlckFREEPXD;
239962306a36Sopenharmony_ci				PXDaddress(&pxdlock->pxd, xaddr);
240062306a36Sopenharmony_ci				PXDlength(&pxdlock->pxd, freexlen);
240162306a36Sopenharmony_ci			}
240262306a36Sopenharmony_ci			/* free truncated extent */
240362306a36Sopenharmony_ci			else {	/* COMMIT_WMAP */
240462306a36Sopenharmony_ci
240562306a36Sopenharmony_ci				pxdlock = (struct pxd_lock *) & xadlock;
240662306a36Sopenharmony_ci				pxdlock->flag = mlckFREEPXD;
240762306a36Sopenharmony_ci				PXDaddress(&pxdlock->pxd, xaddr);
240862306a36Sopenharmony_ci				PXDlength(&pxdlock->pxd, freexlen);
240962306a36Sopenharmony_ci				txFreeMap(ip, pxdlock, NULL, COMMIT_WMAP);
241062306a36Sopenharmony_ci
241162306a36Sopenharmony_ci				/* reset map lock */
241262306a36Sopenharmony_ci				xadlock.flag = mlckFREEXADLIST;
241362306a36Sopenharmony_ci			}
241462306a36Sopenharmony_ci
241562306a36Sopenharmony_ci			/* current entry is new last entry; */
241662306a36Sopenharmony_ci			nextindex = index + 1;
241762306a36Sopenharmony_ci
241862306a36Sopenharmony_ci			nfreed += freexlen;
241962306a36Sopenharmony_ci		}
242062306a36Sopenharmony_ci		/*
242162306a36Sopenharmony_ci		 * eof beyond the entry:
242262306a36Sopenharmony_ci		 *          xad
242362306a36Sopenharmony_ci		 * -------=======---|--->
242462306a36Sopenharmony_ci		 *                 eof
242562306a36Sopenharmony_ci		 */
242662306a36Sopenharmony_ci		else {		/* (xoff + xlen < teof) */
242762306a36Sopenharmony_ci
242862306a36Sopenharmony_ci			nextindex = index + 1;
242962306a36Sopenharmony_ci		}
243062306a36Sopenharmony_ci
243162306a36Sopenharmony_ci		if (nextindex < le16_to_cpu(p->header.nextindex)) {
243262306a36Sopenharmony_ci			if (!log) {	/* COMMIT_WAMP */
243362306a36Sopenharmony_ci				xadlock.xdlist = &p->xad[nextindex];
243462306a36Sopenharmony_ci				xadlock.count =
243562306a36Sopenharmony_ci				    le16_to_cpu(p->header.nextindex) -
243662306a36Sopenharmony_ci				    nextindex;
243762306a36Sopenharmony_ci				txFreeMap(ip, (struct maplock *) & xadlock,
243862306a36Sopenharmony_ci					  NULL, COMMIT_WMAP);
243962306a36Sopenharmony_ci			}
244062306a36Sopenharmony_ci			p->header.nextindex = cpu_to_le16(nextindex);
244162306a36Sopenharmony_ci		}
244262306a36Sopenharmony_ci
244362306a36Sopenharmony_ci		XT_PUTPAGE(mp);
244462306a36Sopenharmony_ci
244562306a36Sopenharmony_ci		/* assert(freed == 0); */
244662306a36Sopenharmony_ci		goto getParent;
244762306a36Sopenharmony_ci	}			/* end scan of leaf page entries */
244862306a36Sopenharmony_ci
244962306a36Sopenharmony_ci	freed = 1;
245062306a36Sopenharmony_ci
245162306a36Sopenharmony_ci	/*
245262306a36Sopenharmony_ci	 * leaf page become empty: free the page if type != PMAP
245362306a36Sopenharmony_ci	 */
245462306a36Sopenharmony_ci	if (log) {		/* COMMIT_PWMAP */
245562306a36Sopenharmony_ci		/* txCommit() with tlckFREE:
245662306a36Sopenharmony_ci		 * free data extents covered by leaf [XTENTRYSTART:hwm);
245762306a36Sopenharmony_ci		 * invalidate leaf if COMMIT_PWMAP;
245862306a36Sopenharmony_ci		 * if (TRUNCATE), will write LOG_NOREDOPAGE;
245962306a36Sopenharmony_ci		 */
246062306a36Sopenharmony_ci		tlck->type = tlckXTREE | tlckFREE;
246162306a36Sopenharmony_ci	} else {		/* COMMIT_WAMP */
246262306a36Sopenharmony_ci
246362306a36Sopenharmony_ci		/* free data extents covered by leaf */
246462306a36Sopenharmony_ci		xadlock.xdlist = &p->xad[XTENTRYSTART];
246562306a36Sopenharmony_ci		xadlock.count =
246662306a36Sopenharmony_ci		    le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
246762306a36Sopenharmony_ci		txFreeMap(ip, (struct maplock *) & xadlock, NULL, COMMIT_WMAP);
246862306a36Sopenharmony_ci	}
246962306a36Sopenharmony_ci
247062306a36Sopenharmony_ci	if (p->header.flag & BT_ROOT) {
247162306a36Sopenharmony_ci		p->header.flag &= ~BT_INTERNAL;
247262306a36Sopenharmony_ci		p->header.flag |= BT_LEAF;
247362306a36Sopenharmony_ci		p->header.nextindex = cpu_to_le16(XTENTRYSTART);
247462306a36Sopenharmony_ci
247562306a36Sopenharmony_ci		XT_PUTPAGE(mp);	/* debug */
247662306a36Sopenharmony_ci		goto out;
247762306a36Sopenharmony_ci	} else {
247862306a36Sopenharmony_ci		if (log) {	/* COMMIT_PWMAP */
247962306a36Sopenharmony_ci			/* page will be invalidated at tx completion
248062306a36Sopenharmony_ci			 */
248162306a36Sopenharmony_ci			XT_PUTPAGE(mp);
248262306a36Sopenharmony_ci		} else {	/* COMMIT_WMAP */
248362306a36Sopenharmony_ci
248462306a36Sopenharmony_ci			if (mp->lid)
248562306a36Sopenharmony_ci				lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
248662306a36Sopenharmony_ci
248762306a36Sopenharmony_ci			/* invalidate empty leaf page */
248862306a36Sopenharmony_ci			discard_metapage(mp);
248962306a36Sopenharmony_ci		}
249062306a36Sopenharmony_ci	}
249162306a36Sopenharmony_ci
249262306a36Sopenharmony_ci	/*
249362306a36Sopenharmony_ci	 * the leaf page become empty: delete the parent entry
249462306a36Sopenharmony_ci	 * for the leaf page if the parent page is to be kept
249562306a36Sopenharmony_ci	 * in the new sized file.
249662306a36Sopenharmony_ci	 */
249762306a36Sopenharmony_ci
249862306a36Sopenharmony_ci	/*
249962306a36Sopenharmony_ci	 * go back up to the parent page
250062306a36Sopenharmony_ci	 */
250162306a36Sopenharmony_ci      getParent:
250262306a36Sopenharmony_ci	/* pop/restore parent entry for the current child page */
250362306a36Sopenharmony_ci	if ((parent = BT_POP(&btstack)) == NULL)
250462306a36Sopenharmony_ci		/* current page must have been root */
250562306a36Sopenharmony_ci		goto out;
250662306a36Sopenharmony_ci
250762306a36Sopenharmony_ci	/* get back the parent page */
250862306a36Sopenharmony_ci	bn = parent->bn;
250962306a36Sopenharmony_ci	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
251062306a36Sopenharmony_ci	if (rc)
251162306a36Sopenharmony_ci		return rc;
251262306a36Sopenharmony_ci
251362306a36Sopenharmony_ci	index = parent->index;
251462306a36Sopenharmony_ci
251562306a36Sopenharmony_ci	/*
251662306a36Sopenharmony_ci	 * child page was not empty:
251762306a36Sopenharmony_ci	 */
251862306a36Sopenharmony_ci	if (freed == 0) {
251962306a36Sopenharmony_ci		/* has any entry deleted from parent ? */
252062306a36Sopenharmony_ci		if (index < le16_to_cpu(p->header.nextindex) - 1) {
252162306a36Sopenharmony_ci			/* (re)acquire tlock on the parent page */
252262306a36Sopenharmony_ci			if (log) {	/* COMMIT_PWMAP */
252362306a36Sopenharmony_ci				/* txCommit() with tlckTRUNCATE:
252462306a36Sopenharmony_ci				 * free child extents covered by parent [);
252562306a36Sopenharmony_ci				 */
252662306a36Sopenharmony_ci				tlck = txLock(tid, ip, mp, tlckXTREE);
252762306a36Sopenharmony_ci				xtlck = (struct xtlock *) & tlck->lock;
252862306a36Sopenharmony_ci				if (!(tlck->type & tlckTRUNCATE)) {
252962306a36Sopenharmony_ci					xtlck->hwm.offset =
253062306a36Sopenharmony_ci					    le16_to_cpu(p->header.
253162306a36Sopenharmony_ci							nextindex) - 1;
253262306a36Sopenharmony_ci					tlck->type =
253362306a36Sopenharmony_ci					    tlckXTREE | tlckTRUNCATE;
253462306a36Sopenharmony_ci				}
253562306a36Sopenharmony_ci			} else {	/* COMMIT_WMAP */
253662306a36Sopenharmony_ci
253762306a36Sopenharmony_ci				/* free child extents covered by parent */
253862306a36Sopenharmony_ci				xadlock.xdlist = &p->xad[index + 1];
253962306a36Sopenharmony_ci				xadlock.count =
254062306a36Sopenharmony_ci				    le16_to_cpu(p->header.nextindex) -
254162306a36Sopenharmony_ci				    index - 1;
254262306a36Sopenharmony_ci				txFreeMap(ip, (struct maplock *) & xadlock,
254362306a36Sopenharmony_ci					  NULL, COMMIT_WMAP);
254462306a36Sopenharmony_ci			}
254562306a36Sopenharmony_ci			BT_MARK_DIRTY(mp, ip);
254662306a36Sopenharmony_ci
254762306a36Sopenharmony_ci			p->header.nextindex = cpu_to_le16(index + 1);
254862306a36Sopenharmony_ci		}
254962306a36Sopenharmony_ci		XT_PUTPAGE(mp);
255062306a36Sopenharmony_ci		goto getParent;
255162306a36Sopenharmony_ci	}
255262306a36Sopenharmony_ci
255362306a36Sopenharmony_ci	/*
255462306a36Sopenharmony_ci	 * child page was empty:
255562306a36Sopenharmony_ci	 */
255662306a36Sopenharmony_ci	nfreed += lengthXAD(&p->xad[index]);
255762306a36Sopenharmony_ci
255862306a36Sopenharmony_ci	/*
255962306a36Sopenharmony_ci	 * During working map update, child page's tlock must be handled
256062306a36Sopenharmony_ci	 * before parent's.  This is because the parent's tlock will cause
256162306a36Sopenharmony_ci	 * the child's disk space to be marked available in the wmap, so
256262306a36Sopenharmony_ci	 * it's important that the child page be released by that time.
256362306a36Sopenharmony_ci	 *
256462306a36Sopenharmony_ci	 * ToDo:  tlocks should be on doubly-linked list, so we can
256562306a36Sopenharmony_ci	 * quickly remove it and add it to the end.
256662306a36Sopenharmony_ci	 */
256762306a36Sopenharmony_ci
256862306a36Sopenharmony_ci	/*
256962306a36Sopenharmony_ci	 * Move parent page's tlock to the end of the tid's tlock list
257062306a36Sopenharmony_ci	 */
257162306a36Sopenharmony_ci	if (log && mp->lid && (tblk->last != mp->lid) &&
257262306a36Sopenharmony_ci	    lid_to_tlock(mp->lid)->tid) {
257362306a36Sopenharmony_ci		lid_t lid = mp->lid;
257462306a36Sopenharmony_ci		struct tlock *prev;
257562306a36Sopenharmony_ci
257662306a36Sopenharmony_ci		tlck = lid_to_tlock(lid);
257762306a36Sopenharmony_ci
257862306a36Sopenharmony_ci		if (tblk->next == lid)
257962306a36Sopenharmony_ci			tblk->next = tlck->next;
258062306a36Sopenharmony_ci		else {
258162306a36Sopenharmony_ci			for (prev = lid_to_tlock(tblk->next);
258262306a36Sopenharmony_ci			     prev->next != lid;
258362306a36Sopenharmony_ci			     prev = lid_to_tlock(prev->next)) {
258462306a36Sopenharmony_ci				assert(prev->next);
258562306a36Sopenharmony_ci			}
258662306a36Sopenharmony_ci			prev->next = tlck->next;
258762306a36Sopenharmony_ci		}
258862306a36Sopenharmony_ci		lid_to_tlock(tblk->last)->next = lid;
258962306a36Sopenharmony_ci		tlck->next = 0;
259062306a36Sopenharmony_ci		tblk->last = lid;
259162306a36Sopenharmony_ci	}
259262306a36Sopenharmony_ci
259362306a36Sopenharmony_ci	/*
259462306a36Sopenharmony_ci	 * parent page become empty: free the page
259562306a36Sopenharmony_ci	 */
259662306a36Sopenharmony_ci	if (index == XTENTRYSTART) {
259762306a36Sopenharmony_ci		if (log) {	/* COMMIT_PWMAP */
259862306a36Sopenharmony_ci			/* txCommit() with tlckFREE:
259962306a36Sopenharmony_ci			 * free child extents covered by parent;
260062306a36Sopenharmony_ci			 * invalidate parent if COMMIT_PWMAP;
260162306a36Sopenharmony_ci			 */
260262306a36Sopenharmony_ci			tlck = txLock(tid, ip, mp, tlckXTREE);
260362306a36Sopenharmony_ci			xtlck = (struct xtlock *) & tlck->lock;
260462306a36Sopenharmony_ci			xtlck->hwm.offset =
260562306a36Sopenharmony_ci			    le16_to_cpu(p->header.nextindex) - 1;
260662306a36Sopenharmony_ci			tlck->type = tlckXTREE | tlckFREE;
260762306a36Sopenharmony_ci		} else {	/* COMMIT_WMAP */
260862306a36Sopenharmony_ci
260962306a36Sopenharmony_ci			/* free child extents covered by parent */
261062306a36Sopenharmony_ci			xadlock.xdlist = &p->xad[XTENTRYSTART];
261162306a36Sopenharmony_ci			xadlock.count =
261262306a36Sopenharmony_ci			    le16_to_cpu(p->header.nextindex) -
261362306a36Sopenharmony_ci			    XTENTRYSTART;
261462306a36Sopenharmony_ci			txFreeMap(ip, (struct maplock *) & xadlock, NULL,
261562306a36Sopenharmony_ci				  COMMIT_WMAP);
261662306a36Sopenharmony_ci		}
261762306a36Sopenharmony_ci		BT_MARK_DIRTY(mp, ip);
261862306a36Sopenharmony_ci
261962306a36Sopenharmony_ci		if (p->header.flag & BT_ROOT) {
262062306a36Sopenharmony_ci			p->header.flag &= ~BT_INTERNAL;
262162306a36Sopenharmony_ci			p->header.flag |= BT_LEAF;
262262306a36Sopenharmony_ci			p->header.nextindex = cpu_to_le16(XTENTRYSTART);
262362306a36Sopenharmony_ci			if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
262462306a36Sopenharmony_ci				/*
262562306a36Sopenharmony_ci				 * Shrink root down to allow inline
262662306a36Sopenharmony_ci				 * EA (otherwise fsck complains)
262762306a36Sopenharmony_ci				 */
262862306a36Sopenharmony_ci				p->header.maxentry =
262962306a36Sopenharmony_ci				    cpu_to_le16(XTROOTINITSLOT);
263062306a36Sopenharmony_ci				JFS_IP(ip)->mode2 |= INLINEEA;
263162306a36Sopenharmony_ci			}
263262306a36Sopenharmony_ci
263362306a36Sopenharmony_ci			XT_PUTPAGE(mp);	/* debug */
263462306a36Sopenharmony_ci			goto out;
263562306a36Sopenharmony_ci		} else {
263662306a36Sopenharmony_ci			if (log) {	/* COMMIT_PWMAP */
263762306a36Sopenharmony_ci				/* page will be invalidated at tx completion
263862306a36Sopenharmony_ci				 */
263962306a36Sopenharmony_ci				XT_PUTPAGE(mp);
264062306a36Sopenharmony_ci			} else {	/* COMMIT_WMAP */
264162306a36Sopenharmony_ci
264262306a36Sopenharmony_ci				if (mp->lid)
264362306a36Sopenharmony_ci					lid_to_tlock(mp->lid)->flag |=
264462306a36Sopenharmony_ci						tlckFREELOCK;
264562306a36Sopenharmony_ci
264662306a36Sopenharmony_ci				/* invalidate parent page */
264762306a36Sopenharmony_ci				discard_metapage(mp);
264862306a36Sopenharmony_ci			}
264962306a36Sopenharmony_ci
265062306a36Sopenharmony_ci			/* parent has become empty and freed:
265162306a36Sopenharmony_ci			 * go back up to its parent page
265262306a36Sopenharmony_ci			 */
265362306a36Sopenharmony_ci			/* freed = 1; */
265462306a36Sopenharmony_ci			goto getParent;
265562306a36Sopenharmony_ci		}
265662306a36Sopenharmony_ci	}
265762306a36Sopenharmony_ci	/*
265862306a36Sopenharmony_ci	 * parent page still has entries for front region;
265962306a36Sopenharmony_ci	 */
266062306a36Sopenharmony_ci	else {
266162306a36Sopenharmony_ci		/* try truncate region covered by preceding entry
266262306a36Sopenharmony_ci		 * (process backward)
266362306a36Sopenharmony_ci		 */
266462306a36Sopenharmony_ci		index--;
266562306a36Sopenharmony_ci
266662306a36Sopenharmony_ci		/* go back down to the child page corresponding
266762306a36Sopenharmony_ci		 * to the entry
266862306a36Sopenharmony_ci		 */
266962306a36Sopenharmony_ci		goto getChild;
267062306a36Sopenharmony_ci	}
267162306a36Sopenharmony_ci
267262306a36Sopenharmony_ci	/*
267362306a36Sopenharmony_ci	 *	internal page: go down to child page of current entry
267462306a36Sopenharmony_ci	 */
267562306a36Sopenharmony_ci      getChild:
267662306a36Sopenharmony_ci	/* save current parent entry for the child page */
267762306a36Sopenharmony_ci	if (BT_STACK_FULL(&btstack)) {
267862306a36Sopenharmony_ci		jfs_error(ip->i_sb, "stack overrun!\n");
267962306a36Sopenharmony_ci		XT_PUTPAGE(mp);
268062306a36Sopenharmony_ci		return -EIO;
268162306a36Sopenharmony_ci	}
268262306a36Sopenharmony_ci	BT_PUSH(&btstack, bn, index);
268362306a36Sopenharmony_ci
268462306a36Sopenharmony_ci	/* get child page */
268562306a36Sopenharmony_ci	xad = &p->xad[index];
268662306a36Sopenharmony_ci	bn = addressXAD(xad);
268762306a36Sopenharmony_ci
268862306a36Sopenharmony_ci	/*
268962306a36Sopenharmony_ci	 * first access of each internal entry:
269062306a36Sopenharmony_ci	 */
269162306a36Sopenharmony_ci	/* release parent page */
269262306a36Sopenharmony_ci	XT_PUTPAGE(mp);
269362306a36Sopenharmony_ci
269462306a36Sopenharmony_ci	/* process the child page */
269562306a36Sopenharmony_ci	goto getPage;
269662306a36Sopenharmony_ci
269762306a36Sopenharmony_ci      out:
269862306a36Sopenharmony_ci	/*
269962306a36Sopenharmony_ci	 * update file resource stat
270062306a36Sopenharmony_ci	 */
270162306a36Sopenharmony_ci	/* set size
270262306a36Sopenharmony_ci	 */
270362306a36Sopenharmony_ci	if (S_ISDIR(ip->i_mode) && !newsize)
270462306a36Sopenharmony_ci		ip->i_size = 1;	/* fsck hates zero-length directories */
270562306a36Sopenharmony_ci	else
270662306a36Sopenharmony_ci		ip->i_size = newsize;
270762306a36Sopenharmony_ci
270862306a36Sopenharmony_ci	/* update quota allocation to reflect freed blocks */
270962306a36Sopenharmony_ci	dquot_free_block(ip, nfreed);
271062306a36Sopenharmony_ci
271162306a36Sopenharmony_ci	/*
271262306a36Sopenharmony_ci	 * free tlock of invalidated pages
271362306a36Sopenharmony_ci	 */
271462306a36Sopenharmony_ci	if (flag == COMMIT_WMAP)
271562306a36Sopenharmony_ci		txFreelock(ip);
271662306a36Sopenharmony_ci
271762306a36Sopenharmony_ci	return newsize;
271862306a36Sopenharmony_ci}
271962306a36Sopenharmony_ci
272062306a36Sopenharmony_ci
272162306a36Sopenharmony_ci/*
272262306a36Sopenharmony_ci *	xtTruncate_pmap()
272362306a36Sopenharmony_ci *
272462306a36Sopenharmony_ci * function:
272562306a36Sopenharmony_ci *	Perform truncate to zero length for deleted file, leaving the
272662306a36Sopenharmony_ci *	xtree and working map untouched.  This allows the file to
272762306a36Sopenharmony_ci *	be accessed via open file handles, while the delete of the file
272862306a36Sopenharmony_ci *	is committed to disk.
272962306a36Sopenharmony_ci *
273062306a36Sopenharmony_ci * parameter:
273162306a36Sopenharmony_ci *	tid_t		tid,
273262306a36Sopenharmony_ci *	struct inode	*ip,
273362306a36Sopenharmony_ci *	s64		committed_size)
273462306a36Sopenharmony_ci *
273562306a36Sopenharmony_ci * return: new committed size
273662306a36Sopenharmony_ci *
273762306a36Sopenharmony_ci * note:
273862306a36Sopenharmony_ci *
273962306a36Sopenharmony_ci *	To avoid deadlock by holding too many transaction locks, the
274062306a36Sopenharmony_ci *	truncation may be broken up into multiple transactions.
274162306a36Sopenharmony_ci *	The committed_size keeps track of part of the file has been
274262306a36Sopenharmony_ci *	freed from the pmaps.
274362306a36Sopenharmony_ci */
274462306a36Sopenharmony_cis64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
274562306a36Sopenharmony_ci{
274662306a36Sopenharmony_ci	s64 bn;
274762306a36Sopenharmony_ci	struct btstack btstack;
274862306a36Sopenharmony_ci	int cmp;
274962306a36Sopenharmony_ci	int index;
275062306a36Sopenharmony_ci	int locked_leaves = 0;
275162306a36Sopenharmony_ci	struct metapage *mp;
275262306a36Sopenharmony_ci	xtpage_t *p;
275362306a36Sopenharmony_ci	struct btframe *parent;
275462306a36Sopenharmony_ci	int rc;
275562306a36Sopenharmony_ci	struct tblock *tblk;
275662306a36Sopenharmony_ci	struct tlock *tlck = NULL;
275762306a36Sopenharmony_ci	xad_t *xad;
275862306a36Sopenharmony_ci	int xlen;
275962306a36Sopenharmony_ci	s64 xoff;
276062306a36Sopenharmony_ci	struct xtlock *xtlck = NULL;
276162306a36Sopenharmony_ci
276262306a36Sopenharmony_ci	/* save object truncation type */
276362306a36Sopenharmony_ci	tblk = tid_to_tblock(tid);
276462306a36Sopenharmony_ci	tblk->xflag |= COMMIT_PMAP;
276562306a36Sopenharmony_ci
276662306a36Sopenharmony_ci	/* clear stack */
276762306a36Sopenharmony_ci	BT_CLR(&btstack);
276862306a36Sopenharmony_ci
276962306a36Sopenharmony_ci	if (committed_size) {
277062306a36Sopenharmony_ci		xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
277162306a36Sopenharmony_ci		rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
277262306a36Sopenharmony_ci		if (rc)
277362306a36Sopenharmony_ci			return rc;
277462306a36Sopenharmony_ci
277562306a36Sopenharmony_ci		XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
277662306a36Sopenharmony_ci
277762306a36Sopenharmony_ci		if (cmp != 0) {
277862306a36Sopenharmony_ci			XT_PUTPAGE(mp);
277962306a36Sopenharmony_ci			jfs_error(ip->i_sb, "did not find extent\n");
278062306a36Sopenharmony_ci			return -EIO;
278162306a36Sopenharmony_ci		}
278262306a36Sopenharmony_ci	} else {
278362306a36Sopenharmony_ci		/*
278462306a36Sopenharmony_ci		 * start with root
278562306a36Sopenharmony_ci		 *
278662306a36Sopenharmony_ci		 * root resides in the inode
278762306a36Sopenharmony_ci		 */
278862306a36Sopenharmony_ci		bn = 0;
278962306a36Sopenharmony_ci
279062306a36Sopenharmony_ci		/*
279162306a36Sopenharmony_ci		 * first access of each page:
279262306a36Sopenharmony_ci		 */
279362306a36Sopenharmony_ci      getPage:
279462306a36Sopenharmony_ci		XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
279562306a36Sopenharmony_ci		if (rc)
279662306a36Sopenharmony_ci			return rc;
279762306a36Sopenharmony_ci
279862306a36Sopenharmony_ci		/* process entries backward from last index */
279962306a36Sopenharmony_ci		index = le16_to_cpu(p->header.nextindex) - 1;
280062306a36Sopenharmony_ci
280162306a36Sopenharmony_ci		if (p->header.flag & BT_INTERNAL)
280262306a36Sopenharmony_ci			goto getChild;
280362306a36Sopenharmony_ci	}
280462306a36Sopenharmony_ci
280562306a36Sopenharmony_ci	/*
280662306a36Sopenharmony_ci	 *	leaf page
280762306a36Sopenharmony_ci	 */
280862306a36Sopenharmony_ci
280962306a36Sopenharmony_ci	if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
281062306a36Sopenharmony_ci		/*
281162306a36Sopenharmony_ci		 * We need to limit the size of the transaction
281262306a36Sopenharmony_ci		 * to avoid exhausting pagecache & tlocks
281362306a36Sopenharmony_ci		 */
281462306a36Sopenharmony_ci		xad = &p->xad[index];
281562306a36Sopenharmony_ci		xoff = offsetXAD(xad);
281662306a36Sopenharmony_ci		xlen = lengthXAD(xad);
281762306a36Sopenharmony_ci		XT_PUTPAGE(mp);
281862306a36Sopenharmony_ci		return (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
281962306a36Sopenharmony_ci	}
282062306a36Sopenharmony_ci	tlck = txLock(tid, ip, mp, tlckXTREE);
282162306a36Sopenharmony_ci	tlck->type = tlckXTREE | tlckFREE;
282262306a36Sopenharmony_ci	xtlck = (struct xtlock *) & tlck->lock;
282362306a36Sopenharmony_ci	xtlck->hwm.offset = index;
282462306a36Sopenharmony_ci
282562306a36Sopenharmony_ci
282662306a36Sopenharmony_ci	XT_PUTPAGE(mp);
282762306a36Sopenharmony_ci
282862306a36Sopenharmony_ci	/*
282962306a36Sopenharmony_ci	 * go back up to the parent page
283062306a36Sopenharmony_ci	 */
283162306a36Sopenharmony_ci      getParent:
283262306a36Sopenharmony_ci	/* pop/restore parent entry for the current child page */
283362306a36Sopenharmony_ci	if ((parent = BT_POP(&btstack)) == NULL)
283462306a36Sopenharmony_ci		/* current page must have been root */
283562306a36Sopenharmony_ci		goto out;
283662306a36Sopenharmony_ci
283762306a36Sopenharmony_ci	/* get back the parent page */
283862306a36Sopenharmony_ci	bn = parent->bn;
283962306a36Sopenharmony_ci	XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
284062306a36Sopenharmony_ci	if (rc)
284162306a36Sopenharmony_ci		return rc;
284262306a36Sopenharmony_ci
284362306a36Sopenharmony_ci	index = parent->index;
284462306a36Sopenharmony_ci
284562306a36Sopenharmony_ci	/*
284662306a36Sopenharmony_ci	 * parent page become empty: free the page
284762306a36Sopenharmony_ci	 */
284862306a36Sopenharmony_ci	if (index == XTENTRYSTART) {
284962306a36Sopenharmony_ci		/* txCommit() with tlckFREE:
285062306a36Sopenharmony_ci		 * free child extents covered by parent;
285162306a36Sopenharmony_ci		 * invalidate parent if COMMIT_PWMAP;
285262306a36Sopenharmony_ci		 */
285362306a36Sopenharmony_ci		tlck = txLock(tid, ip, mp, tlckXTREE);
285462306a36Sopenharmony_ci		xtlck = (struct xtlock *) & tlck->lock;
285562306a36Sopenharmony_ci		xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
285662306a36Sopenharmony_ci		tlck->type = tlckXTREE | tlckFREE;
285762306a36Sopenharmony_ci
285862306a36Sopenharmony_ci		XT_PUTPAGE(mp);
285962306a36Sopenharmony_ci
286062306a36Sopenharmony_ci		if (p->header.flag & BT_ROOT) {
286162306a36Sopenharmony_ci
286262306a36Sopenharmony_ci			goto out;
286362306a36Sopenharmony_ci		} else {
286462306a36Sopenharmony_ci			goto getParent;
286562306a36Sopenharmony_ci		}
286662306a36Sopenharmony_ci	}
286762306a36Sopenharmony_ci	/*
286862306a36Sopenharmony_ci	 * parent page still has entries for front region;
286962306a36Sopenharmony_ci	 */
287062306a36Sopenharmony_ci	else
287162306a36Sopenharmony_ci		index--;
287262306a36Sopenharmony_ci	/*
287362306a36Sopenharmony_ci	 *	internal page: go down to child page of current entry
287462306a36Sopenharmony_ci	 */
287562306a36Sopenharmony_ci      getChild:
287662306a36Sopenharmony_ci	/* save current parent entry for the child page */
287762306a36Sopenharmony_ci	if (BT_STACK_FULL(&btstack)) {
287862306a36Sopenharmony_ci		jfs_error(ip->i_sb, "stack overrun!\n");
287962306a36Sopenharmony_ci		XT_PUTPAGE(mp);
288062306a36Sopenharmony_ci		return -EIO;
288162306a36Sopenharmony_ci	}
288262306a36Sopenharmony_ci	BT_PUSH(&btstack, bn, index);
288362306a36Sopenharmony_ci
288462306a36Sopenharmony_ci	/* get child page */
288562306a36Sopenharmony_ci	xad = &p->xad[index];
288662306a36Sopenharmony_ci	bn = addressXAD(xad);
288762306a36Sopenharmony_ci
288862306a36Sopenharmony_ci	/*
288962306a36Sopenharmony_ci	 * first access of each internal entry:
289062306a36Sopenharmony_ci	 */
289162306a36Sopenharmony_ci	/* release parent page */
289262306a36Sopenharmony_ci	XT_PUTPAGE(mp);
289362306a36Sopenharmony_ci
289462306a36Sopenharmony_ci	/* process the child page */
289562306a36Sopenharmony_ci	goto getPage;
289662306a36Sopenharmony_ci
289762306a36Sopenharmony_ci      out:
289862306a36Sopenharmony_ci
289962306a36Sopenharmony_ci	return 0;
290062306a36Sopenharmony_ci}
290162306a36Sopenharmony_ci
290262306a36Sopenharmony_ci#ifdef CONFIG_JFS_STATISTICS
290362306a36Sopenharmony_ciint jfs_xtstat_proc_show(struct seq_file *m, void *v)
290462306a36Sopenharmony_ci{
290562306a36Sopenharmony_ci	seq_printf(m,
290662306a36Sopenharmony_ci		       "JFS Xtree statistics\n"
290762306a36Sopenharmony_ci		       "====================\n"
290862306a36Sopenharmony_ci		       "searches = %d\n"
290962306a36Sopenharmony_ci		       "fast searches = %d\n"
291062306a36Sopenharmony_ci		       "splits = %d\n",
291162306a36Sopenharmony_ci		       xtStat.search,
291262306a36Sopenharmony_ci		       xtStat.fastSearch,
291362306a36Sopenharmony_ci		       xtStat.split);
291462306a36Sopenharmony_ci	return 0;
291562306a36Sopenharmony_ci}
291662306a36Sopenharmony_ci#endif
2917