xref: /kernel/linux/linux-5.10/fs/nilfs2/btree.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * btree.c - NILFS B-tree.
4 *
5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 *
7 * Written by Koji Sato.
8 */
9
10#include <linux/slab.h>
11#include <linux/string.h>
12#include <linux/errno.h>
13#include <linux/pagevec.h>
14#include "nilfs.h"
15#include "page.h"
16#include "btnode.h"
17#include "btree.h"
18#include "alloc.h"
19#include "dat.h"
20
21static void __nilfs_btree_init(struct nilfs_bmap *bmap);
22
23static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
24{
25	struct nilfs_btree_path *path;
26	int level = NILFS_BTREE_LEVEL_DATA;
27
28	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
29	if (path == NULL)
30		goto out;
31
32	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
33		path[level].bp_bh = NULL;
34		path[level].bp_sib_bh = NULL;
35		path[level].bp_index = 0;
36		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
37		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
38		path[level].bp_op = NULL;
39	}
40
41out:
42	return path;
43}
44
45static void nilfs_btree_free_path(struct nilfs_btree_path *path)
46{
47	int level = NILFS_BTREE_LEVEL_DATA;
48
49	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
50		brelse(path[level].bp_bh);
51
52	kmem_cache_free(nilfs_btree_path_cache, path);
53}
54
55/*
56 * B-tree node operations
57 */
58static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
59				     __u64 ptr, struct buffer_head **bhp)
60{
61	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
62	struct address_space *btnc = btnc_inode->i_mapping;
63	struct buffer_head *bh;
64
65	bh = nilfs_btnode_create_block(btnc, ptr);
66	if (!bh)
67		return -ENOMEM;
68
69	set_buffer_nilfs_volatile(bh);
70	*bhp = bh;
71	return 0;
72}
73
74static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
75{
76	return node->bn_flags;
77}
78
79static void
80nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
81{
82	node->bn_flags = flags;
83}
84
85static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
86{
87	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
88}
89
90static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
91{
92	return node->bn_level;
93}
94
95static void
96nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
97{
98	node->bn_level = level;
99}
100
101static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
102{
103	return le16_to_cpu(node->bn_nchildren);
104}
105
106static void
107nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
108{
109	node->bn_nchildren = cpu_to_le16(nchildren);
110}
111
112static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
113{
114	return i_blocksize(btree->b_inode);
115}
116
117static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
118{
119	return btree->b_nchildren_per_block;
120}
121
122static __le64 *
123nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
124{
125	return (__le64 *)((char *)(node + 1) +
126			  (nilfs_btree_node_root(node) ?
127			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
128}
129
130static __le64 *
131nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
132{
133	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
134}
135
136static __u64
137nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
138{
139	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
140}
141
142static void
143nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
144{
145	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
146}
147
148static __u64
149nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
150			 int ncmax)
151{
152	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
153}
154
155static void
156nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
157			 int ncmax)
158{
159	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
160}
161
162static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
163				  int level, int nchildren, int ncmax,
164				  const __u64 *keys, const __u64 *ptrs)
165{
166	__le64 *dkeys;
167	__le64 *dptrs;
168	int i;
169
170	nilfs_btree_node_set_flags(node, flags);
171	nilfs_btree_node_set_level(node, level);
172	nilfs_btree_node_set_nchildren(node, nchildren);
173
174	dkeys = nilfs_btree_node_dkeys(node);
175	dptrs = nilfs_btree_node_dptrs(node, ncmax);
176	for (i = 0; i < nchildren; i++) {
177		dkeys[i] = cpu_to_le64(keys[i]);
178		dptrs[i] = cpu_to_le64(ptrs[i]);
179	}
180}
181
182/* Assume the buffer heads corresponding to left and right are locked. */
183static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
184				       struct nilfs_btree_node *right,
185				       int n, int lncmax, int rncmax)
186{
187	__le64 *ldkeys, *rdkeys;
188	__le64 *ldptrs, *rdptrs;
189	int lnchildren, rnchildren;
190
191	ldkeys = nilfs_btree_node_dkeys(left);
192	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
193	lnchildren = nilfs_btree_node_get_nchildren(left);
194
195	rdkeys = nilfs_btree_node_dkeys(right);
196	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
197	rnchildren = nilfs_btree_node_get_nchildren(right);
198
199	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
200	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
201	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
202	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
203
204	lnchildren += n;
205	rnchildren -= n;
206	nilfs_btree_node_set_nchildren(left, lnchildren);
207	nilfs_btree_node_set_nchildren(right, rnchildren);
208}
209
210/* Assume that the buffer heads corresponding to left and right are locked. */
211static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
212					struct nilfs_btree_node *right,
213					int n, int lncmax, int rncmax)
214{
215	__le64 *ldkeys, *rdkeys;
216	__le64 *ldptrs, *rdptrs;
217	int lnchildren, rnchildren;
218
219	ldkeys = nilfs_btree_node_dkeys(left);
220	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
221	lnchildren = nilfs_btree_node_get_nchildren(left);
222
223	rdkeys = nilfs_btree_node_dkeys(right);
224	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
225	rnchildren = nilfs_btree_node_get_nchildren(right);
226
227	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
228	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
229	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
230	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
231
232	lnchildren -= n;
233	rnchildren += n;
234	nilfs_btree_node_set_nchildren(left, lnchildren);
235	nilfs_btree_node_set_nchildren(right, rnchildren);
236}
237
238/* Assume that the buffer head corresponding to node is locked. */
239static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
240				    __u64 key, __u64 ptr, int ncmax)
241{
242	__le64 *dkeys;
243	__le64 *dptrs;
244	int nchildren;
245
246	dkeys = nilfs_btree_node_dkeys(node);
247	dptrs = nilfs_btree_node_dptrs(node, ncmax);
248	nchildren = nilfs_btree_node_get_nchildren(node);
249	if (index < nchildren) {
250		memmove(dkeys + index + 1, dkeys + index,
251			(nchildren - index) * sizeof(*dkeys));
252		memmove(dptrs + index + 1, dptrs + index,
253			(nchildren - index) * sizeof(*dptrs));
254	}
255	dkeys[index] = cpu_to_le64(key);
256	dptrs[index] = cpu_to_le64(ptr);
257	nchildren++;
258	nilfs_btree_node_set_nchildren(node, nchildren);
259}
260
261/* Assume that the buffer head corresponding to node is locked. */
262static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
263				    __u64 *keyp, __u64 *ptrp, int ncmax)
264{
265	__u64 key;
266	__u64 ptr;
267	__le64 *dkeys;
268	__le64 *dptrs;
269	int nchildren;
270
271	dkeys = nilfs_btree_node_dkeys(node);
272	dptrs = nilfs_btree_node_dptrs(node, ncmax);
273	key = le64_to_cpu(dkeys[index]);
274	ptr = le64_to_cpu(dptrs[index]);
275	nchildren = nilfs_btree_node_get_nchildren(node);
276	if (keyp != NULL)
277		*keyp = key;
278	if (ptrp != NULL)
279		*ptrp = ptr;
280
281	if (index < nchildren - 1) {
282		memmove(dkeys + index, dkeys + index + 1,
283			(nchildren - index - 1) * sizeof(*dkeys));
284		memmove(dptrs + index, dptrs + index + 1,
285			(nchildren - index - 1) * sizeof(*dptrs));
286	}
287	nchildren--;
288	nilfs_btree_node_set_nchildren(node, nchildren);
289}
290
291static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
292				   __u64 key, int *indexp)
293{
294	__u64 nkey;
295	int index, low, high, s;
296
297	/* binary search */
298	low = 0;
299	high = nilfs_btree_node_get_nchildren(node) - 1;
300	index = 0;
301	s = 0;
302	while (low <= high) {
303		index = (low + high) / 2;
304		nkey = nilfs_btree_node_get_key(node, index);
305		if (nkey == key) {
306			s = 0;
307			goto out;
308		} else if (nkey < key) {
309			low = index + 1;
310			s = -1;
311		} else {
312			high = index - 1;
313			s = 1;
314		}
315	}
316
317	/* adjust index */
318	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
319		if (s > 0 && index > 0)
320			index--;
321	} else if (s < 0)
322		index++;
323
324 out:
325	*indexp = index;
326
327	return s == 0;
328}
329
330/**
331 * nilfs_btree_node_broken - verify consistency of btree node
332 * @node: btree node block to be examined
333 * @size: node size (in bytes)
334 * @inode: host inode of btree
335 * @blocknr: block number
336 *
337 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
338 */
339static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
340				   size_t size, struct inode *inode,
341				   sector_t blocknr)
342{
343	int level, flags, nchildren;
344	int ret = 0;
345
346	level = nilfs_btree_node_get_level(node);
347	flags = nilfs_btree_node_get_flags(node);
348	nchildren = nilfs_btree_node_get_nchildren(node);
349
350	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
351		     level >= NILFS_BTREE_LEVEL_MAX ||
352		     (flags & NILFS_BTREE_NODE_ROOT) ||
353		     nchildren < 0 ||
354		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
355		nilfs_crit(inode->i_sb,
356			   "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
357			   inode->i_ino, (unsigned long long)blocknr, level,
358			   flags, nchildren);
359		ret = 1;
360	}
361	return ret;
362}
363
364/**
365 * nilfs_btree_root_broken - verify consistency of btree root node
366 * @node: btree root node to be examined
367 * @inode: host inode of btree
368 *
369 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
370 */
371static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
372				   struct inode *inode)
373{
374	int level, flags, nchildren;
375	int ret = 0;
376
377	level = nilfs_btree_node_get_level(node);
378	flags = nilfs_btree_node_get_flags(node);
379	nchildren = nilfs_btree_node_get_nchildren(node);
380
381	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
382		     level >= NILFS_BTREE_LEVEL_MAX ||
383		     nchildren < 0 ||
384		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
385		nilfs_crit(inode->i_sb,
386			   "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
387			   inode->i_ino, level, flags, nchildren);
388		ret = 1;
389	}
390	return ret;
391}
392
393int nilfs_btree_broken_node_block(struct buffer_head *bh)
394{
395	struct inode *inode;
396	int ret;
397
398	if (buffer_nilfs_checked(bh))
399		return 0;
400
401	inode = bh->b_page->mapping->host;
402	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
403				      bh->b_size, inode, bh->b_blocknr);
404	if (likely(!ret))
405		set_buffer_nilfs_checked(bh);
406	return ret;
407}
408
409static struct nilfs_btree_node *
410nilfs_btree_get_root(const struct nilfs_bmap *btree)
411{
412	return (struct nilfs_btree_node *)btree->b_u.u_data;
413}
414
415static struct nilfs_btree_node *
416nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
417{
418	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
419}
420
421static struct nilfs_btree_node *
422nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
423{
424	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
425}
426
427static int nilfs_btree_height(const struct nilfs_bmap *btree)
428{
429	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
430}
431
432static struct nilfs_btree_node *
433nilfs_btree_get_node(const struct nilfs_bmap *btree,
434		     const struct nilfs_btree_path *path,
435		     int level, int *ncmaxp)
436{
437	struct nilfs_btree_node *node;
438
439	if (level == nilfs_btree_height(btree) - 1) {
440		node = nilfs_btree_get_root(btree);
441		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
442	} else {
443		node = nilfs_btree_get_nonroot_node(path, level);
444		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
445	}
446	return node;
447}
448
449static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
450				struct nilfs_btree_node *node, int level)
451{
452	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
453		dump_stack();
454		nilfs_crit(btree->b_inode->i_sb,
455			   "btree level mismatch (ino=%lu): %d != %d",
456			   btree->b_inode->i_ino,
457			   nilfs_btree_node_get_level(node), level);
458		return 1;
459	}
460	return 0;
461}
462
463struct nilfs_btree_readahead_info {
464	struct nilfs_btree_node *node;	/* parent node */
465	int max_ra_blocks;		/* max nof blocks to read ahead */
466	int index;			/* current index on the parent node */
467	int ncmax;			/* nof children in the parent node */
468};
469
470static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
471				   struct buffer_head **bhp,
472				   const struct nilfs_btree_readahead_info *ra)
473{
474	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
475	struct address_space *btnc = btnc_inode->i_mapping;
476	struct buffer_head *bh, *ra_bh;
477	sector_t submit_ptr = 0;
478	int ret;
479
480	ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
481					&submit_ptr);
482	if (ret) {
483		if (likely(ret == -EEXIST))
484			goto out_check;
485		if (ret == -ENOENT) {
486			/*
487			 * Block address translation failed due to invalid
488			 * value of 'ptr'.  In this case, return internal code
489			 * -EINVAL (broken bmap) to notify bmap layer of fatal
490			 * metadata corruption.
491			 */
492			ret = -EINVAL;
493		}
494		return ret;
495	}
496
497	if (ra) {
498		int i, n;
499		__u64 ptr2;
500
501		/* read ahead sibling nodes */
502		for (n = ra->max_ra_blocks, i = ra->index + 1;
503		     n > 0 && i < ra->ncmax; n--, i++) {
504			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
505
506			ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
507							REQ_OP_READ, REQ_RAHEAD,
508							&ra_bh, &submit_ptr);
509			if (likely(!ret || ret == -EEXIST))
510				brelse(ra_bh);
511			else if (ret != -EBUSY)
512				break;
513			if (!buffer_locked(bh))
514				goto out_no_wait;
515		}
516	}
517
518	wait_on_buffer(bh);
519
520 out_no_wait:
521	if (!buffer_uptodate(bh)) {
522		nilfs_err(btree->b_inode->i_sb,
523			  "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
524			  btree->b_inode->i_ino, (unsigned long long)ptr);
525		brelse(bh);
526		return -EIO;
527	}
528
529 out_check:
530	if (nilfs_btree_broken_node_block(bh)) {
531		clear_buffer_uptodate(bh);
532		brelse(bh);
533		return -EINVAL;
534	}
535
536	*bhp = bh;
537	return 0;
538}
539
540static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
541				   struct buffer_head **bhp)
542{
543	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
544}
545
546static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
547				 struct nilfs_btree_path *path,
548				 __u64 key, __u64 *ptrp, int minlevel,
549				 int readahead)
550{
551	struct nilfs_btree_node *node;
552	struct nilfs_btree_readahead_info p, *ra;
553	__u64 ptr;
554	int level, index, found, ncmax, ret;
555
556	node = nilfs_btree_get_root(btree);
557	level = nilfs_btree_node_get_level(node);
558	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
559		return -ENOENT;
560
561	found = nilfs_btree_node_lookup(node, key, &index);
562	ptr = nilfs_btree_node_get_ptr(node, index,
563				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
564	path[level].bp_bh = NULL;
565	path[level].bp_index = index;
566
567	ncmax = nilfs_btree_nchildren_per_block(btree);
568
569	while (--level >= minlevel) {
570		ra = NULL;
571		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
572			p.node = nilfs_btree_get_node(btree, path, level + 1,
573						      &p.ncmax);
574			p.index = index;
575			p.max_ra_blocks = 7;
576			ra = &p;
577		}
578		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
579					      ra);
580		if (ret < 0)
581			return ret;
582
583		node = nilfs_btree_get_nonroot_node(path, level);
584		if (nilfs_btree_bad_node(btree, node, level))
585			return -EINVAL;
586		if (!found)
587			found = nilfs_btree_node_lookup(node, key, &index);
588		else
589			index = 0;
590		if (index < ncmax) {
591			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
592		} else {
593			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
594			/* insert */
595			ptr = NILFS_BMAP_INVALID_PTR;
596		}
597		path[level].bp_index = index;
598	}
599	if (!found)
600		return -ENOENT;
601
602	if (ptrp != NULL)
603		*ptrp = ptr;
604
605	return 0;
606}
607
608static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
609				      struct nilfs_btree_path *path,
610				      __u64 *keyp, __u64 *ptrp)
611{
612	struct nilfs_btree_node *node;
613	__u64 ptr;
614	int index, level, ncmax, ret;
615
616	node = nilfs_btree_get_root(btree);
617	index = nilfs_btree_node_get_nchildren(node) - 1;
618	if (index < 0)
619		return -ENOENT;
620	level = nilfs_btree_node_get_level(node);
621	ptr = nilfs_btree_node_get_ptr(node, index,
622				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
623	path[level].bp_bh = NULL;
624	path[level].bp_index = index;
625	ncmax = nilfs_btree_nchildren_per_block(btree);
626
627	for (level--; level > 0; level--) {
628		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
629		if (ret < 0)
630			return ret;
631		node = nilfs_btree_get_nonroot_node(path, level);
632		if (nilfs_btree_bad_node(btree, node, level))
633			return -EINVAL;
634		index = nilfs_btree_node_get_nchildren(node) - 1;
635		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
636		path[level].bp_index = index;
637	}
638
639	if (keyp != NULL)
640		*keyp = nilfs_btree_node_get_key(node, index);
641	if (ptrp != NULL)
642		*ptrp = ptr;
643
644	return 0;
645}
646
647/**
648 * nilfs_btree_get_next_key - get next valid key from btree path array
649 * @btree: bmap struct of btree
650 * @path: array of nilfs_btree_path struct
651 * @minlevel: start level
652 * @nextkey: place to store the next valid key
653 *
654 * Return Value: If a next key was found, 0 is returned. Otherwise,
655 * -ENOENT is returned.
656 */
657static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
658				    const struct nilfs_btree_path *path,
659				    int minlevel, __u64 *nextkey)
660{
661	struct nilfs_btree_node *node;
662	int maxlevel = nilfs_btree_height(btree) - 1;
663	int index, next_adj, level;
664
665	/* Next index is already set to bp_index for leaf nodes. */
666	next_adj = 0;
667	for (level = minlevel; level <= maxlevel; level++) {
668		if (level == maxlevel)
669			node = nilfs_btree_get_root(btree);
670		else
671			node = nilfs_btree_get_nonroot_node(path, level);
672
673		index = path[level].bp_index + next_adj;
674		if (index < nilfs_btree_node_get_nchildren(node)) {
675			/* Next key is in this node */
676			*nextkey = nilfs_btree_node_get_key(node, index);
677			return 0;
678		}
679		/* For non-leaf nodes, next index is stored at bp_index + 1. */
680		next_adj = 1;
681	}
682	return -ENOENT;
683}
684
685static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
686			      __u64 key, int level, __u64 *ptrp)
687{
688	struct nilfs_btree_path *path;
689	int ret;
690
691	path = nilfs_btree_alloc_path();
692	if (path == NULL)
693		return -ENOMEM;
694
695	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
696
697	nilfs_btree_free_path(path);
698
699	return ret;
700}
701
702static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
703				     __u64 key, __u64 *ptrp,
704				     unsigned int maxblocks)
705{
706	struct nilfs_btree_path *path;
707	struct nilfs_btree_node *node;
708	struct inode *dat = NULL;
709	__u64 ptr, ptr2;
710	sector_t blocknr;
711	int level = NILFS_BTREE_LEVEL_NODE_MIN;
712	int ret, cnt, index, maxlevel, ncmax;
713	struct nilfs_btree_readahead_info p;
714
715	path = nilfs_btree_alloc_path();
716	if (path == NULL)
717		return -ENOMEM;
718
719	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
720	if (ret < 0)
721		goto out;
722
723	if (NILFS_BMAP_USE_VBN(btree)) {
724		dat = nilfs_bmap_get_dat(btree);
725		ret = nilfs_dat_translate(dat, ptr, &blocknr);
726		if (ret < 0)
727			goto out;
728		ptr = blocknr;
729	}
730	cnt = 1;
731	if (cnt == maxblocks)
732		goto end;
733
734	maxlevel = nilfs_btree_height(btree) - 1;
735	node = nilfs_btree_get_node(btree, path, level, &ncmax);
736	index = path[level].bp_index + 1;
737	for (;;) {
738		while (index < nilfs_btree_node_get_nchildren(node)) {
739			if (nilfs_btree_node_get_key(node, index) !=
740			    key + cnt)
741				goto end;
742			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
743			if (dat) {
744				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
745				if (ret < 0)
746					goto out;
747				ptr2 = blocknr;
748			}
749			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
750				goto end;
751			index++;
752			continue;
753		}
754		if (level == maxlevel)
755			break;
756
757		/* look-up right sibling node */
758		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
759		p.index = path[level + 1].bp_index + 1;
760		p.max_ra_blocks = 7;
761		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
762		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
763			break;
764		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
765		path[level + 1].bp_index = p.index;
766
767		brelse(path[level].bp_bh);
768		path[level].bp_bh = NULL;
769
770		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
771					      &p);
772		if (ret < 0)
773			goto out;
774		node = nilfs_btree_get_nonroot_node(path, level);
775		ncmax = nilfs_btree_nchildren_per_block(btree);
776		index = 0;
777		path[level].bp_index = index;
778	}
779 end:
780	*ptrp = ptr;
781	ret = cnt;
782 out:
783	nilfs_btree_free_path(path);
784	return ret;
785}
786
787static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
788				    struct nilfs_btree_path *path,
789				    int level, __u64 key)
790{
791	if (level < nilfs_btree_height(btree) - 1) {
792		do {
793			nilfs_btree_node_set_key(
794				nilfs_btree_get_nonroot_node(path, level),
795				path[level].bp_index, key);
796			if (!buffer_dirty(path[level].bp_bh))
797				mark_buffer_dirty(path[level].bp_bh);
798		} while ((path[level].bp_index == 0) &&
799			 (++level < nilfs_btree_height(btree) - 1));
800	}
801
802	/* root */
803	if (level == nilfs_btree_height(btree) - 1) {
804		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
805					 path[level].bp_index, key);
806	}
807}
808
809static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
810				  struct nilfs_btree_path *path,
811				  int level, __u64 *keyp, __u64 *ptrp)
812{
813	struct nilfs_btree_node *node;
814	int ncblk;
815
816	if (level < nilfs_btree_height(btree) - 1) {
817		node = nilfs_btree_get_nonroot_node(path, level);
818		ncblk = nilfs_btree_nchildren_per_block(btree);
819		nilfs_btree_node_insert(node, path[level].bp_index,
820					*keyp, *ptrp, ncblk);
821		if (!buffer_dirty(path[level].bp_bh))
822			mark_buffer_dirty(path[level].bp_bh);
823
824		if (path[level].bp_index == 0)
825			nilfs_btree_promote_key(btree, path, level + 1,
826						nilfs_btree_node_get_key(node,
827									 0));
828	} else {
829		node = nilfs_btree_get_root(btree);
830		nilfs_btree_node_insert(node, path[level].bp_index,
831					*keyp, *ptrp,
832					NILFS_BTREE_ROOT_NCHILDREN_MAX);
833	}
834}
835
836static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
837				   struct nilfs_btree_path *path,
838				   int level, __u64 *keyp, __u64 *ptrp)
839{
840	struct nilfs_btree_node *node, *left;
841	int nchildren, lnchildren, n, move, ncblk;
842
843	node = nilfs_btree_get_nonroot_node(path, level);
844	left = nilfs_btree_get_sib_node(path, level);
845	nchildren = nilfs_btree_node_get_nchildren(node);
846	lnchildren = nilfs_btree_node_get_nchildren(left);
847	ncblk = nilfs_btree_nchildren_per_block(btree);
848	move = 0;
849
850	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
851	if (n > path[level].bp_index) {
852		/* move insert point */
853		n--;
854		move = 1;
855	}
856
857	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
858
859	if (!buffer_dirty(path[level].bp_bh))
860		mark_buffer_dirty(path[level].bp_bh);
861	if (!buffer_dirty(path[level].bp_sib_bh))
862		mark_buffer_dirty(path[level].bp_sib_bh);
863
864	nilfs_btree_promote_key(btree, path, level + 1,
865				nilfs_btree_node_get_key(node, 0));
866
867	if (move) {
868		brelse(path[level].bp_bh);
869		path[level].bp_bh = path[level].bp_sib_bh;
870		path[level].bp_sib_bh = NULL;
871		path[level].bp_index += lnchildren;
872		path[level + 1].bp_index--;
873	} else {
874		brelse(path[level].bp_sib_bh);
875		path[level].bp_sib_bh = NULL;
876		path[level].bp_index -= n;
877	}
878
879	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
880}
881
882static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
883				    struct nilfs_btree_path *path,
884				    int level, __u64 *keyp, __u64 *ptrp)
885{
886	struct nilfs_btree_node *node, *right;
887	int nchildren, rnchildren, n, move, ncblk;
888
889	node = nilfs_btree_get_nonroot_node(path, level);
890	right = nilfs_btree_get_sib_node(path, level);
891	nchildren = nilfs_btree_node_get_nchildren(node);
892	rnchildren = nilfs_btree_node_get_nchildren(right);
893	ncblk = nilfs_btree_nchildren_per_block(btree);
894	move = 0;
895
896	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
897	if (n > nchildren - path[level].bp_index) {
898		/* move insert point */
899		n--;
900		move = 1;
901	}
902
903	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
904
905	if (!buffer_dirty(path[level].bp_bh))
906		mark_buffer_dirty(path[level].bp_bh);
907	if (!buffer_dirty(path[level].bp_sib_bh))
908		mark_buffer_dirty(path[level].bp_sib_bh);
909
910	path[level + 1].bp_index++;
911	nilfs_btree_promote_key(btree, path, level + 1,
912				nilfs_btree_node_get_key(right, 0));
913	path[level + 1].bp_index--;
914
915	if (move) {
916		brelse(path[level].bp_bh);
917		path[level].bp_bh = path[level].bp_sib_bh;
918		path[level].bp_sib_bh = NULL;
919		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
920		path[level + 1].bp_index++;
921	} else {
922		brelse(path[level].bp_sib_bh);
923		path[level].bp_sib_bh = NULL;
924	}
925
926	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
927}
928
929static void nilfs_btree_split(struct nilfs_bmap *btree,
930			      struct nilfs_btree_path *path,
931			      int level, __u64 *keyp, __u64 *ptrp)
932{
933	struct nilfs_btree_node *node, *right;
934	int nchildren, n, move, ncblk;
935
936	node = nilfs_btree_get_nonroot_node(path, level);
937	right = nilfs_btree_get_sib_node(path, level);
938	nchildren = nilfs_btree_node_get_nchildren(node);
939	ncblk = nilfs_btree_nchildren_per_block(btree);
940	move = 0;
941
942	n = (nchildren + 1) / 2;
943	if (n > nchildren - path[level].bp_index) {
944		n--;
945		move = 1;
946	}
947
948	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
949
950	if (!buffer_dirty(path[level].bp_bh))
951		mark_buffer_dirty(path[level].bp_bh);
952	if (!buffer_dirty(path[level].bp_sib_bh))
953		mark_buffer_dirty(path[level].bp_sib_bh);
954
955	if (move) {
956		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
957		nilfs_btree_node_insert(right, path[level].bp_index,
958					*keyp, *ptrp, ncblk);
959
960		*keyp = nilfs_btree_node_get_key(right, 0);
961		*ptrp = path[level].bp_newreq.bpr_ptr;
962
963		brelse(path[level].bp_bh);
964		path[level].bp_bh = path[level].bp_sib_bh;
965		path[level].bp_sib_bh = NULL;
966	} else {
967		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
968
969		*keyp = nilfs_btree_node_get_key(right, 0);
970		*ptrp = path[level].bp_newreq.bpr_ptr;
971
972		brelse(path[level].bp_sib_bh);
973		path[level].bp_sib_bh = NULL;
974	}
975
976	path[level + 1].bp_index++;
977}
978
979static void nilfs_btree_grow(struct nilfs_bmap *btree,
980			     struct nilfs_btree_path *path,
981			     int level, __u64 *keyp, __u64 *ptrp)
982{
983	struct nilfs_btree_node *root, *child;
984	int n, ncblk;
985
986	root = nilfs_btree_get_root(btree);
987	child = nilfs_btree_get_sib_node(path, level);
988	ncblk = nilfs_btree_nchildren_per_block(btree);
989
990	n = nilfs_btree_node_get_nchildren(root);
991
992	nilfs_btree_node_move_right(root, child, n,
993				    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
994	nilfs_btree_node_set_level(root, level + 1);
995
996	if (!buffer_dirty(path[level].bp_sib_bh))
997		mark_buffer_dirty(path[level].bp_sib_bh);
998
999	path[level].bp_bh = path[level].bp_sib_bh;
1000	path[level].bp_sib_bh = NULL;
1001
1002	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1003
1004	*keyp = nilfs_btree_node_get_key(child, 0);
1005	*ptrp = path[level].bp_newreq.bpr_ptr;
1006}
1007
1008static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1009				   const struct nilfs_btree_path *path)
1010{
1011	struct nilfs_btree_node *node;
1012	int level, ncmax;
1013
1014	if (path == NULL)
1015		return NILFS_BMAP_INVALID_PTR;
1016
1017	/* left sibling */
1018	level = NILFS_BTREE_LEVEL_NODE_MIN;
1019	if (path[level].bp_index > 0) {
1020		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1021		return nilfs_btree_node_get_ptr(node,
1022						path[level].bp_index - 1,
1023						ncmax);
1024	}
1025
1026	/* parent */
1027	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1028	if (level <= nilfs_btree_height(btree) - 1) {
1029		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1030		return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1031						ncmax);
1032	}
1033
1034	return NILFS_BMAP_INVALID_PTR;
1035}
1036
1037static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1038				       const struct nilfs_btree_path *path,
1039				       __u64 key)
1040{
1041	__u64 ptr;
1042
1043	ptr = nilfs_bmap_find_target_seq(btree, key);
1044	if (ptr != NILFS_BMAP_INVALID_PTR)
1045		/* sequential access */
1046		return ptr;
1047
1048	ptr = nilfs_btree_find_near(btree, path);
1049	if (ptr != NILFS_BMAP_INVALID_PTR)
1050		/* near */
1051		return ptr;
1052
1053	/* block group */
1054	return nilfs_bmap_find_target_in_group(btree);
1055}
1056
1057static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1058				      struct nilfs_btree_path *path,
1059				      int *levelp, __u64 key, __u64 ptr,
1060				      struct nilfs_bmap_stats *stats)
1061{
1062	struct buffer_head *bh;
1063	struct nilfs_btree_node *node, *parent, *sib;
1064	__u64 sibptr;
1065	int pindex, level, ncmax, ncblk, ret;
1066	struct inode *dat = NULL;
1067
1068	stats->bs_nblocks = 0;
1069	level = NILFS_BTREE_LEVEL_DATA;
1070
1071	/* allocate a new ptr for data block */
1072	if (NILFS_BMAP_USE_VBN(btree)) {
1073		path[level].bp_newreq.bpr_ptr =
1074			nilfs_btree_find_target_v(btree, path, key);
1075		dat = nilfs_bmap_get_dat(btree);
1076	}
1077
1078	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1079	if (ret < 0)
1080		goto err_out_data;
1081
1082	ncblk = nilfs_btree_nchildren_per_block(btree);
1083
1084	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1085	     level < nilfs_btree_height(btree) - 1;
1086	     level++) {
1087		node = nilfs_btree_get_nonroot_node(path, level);
1088		if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1089			path[level].bp_op = nilfs_btree_do_insert;
1090			stats->bs_nblocks++;
1091			goto out;
1092		}
1093
1094		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1095		pindex = path[level + 1].bp_index;
1096
1097		/* left sibling */
1098		if (pindex > 0) {
1099			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1100							  ncmax);
1101			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1102			if (ret < 0)
1103				goto err_out_child_node;
1104			sib = (struct nilfs_btree_node *)bh->b_data;
1105			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1106				path[level].bp_sib_bh = bh;
1107				path[level].bp_op = nilfs_btree_carry_left;
1108				stats->bs_nblocks++;
1109				goto out;
1110			} else {
1111				brelse(bh);
1112			}
1113		}
1114
1115		/* right sibling */
1116		if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1117			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1118							  ncmax);
1119			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1120			if (ret < 0)
1121				goto err_out_child_node;
1122			sib = (struct nilfs_btree_node *)bh->b_data;
1123			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1124				path[level].bp_sib_bh = bh;
1125				path[level].bp_op = nilfs_btree_carry_right;
1126				stats->bs_nblocks++;
1127				goto out;
1128			} else {
1129				brelse(bh);
1130			}
1131		}
1132
1133		/* split */
1134		path[level].bp_newreq.bpr_ptr =
1135			path[level - 1].bp_newreq.bpr_ptr + 1;
1136		ret = nilfs_bmap_prepare_alloc_ptr(btree,
1137						   &path[level].bp_newreq, dat);
1138		if (ret < 0)
1139			goto err_out_child_node;
1140		ret = nilfs_btree_get_new_block(btree,
1141						path[level].bp_newreq.bpr_ptr,
1142						&bh);
1143		if (ret < 0)
1144			goto err_out_curr_node;
1145
1146		stats->bs_nblocks++;
1147
1148		sib = (struct nilfs_btree_node *)bh->b_data;
1149		nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1150		path[level].bp_sib_bh = bh;
1151		path[level].bp_op = nilfs_btree_split;
1152	}
1153
1154	/* root */
1155	node = nilfs_btree_get_root(btree);
1156	if (nilfs_btree_node_get_nchildren(node) <
1157	    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1158		path[level].bp_op = nilfs_btree_do_insert;
1159		stats->bs_nblocks++;
1160		goto out;
1161	}
1162
1163	/* grow */
1164	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1165	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1166	if (ret < 0)
1167		goto err_out_child_node;
1168	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1169					&bh);
1170	if (ret < 0)
1171		goto err_out_curr_node;
1172
1173	nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1174			      0, level, 0, ncblk, NULL, NULL);
1175	path[level].bp_sib_bh = bh;
1176	path[level].bp_op = nilfs_btree_grow;
1177
1178	level++;
1179	path[level].bp_op = nilfs_btree_do_insert;
1180
1181	/* a newly-created node block and a data block are added */
1182	stats->bs_nblocks += 2;
1183
1184	/* success */
1185 out:
1186	*levelp = level;
1187	return ret;
1188
1189	/* error */
1190 err_out_curr_node:
1191	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1192 err_out_child_node:
1193	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1194		nilfs_btnode_delete(path[level].bp_sib_bh);
1195		nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1196
1197	}
1198
1199	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1200 err_out_data:
1201	*levelp = level;
1202	stats->bs_nblocks = 0;
1203	return ret;
1204}
1205
1206static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1207				      struct nilfs_btree_path *path,
1208				      int maxlevel, __u64 key, __u64 ptr)
1209{
1210	struct inode *dat = NULL;
1211	int level;
1212
1213	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1214	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1215	if (NILFS_BMAP_USE_VBN(btree)) {
1216		nilfs_bmap_set_target_v(btree, key, ptr);
1217		dat = nilfs_bmap_get_dat(btree);
1218	}
1219
1220	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1221		nilfs_bmap_commit_alloc_ptr(btree,
1222					    &path[level - 1].bp_newreq, dat);
1223		path[level].bp_op(btree, path, level, &key, &ptr);
1224	}
1225
1226	if (!nilfs_bmap_dirty(btree))
1227		nilfs_bmap_set_dirty(btree);
1228}
1229
1230static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1231{
1232	struct nilfs_btree_path *path;
1233	struct nilfs_bmap_stats stats;
1234	int level, ret;
1235
1236	path = nilfs_btree_alloc_path();
1237	if (path == NULL)
1238		return -ENOMEM;
1239
1240	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1241				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1242	if (ret != -ENOENT) {
1243		if (ret == 0)
1244			ret = -EEXIST;
1245		goto out;
1246	}
1247
1248	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1249	if (ret < 0)
1250		goto out;
1251	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1252	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1253
1254 out:
1255	nilfs_btree_free_path(path);
1256	return ret;
1257}
1258
1259static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1260				  struct nilfs_btree_path *path,
1261				  int level, __u64 *keyp, __u64 *ptrp)
1262{
1263	struct nilfs_btree_node *node;
1264	int ncblk;
1265
1266	if (level < nilfs_btree_height(btree) - 1) {
1267		node = nilfs_btree_get_nonroot_node(path, level);
1268		ncblk = nilfs_btree_nchildren_per_block(btree);
1269		nilfs_btree_node_delete(node, path[level].bp_index,
1270					keyp, ptrp, ncblk);
1271		if (!buffer_dirty(path[level].bp_bh))
1272			mark_buffer_dirty(path[level].bp_bh);
1273		if (path[level].bp_index == 0)
1274			nilfs_btree_promote_key(btree, path, level + 1,
1275				nilfs_btree_node_get_key(node, 0));
1276	} else {
1277		node = nilfs_btree_get_root(btree);
1278		nilfs_btree_node_delete(node, path[level].bp_index,
1279					keyp, ptrp,
1280					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1281	}
1282}
1283
1284static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1285				    struct nilfs_btree_path *path,
1286				    int level, __u64 *keyp, __u64 *ptrp)
1287{
1288	struct nilfs_btree_node *node, *left;
1289	int nchildren, lnchildren, n, ncblk;
1290
1291	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1292
1293	node = nilfs_btree_get_nonroot_node(path, level);
1294	left = nilfs_btree_get_sib_node(path, level);
1295	nchildren = nilfs_btree_node_get_nchildren(node);
1296	lnchildren = nilfs_btree_node_get_nchildren(left);
1297	ncblk = nilfs_btree_nchildren_per_block(btree);
1298
1299	n = (nchildren + lnchildren) / 2 - nchildren;
1300
1301	nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1302
1303	if (!buffer_dirty(path[level].bp_bh))
1304		mark_buffer_dirty(path[level].bp_bh);
1305	if (!buffer_dirty(path[level].bp_sib_bh))
1306		mark_buffer_dirty(path[level].bp_sib_bh);
1307
1308	nilfs_btree_promote_key(btree, path, level + 1,
1309				nilfs_btree_node_get_key(node, 0));
1310
1311	brelse(path[level].bp_sib_bh);
1312	path[level].bp_sib_bh = NULL;
1313	path[level].bp_index += n;
1314}
1315
1316static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1317				     struct nilfs_btree_path *path,
1318				     int level, __u64 *keyp, __u64 *ptrp)
1319{
1320	struct nilfs_btree_node *node, *right;
1321	int nchildren, rnchildren, n, ncblk;
1322
1323	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1324
1325	node = nilfs_btree_get_nonroot_node(path, level);
1326	right = nilfs_btree_get_sib_node(path, level);
1327	nchildren = nilfs_btree_node_get_nchildren(node);
1328	rnchildren = nilfs_btree_node_get_nchildren(right);
1329	ncblk = nilfs_btree_nchildren_per_block(btree);
1330
1331	n = (nchildren + rnchildren) / 2 - nchildren;
1332
1333	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1334
1335	if (!buffer_dirty(path[level].bp_bh))
1336		mark_buffer_dirty(path[level].bp_bh);
1337	if (!buffer_dirty(path[level].bp_sib_bh))
1338		mark_buffer_dirty(path[level].bp_sib_bh);
1339
1340	path[level + 1].bp_index++;
1341	nilfs_btree_promote_key(btree, path, level + 1,
1342				nilfs_btree_node_get_key(right, 0));
1343	path[level + 1].bp_index--;
1344
1345	brelse(path[level].bp_sib_bh);
1346	path[level].bp_sib_bh = NULL;
1347}
1348
1349static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1350				    struct nilfs_btree_path *path,
1351				    int level, __u64 *keyp, __u64 *ptrp)
1352{
1353	struct nilfs_btree_node *node, *left;
1354	int n, ncblk;
1355
1356	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1357
1358	node = nilfs_btree_get_nonroot_node(path, level);
1359	left = nilfs_btree_get_sib_node(path, level);
1360	ncblk = nilfs_btree_nchildren_per_block(btree);
1361
1362	n = nilfs_btree_node_get_nchildren(node);
1363
1364	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1365
1366	if (!buffer_dirty(path[level].bp_sib_bh))
1367		mark_buffer_dirty(path[level].bp_sib_bh);
1368
1369	nilfs_btnode_delete(path[level].bp_bh);
1370	path[level].bp_bh = path[level].bp_sib_bh;
1371	path[level].bp_sib_bh = NULL;
1372	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1373}
1374
1375static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1376				     struct nilfs_btree_path *path,
1377				     int level, __u64 *keyp, __u64 *ptrp)
1378{
1379	struct nilfs_btree_node *node, *right;
1380	int n, ncblk;
1381
1382	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1383
1384	node = nilfs_btree_get_nonroot_node(path, level);
1385	right = nilfs_btree_get_sib_node(path, level);
1386	ncblk = nilfs_btree_nchildren_per_block(btree);
1387
1388	n = nilfs_btree_node_get_nchildren(right);
1389
1390	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1391
1392	if (!buffer_dirty(path[level].bp_bh))
1393		mark_buffer_dirty(path[level].bp_bh);
1394
1395	nilfs_btnode_delete(path[level].bp_sib_bh);
1396	path[level].bp_sib_bh = NULL;
1397	path[level + 1].bp_index++;
1398}
1399
1400static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1401			       struct nilfs_btree_path *path,
1402			       int level, __u64 *keyp, __u64 *ptrp)
1403{
1404	struct nilfs_btree_node *root, *child;
1405	int n, ncblk;
1406
1407	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1408
1409	root = nilfs_btree_get_root(btree);
1410	child = nilfs_btree_get_nonroot_node(path, level);
1411	ncblk = nilfs_btree_nchildren_per_block(btree);
1412
1413	nilfs_btree_node_delete(root, 0, NULL, NULL,
1414				NILFS_BTREE_ROOT_NCHILDREN_MAX);
1415	nilfs_btree_node_set_level(root, level);
1416	n = nilfs_btree_node_get_nchildren(child);
1417	nilfs_btree_node_move_left(root, child, n,
1418				   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1419
1420	nilfs_btnode_delete(path[level].bp_bh);
1421	path[level].bp_bh = NULL;
1422}
1423
1424static void nilfs_btree_nop(struct nilfs_bmap *btree,
1425			    struct nilfs_btree_path *path,
1426			    int level, __u64 *keyp, __u64 *ptrp)
1427{
1428}
1429
1430static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1431				      struct nilfs_btree_path *path,
1432				      int *levelp,
1433				      struct nilfs_bmap_stats *stats,
1434				      struct inode *dat)
1435{
1436	struct buffer_head *bh;
1437	struct nilfs_btree_node *node, *parent, *sib;
1438	__u64 sibptr;
1439	int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1440
1441	ret = 0;
1442	stats->bs_nblocks = 0;
1443	ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1444	ncblk = nilfs_btree_nchildren_per_block(btree);
1445
1446	for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1447	     level < nilfs_btree_height(btree) - 1;
1448	     level++) {
1449		node = nilfs_btree_get_nonroot_node(path, level);
1450		path[level].bp_oldreq.bpr_ptr =
1451			nilfs_btree_node_get_ptr(node, dindex, ncblk);
1452		ret = nilfs_bmap_prepare_end_ptr(btree,
1453						 &path[level].bp_oldreq, dat);
1454		if (ret < 0)
1455			goto err_out_child_node;
1456
1457		if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1458			path[level].bp_op = nilfs_btree_do_delete;
1459			stats->bs_nblocks++;
1460			goto out;
1461		}
1462
1463		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1464		pindex = path[level + 1].bp_index;
1465		dindex = pindex;
1466
1467		if (pindex > 0) {
1468			/* left sibling */
1469			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1470							  ncmax);
1471			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1472			if (ret < 0)
1473				goto err_out_curr_node;
1474			sib = (struct nilfs_btree_node *)bh->b_data;
1475			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1476				path[level].bp_sib_bh = bh;
1477				path[level].bp_op = nilfs_btree_borrow_left;
1478				stats->bs_nblocks++;
1479				goto out;
1480			} else {
1481				path[level].bp_sib_bh = bh;
1482				path[level].bp_op = nilfs_btree_concat_left;
1483				stats->bs_nblocks++;
1484				/* continue; */
1485			}
1486		} else if (pindex <
1487			   nilfs_btree_node_get_nchildren(parent) - 1) {
1488			/* right sibling */
1489			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1490							  ncmax);
1491			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1492			if (ret < 0)
1493				goto err_out_curr_node;
1494			sib = (struct nilfs_btree_node *)bh->b_data;
1495			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1496				path[level].bp_sib_bh = bh;
1497				path[level].bp_op = nilfs_btree_borrow_right;
1498				stats->bs_nblocks++;
1499				goto out;
1500			} else {
1501				path[level].bp_sib_bh = bh;
1502				path[level].bp_op = nilfs_btree_concat_right;
1503				stats->bs_nblocks++;
1504				/*
1505				 * When merging right sibling node
1506				 * into the current node, pointer to
1507				 * the right sibling node must be
1508				 * terminated instead.  The adjustment
1509				 * below is required for that.
1510				 */
1511				dindex = pindex + 1;
1512				/* continue; */
1513			}
1514		} else {
1515			/* no siblings */
1516			/* the only child of the root node */
1517			WARN_ON(level != nilfs_btree_height(btree) - 2);
1518			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1519			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1520				path[level].bp_op = nilfs_btree_shrink;
1521				stats->bs_nblocks += 2;
1522				level++;
1523				path[level].bp_op = nilfs_btree_nop;
1524				goto shrink_root_child;
1525			} else {
1526				path[level].bp_op = nilfs_btree_do_delete;
1527				stats->bs_nblocks++;
1528				goto out;
1529			}
1530		}
1531	}
1532
1533	/* child of the root node is deleted */
1534	path[level].bp_op = nilfs_btree_do_delete;
1535	stats->bs_nblocks++;
1536
1537shrink_root_child:
1538	node = nilfs_btree_get_root(btree);
1539	path[level].bp_oldreq.bpr_ptr =
1540		nilfs_btree_node_get_ptr(node, dindex,
1541					 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1542
1543	ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1544	if (ret < 0)
1545		goto err_out_child_node;
1546
1547	/* success */
1548 out:
1549	*levelp = level;
1550	return ret;
1551
1552	/* error */
1553 err_out_curr_node:
1554	nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1555 err_out_child_node:
1556	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1557		brelse(path[level].bp_sib_bh);
1558		nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1559	}
1560	*levelp = level;
1561	stats->bs_nblocks = 0;
1562	return ret;
1563}
1564
1565static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1566				      struct nilfs_btree_path *path,
1567				      int maxlevel, struct inode *dat)
1568{
1569	int level;
1570
1571	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1572		nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1573		path[level].bp_op(btree, path, level, NULL, NULL);
1574	}
1575
1576	if (!nilfs_bmap_dirty(btree))
1577		nilfs_bmap_set_dirty(btree);
1578}
1579
1580static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1581
1582{
1583	struct nilfs_btree_path *path;
1584	struct nilfs_bmap_stats stats;
1585	struct inode *dat;
1586	int level, ret;
1587
1588	path = nilfs_btree_alloc_path();
1589	if (path == NULL)
1590		return -ENOMEM;
1591
1592	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1593				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1594	if (ret < 0)
1595		goto out;
1596
1597
1598	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1599
1600	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1601	if (ret < 0)
1602		goto out;
1603	nilfs_btree_commit_delete(btree, path, level, dat);
1604	nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1605
1606out:
1607	nilfs_btree_free_path(path);
1608	return ret;
1609}
1610
1611static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1612				__u64 *keyp)
1613{
1614	struct nilfs_btree_path *path;
1615	const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1616	int ret;
1617
1618	path = nilfs_btree_alloc_path();
1619	if (!path)
1620		return -ENOMEM;
1621
1622	ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1623	if (!ret)
1624		*keyp = start;
1625	else if (ret == -ENOENT)
1626		ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1627
1628	nilfs_btree_free_path(path);
1629	return ret;
1630}
1631
1632static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1633{
1634	struct nilfs_btree_path *path;
1635	int ret;
1636
1637	path = nilfs_btree_alloc_path();
1638	if (path == NULL)
1639		return -ENOMEM;
1640
1641	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1642
1643	nilfs_btree_free_path(path);
1644
1645	return ret;
1646}
1647
1648static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1649{
1650	struct buffer_head *bh;
1651	struct nilfs_btree_node *root, *node;
1652	__u64 maxkey, nextmaxkey;
1653	__u64 ptr;
1654	int nchildren, ret;
1655
1656	root = nilfs_btree_get_root(btree);
1657	switch (nilfs_btree_height(btree)) {
1658	case 2:
1659		bh = NULL;
1660		node = root;
1661		break;
1662	case 3:
1663		nchildren = nilfs_btree_node_get_nchildren(root);
1664		if (nchildren > 1)
1665			return 0;
1666		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1667					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1668		ret = nilfs_btree_get_block(btree, ptr, &bh);
1669		if (ret < 0)
1670			return ret;
1671		node = (struct nilfs_btree_node *)bh->b_data;
1672		break;
1673	default:
1674		return 0;
1675	}
1676
1677	nchildren = nilfs_btree_node_get_nchildren(node);
1678	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1679	nextmaxkey = (nchildren > 1) ?
1680		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1681	if (bh != NULL)
1682		brelse(bh);
1683
1684	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1685}
1686
1687static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1688				   __u64 *keys, __u64 *ptrs, int nitems)
1689{
1690	struct buffer_head *bh;
1691	struct nilfs_btree_node *node, *root;
1692	__le64 *dkeys;
1693	__le64 *dptrs;
1694	__u64 ptr;
1695	int nchildren, ncmax, i, ret;
1696
1697	root = nilfs_btree_get_root(btree);
1698	switch (nilfs_btree_height(btree)) {
1699	case 2:
1700		bh = NULL;
1701		node = root;
1702		ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1703		break;
1704	case 3:
1705		nchildren = nilfs_btree_node_get_nchildren(root);
1706		WARN_ON(nchildren > 1);
1707		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1708					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1709		ret = nilfs_btree_get_block(btree, ptr, &bh);
1710		if (ret < 0)
1711			return ret;
1712		node = (struct nilfs_btree_node *)bh->b_data;
1713		ncmax = nilfs_btree_nchildren_per_block(btree);
1714		break;
1715	default:
1716		node = NULL;
1717		return -EINVAL;
1718	}
1719
1720	nchildren = nilfs_btree_node_get_nchildren(node);
1721	if (nchildren < nitems)
1722		nitems = nchildren;
1723	dkeys = nilfs_btree_node_dkeys(node);
1724	dptrs = nilfs_btree_node_dptrs(node, ncmax);
1725	for (i = 0; i < nitems; i++) {
1726		keys[i] = le64_to_cpu(dkeys[i]);
1727		ptrs[i] = le64_to_cpu(dptrs[i]);
1728	}
1729
1730	if (bh != NULL)
1731		brelse(bh);
1732
1733	return nitems;
1734}
1735
1736static int
1737nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1738				       union nilfs_bmap_ptr_req *dreq,
1739				       union nilfs_bmap_ptr_req *nreq,
1740				       struct buffer_head **bhp,
1741				       struct nilfs_bmap_stats *stats)
1742{
1743	struct buffer_head *bh;
1744	struct inode *dat = NULL;
1745	int ret;
1746
1747	stats->bs_nblocks = 0;
1748
1749	/* for data */
1750	/* cannot find near ptr */
1751	if (NILFS_BMAP_USE_VBN(btree)) {
1752		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1753		dat = nilfs_bmap_get_dat(btree);
1754	}
1755
1756	ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1757	if (ret < 0)
1758		return ret;
1759
1760	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1761	if (ret < 0)
1762		return ret;
1763
1764	*bhp = NULL;
1765	stats->bs_nblocks++;
1766	if (nreq != NULL) {
1767		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1768		ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1769		if (ret < 0)
1770			goto err_out_dreq;
1771
1772		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1773		if (ret < 0)
1774			goto err_out_nreq;
1775
1776		*bhp = bh;
1777		stats->bs_nblocks++;
1778	}
1779
1780	/* success */
1781	return 0;
1782
1783	/* error */
1784 err_out_nreq:
1785	nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1786 err_out_dreq:
1787	nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1788	stats->bs_nblocks = 0;
1789	return ret;
1790
1791}
1792
1793static void
1794nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1795				      __u64 key, __u64 ptr,
1796				      const __u64 *keys, const __u64 *ptrs,
1797				      int n,
1798				      union nilfs_bmap_ptr_req *dreq,
1799				      union nilfs_bmap_ptr_req *nreq,
1800				      struct buffer_head *bh)
1801{
1802	struct nilfs_btree_node *node;
1803	struct inode *dat;
1804	__u64 tmpptr;
1805	int ncblk;
1806
1807	/* free resources */
1808	if (btree->b_ops->bop_clear != NULL)
1809		btree->b_ops->bop_clear(btree);
1810
1811	/* ptr must be a pointer to a buffer head. */
1812	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1813
1814	/* convert and insert */
1815	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1816	__nilfs_btree_init(btree);
1817	if (nreq != NULL) {
1818		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1819		nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1820
1821		/* create child node at level 1 */
1822		node = (struct nilfs_btree_node *)bh->b_data;
1823		ncblk = nilfs_btree_nchildren_per_block(btree);
1824		nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1825		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1826		if (!buffer_dirty(bh))
1827			mark_buffer_dirty(bh);
1828		if (!nilfs_bmap_dirty(btree))
1829			nilfs_bmap_set_dirty(btree);
1830
1831		brelse(bh);
1832
1833		/* create root node at level 2 */
1834		node = nilfs_btree_get_root(btree);
1835		tmpptr = nreq->bpr_ptr;
1836		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1837				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1838				      &keys[0], &tmpptr);
1839	} else {
1840		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1841
1842		/* create root node at level 1 */
1843		node = nilfs_btree_get_root(btree);
1844		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1845				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1846				      keys, ptrs);
1847		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1848					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1849		if (!nilfs_bmap_dirty(btree))
1850			nilfs_bmap_set_dirty(btree);
1851	}
1852
1853	if (NILFS_BMAP_USE_VBN(btree))
1854		nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1855}
1856
1857/**
1858 * nilfs_btree_convert_and_insert -
1859 * @bmap:
1860 * @key:
1861 * @ptr:
1862 * @keys:
1863 * @ptrs:
1864 * @n:
1865 */
1866int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1867				   __u64 key, __u64 ptr,
1868				   const __u64 *keys, const __u64 *ptrs, int n)
1869{
1870	struct buffer_head *bh = NULL;
1871	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1872	struct nilfs_bmap_stats stats;
1873	int ret;
1874
1875	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1876		di = &dreq;
1877		ni = NULL;
1878	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1879			   nilfs_btree_node_size(btree))) {
1880		di = &dreq;
1881		ni = &nreq;
1882	} else {
1883		di = NULL;
1884		ni = NULL;
1885		BUG();
1886	}
1887
1888	ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1889						     &stats);
1890	if (ret < 0)
1891		return ret;
1892	nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1893					      di, ni, bh);
1894	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1895	return 0;
1896}
1897
1898static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1899				   struct nilfs_btree_path *path,
1900				   int level,
1901				   struct buffer_head *bh)
1902{
1903	while ((++level < nilfs_btree_height(btree) - 1) &&
1904	       !buffer_dirty(path[level].bp_bh))
1905		mark_buffer_dirty(path[level].bp_bh);
1906
1907	return 0;
1908}
1909
1910static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1911					struct nilfs_btree_path *path,
1912					int level, struct inode *dat)
1913{
1914	struct nilfs_btree_node *parent;
1915	int ncmax, ret;
1916
1917	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1918	path[level].bp_oldreq.bpr_ptr =
1919		nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1920					 ncmax);
1921	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1922	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1923				       &path[level].bp_newreq.bpr_req);
1924	if (ret < 0)
1925		return ret;
1926
1927	if (buffer_nilfs_node(path[level].bp_bh)) {
1928		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1929		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1930		path[level].bp_ctxt.bh = path[level].bp_bh;
1931		ret = nilfs_btnode_prepare_change_key(
1932			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1933			&path[level].bp_ctxt);
1934		if (ret < 0) {
1935			nilfs_dat_abort_update(dat,
1936					       &path[level].bp_oldreq.bpr_req,
1937					       &path[level].bp_newreq.bpr_req);
1938			return ret;
1939		}
1940	}
1941
1942	return 0;
1943}
1944
1945static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1946					struct nilfs_btree_path *path,
1947					int level, struct inode *dat)
1948{
1949	struct nilfs_btree_node *parent;
1950	int ncmax;
1951
1952	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1953				&path[level].bp_newreq.bpr_req,
1954				btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1955
1956	if (buffer_nilfs_node(path[level].bp_bh)) {
1957		nilfs_btnode_commit_change_key(
1958			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1959			&path[level].bp_ctxt);
1960		path[level].bp_bh = path[level].bp_ctxt.bh;
1961	}
1962	set_buffer_nilfs_volatile(path[level].bp_bh);
1963
1964	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1965	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1966				 path[level].bp_newreq.bpr_ptr, ncmax);
1967}
1968
1969static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1970				       struct nilfs_btree_path *path,
1971				       int level, struct inode *dat)
1972{
1973	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1974			       &path[level].bp_newreq.bpr_req);
1975	if (buffer_nilfs_node(path[level].bp_bh))
1976		nilfs_btnode_abort_change_key(
1977			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1978			&path[level].bp_ctxt);
1979}
1980
1981static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1982					   struct nilfs_btree_path *path,
1983					   int minlevel, int *maxlevelp,
1984					   struct inode *dat)
1985{
1986	int level, ret;
1987
1988	level = minlevel;
1989	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1990		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1991		if (ret < 0)
1992			return ret;
1993	}
1994	while ((++level < nilfs_btree_height(btree) - 1) &&
1995	       !buffer_dirty(path[level].bp_bh)) {
1996
1997		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1998		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1999		if (ret < 0)
2000			goto out;
2001	}
2002
2003	/* success */
2004	*maxlevelp = level - 1;
2005	return 0;
2006
2007	/* error */
2008 out:
2009	while (--level > minlevel)
2010		nilfs_btree_abort_update_v(btree, path, level, dat);
2011	if (!buffer_nilfs_volatile(path[level].bp_bh))
2012		nilfs_btree_abort_update_v(btree, path, level, dat);
2013	return ret;
2014}
2015
2016static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2017					   struct nilfs_btree_path *path,
2018					   int minlevel, int maxlevel,
2019					   struct buffer_head *bh,
2020					   struct inode *dat)
2021{
2022	int level;
2023
2024	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2025		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2026
2027	for (level = minlevel + 1; level <= maxlevel; level++)
2028		nilfs_btree_commit_update_v(btree, path, level, dat);
2029}
2030
2031static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2032				   struct nilfs_btree_path *path,
2033				   int level, struct buffer_head *bh)
2034{
2035	int maxlevel = 0, ret;
2036	struct nilfs_btree_node *parent;
2037	struct inode *dat = nilfs_bmap_get_dat(btree);
2038	__u64 ptr;
2039	int ncmax;
2040
2041	get_bh(bh);
2042	path[level].bp_bh = bh;
2043	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2044					      dat);
2045	if (ret < 0)
2046		goto out;
2047
2048	if (buffer_nilfs_volatile(path[level].bp_bh)) {
2049		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2050		ptr = nilfs_btree_node_get_ptr(parent,
2051					       path[level + 1].bp_index,
2052					       ncmax);
2053		ret = nilfs_dat_mark_dirty(dat, ptr);
2054		if (ret < 0)
2055			goto out;
2056	}
2057
2058	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2059
2060 out:
2061	brelse(path[level].bp_bh);
2062	path[level].bp_bh = NULL;
2063	return ret;
2064}
2065
2066static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2067				 struct buffer_head *bh)
2068{
2069	struct nilfs_btree_path *path;
2070	struct nilfs_btree_node *node;
2071	__u64 key;
2072	int level, ret;
2073
2074	WARN_ON(!buffer_dirty(bh));
2075
2076	path = nilfs_btree_alloc_path();
2077	if (path == NULL)
2078		return -ENOMEM;
2079
2080	if (buffer_nilfs_node(bh)) {
2081		node = (struct nilfs_btree_node *)bh->b_data;
2082		key = nilfs_btree_node_get_key(node, 0);
2083		level = nilfs_btree_node_get_level(node);
2084	} else {
2085		key = nilfs_bmap_data_get_key(btree, bh);
2086		level = NILFS_BTREE_LEVEL_DATA;
2087	}
2088
2089	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2090	if (ret < 0) {
2091		if (unlikely(ret == -ENOENT))
2092			nilfs_crit(btree->b_inode->i_sb,
2093				   "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2094				   btree->b_inode->i_ino,
2095				   (unsigned long long)key, level);
2096		goto out;
2097	}
2098
2099	ret = NILFS_BMAP_USE_VBN(btree) ?
2100		nilfs_btree_propagate_v(btree, path, level, bh) :
2101		nilfs_btree_propagate_p(btree, path, level, bh);
2102
2103 out:
2104	nilfs_btree_free_path(path);
2105
2106	return ret;
2107}
2108
2109static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2110				    struct buffer_head *bh)
2111{
2112	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2113}
2114
2115static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2116					 struct list_head *lists,
2117					 struct buffer_head *bh)
2118{
2119	struct list_head *head;
2120	struct buffer_head *cbh;
2121	struct nilfs_btree_node *node, *cnode;
2122	__u64 key, ckey;
2123	int level;
2124
2125	get_bh(bh);
2126	node = (struct nilfs_btree_node *)bh->b_data;
2127	key = nilfs_btree_node_get_key(node, 0);
2128	level = nilfs_btree_node_get_level(node);
2129	if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2130	    level >= NILFS_BTREE_LEVEL_MAX) {
2131		dump_stack();
2132		nilfs_warn(btree->b_inode->i_sb,
2133			   "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2134			   level, (unsigned long long)key,
2135			   btree->b_inode->i_ino,
2136			   (unsigned long long)bh->b_blocknr);
2137		return;
2138	}
2139
2140	list_for_each(head, &lists[level]) {
2141		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2142		cnode = (struct nilfs_btree_node *)cbh->b_data;
2143		ckey = nilfs_btree_node_get_key(cnode, 0);
2144		if (key < ckey)
2145			break;
2146	}
2147	list_add_tail(&bh->b_assoc_buffers, head);
2148}
2149
2150static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2151					     struct list_head *listp)
2152{
2153	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2154	struct address_space *btcache = btnc_inode->i_mapping;
2155	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2156	struct pagevec pvec;
2157	struct buffer_head *bh, *head;
2158	pgoff_t index = 0;
2159	int level, i;
2160
2161	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2162	     level < NILFS_BTREE_LEVEL_MAX;
2163	     level++)
2164		INIT_LIST_HEAD(&lists[level]);
2165
2166	pagevec_init(&pvec);
2167
2168	while (pagevec_lookup_tag(&pvec, btcache, &index,
2169					PAGECACHE_TAG_DIRTY)) {
2170		for (i = 0; i < pagevec_count(&pvec); i++) {
2171			bh = head = page_buffers(pvec.pages[i]);
2172			do {
2173				if (buffer_dirty(bh))
2174					nilfs_btree_add_dirty_buffer(btree,
2175								     lists, bh);
2176			} while ((bh = bh->b_this_page) != head);
2177		}
2178		pagevec_release(&pvec);
2179		cond_resched();
2180	}
2181
2182	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2183	     level < NILFS_BTREE_LEVEL_MAX;
2184	     level++)
2185		list_splice_tail(&lists[level], listp);
2186}
2187
2188static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2189				struct nilfs_btree_path *path,
2190				int level,
2191				struct buffer_head **bh,
2192				sector_t blocknr,
2193				union nilfs_binfo *binfo)
2194{
2195	struct nilfs_btree_node *parent;
2196	__u64 key;
2197	__u64 ptr;
2198	int ncmax, ret;
2199
2200	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2201	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2202				       ncmax);
2203	if (buffer_nilfs_node(*bh)) {
2204		path[level].bp_ctxt.oldkey = ptr;
2205		path[level].bp_ctxt.newkey = blocknr;
2206		path[level].bp_ctxt.bh = *bh;
2207		ret = nilfs_btnode_prepare_change_key(
2208			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2209			&path[level].bp_ctxt);
2210		if (ret < 0)
2211			return ret;
2212		nilfs_btnode_commit_change_key(
2213			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2214			&path[level].bp_ctxt);
2215		*bh = path[level].bp_ctxt.bh;
2216	}
2217
2218	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2219				 ncmax);
2220
2221	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2222	/* on-disk format */
2223	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2224	binfo->bi_dat.bi_level = level;
2225
2226	return 0;
2227}
2228
2229static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2230				struct nilfs_btree_path *path,
2231				int level,
2232				struct buffer_head **bh,
2233				sector_t blocknr,
2234				union nilfs_binfo *binfo)
2235{
2236	struct nilfs_btree_node *parent;
2237	struct inode *dat = nilfs_bmap_get_dat(btree);
2238	__u64 key;
2239	__u64 ptr;
2240	union nilfs_bmap_ptr_req req;
2241	int ncmax, ret;
2242
2243	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2244	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2245				       ncmax);
2246	req.bpr_ptr = ptr;
2247	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2248	if (ret < 0)
2249		return ret;
2250	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2251
2252	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2253	/* on-disk format */
2254	binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2255	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2256
2257	return 0;
2258}
2259
2260static int nilfs_btree_assign(struct nilfs_bmap *btree,
2261			      struct buffer_head **bh,
2262			      sector_t blocknr,
2263			      union nilfs_binfo *binfo)
2264{
2265	struct nilfs_btree_path *path;
2266	struct nilfs_btree_node *node;
2267	__u64 key;
2268	int level, ret;
2269
2270	path = nilfs_btree_alloc_path();
2271	if (path == NULL)
2272		return -ENOMEM;
2273
2274	if (buffer_nilfs_node(*bh)) {
2275		node = (struct nilfs_btree_node *)(*bh)->b_data;
2276		key = nilfs_btree_node_get_key(node, 0);
2277		level = nilfs_btree_node_get_level(node);
2278	} else {
2279		key = nilfs_bmap_data_get_key(btree, *bh);
2280		level = NILFS_BTREE_LEVEL_DATA;
2281	}
2282
2283	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2284	if (ret < 0) {
2285		WARN_ON(ret == -ENOENT);
2286		goto out;
2287	}
2288
2289	ret = NILFS_BMAP_USE_VBN(btree) ?
2290		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2291		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2292
2293 out:
2294	nilfs_btree_free_path(path);
2295
2296	return ret;
2297}
2298
2299static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2300				 struct buffer_head **bh,
2301				 sector_t blocknr,
2302				 union nilfs_binfo *binfo)
2303{
2304	struct nilfs_btree_node *node;
2305	__u64 key;
2306	int ret;
2307
2308	ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2309			     blocknr);
2310	if (ret < 0)
2311		return ret;
2312
2313	if (buffer_nilfs_node(*bh)) {
2314		node = (struct nilfs_btree_node *)(*bh)->b_data;
2315		key = nilfs_btree_node_get_key(node, 0);
2316	} else
2317		key = nilfs_bmap_data_get_key(btree, *bh);
2318
2319	/* on-disk format */
2320	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2321	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2322
2323	return 0;
2324}
2325
2326static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2327{
2328	struct buffer_head *bh;
2329	struct nilfs_btree_path *path;
2330	__u64 ptr;
2331	int ret;
2332
2333	path = nilfs_btree_alloc_path();
2334	if (path == NULL)
2335		return -ENOMEM;
2336
2337	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2338	if (ret < 0) {
2339		WARN_ON(ret == -ENOENT);
2340		goto out;
2341	}
2342	ret = nilfs_btree_get_block(btree, ptr, &bh);
2343	if (ret < 0) {
2344		WARN_ON(ret == -ENOENT);
2345		goto out;
2346	}
2347
2348	if (!buffer_dirty(bh))
2349		mark_buffer_dirty(bh);
2350	brelse(bh);
2351	if (!nilfs_bmap_dirty(btree))
2352		nilfs_bmap_set_dirty(btree);
2353
2354 out:
2355	nilfs_btree_free_path(path);
2356	return ret;
2357}
2358
2359static const struct nilfs_bmap_operations nilfs_btree_ops = {
2360	.bop_lookup		=	nilfs_btree_lookup,
2361	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2362	.bop_insert		=	nilfs_btree_insert,
2363	.bop_delete		=	nilfs_btree_delete,
2364	.bop_clear		=	NULL,
2365
2366	.bop_propagate		=	nilfs_btree_propagate,
2367
2368	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2369
2370	.bop_assign		=	nilfs_btree_assign,
2371	.bop_mark		=	nilfs_btree_mark,
2372
2373	.bop_seek_key		=	nilfs_btree_seek_key,
2374	.bop_last_key		=	nilfs_btree_last_key,
2375
2376	.bop_check_insert	=	NULL,
2377	.bop_check_delete	=	nilfs_btree_check_delete,
2378	.bop_gather_data	=	nilfs_btree_gather_data,
2379};
2380
2381static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2382	.bop_lookup		=	NULL,
2383	.bop_lookup_contig	=	NULL,
2384	.bop_insert		=	NULL,
2385	.bop_delete		=	NULL,
2386	.bop_clear		=	NULL,
2387
2388	.bop_propagate		=	nilfs_btree_propagate_gc,
2389
2390	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2391
2392	.bop_assign		=	nilfs_btree_assign_gc,
2393	.bop_mark		=	NULL,
2394
2395	.bop_seek_key		=	NULL,
2396	.bop_last_key		=	NULL,
2397
2398	.bop_check_insert	=	NULL,
2399	.bop_check_delete	=	NULL,
2400	.bop_gather_data	=	NULL,
2401};
2402
2403static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2404{
2405	bmap->b_ops = &nilfs_btree_ops;
2406	bmap->b_nchildren_per_block =
2407		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2408}
2409
2410int nilfs_btree_init(struct nilfs_bmap *bmap)
2411{
2412	int ret = 0;
2413
2414	__nilfs_btree_init(bmap);
2415
2416	if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2417		ret = -EIO;
2418	else
2419		ret = nilfs_attach_btree_node_cache(
2420			&NILFS_BMAP_I(bmap)->vfs_inode);
2421
2422	return ret;
2423}
2424
2425void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2426{
2427	bmap->b_ops = &nilfs_btree_ops_gc;
2428	bmap->b_nchildren_per_block =
2429		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2430}
2431