xref: /kernel/linux/linux-6.6/fs/nilfs2/btree.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * NILFS B-tree.
4 *
5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
6 *
7 * Written by Koji Sato.
8 */
9
10#include <linux/slab.h>
11#include <linux/string.h>
12#include <linux/errno.h>
13#include <linux/pagevec.h>
14#include "nilfs.h"
15#include "page.h"
16#include "btnode.h"
17#include "btree.h"
18#include "alloc.h"
19#include "dat.h"
20
21static void __nilfs_btree_init(struct nilfs_bmap *bmap);
22
23static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
24{
25	struct nilfs_btree_path *path;
26	int level = NILFS_BTREE_LEVEL_DATA;
27
28	path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
29	if (path == NULL)
30		goto out;
31
32	for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
33		path[level].bp_bh = NULL;
34		path[level].bp_sib_bh = NULL;
35		path[level].bp_index = 0;
36		path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
37		path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
38		path[level].bp_op = NULL;
39	}
40
41out:
42	return path;
43}
44
45static void nilfs_btree_free_path(struct nilfs_btree_path *path)
46{
47	int level = NILFS_BTREE_LEVEL_DATA;
48
49	for (; level < NILFS_BTREE_LEVEL_MAX; level++)
50		brelse(path[level].bp_bh);
51
52	kmem_cache_free(nilfs_btree_path_cache, path);
53}
54
55/*
56 * B-tree node operations
57 */
58static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
59				     __u64 ptr, struct buffer_head **bhp)
60{
61	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
62	struct address_space *btnc = btnc_inode->i_mapping;
63	struct buffer_head *bh;
64
65	bh = nilfs_btnode_create_block(btnc, ptr);
66	if (!bh)
67		return -ENOMEM;
68
69	set_buffer_nilfs_volatile(bh);
70	*bhp = bh;
71	return 0;
72}
73
74static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
75{
76	return node->bn_flags;
77}
78
79static void
80nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
81{
82	node->bn_flags = flags;
83}
84
85static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
86{
87	return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
88}
89
90static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
91{
92	return node->bn_level;
93}
94
95static void
96nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
97{
98	node->bn_level = level;
99}
100
101static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
102{
103	return le16_to_cpu(node->bn_nchildren);
104}
105
106static void
107nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
108{
109	node->bn_nchildren = cpu_to_le16(nchildren);
110}
111
112static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
113{
114	return i_blocksize(btree->b_inode);
115}
116
117static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
118{
119	return btree->b_nchildren_per_block;
120}
121
122static __le64 *
123nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
124{
125	return (__le64 *)((char *)(node + 1) +
126			  (nilfs_btree_node_root(node) ?
127			   0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
128}
129
130static __le64 *
131nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
132{
133	return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
134}
135
136static __u64
137nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
138{
139	return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
140}
141
142static void
143nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
144{
145	*(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
146}
147
148static __u64
149nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
150			 int ncmax)
151{
152	return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
153}
154
155static void
156nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
157			 int ncmax)
158{
159	*(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
160}
161
162static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
163				  int level, int nchildren, int ncmax,
164				  const __u64 *keys, const __u64 *ptrs)
165{
166	__le64 *dkeys;
167	__le64 *dptrs;
168	int i;
169
170	nilfs_btree_node_set_flags(node, flags);
171	nilfs_btree_node_set_level(node, level);
172	nilfs_btree_node_set_nchildren(node, nchildren);
173
174	dkeys = nilfs_btree_node_dkeys(node);
175	dptrs = nilfs_btree_node_dptrs(node, ncmax);
176	for (i = 0; i < nchildren; i++) {
177		dkeys[i] = cpu_to_le64(keys[i]);
178		dptrs[i] = cpu_to_le64(ptrs[i]);
179	}
180}
181
182/* Assume the buffer heads corresponding to left and right are locked. */
183static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
184				       struct nilfs_btree_node *right,
185				       int n, int lncmax, int rncmax)
186{
187	__le64 *ldkeys, *rdkeys;
188	__le64 *ldptrs, *rdptrs;
189	int lnchildren, rnchildren;
190
191	ldkeys = nilfs_btree_node_dkeys(left);
192	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
193	lnchildren = nilfs_btree_node_get_nchildren(left);
194
195	rdkeys = nilfs_btree_node_dkeys(right);
196	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
197	rnchildren = nilfs_btree_node_get_nchildren(right);
198
199	memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
200	memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
201	memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
202	memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
203
204	lnchildren += n;
205	rnchildren -= n;
206	nilfs_btree_node_set_nchildren(left, lnchildren);
207	nilfs_btree_node_set_nchildren(right, rnchildren);
208}
209
210/* Assume that the buffer heads corresponding to left and right are locked. */
211static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
212					struct nilfs_btree_node *right,
213					int n, int lncmax, int rncmax)
214{
215	__le64 *ldkeys, *rdkeys;
216	__le64 *ldptrs, *rdptrs;
217	int lnchildren, rnchildren;
218
219	ldkeys = nilfs_btree_node_dkeys(left);
220	ldptrs = nilfs_btree_node_dptrs(left, lncmax);
221	lnchildren = nilfs_btree_node_get_nchildren(left);
222
223	rdkeys = nilfs_btree_node_dkeys(right);
224	rdptrs = nilfs_btree_node_dptrs(right, rncmax);
225	rnchildren = nilfs_btree_node_get_nchildren(right);
226
227	memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
228	memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
229	memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
230	memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
231
232	lnchildren -= n;
233	rnchildren += n;
234	nilfs_btree_node_set_nchildren(left, lnchildren);
235	nilfs_btree_node_set_nchildren(right, rnchildren);
236}
237
238/* Assume that the buffer head corresponding to node is locked. */
239static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
240				    __u64 key, __u64 ptr, int ncmax)
241{
242	__le64 *dkeys;
243	__le64 *dptrs;
244	int nchildren;
245
246	dkeys = nilfs_btree_node_dkeys(node);
247	dptrs = nilfs_btree_node_dptrs(node, ncmax);
248	nchildren = nilfs_btree_node_get_nchildren(node);
249	if (index < nchildren) {
250		memmove(dkeys + index + 1, dkeys + index,
251			(nchildren - index) * sizeof(*dkeys));
252		memmove(dptrs + index + 1, dptrs + index,
253			(nchildren - index) * sizeof(*dptrs));
254	}
255	dkeys[index] = cpu_to_le64(key);
256	dptrs[index] = cpu_to_le64(ptr);
257	nchildren++;
258	nilfs_btree_node_set_nchildren(node, nchildren);
259}
260
261/* Assume that the buffer head corresponding to node is locked. */
262static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
263				    __u64 *keyp, __u64 *ptrp, int ncmax)
264{
265	__u64 key;
266	__u64 ptr;
267	__le64 *dkeys;
268	__le64 *dptrs;
269	int nchildren;
270
271	dkeys = nilfs_btree_node_dkeys(node);
272	dptrs = nilfs_btree_node_dptrs(node, ncmax);
273	key = le64_to_cpu(dkeys[index]);
274	ptr = le64_to_cpu(dptrs[index]);
275	nchildren = nilfs_btree_node_get_nchildren(node);
276	if (keyp != NULL)
277		*keyp = key;
278	if (ptrp != NULL)
279		*ptrp = ptr;
280
281	if (index < nchildren - 1) {
282		memmove(dkeys + index, dkeys + index + 1,
283			(nchildren - index - 1) * sizeof(*dkeys));
284		memmove(dptrs + index, dptrs + index + 1,
285			(nchildren - index - 1) * sizeof(*dptrs));
286	}
287	nchildren--;
288	nilfs_btree_node_set_nchildren(node, nchildren);
289}
290
291static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
292				   __u64 key, int *indexp)
293{
294	__u64 nkey;
295	int index, low, high, s;
296
297	/* binary search */
298	low = 0;
299	high = nilfs_btree_node_get_nchildren(node) - 1;
300	index = 0;
301	s = 0;
302	while (low <= high) {
303		index = (low + high) / 2;
304		nkey = nilfs_btree_node_get_key(node, index);
305		if (nkey == key) {
306			s = 0;
307			goto out;
308		} else if (nkey < key) {
309			low = index + 1;
310			s = -1;
311		} else {
312			high = index - 1;
313			s = 1;
314		}
315	}
316
317	/* adjust index */
318	if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
319		if (s > 0 && index > 0)
320			index--;
321	} else if (s < 0)
322		index++;
323
324 out:
325	*indexp = index;
326
327	return s == 0;
328}
329
330/**
331 * nilfs_btree_node_broken - verify consistency of btree node
332 * @node: btree node block to be examined
333 * @size: node size (in bytes)
334 * @inode: host inode of btree
335 * @blocknr: block number
336 *
337 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
338 */
339static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
340				   size_t size, struct inode *inode,
341				   sector_t blocknr)
342{
343	int level, flags, nchildren;
344	int ret = 0;
345
346	level = nilfs_btree_node_get_level(node);
347	flags = nilfs_btree_node_get_flags(node);
348	nchildren = nilfs_btree_node_get_nchildren(node);
349
350	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
351		     level >= NILFS_BTREE_LEVEL_MAX ||
352		     (flags & NILFS_BTREE_NODE_ROOT) ||
353		     nchildren < 0 ||
354		     nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
355		nilfs_crit(inode->i_sb,
356			   "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
357			   inode->i_ino, (unsigned long long)blocknr, level,
358			   flags, nchildren);
359		ret = 1;
360	}
361	return ret;
362}
363
364/**
365 * nilfs_btree_root_broken - verify consistency of btree root node
366 * @node: btree root node to be examined
367 * @inode: host inode of btree
368 *
369 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
370 */
371static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
372				   struct inode *inode)
373{
374	int level, flags, nchildren;
375	int ret = 0;
376
377	level = nilfs_btree_node_get_level(node);
378	flags = nilfs_btree_node_get_flags(node);
379	nchildren = nilfs_btree_node_get_nchildren(node);
380
381	if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
382		     level >= NILFS_BTREE_LEVEL_MAX ||
383		     nchildren < 0 ||
384		     nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
385		nilfs_crit(inode->i_sb,
386			   "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
387			   inode->i_ino, level, flags, nchildren);
388		ret = 1;
389	}
390	return ret;
391}
392
393int nilfs_btree_broken_node_block(struct buffer_head *bh)
394{
395	struct inode *inode;
396	int ret;
397
398	if (buffer_nilfs_checked(bh))
399		return 0;
400
401	inode = bh->b_folio->mapping->host;
402	ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
403				      bh->b_size, inode, bh->b_blocknr);
404	if (likely(!ret))
405		set_buffer_nilfs_checked(bh);
406	return ret;
407}
408
409static struct nilfs_btree_node *
410nilfs_btree_get_root(const struct nilfs_bmap *btree)
411{
412	return (struct nilfs_btree_node *)btree->b_u.u_data;
413}
414
415static struct nilfs_btree_node *
416nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
417{
418	return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
419}
420
421static struct nilfs_btree_node *
422nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
423{
424	return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
425}
426
427static int nilfs_btree_height(const struct nilfs_bmap *btree)
428{
429	return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
430}
431
432static struct nilfs_btree_node *
433nilfs_btree_get_node(const struct nilfs_bmap *btree,
434		     const struct nilfs_btree_path *path,
435		     int level, int *ncmaxp)
436{
437	struct nilfs_btree_node *node;
438
439	if (level == nilfs_btree_height(btree) - 1) {
440		node = nilfs_btree_get_root(btree);
441		*ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
442	} else {
443		node = nilfs_btree_get_nonroot_node(path, level);
444		*ncmaxp = nilfs_btree_nchildren_per_block(btree);
445	}
446	return node;
447}
448
449static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
450				struct nilfs_btree_node *node, int level)
451{
452	if (unlikely(nilfs_btree_node_get_level(node) != level)) {
453		dump_stack();
454		nilfs_crit(btree->b_inode->i_sb,
455			   "btree level mismatch (ino=%lu): %d != %d",
456			   btree->b_inode->i_ino,
457			   nilfs_btree_node_get_level(node), level);
458		return 1;
459	}
460	return 0;
461}
462
463struct nilfs_btree_readahead_info {
464	struct nilfs_btree_node *node;	/* parent node */
465	int max_ra_blocks;		/* max nof blocks to read ahead */
466	int index;			/* current index on the parent node */
467	int ncmax;			/* nof children in the parent node */
468};
469
470static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
471				   struct buffer_head **bhp,
472				   const struct nilfs_btree_readahead_info *ra)
473{
474	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
475	struct address_space *btnc = btnc_inode->i_mapping;
476	struct buffer_head *bh, *ra_bh;
477	sector_t submit_ptr = 0;
478	int ret;
479
480	ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh,
481					&submit_ptr);
482	if (ret) {
483		if (likely(ret == -EEXIST))
484			goto out_check;
485		if (ret == -ENOENT) {
486			/*
487			 * Block address translation failed due to invalid
488			 * value of 'ptr'.  In this case, return internal code
489			 * -EINVAL (broken bmap) to notify bmap layer of fatal
490			 * metadata corruption.
491			 */
492			ret = -EINVAL;
493		}
494		return ret;
495	}
496
497	if (ra) {
498		int i, n;
499		__u64 ptr2;
500
501		/* read ahead sibling nodes */
502		for (n = ra->max_ra_blocks, i = ra->index + 1;
503		     n > 0 && i < ra->ncmax; n--, i++) {
504			ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
505
506			ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
507						REQ_OP_READ | REQ_RAHEAD,
508						&ra_bh, &submit_ptr);
509			if (likely(!ret || ret == -EEXIST))
510				brelse(ra_bh);
511			else if (ret != -EBUSY)
512				break;
513			if (!buffer_locked(bh))
514				goto out_no_wait;
515		}
516	}
517
518	wait_on_buffer(bh);
519
520 out_no_wait:
521	if (!buffer_uptodate(bh)) {
522		nilfs_err(btree->b_inode->i_sb,
523			  "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
524			  btree->b_inode->i_ino, (unsigned long long)ptr);
525		brelse(bh);
526		return -EIO;
527	}
528
529 out_check:
530	if (nilfs_btree_broken_node_block(bh)) {
531		clear_buffer_uptodate(bh);
532		brelse(bh);
533		return -EINVAL;
534	}
535
536	*bhp = bh;
537	return 0;
538}
539
540static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
541				   struct buffer_head **bhp)
542{
543	return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
544}
545
546static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
547				 struct nilfs_btree_path *path,
548				 __u64 key, __u64 *ptrp, int minlevel,
549				 int readahead)
550{
551	struct nilfs_btree_node *node;
552	struct nilfs_btree_readahead_info p, *ra;
553	__u64 ptr;
554	int level, index, found, ncmax, ret;
555
556	node = nilfs_btree_get_root(btree);
557	level = nilfs_btree_node_get_level(node);
558	if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
559		return -ENOENT;
560
561	found = nilfs_btree_node_lookup(node, key, &index);
562	ptr = nilfs_btree_node_get_ptr(node, index,
563				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
564	path[level].bp_bh = NULL;
565	path[level].bp_index = index;
566
567	ncmax = nilfs_btree_nchildren_per_block(btree);
568
569	while (--level >= minlevel) {
570		ra = NULL;
571		if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
572			p.node = nilfs_btree_get_node(btree, path, level + 1,
573						      &p.ncmax);
574			p.index = index;
575			p.max_ra_blocks = 7;
576			ra = &p;
577		}
578		ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
579					      ra);
580		if (ret < 0)
581			return ret;
582
583		node = nilfs_btree_get_nonroot_node(path, level);
584		if (nilfs_btree_bad_node(btree, node, level))
585			return -EINVAL;
586		if (!found)
587			found = nilfs_btree_node_lookup(node, key, &index);
588		else
589			index = 0;
590		if (index < ncmax) {
591			ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
592		} else {
593			WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
594			/* insert */
595			ptr = NILFS_BMAP_INVALID_PTR;
596		}
597		path[level].bp_index = index;
598	}
599	if (!found)
600		return -ENOENT;
601
602	if (ptrp != NULL)
603		*ptrp = ptr;
604
605	return 0;
606}
607
608static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
609				      struct nilfs_btree_path *path,
610				      __u64 *keyp, __u64 *ptrp)
611{
612	struct nilfs_btree_node *node;
613	__u64 ptr;
614	int index, level, ncmax, ret;
615
616	node = nilfs_btree_get_root(btree);
617	index = nilfs_btree_node_get_nchildren(node) - 1;
618	if (index < 0)
619		return -ENOENT;
620	level = nilfs_btree_node_get_level(node);
621	ptr = nilfs_btree_node_get_ptr(node, index,
622				       NILFS_BTREE_ROOT_NCHILDREN_MAX);
623	path[level].bp_bh = NULL;
624	path[level].bp_index = index;
625	ncmax = nilfs_btree_nchildren_per_block(btree);
626
627	for (level--; level > 0; level--) {
628		ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
629		if (ret < 0)
630			return ret;
631		node = nilfs_btree_get_nonroot_node(path, level);
632		if (nilfs_btree_bad_node(btree, node, level))
633			return -EINVAL;
634		index = nilfs_btree_node_get_nchildren(node) - 1;
635		ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
636		path[level].bp_index = index;
637	}
638
639	if (keyp != NULL)
640		*keyp = nilfs_btree_node_get_key(node, index);
641	if (ptrp != NULL)
642		*ptrp = ptr;
643
644	return 0;
645}
646
647/**
648 * nilfs_btree_get_next_key - get next valid key from btree path array
649 * @btree: bmap struct of btree
650 * @path: array of nilfs_btree_path struct
651 * @minlevel: start level
652 * @nextkey: place to store the next valid key
653 *
654 * Return Value: If a next key was found, 0 is returned. Otherwise,
655 * -ENOENT is returned.
656 */
657static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
658				    const struct nilfs_btree_path *path,
659				    int minlevel, __u64 *nextkey)
660{
661	struct nilfs_btree_node *node;
662	int maxlevel = nilfs_btree_height(btree) - 1;
663	int index, next_adj, level;
664
665	/* Next index is already set to bp_index for leaf nodes. */
666	next_adj = 0;
667	for (level = minlevel; level <= maxlevel; level++) {
668		if (level == maxlevel)
669			node = nilfs_btree_get_root(btree);
670		else
671			node = nilfs_btree_get_nonroot_node(path, level);
672
673		index = path[level].bp_index + next_adj;
674		if (index < nilfs_btree_node_get_nchildren(node)) {
675			/* Next key is in this node */
676			*nextkey = nilfs_btree_node_get_key(node, index);
677			return 0;
678		}
679		/* For non-leaf nodes, next index is stored at bp_index + 1. */
680		next_adj = 1;
681	}
682	return -ENOENT;
683}
684
685static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
686			      __u64 key, int level, __u64 *ptrp)
687{
688	struct nilfs_btree_path *path;
689	int ret;
690
691	path = nilfs_btree_alloc_path();
692	if (path == NULL)
693		return -ENOMEM;
694
695	ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
696
697	nilfs_btree_free_path(path);
698
699	return ret;
700}
701
702static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
703				     __u64 key, __u64 *ptrp,
704				     unsigned int maxblocks)
705{
706	struct nilfs_btree_path *path;
707	struct nilfs_btree_node *node;
708	struct inode *dat = NULL;
709	__u64 ptr, ptr2;
710	sector_t blocknr;
711	int level = NILFS_BTREE_LEVEL_NODE_MIN;
712	int ret, cnt, index, maxlevel, ncmax;
713	struct nilfs_btree_readahead_info p;
714
715	path = nilfs_btree_alloc_path();
716	if (path == NULL)
717		return -ENOMEM;
718
719	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
720	if (ret < 0)
721		goto out;
722
723	if (NILFS_BMAP_USE_VBN(btree)) {
724		dat = nilfs_bmap_get_dat(btree);
725		ret = nilfs_dat_translate(dat, ptr, &blocknr);
726		if (ret < 0)
727			goto out;
728		ptr = blocknr;
729	}
730	cnt = 1;
731	if (cnt == maxblocks)
732		goto end;
733
734	maxlevel = nilfs_btree_height(btree) - 1;
735	node = nilfs_btree_get_node(btree, path, level, &ncmax);
736	index = path[level].bp_index + 1;
737	for (;;) {
738		while (index < nilfs_btree_node_get_nchildren(node)) {
739			if (nilfs_btree_node_get_key(node, index) !=
740			    key + cnt)
741				goto end;
742			ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
743			if (dat) {
744				ret = nilfs_dat_translate(dat, ptr2, &blocknr);
745				if (ret < 0)
746					goto out;
747				ptr2 = blocknr;
748			}
749			if (ptr2 != ptr + cnt || ++cnt == maxblocks)
750				goto end;
751			index++;
752		}
753		if (level == maxlevel)
754			break;
755
756		/* look-up right sibling node */
757		p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
758		p.index = path[level + 1].bp_index + 1;
759		p.max_ra_blocks = 7;
760		if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
761		    nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
762			break;
763		ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
764		path[level + 1].bp_index = p.index;
765
766		brelse(path[level].bp_bh);
767		path[level].bp_bh = NULL;
768
769		ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
770					      &p);
771		if (ret < 0)
772			goto out;
773		node = nilfs_btree_get_nonroot_node(path, level);
774		ncmax = nilfs_btree_nchildren_per_block(btree);
775		index = 0;
776		path[level].bp_index = index;
777	}
778 end:
779	*ptrp = ptr;
780	ret = cnt;
781 out:
782	nilfs_btree_free_path(path);
783	return ret;
784}
785
786static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
787				    struct nilfs_btree_path *path,
788				    int level, __u64 key)
789{
790	if (level < nilfs_btree_height(btree) - 1) {
791		do {
792			nilfs_btree_node_set_key(
793				nilfs_btree_get_nonroot_node(path, level),
794				path[level].bp_index, key);
795			if (!buffer_dirty(path[level].bp_bh))
796				mark_buffer_dirty(path[level].bp_bh);
797		} while ((path[level].bp_index == 0) &&
798			 (++level < nilfs_btree_height(btree) - 1));
799	}
800
801	/* root */
802	if (level == nilfs_btree_height(btree) - 1) {
803		nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
804					 path[level].bp_index, key);
805	}
806}
807
808static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
809				  struct nilfs_btree_path *path,
810				  int level, __u64 *keyp, __u64 *ptrp)
811{
812	struct nilfs_btree_node *node;
813	int ncblk;
814
815	if (level < nilfs_btree_height(btree) - 1) {
816		node = nilfs_btree_get_nonroot_node(path, level);
817		ncblk = nilfs_btree_nchildren_per_block(btree);
818		nilfs_btree_node_insert(node, path[level].bp_index,
819					*keyp, *ptrp, ncblk);
820		if (!buffer_dirty(path[level].bp_bh))
821			mark_buffer_dirty(path[level].bp_bh);
822
823		if (path[level].bp_index == 0)
824			nilfs_btree_promote_key(btree, path, level + 1,
825						nilfs_btree_node_get_key(node,
826									 0));
827	} else {
828		node = nilfs_btree_get_root(btree);
829		nilfs_btree_node_insert(node, path[level].bp_index,
830					*keyp, *ptrp,
831					NILFS_BTREE_ROOT_NCHILDREN_MAX);
832	}
833}
834
835static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
836				   struct nilfs_btree_path *path,
837				   int level, __u64 *keyp, __u64 *ptrp)
838{
839	struct nilfs_btree_node *node, *left;
840	int nchildren, lnchildren, n, move, ncblk;
841
842	node = nilfs_btree_get_nonroot_node(path, level);
843	left = nilfs_btree_get_sib_node(path, level);
844	nchildren = nilfs_btree_node_get_nchildren(node);
845	lnchildren = nilfs_btree_node_get_nchildren(left);
846	ncblk = nilfs_btree_nchildren_per_block(btree);
847	move = 0;
848
849	n = (nchildren + lnchildren + 1) / 2 - lnchildren;
850	if (n > path[level].bp_index) {
851		/* move insert point */
852		n--;
853		move = 1;
854	}
855
856	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
857
858	if (!buffer_dirty(path[level].bp_bh))
859		mark_buffer_dirty(path[level].bp_bh);
860	if (!buffer_dirty(path[level].bp_sib_bh))
861		mark_buffer_dirty(path[level].bp_sib_bh);
862
863	nilfs_btree_promote_key(btree, path, level + 1,
864				nilfs_btree_node_get_key(node, 0));
865
866	if (move) {
867		brelse(path[level].bp_bh);
868		path[level].bp_bh = path[level].bp_sib_bh;
869		path[level].bp_sib_bh = NULL;
870		path[level].bp_index += lnchildren;
871		path[level + 1].bp_index--;
872	} else {
873		brelse(path[level].bp_sib_bh);
874		path[level].bp_sib_bh = NULL;
875		path[level].bp_index -= n;
876	}
877
878	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
879}
880
881static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
882				    struct nilfs_btree_path *path,
883				    int level, __u64 *keyp, __u64 *ptrp)
884{
885	struct nilfs_btree_node *node, *right;
886	int nchildren, rnchildren, n, move, ncblk;
887
888	node = nilfs_btree_get_nonroot_node(path, level);
889	right = nilfs_btree_get_sib_node(path, level);
890	nchildren = nilfs_btree_node_get_nchildren(node);
891	rnchildren = nilfs_btree_node_get_nchildren(right);
892	ncblk = nilfs_btree_nchildren_per_block(btree);
893	move = 0;
894
895	n = (nchildren + rnchildren + 1) / 2 - rnchildren;
896	if (n > nchildren - path[level].bp_index) {
897		/* move insert point */
898		n--;
899		move = 1;
900	}
901
902	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
903
904	if (!buffer_dirty(path[level].bp_bh))
905		mark_buffer_dirty(path[level].bp_bh);
906	if (!buffer_dirty(path[level].bp_sib_bh))
907		mark_buffer_dirty(path[level].bp_sib_bh);
908
909	path[level + 1].bp_index++;
910	nilfs_btree_promote_key(btree, path, level + 1,
911				nilfs_btree_node_get_key(right, 0));
912	path[level + 1].bp_index--;
913
914	if (move) {
915		brelse(path[level].bp_bh);
916		path[level].bp_bh = path[level].bp_sib_bh;
917		path[level].bp_sib_bh = NULL;
918		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
919		path[level + 1].bp_index++;
920	} else {
921		brelse(path[level].bp_sib_bh);
922		path[level].bp_sib_bh = NULL;
923	}
924
925	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
926}
927
928static void nilfs_btree_split(struct nilfs_bmap *btree,
929			      struct nilfs_btree_path *path,
930			      int level, __u64 *keyp, __u64 *ptrp)
931{
932	struct nilfs_btree_node *node, *right;
933	int nchildren, n, move, ncblk;
934
935	node = nilfs_btree_get_nonroot_node(path, level);
936	right = nilfs_btree_get_sib_node(path, level);
937	nchildren = nilfs_btree_node_get_nchildren(node);
938	ncblk = nilfs_btree_nchildren_per_block(btree);
939	move = 0;
940
941	n = (nchildren + 1) / 2;
942	if (n > nchildren - path[level].bp_index) {
943		n--;
944		move = 1;
945	}
946
947	nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
948
949	if (!buffer_dirty(path[level].bp_bh))
950		mark_buffer_dirty(path[level].bp_bh);
951	if (!buffer_dirty(path[level].bp_sib_bh))
952		mark_buffer_dirty(path[level].bp_sib_bh);
953
954	if (move) {
955		path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
956		nilfs_btree_node_insert(right, path[level].bp_index,
957					*keyp, *ptrp, ncblk);
958
959		*keyp = nilfs_btree_node_get_key(right, 0);
960		*ptrp = path[level].bp_newreq.bpr_ptr;
961
962		brelse(path[level].bp_bh);
963		path[level].bp_bh = path[level].bp_sib_bh;
964		path[level].bp_sib_bh = NULL;
965	} else {
966		nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
967
968		*keyp = nilfs_btree_node_get_key(right, 0);
969		*ptrp = path[level].bp_newreq.bpr_ptr;
970
971		brelse(path[level].bp_sib_bh);
972		path[level].bp_sib_bh = NULL;
973	}
974
975	path[level + 1].bp_index++;
976}
977
978static void nilfs_btree_grow(struct nilfs_bmap *btree,
979			     struct nilfs_btree_path *path,
980			     int level, __u64 *keyp, __u64 *ptrp)
981{
982	struct nilfs_btree_node *root, *child;
983	int n, ncblk;
984
985	root = nilfs_btree_get_root(btree);
986	child = nilfs_btree_get_sib_node(path, level);
987	ncblk = nilfs_btree_nchildren_per_block(btree);
988
989	n = nilfs_btree_node_get_nchildren(root);
990
991	nilfs_btree_node_move_right(root, child, n,
992				    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
993	nilfs_btree_node_set_level(root, level + 1);
994
995	if (!buffer_dirty(path[level].bp_sib_bh))
996		mark_buffer_dirty(path[level].bp_sib_bh);
997
998	path[level].bp_bh = path[level].bp_sib_bh;
999	path[level].bp_sib_bh = NULL;
1000
1001	nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
1002
1003	*keyp = nilfs_btree_node_get_key(child, 0);
1004	*ptrp = path[level].bp_newreq.bpr_ptr;
1005}
1006
1007static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
1008				   const struct nilfs_btree_path *path)
1009{
1010	struct nilfs_btree_node *node;
1011	int level, ncmax;
1012
1013	if (path == NULL)
1014		return NILFS_BMAP_INVALID_PTR;
1015
1016	/* left sibling */
1017	level = NILFS_BTREE_LEVEL_NODE_MIN;
1018	if (path[level].bp_index > 0) {
1019		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1020		return nilfs_btree_node_get_ptr(node,
1021						path[level].bp_index - 1,
1022						ncmax);
1023	}
1024
1025	/* parent */
1026	level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1027	if (level <= nilfs_btree_height(btree) - 1) {
1028		node = nilfs_btree_get_node(btree, path, level, &ncmax);
1029		return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1030						ncmax);
1031	}
1032
1033	return NILFS_BMAP_INVALID_PTR;
1034}
1035
1036static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1037				       const struct nilfs_btree_path *path,
1038				       __u64 key)
1039{
1040	__u64 ptr;
1041
1042	ptr = nilfs_bmap_find_target_seq(btree, key);
1043	if (ptr != NILFS_BMAP_INVALID_PTR)
1044		/* sequential access */
1045		return ptr;
1046
1047	ptr = nilfs_btree_find_near(btree, path);
1048	if (ptr != NILFS_BMAP_INVALID_PTR)
1049		/* near */
1050		return ptr;
1051
1052	/* block group */
1053	return nilfs_bmap_find_target_in_group(btree);
1054}
1055
1056static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1057				      struct nilfs_btree_path *path,
1058				      int *levelp, __u64 key, __u64 ptr,
1059				      struct nilfs_bmap_stats *stats)
1060{
1061	struct buffer_head *bh;
1062	struct nilfs_btree_node *node, *parent, *sib;
1063	__u64 sibptr;
1064	int pindex, level, ncmax, ncblk, ret;
1065	struct inode *dat = NULL;
1066
1067	stats->bs_nblocks = 0;
1068	level = NILFS_BTREE_LEVEL_DATA;
1069
1070	/* allocate a new ptr for data block */
1071	if (NILFS_BMAP_USE_VBN(btree)) {
1072		path[level].bp_newreq.bpr_ptr =
1073			nilfs_btree_find_target_v(btree, path, key);
1074		dat = nilfs_bmap_get_dat(btree);
1075	}
1076
1077	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1078	if (ret < 0)
1079		goto err_out_data;
1080
1081	ncblk = nilfs_btree_nchildren_per_block(btree);
1082
1083	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1084	     level < nilfs_btree_height(btree) - 1;
1085	     level++) {
1086		node = nilfs_btree_get_nonroot_node(path, level);
1087		if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1088			path[level].bp_op = nilfs_btree_do_insert;
1089			stats->bs_nblocks++;
1090			goto out;
1091		}
1092
1093		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1094		pindex = path[level + 1].bp_index;
1095
1096		/* left sibling */
1097		if (pindex > 0) {
1098			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1099							  ncmax);
1100			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1101			if (ret < 0)
1102				goto err_out_child_node;
1103			sib = (struct nilfs_btree_node *)bh->b_data;
1104			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1105				path[level].bp_sib_bh = bh;
1106				path[level].bp_op = nilfs_btree_carry_left;
1107				stats->bs_nblocks++;
1108				goto out;
1109			} else {
1110				brelse(bh);
1111			}
1112		}
1113
1114		/* right sibling */
1115		if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1116			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1117							  ncmax);
1118			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1119			if (ret < 0)
1120				goto err_out_child_node;
1121			sib = (struct nilfs_btree_node *)bh->b_data;
1122			if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1123				path[level].bp_sib_bh = bh;
1124				path[level].bp_op = nilfs_btree_carry_right;
1125				stats->bs_nblocks++;
1126				goto out;
1127			} else {
1128				brelse(bh);
1129			}
1130		}
1131
1132		/* split */
1133		path[level].bp_newreq.bpr_ptr =
1134			path[level - 1].bp_newreq.bpr_ptr + 1;
1135		ret = nilfs_bmap_prepare_alloc_ptr(btree,
1136						   &path[level].bp_newreq, dat);
1137		if (ret < 0)
1138			goto err_out_child_node;
1139		ret = nilfs_btree_get_new_block(btree,
1140						path[level].bp_newreq.bpr_ptr,
1141						&bh);
1142		if (ret < 0)
1143			goto err_out_curr_node;
1144
1145		stats->bs_nblocks++;
1146
1147		sib = (struct nilfs_btree_node *)bh->b_data;
1148		nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1149		path[level].bp_sib_bh = bh;
1150		path[level].bp_op = nilfs_btree_split;
1151	}
1152
1153	/* root */
1154	node = nilfs_btree_get_root(btree);
1155	if (nilfs_btree_node_get_nchildren(node) <
1156	    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1157		path[level].bp_op = nilfs_btree_do_insert;
1158		stats->bs_nblocks++;
1159		goto out;
1160	}
1161
1162	/* grow */
1163	path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1164	ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1165	if (ret < 0)
1166		goto err_out_child_node;
1167	ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1168					&bh);
1169	if (ret < 0)
1170		goto err_out_curr_node;
1171
1172	nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1173			      0, level, 0, ncblk, NULL, NULL);
1174	path[level].bp_sib_bh = bh;
1175	path[level].bp_op = nilfs_btree_grow;
1176
1177	level++;
1178	path[level].bp_op = nilfs_btree_do_insert;
1179
1180	/* a newly-created node block and a data block are added */
1181	stats->bs_nblocks += 2;
1182
1183	/* success */
1184 out:
1185	*levelp = level;
1186	return ret;
1187
1188	/* error */
1189 err_out_curr_node:
1190	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1191 err_out_child_node:
1192	for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1193		nilfs_btnode_delete(path[level].bp_sib_bh);
1194		nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1195
1196	}
1197
1198	nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1199 err_out_data:
1200	*levelp = level;
1201	stats->bs_nblocks = 0;
1202	return ret;
1203}
1204
1205static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1206				      struct nilfs_btree_path *path,
1207				      int maxlevel, __u64 key, __u64 ptr)
1208{
1209	struct inode *dat = NULL;
1210	int level;
1211
1212	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1213	ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1214	if (NILFS_BMAP_USE_VBN(btree)) {
1215		nilfs_bmap_set_target_v(btree, key, ptr);
1216		dat = nilfs_bmap_get_dat(btree);
1217	}
1218
1219	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1220		nilfs_bmap_commit_alloc_ptr(btree,
1221					    &path[level - 1].bp_newreq, dat);
1222		path[level].bp_op(btree, path, level, &key, &ptr);
1223	}
1224
1225	if (!nilfs_bmap_dirty(btree))
1226		nilfs_bmap_set_dirty(btree);
1227}
1228
1229static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1230{
1231	struct nilfs_btree_path *path;
1232	struct nilfs_bmap_stats stats;
1233	int level, ret;
1234
1235	path = nilfs_btree_alloc_path();
1236	if (path == NULL)
1237		return -ENOMEM;
1238
1239	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1240				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1241	if (ret != -ENOENT) {
1242		if (ret == 0)
1243			ret = -EEXIST;
1244		goto out;
1245	}
1246
1247	ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1248	if (ret < 0)
1249		goto out;
1250	nilfs_btree_commit_insert(btree, path, level, key, ptr);
1251	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1252
1253 out:
1254	nilfs_btree_free_path(path);
1255	return ret;
1256}
1257
1258static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1259				  struct nilfs_btree_path *path,
1260				  int level, __u64 *keyp, __u64 *ptrp)
1261{
1262	struct nilfs_btree_node *node;
1263	int ncblk;
1264
1265	if (level < nilfs_btree_height(btree) - 1) {
1266		node = nilfs_btree_get_nonroot_node(path, level);
1267		ncblk = nilfs_btree_nchildren_per_block(btree);
1268		nilfs_btree_node_delete(node, path[level].bp_index,
1269					keyp, ptrp, ncblk);
1270		if (!buffer_dirty(path[level].bp_bh))
1271			mark_buffer_dirty(path[level].bp_bh);
1272		if (path[level].bp_index == 0)
1273			nilfs_btree_promote_key(btree, path, level + 1,
1274				nilfs_btree_node_get_key(node, 0));
1275	} else {
1276		node = nilfs_btree_get_root(btree);
1277		nilfs_btree_node_delete(node, path[level].bp_index,
1278					keyp, ptrp,
1279					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1280	}
1281}
1282
1283static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1284				    struct nilfs_btree_path *path,
1285				    int level, __u64 *keyp, __u64 *ptrp)
1286{
1287	struct nilfs_btree_node *node, *left;
1288	int nchildren, lnchildren, n, ncblk;
1289
1290	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1291
1292	node = nilfs_btree_get_nonroot_node(path, level);
1293	left = nilfs_btree_get_sib_node(path, level);
1294	nchildren = nilfs_btree_node_get_nchildren(node);
1295	lnchildren = nilfs_btree_node_get_nchildren(left);
1296	ncblk = nilfs_btree_nchildren_per_block(btree);
1297
1298	n = (nchildren + lnchildren) / 2 - nchildren;
1299
1300	nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1301
1302	if (!buffer_dirty(path[level].bp_bh))
1303		mark_buffer_dirty(path[level].bp_bh);
1304	if (!buffer_dirty(path[level].bp_sib_bh))
1305		mark_buffer_dirty(path[level].bp_sib_bh);
1306
1307	nilfs_btree_promote_key(btree, path, level + 1,
1308				nilfs_btree_node_get_key(node, 0));
1309
1310	brelse(path[level].bp_sib_bh);
1311	path[level].bp_sib_bh = NULL;
1312	path[level].bp_index += n;
1313}
1314
1315static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1316				     struct nilfs_btree_path *path,
1317				     int level, __u64 *keyp, __u64 *ptrp)
1318{
1319	struct nilfs_btree_node *node, *right;
1320	int nchildren, rnchildren, n, ncblk;
1321
1322	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1323
1324	node = nilfs_btree_get_nonroot_node(path, level);
1325	right = nilfs_btree_get_sib_node(path, level);
1326	nchildren = nilfs_btree_node_get_nchildren(node);
1327	rnchildren = nilfs_btree_node_get_nchildren(right);
1328	ncblk = nilfs_btree_nchildren_per_block(btree);
1329
1330	n = (nchildren + rnchildren) / 2 - nchildren;
1331
1332	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1333
1334	if (!buffer_dirty(path[level].bp_bh))
1335		mark_buffer_dirty(path[level].bp_bh);
1336	if (!buffer_dirty(path[level].bp_sib_bh))
1337		mark_buffer_dirty(path[level].bp_sib_bh);
1338
1339	path[level + 1].bp_index++;
1340	nilfs_btree_promote_key(btree, path, level + 1,
1341				nilfs_btree_node_get_key(right, 0));
1342	path[level + 1].bp_index--;
1343
1344	brelse(path[level].bp_sib_bh);
1345	path[level].bp_sib_bh = NULL;
1346}
1347
1348static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1349				    struct nilfs_btree_path *path,
1350				    int level, __u64 *keyp, __u64 *ptrp)
1351{
1352	struct nilfs_btree_node *node, *left;
1353	int n, ncblk;
1354
1355	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1356
1357	node = nilfs_btree_get_nonroot_node(path, level);
1358	left = nilfs_btree_get_sib_node(path, level);
1359	ncblk = nilfs_btree_nchildren_per_block(btree);
1360
1361	n = nilfs_btree_node_get_nchildren(node);
1362
1363	nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1364
1365	if (!buffer_dirty(path[level].bp_sib_bh))
1366		mark_buffer_dirty(path[level].bp_sib_bh);
1367
1368	nilfs_btnode_delete(path[level].bp_bh);
1369	path[level].bp_bh = path[level].bp_sib_bh;
1370	path[level].bp_sib_bh = NULL;
1371	path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1372}
1373
1374static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1375				     struct nilfs_btree_path *path,
1376				     int level, __u64 *keyp, __u64 *ptrp)
1377{
1378	struct nilfs_btree_node *node, *right;
1379	int n, ncblk;
1380
1381	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1382
1383	node = nilfs_btree_get_nonroot_node(path, level);
1384	right = nilfs_btree_get_sib_node(path, level);
1385	ncblk = nilfs_btree_nchildren_per_block(btree);
1386
1387	n = nilfs_btree_node_get_nchildren(right);
1388
1389	nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1390
1391	if (!buffer_dirty(path[level].bp_bh))
1392		mark_buffer_dirty(path[level].bp_bh);
1393
1394	nilfs_btnode_delete(path[level].bp_sib_bh);
1395	path[level].bp_sib_bh = NULL;
1396	path[level + 1].bp_index++;
1397}
1398
1399static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1400			       struct nilfs_btree_path *path,
1401			       int level, __u64 *keyp, __u64 *ptrp)
1402{
1403	struct nilfs_btree_node *root, *child;
1404	int n, ncblk;
1405
1406	nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1407
1408	root = nilfs_btree_get_root(btree);
1409	child = nilfs_btree_get_nonroot_node(path, level);
1410	ncblk = nilfs_btree_nchildren_per_block(btree);
1411
1412	nilfs_btree_node_delete(root, 0, NULL, NULL,
1413				NILFS_BTREE_ROOT_NCHILDREN_MAX);
1414	nilfs_btree_node_set_level(root, level);
1415	n = nilfs_btree_node_get_nchildren(child);
1416	nilfs_btree_node_move_left(root, child, n,
1417				   NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1418
1419	nilfs_btnode_delete(path[level].bp_bh);
1420	path[level].bp_bh = NULL;
1421}
1422
1423static void nilfs_btree_nop(struct nilfs_bmap *btree,
1424			    struct nilfs_btree_path *path,
1425			    int level, __u64 *keyp, __u64 *ptrp)
1426{
1427}
1428
1429static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1430				      struct nilfs_btree_path *path,
1431				      int *levelp,
1432				      struct nilfs_bmap_stats *stats,
1433				      struct inode *dat)
1434{
1435	struct buffer_head *bh;
1436	struct nilfs_btree_node *node, *parent, *sib;
1437	__u64 sibptr;
1438	int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1439
1440	ret = 0;
1441	stats->bs_nblocks = 0;
1442	ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1443	ncblk = nilfs_btree_nchildren_per_block(btree);
1444
1445	for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1446	     level < nilfs_btree_height(btree) - 1;
1447	     level++) {
1448		node = nilfs_btree_get_nonroot_node(path, level);
1449		path[level].bp_oldreq.bpr_ptr =
1450			nilfs_btree_node_get_ptr(node, dindex, ncblk);
1451		ret = nilfs_bmap_prepare_end_ptr(btree,
1452						 &path[level].bp_oldreq, dat);
1453		if (ret < 0)
1454			goto err_out_child_node;
1455
1456		if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1457			path[level].bp_op = nilfs_btree_do_delete;
1458			stats->bs_nblocks++;
1459			goto out;
1460		}
1461
1462		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1463		pindex = path[level + 1].bp_index;
1464		dindex = pindex;
1465
1466		if (pindex > 0) {
1467			/* left sibling */
1468			sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1469							  ncmax);
1470			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1471			if (ret < 0)
1472				goto err_out_curr_node;
1473			sib = (struct nilfs_btree_node *)bh->b_data;
1474			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1475				path[level].bp_sib_bh = bh;
1476				path[level].bp_op = nilfs_btree_borrow_left;
1477				stats->bs_nblocks++;
1478				goto out;
1479			} else {
1480				path[level].bp_sib_bh = bh;
1481				path[level].bp_op = nilfs_btree_concat_left;
1482				stats->bs_nblocks++;
1483				/* continue; */
1484			}
1485		} else if (pindex <
1486			   nilfs_btree_node_get_nchildren(parent) - 1) {
1487			/* right sibling */
1488			sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1489							  ncmax);
1490			ret = nilfs_btree_get_block(btree, sibptr, &bh);
1491			if (ret < 0)
1492				goto err_out_curr_node;
1493			sib = (struct nilfs_btree_node *)bh->b_data;
1494			if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1495				path[level].bp_sib_bh = bh;
1496				path[level].bp_op = nilfs_btree_borrow_right;
1497				stats->bs_nblocks++;
1498				goto out;
1499			} else {
1500				path[level].bp_sib_bh = bh;
1501				path[level].bp_op = nilfs_btree_concat_right;
1502				stats->bs_nblocks++;
1503				/*
1504				 * When merging right sibling node
1505				 * into the current node, pointer to
1506				 * the right sibling node must be
1507				 * terminated instead.  The adjustment
1508				 * below is required for that.
1509				 */
1510				dindex = pindex + 1;
1511				/* continue; */
1512			}
1513		} else {
1514			/* no siblings */
1515			/* the only child of the root node */
1516			WARN_ON(level != nilfs_btree_height(btree) - 2);
1517			if (nilfs_btree_node_get_nchildren(node) - 1 <=
1518			    NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1519				path[level].bp_op = nilfs_btree_shrink;
1520				stats->bs_nblocks += 2;
1521				level++;
1522				path[level].bp_op = nilfs_btree_nop;
1523				goto shrink_root_child;
1524			} else {
1525				path[level].bp_op = nilfs_btree_do_delete;
1526				stats->bs_nblocks++;
1527				goto out;
1528			}
1529		}
1530	}
1531
1532	/* child of the root node is deleted */
1533	path[level].bp_op = nilfs_btree_do_delete;
1534	stats->bs_nblocks++;
1535
1536shrink_root_child:
1537	node = nilfs_btree_get_root(btree);
1538	path[level].bp_oldreq.bpr_ptr =
1539		nilfs_btree_node_get_ptr(node, dindex,
1540					 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1541
1542	ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1543	if (ret < 0)
1544		goto err_out_child_node;
1545
1546	/* success */
1547 out:
1548	*levelp = level;
1549	return ret;
1550
1551	/* error */
1552 err_out_curr_node:
1553	nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1554 err_out_child_node:
1555	for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1556		brelse(path[level].bp_sib_bh);
1557		nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1558	}
1559	*levelp = level;
1560	stats->bs_nblocks = 0;
1561	return ret;
1562}
1563
1564static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1565				      struct nilfs_btree_path *path,
1566				      int maxlevel, struct inode *dat)
1567{
1568	int level;
1569
1570	for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1571		nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1572		path[level].bp_op(btree, path, level, NULL, NULL);
1573	}
1574
1575	if (!nilfs_bmap_dirty(btree))
1576		nilfs_bmap_set_dirty(btree);
1577}
1578
1579static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1580
1581{
1582	struct nilfs_btree_path *path;
1583	struct nilfs_bmap_stats stats;
1584	struct inode *dat;
1585	int level, ret;
1586
1587	path = nilfs_btree_alloc_path();
1588	if (path == NULL)
1589		return -ENOMEM;
1590
1591	ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1592				    NILFS_BTREE_LEVEL_NODE_MIN, 0);
1593	if (ret < 0)
1594		goto out;
1595
1596
1597	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1598
1599	ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1600	if (ret < 0)
1601		goto out;
1602	nilfs_btree_commit_delete(btree, path, level, dat);
1603	nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1604
1605out:
1606	nilfs_btree_free_path(path);
1607	return ret;
1608}
1609
1610static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1611				__u64 *keyp)
1612{
1613	struct nilfs_btree_path *path;
1614	const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1615	int ret;
1616
1617	path = nilfs_btree_alloc_path();
1618	if (!path)
1619		return -ENOMEM;
1620
1621	ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1622	if (!ret)
1623		*keyp = start;
1624	else if (ret == -ENOENT)
1625		ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1626
1627	nilfs_btree_free_path(path);
1628	return ret;
1629}
1630
1631static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1632{
1633	struct nilfs_btree_path *path;
1634	int ret;
1635
1636	path = nilfs_btree_alloc_path();
1637	if (path == NULL)
1638		return -ENOMEM;
1639
1640	ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1641
1642	nilfs_btree_free_path(path);
1643
1644	return ret;
1645}
1646
1647static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1648{
1649	struct buffer_head *bh;
1650	struct nilfs_btree_node *root, *node;
1651	__u64 maxkey, nextmaxkey;
1652	__u64 ptr;
1653	int nchildren, ret;
1654
1655	root = nilfs_btree_get_root(btree);
1656	switch (nilfs_btree_height(btree)) {
1657	case 2:
1658		bh = NULL;
1659		node = root;
1660		break;
1661	case 3:
1662		nchildren = nilfs_btree_node_get_nchildren(root);
1663		if (nchildren > 1)
1664			return 0;
1665		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1666					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1667		ret = nilfs_btree_get_block(btree, ptr, &bh);
1668		if (ret < 0)
1669			return ret;
1670		node = (struct nilfs_btree_node *)bh->b_data;
1671		break;
1672	default:
1673		return 0;
1674	}
1675
1676	nchildren = nilfs_btree_node_get_nchildren(node);
1677	maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1678	nextmaxkey = (nchildren > 1) ?
1679		nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1680	brelse(bh);
1681
1682	return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1683}
1684
1685static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1686				   __u64 *keys, __u64 *ptrs, int nitems)
1687{
1688	struct buffer_head *bh;
1689	struct nilfs_btree_node *node, *root;
1690	__le64 *dkeys;
1691	__le64 *dptrs;
1692	__u64 ptr;
1693	int nchildren, ncmax, i, ret;
1694
1695	root = nilfs_btree_get_root(btree);
1696	switch (nilfs_btree_height(btree)) {
1697	case 2:
1698		bh = NULL;
1699		node = root;
1700		ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1701		break;
1702	case 3:
1703		nchildren = nilfs_btree_node_get_nchildren(root);
1704		WARN_ON(nchildren > 1);
1705		ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1706					       NILFS_BTREE_ROOT_NCHILDREN_MAX);
1707		ret = nilfs_btree_get_block(btree, ptr, &bh);
1708		if (ret < 0)
1709			return ret;
1710		node = (struct nilfs_btree_node *)bh->b_data;
1711		ncmax = nilfs_btree_nchildren_per_block(btree);
1712		break;
1713	default:
1714		node = NULL;
1715		return -EINVAL;
1716	}
1717
1718	nchildren = nilfs_btree_node_get_nchildren(node);
1719	if (nchildren < nitems)
1720		nitems = nchildren;
1721	dkeys = nilfs_btree_node_dkeys(node);
1722	dptrs = nilfs_btree_node_dptrs(node, ncmax);
1723	for (i = 0; i < nitems; i++) {
1724		keys[i] = le64_to_cpu(dkeys[i]);
1725		ptrs[i] = le64_to_cpu(dptrs[i]);
1726	}
1727
1728	brelse(bh);
1729
1730	return nitems;
1731}
1732
1733static int
1734nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1735				       union nilfs_bmap_ptr_req *dreq,
1736				       union nilfs_bmap_ptr_req *nreq,
1737				       struct buffer_head **bhp,
1738				       struct nilfs_bmap_stats *stats)
1739{
1740	struct buffer_head *bh;
1741	struct inode *dat = NULL;
1742	int ret;
1743
1744	stats->bs_nblocks = 0;
1745
1746	/* for data */
1747	/* cannot find near ptr */
1748	if (NILFS_BMAP_USE_VBN(btree)) {
1749		dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1750		dat = nilfs_bmap_get_dat(btree);
1751	}
1752
1753	ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode);
1754	if (ret < 0)
1755		return ret;
1756
1757	ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1758	if (ret < 0)
1759		return ret;
1760
1761	*bhp = NULL;
1762	stats->bs_nblocks++;
1763	if (nreq != NULL) {
1764		nreq->bpr_ptr = dreq->bpr_ptr + 1;
1765		ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1766		if (ret < 0)
1767			goto err_out_dreq;
1768
1769		ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1770		if (ret < 0)
1771			goto err_out_nreq;
1772
1773		*bhp = bh;
1774		stats->bs_nblocks++;
1775	}
1776
1777	/* success */
1778	return 0;
1779
1780	/* error */
1781 err_out_nreq:
1782	nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1783 err_out_dreq:
1784	nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1785	stats->bs_nblocks = 0;
1786	return ret;
1787
1788}
1789
1790static void
1791nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1792				      __u64 key, __u64 ptr,
1793				      const __u64 *keys, const __u64 *ptrs,
1794				      int n,
1795				      union nilfs_bmap_ptr_req *dreq,
1796				      union nilfs_bmap_ptr_req *nreq,
1797				      struct buffer_head *bh)
1798{
1799	struct nilfs_btree_node *node;
1800	struct inode *dat;
1801	__u64 tmpptr;
1802	int ncblk;
1803
1804	/* free resources */
1805	if (btree->b_ops->bop_clear != NULL)
1806		btree->b_ops->bop_clear(btree);
1807
1808	/* ptr must be a pointer to a buffer head. */
1809	set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1810
1811	/* convert and insert */
1812	dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1813	__nilfs_btree_init(btree);
1814	if (nreq != NULL) {
1815		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1816		nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1817
1818		/* create child node at level 1 */
1819		node = (struct nilfs_btree_node *)bh->b_data;
1820		ncblk = nilfs_btree_nchildren_per_block(btree);
1821		nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1822		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1823		if (!buffer_dirty(bh))
1824			mark_buffer_dirty(bh);
1825		if (!nilfs_bmap_dirty(btree))
1826			nilfs_bmap_set_dirty(btree);
1827
1828		brelse(bh);
1829
1830		/* create root node at level 2 */
1831		node = nilfs_btree_get_root(btree);
1832		tmpptr = nreq->bpr_ptr;
1833		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1834				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1835				      &keys[0], &tmpptr);
1836	} else {
1837		nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1838
1839		/* create root node at level 1 */
1840		node = nilfs_btree_get_root(btree);
1841		nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1842				      NILFS_BTREE_ROOT_NCHILDREN_MAX,
1843				      keys, ptrs);
1844		nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1845					NILFS_BTREE_ROOT_NCHILDREN_MAX);
1846		if (!nilfs_bmap_dirty(btree))
1847			nilfs_bmap_set_dirty(btree);
1848	}
1849
1850	if (NILFS_BMAP_USE_VBN(btree))
1851		nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1852}
1853
1854/**
1855 * nilfs_btree_convert_and_insert -
1856 * @bmap:
1857 * @key:
1858 * @ptr:
1859 * @keys:
1860 * @ptrs:
1861 * @n:
1862 */
1863int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1864				   __u64 key, __u64 ptr,
1865				   const __u64 *keys, const __u64 *ptrs, int n)
1866{
1867	struct buffer_head *bh = NULL;
1868	union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1869	struct nilfs_bmap_stats stats;
1870	int ret;
1871
1872	if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1873		di = &dreq;
1874		ni = NULL;
1875	} else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1876			   nilfs_btree_node_size(btree))) {
1877		di = &dreq;
1878		ni = &nreq;
1879	} else {
1880		di = NULL;
1881		ni = NULL;
1882		BUG();
1883	}
1884
1885	ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1886						     &stats);
1887	if (ret < 0)
1888		return ret;
1889	nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1890					      di, ni, bh);
1891	nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1892	return 0;
1893}
1894
1895static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1896				   struct nilfs_btree_path *path,
1897				   int level,
1898				   struct buffer_head *bh)
1899{
1900	while ((++level < nilfs_btree_height(btree) - 1) &&
1901	       !buffer_dirty(path[level].bp_bh))
1902		mark_buffer_dirty(path[level].bp_bh);
1903
1904	return 0;
1905}
1906
1907static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1908					struct nilfs_btree_path *path,
1909					int level, struct inode *dat)
1910{
1911	struct nilfs_btree_node *parent;
1912	int ncmax, ret;
1913
1914	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1915	path[level].bp_oldreq.bpr_ptr =
1916		nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1917					 ncmax);
1918	path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1919	ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1920				       &path[level].bp_newreq.bpr_req);
1921	if (ret < 0)
1922		return ret;
1923
1924	if (buffer_nilfs_node(path[level].bp_bh)) {
1925		path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1926		path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1927		path[level].bp_ctxt.bh = path[level].bp_bh;
1928		ret = nilfs_btnode_prepare_change_key(
1929			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1930			&path[level].bp_ctxt);
1931		if (ret < 0) {
1932			nilfs_dat_abort_update(dat,
1933					       &path[level].bp_oldreq.bpr_req,
1934					       &path[level].bp_newreq.bpr_req);
1935			return ret;
1936		}
1937	}
1938
1939	return 0;
1940}
1941
1942static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1943					struct nilfs_btree_path *path,
1944					int level, struct inode *dat)
1945{
1946	struct nilfs_btree_node *parent;
1947	int ncmax;
1948
1949	nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1950				&path[level].bp_newreq.bpr_req,
1951				btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1952
1953	if (buffer_nilfs_node(path[level].bp_bh)) {
1954		nilfs_btnode_commit_change_key(
1955			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1956			&path[level].bp_ctxt);
1957		path[level].bp_bh = path[level].bp_ctxt.bh;
1958	}
1959	set_buffer_nilfs_volatile(path[level].bp_bh);
1960
1961	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1962	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1963				 path[level].bp_newreq.bpr_ptr, ncmax);
1964}
1965
1966static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1967				       struct nilfs_btree_path *path,
1968				       int level, struct inode *dat)
1969{
1970	nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1971			       &path[level].bp_newreq.bpr_req);
1972	if (buffer_nilfs_node(path[level].bp_bh))
1973		nilfs_btnode_abort_change_key(
1974			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
1975			&path[level].bp_ctxt);
1976}
1977
1978static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1979					   struct nilfs_btree_path *path,
1980					   int minlevel, int *maxlevelp,
1981					   struct inode *dat)
1982{
1983	int level, ret;
1984
1985	level = minlevel;
1986	if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1987		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1988		if (ret < 0)
1989			return ret;
1990	}
1991	while ((++level < nilfs_btree_height(btree) - 1) &&
1992	       !buffer_dirty(path[level].bp_bh)) {
1993
1994		WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1995		ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1996		if (ret < 0)
1997			goto out;
1998	}
1999
2000	/* success */
2001	*maxlevelp = level - 1;
2002	return 0;
2003
2004	/* error */
2005 out:
2006	while (--level > minlevel)
2007		nilfs_btree_abort_update_v(btree, path, level, dat);
2008	if (!buffer_nilfs_volatile(path[level].bp_bh))
2009		nilfs_btree_abort_update_v(btree, path, level, dat);
2010	return ret;
2011}
2012
2013static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2014					   struct nilfs_btree_path *path,
2015					   int minlevel, int maxlevel,
2016					   struct buffer_head *bh,
2017					   struct inode *dat)
2018{
2019	int level;
2020
2021	if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2022		nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2023
2024	for (level = minlevel + 1; level <= maxlevel; level++)
2025		nilfs_btree_commit_update_v(btree, path, level, dat);
2026}
2027
2028static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2029				   struct nilfs_btree_path *path,
2030				   int level, struct buffer_head *bh)
2031{
2032	int maxlevel = 0, ret;
2033	struct nilfs_btree_node *parent;
2034	struct inode *dat = nilfs_bmap_get_dat(btree);
2035	__u64 ptr;
2036	int ncmax;
2037
2038	get_bh(bh);
2039	path[level].bp_bh = bh;
2040	ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2041					      dat);
2042	if (ret < 0)
2043		goto out;
2044
2045	if (buffer_nilfs_volatile(path[level].bp_bh)) {
2046		parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2047		ptr = nilfs_btree_node_get_ptr(parent,
2048					       path[level + 1].bp_index,
2049					       ncmax);
2050		ret = nilfs_dat_mark_dirty(dat, ptr);
2051		if (ret < 0)
2052			goto out;
2053	}
2054
2055	nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2056
2057 out:
2058	brelse(path[level].bp_bh);
2059	path[level].bp_bh = NULL;
2060	return ret;
2061}
2062
2063static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2064				 struct buffer_head *bh)
2065{
2066	struct nilfs_btree_path *path;
2067	struct nilfs_btree_node *node;
2068	__u64 key;
2069	int level, ret;
2070
2071	WARN_ON(!buffer_dirty(bh));
2072
2073	path = nilfs_btree_alloc_path();
2074	if (path == NULL)
2075		return -ENOMEM;
2076
2077	if (buffer_nilfs_node(bh)) {
2078		node = (struct nilfs_btree_node *)bh->b_data;
2079		key = nilfs_btree_node_get_key(node, 0);
2080		level = nilfs_btree_node_get_level(node);
2081	} else {
2082		key = nilfs_bmap_data_get_key(btree, bh);
2083		level = NILFS_BTREE_LEVEL_DATA;
2084	}
2085
2086	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2087	if (ret < 0) {
2088		if (unlikely(ret == -ENOENT))
2089			nilfs_crit(btree->b_inode->i_sb,
2090				   "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
2091				   btree->b_inode->i_ino,
2092				   (unsigned long long)key, level);
2093		goto out;
2094	}
2095
2096	ret = NILFS_BMAP_USE_VBN(btree) ?
2097		nilfs_btree_propagate_v(btree, path, level, bh) :
2098		nilfs_btree_propagate_p(btree, path, level, bh);
2099
2100 out:
2101	nilfs_btree_free_path(path);
2102
2103	return ret;
2104}
2105
2106static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2107				    struct buffer_head *bh)
2108{
2109	return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2110}
2111
2112static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2113					 struct list_head *lists,
2114					 struct buffer_head *bh)
2115{
2116	struct list_head *head;
2117	struct buffer_head *cbh;
2118	struct nilfs_btree_node *node, *cnode;
2119	__u64 key, ckey;
2120	int level;
2121
2122	get_bh(bh);
2123	node = (struct nilfs_btree_node *)bh->b_data;
2124	key = nilfs_btree_node_get_key(node, 0);
2125	level = nilfs_btree_node_get_level(node);
2126	if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2127	    level >= NILFS_BTREE_LEVEL_MAX) {
2128		dump_stack();
2129		nilfs_warn(btree->b_inode->i_sb,
2130			   "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
2131			   level, (unsigned long long)key,
2132			   btree->b_inode->i_ino,
2133			   (unsigned long long)bh->b_blocknr);
2134		return;
2135	}
2136
2137	list_for_each(head, &lists[level]) {
2138		cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2139		cnode = (struct nilfs_btree_node *)cbh->b_data;
2140		ckey = nilfs_btree_node_get_key(cnode, 0);
2141		if (key < ckey)
2142			break;
2143	}
2144	list_add_tail(&bh->b_assoc_buffers, head);
2145}
2146
2147static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2148					     struct list_head *listp)
2149{
2150	struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode;
2151	struct address_space *btcache = btnc_inode->i_mapping;
2152	struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2153	struct folio_batch fbatch;
2154	struct buffer_head *bh, *head;
2155	pgoff_t index = 0;
2156	int level, i;
2157
2158	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2159	     level < NILFS_BTREE_LEVEL_MAX;
2160	     level++)
2161		INIT_LIST_HEAD(&lists[level]);
2162
2163	folio_batch_init(&fbatch);
2164
2165	while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1,
2166				PAGECACHE_TAG_DIRTY, &fbatch)) {
2167		for (i = 0; i < folio_batch_count(&fbatch); i++) {
2168			bh = head = folio_buffers(fbatch.folios[i]);
2169			do {
2170				if (buffer_dirty(bh))
2171					nilfs_btree_add_dirty_buffer(btree,
2172								     lists, bh);
2173			} while ((bh = bh->b_this_page) != head);
2174		}
2175		folio_batch_release(&fbatch);
2176		cond_resched();
2177	}
2178
2179	for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2180	     level < NILFS_BTREE_LEVEL_MAX;
2181	     level++)
2182		list_splice_tail(&lists[level], listp);
2183}
2184
2185static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2186				struct nilfs_btree_path *path,
2187				int level,
2188				struct buffer_head **bh,
2189				sector_t blocknr,
2190				union nilfs_binfo *binfo)
2191{
2192	struct nilfs_btree_node *parent;
2193	__u64 key;
2194	__u64 ptr;
2195	int ncmax, ret;
2196
2197	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2198	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2199				       ncmax);
2200	if (buffer_nilfs_node(*bh)) {
2201		path[level].bp_ctxt.oldkey = ptr;
2202		path[level].bp_ctxt.newkey = blocknr;
2203		path[level].bp_ctxt.bh = *bh;
2204		ret = nilfs_btnode_prepare_change_key(
2205			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2206			&path[level].bp_ctxt);
2207		if (ret < 0)
2208			return ret;
2209		nilfs_btnode_commit_change_key(
2210			NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping,
2211			&path[level].bp_ctxt);
2212		*bh = path[level].bp_ctxt.bh;
2213	}
2214
2215	nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2216				 ncmax);
2217
2218	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2219	/* on-disk format */
2220	binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2221	binfo->bi_dat.bi_level = level;
2222	memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
2223
2224	return 0;
2225}
2226
2227static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2228				struct nilfs_btree_path *path,
2229				int level,
2230				struct buffer_head **bh,
2231				sector_t blocknr,
2232				union nilfs_binfo *binfo)
2233{
2234	struct nilfs_btree_node *parent;
2235	struct inode *dat = nilfs_bmap_get_dat(btree);
2236	__u64 key;
2237	__u64 ptr;
2238	union nilfs_bmap_ptr_req req;
2239	int ncmax, ret;
2240
2241	parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2242	ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2243				       ncmax);
2244	req.bpr_ptr = ptr;
2245	ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2246	if (ret < 0)
2247		return ret;
2248	nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2249
2250	key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2251	/* on-disk format */
2252	binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2253	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2254
2255	return 0;
2256}
2257
2258static int nilfs_btree_assign(struct nilfs_bmap *btree,
2259			      struct buffer_head **bh,
2260			      sector_t blocknr,
2261			      union nilfs_binfo *binfo)
2262{
2263	struct nilfs_btree_path *path;
2264	struct nilfs_btree_node *node;
2265	__u64 key;
2266	int level, ret;
2267
2268	path = nilfs_btree_alloc_path();
2269	if (path == NULL)
2270		return -ENOMEM;
2271
2272	if (buffer_nilfs_node(*bh)) {
2273		node = (struct nilfs_btree_node *)(*bh)->b_data;
2274		key = nilfs_btree_node_get_key(node, 0);
2275		level = nilfs_btree_node_get_level(node);
2276	} else {
2277		key = nilfs_bmap_data_get_key(btree, *bh);
2278		level = NILFS_BTREE_LEVEL_DATA;
2279	}
2280
2281	ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2282	if (ret < 0) {
2283		WARN_ON(ret == -ENOENT);
2284		goto out;
2285	}
2286
2287	ret = NILFS_BMAP_USE_VBN(btree) ?
2288		nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2289		nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2290
2291 out:
2292	nilfs_btree_free_path(path);
2293
2294	return ret;
2295}
2296
2297static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2298				 struct buffer_head **bh,
2299				 sector_t blocknr,
2300				 union nilfs_binfo *binfo)
2301{
2302	struct nilfs_btree_node *node;
2303	__u64 key;
2304	int ret;
2305
2306	ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2307			     blocknr);
2308	if (ret < 0)
2309		return ret;
2310
2311	if (buffer_nilfs_node(*bh)) {
2312		node = (struct nilfs_btree_node *)(*bh)->b_data;
2313		key = nilfs_btree_node_get_key(node, 0);
2314	} else
2315		key = nilfs_bmap_data_get_key(btree, *bh);
2316
2317	/* on-disk format */
2318	binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2319	binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2320
2321	return 0;
2322}
2323
2324static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2325{
2326	struct buffer_head *bh;
2327	struct nilfs_btree_path *path;
2328	__u64 ptr;
2329	int ret;
2330
2331	path = nilfs_btree_alloc_path();
2332	if (path == NULL)
2333		return -ENOMEM;
2334
2335	ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2336	if (ret < 0) {
2337		WARN_ON(ret == -ENOENT);
2338		goto out;
2339	}
2340	ret = nilfs_btree_get_block(btree, ptr, &bh);
2341	if (ret < 0) {
2342		WARN_ON(ret == -ENOENT);
2343		goto out;
2344	}
2345
2346	if (!buffer_dirty(bh))
2347		mark_buffer_dirty(bh);
2348	brelse(bh);
2349	if (!nilfs_bmap_dirty(btree))
2350		nilfs_bmap_set_dirty(btree);
2351
2352 out:
2353	nilfs_btree_free_path(path);
2354	return ret;
2355}
2356
2357static const struct nilfs_bmap_operations nilfs_btree_ops = {
2358	.bop_lookup		=	nilfs_btree_lookup,
2359	.bop_lookup_contig	=	nilfs_btree_lookup_contig,
2360	.bop_insert		=	nilfs_btree_insert,
2361	.bop_delete		=	nilfs_btree_delete,
2362	.bop_clear		=	NULL,
2363
2364	.bop_propagate		=	nilfs_btree_propagate,
2365
2366	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2367
2368	.bop_assign		=	nilfs_btree_assign,
2369	.bop_mark		=	nilfs_btree_mark,
2370
2371	.bop_seek_key		=	nilfs_btree_seek_key,
2372	.bop_last_key		=	nilfs_btree_last_key,
2373
2374	.bop_check_insert	=	NULL,
2375	.bop_check_delete	=	nilfs_btree_check_delete,
2376	.bop_gather_data	=	nilfs_btree_gather_data,
2377};
2378
2379static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2380	.bop_lookup		=	NULL,
2381	.bop_lookup_contig	=	NULL,
2382	.bop_insert		=	NULL,
2383	.bop_delete		=	NULL,
2384	.bop_clear		=	NULL,
2385
2386	.bop_propagate		=	nilfs_btree_propagate_gc,
2387
2388	.bop_lookup_dirty_buffers =	nilfs_btree_lookup_dirty_buffers,
2389
2390	.bop_assign		=	nilfs_btree_assign_gc,
2391	.bop_mark		=	NULL,
2392
2393	.bop_seek_key		=	NULL,
2394	.bop_last_key		=	NULL,
2395
2396	.bop_check_insert	=	NULL,
2397	.bop_check_delete	=	NULL,
2398	.bop_gather_data	=	NULL,
2399};
2400
2401static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2402{
2403	bmap->b_ops = &nilfs_btree_ops;
2404	bmap->b_nchildren_per_block =
2405		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2406}
2407
2408int nilfs_btree_init(struct nilfs_bmap *bmap)
2409{
2410	int ret = 0;
2411
2412	__nilfs_btree_init(bmap);
2413
2414	if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode))
2415		ret = -EIO;
2416	else
2417		ret = nilfs_attach_btree_node_cache(
2418			&NILFS_BMAP_I(bmap)->vfs_inode);
2419
2420	return ret;
2421}
2422
2423void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2424{
2425	bmap->b_ops = &nilfs_btree_ops_gc;
2426	bmap->b_nchildren_per_block =
2427		NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2428}
2429