1// SPDX-License-Identifier: GPL-2.0+ 2/* 3 * btree.c - NILFS B-tree. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10#include <linux/slab.h> 11#include <linux/string.h> 12#include <linux/errno.h> 13#include <linux/pagevec.h> 14#include "nilfs.h" 15#include "page.h" 16#include "btnode.h" 17#include "btree.h" 18#include "alloc.h" 19#include "dat.h" 20 21static void __nilfs_btree_init(struct nilfs_bmap *bmap); 22 23static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 24{ 25 struct nilfs_btree_path *path; 26 int level = NILFS_BTREE_LEVEL_DATA; 27 28 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); 29 if (path == NULL) 30 goto out; 31 32 for (; level < NILFS_BTREE_LEVEL_MAX; level++) { 33 path[level].bp_bh = NULL; 34 path[level].bp_sib_bh = NULL; 35 path[level].bp_index = 0; 36 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 37 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 38 path[level].bp_op = NULL; 39 } 40 41out: 42 return path; 43} 44 45static void nilfs_btree_free_path(struct nilfs_btree_path *path) 46{ 47 int level = NILFS_BTREE_LEVEL_DATA; 48 49 for (; level < NILFS_BTREE_LEVEL_MAX; level++) 50 brelse(path[level].bp_bh); 51 52 kmem_cache_free(nilfs_btree_path_cache, path); 53} 54 55/* 56 * B-tree node operations 57 */ 58static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, 59 __u64 ptr, struct buffer_head **bhp) 60{ 61 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 62 struct address_space *btnc = btnc_inode->i_mapping; 63 struct buffer_head *bh; 64 65 bh = nilfs_btnode_create_block(btnc, ptr); 66 if (!bh) 67 return -ENOMEM; 68 69 set_buffer_nilfs_volatile(bh); 70 *bhp = bh; 71 return 0; 72} 73 74static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) 75{ 76 return node->bn_flags; 77} 78 79static void 80nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 81{ 82 node->bn_flags = flags; 83} 84 85static int nilfs_btree_node_root(const struct nilfs_btree_node *node) 86{ 87 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 88} 89 90static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) 91{ 92 return node->bn_level; 93} 94 95static void 96nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 97{ 98 node->bn_level = level; 99} 100 101static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) 102{ 103 return le16_to_cpu(node->bn_nchildren); 104} 105 106static void 107nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 108{ 109 node->bn_nchildren = cpu_to_le16(nchildren); 110} 111 112static int nilfs_btree_node_size(const struct nilfs_bmap *btree) 113{ 114 return i_blocksize(btree->b_inode); 115} 116 117static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) 118{ 119 return btree->b_nchildren_per_block; 120} 121 122static __le64 * 123nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 124{ 125 return (__le64 *)((char *)(node + 1) + 126 (nilfs_btree_node_root(node) ? 127 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 128} 129 130static __le64 * 131nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) 132{ 133 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); 134} 135 136static __u64 137nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 138{ 139 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); 140} 141 142static void 143nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 144{ 145 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); 146} 147 148static __u64 149nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, 150 int ncmax) 151{ 152 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); 153} 154 155static void 156nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, 157 int ncmax) 158{ 159 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); 160} 161 162static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, 163 int level, int nchildren, int ncmax, 164 const __u64 *keys, const __u64 *ptrs) 165{ 166 __le64 *dkeys; 167 __le64 *dptrs; 168 int i; 169 170 nilfs_btree_node_set_flags(node, flags); 171 nilfs_btree_node_set_level(node, level); 172 nilfs_btree_node_set_nchildren(node, nchildren); 173 174 dkeys = nilfs_btree_node_dkeys(node); 175 dptrs = nilfs_btree_node_dptrs(node, ncmax); 176 for (i = 0; i < nchildren; i++) { 177 dkeys[i] = cpu_to_le64(keys[i]); 178 dptrs[i] = cpu_to_le64(ptrs[i]); 179 } 180} 181 182/* Assume the buffer heads corresponding to left and right are locked. */ 183static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, 184 struct nilfs_btree_node *right, 185 int n, int lncmax, int rncmax) 186{ 187 __le64 *ldkeys, *rdkeys; 188 __le64 *ldptrs, *rdptrs; 189 int lnchildren, rnchildren; 190 191 ldkeys = nilfs_btree_node_dkeys(left); 192 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 193 lnchildren = nilfs_btree_node_get_nchildren(left); 194 195 rdkeys = nilfs_btree_node_dkeys(right); 196 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 197 rnchildren = nilfs_btree_node_get_nchildren(right); 198 199 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 200 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 201 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); 202 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); 203 204 lnchildren += n; 205 rnchildren -= n; 206 nilfs_btree_node_set_nchildren(left, lnchildren); 207 nilfs_btree_node_set_nchildren(right, rnchildren); 208} 209 210/* Assume that the buffer heads corresponding to left and right are locked. */ 211static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, 212 struct nilfs_btree_node *right, 213 int n, int lncmax, int rncmax) 214{ 215 __le64 *ldkeys, *rdkeys; 216 __le64 *ldptrs, *rdptrs; 217 int lnchildren, rnchildren; 218 219 ldkeys = nilfs_btree_node_dkeys(left); 220 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 221 lnchildren = nilfs_btree_node_get_nchildren(left); 222 223 rdkeys = nilfs_btree_node_dkeys(right); 224 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 225 rnchildren = nilfs_btree_node_get_nchildren(right); 226 227 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 228 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 229 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); 230 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); 231 232 lnchildren -= n; 233 rnchildren += n; 234 nilfs_btree_node_set_nchildren(left, lnchildren); 235 nilfs_btree_node_set_nchildren(right, rnchildren); 236} 237 238/* Assume that the buffer head corresponding to node is locked. */ 239static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, 240 __u64 key, __u64 ptr, int ncmax) 241{ 242 __le64 *dkeys; 243 __le64 *dptrs; 244 int nchildren; 245 246 dkeys = nilfs_btree_node_dkeys(node); 247 dptrs = nilfs_btree_node_dptrs(node, ncmax); 248 nchildren = nilfs_btree_node_get_nchildren(node); 249 if (index < nchildren) { 250 memmove(dkeys + index + 1, dkeys + index, 251 (nchildren - index) * sizeof(*dkeys)); 252 memmove(dptrs + index + 1, dptrs + index, 253 (nchildren - index) * sizeof(*dptrs)); 254 } 255 dkeys[index] = cpu_to_le64(key); 256 dptrs[index] = cpu_to_le64(ptr); 257 nchildren++; 258 nilfs_btree_node_set_nchildren(node, nchildren); 259} 260 261/* Assume that the buffer head corresponding to node is locked. */ 262static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, 263 __u64 *keyp, __u64 *ptrp, int ncmax) 264{ 265 __u64 key; 266 __u64 ptr; 267 __le64 *dkeys; 268 __le64 *dptrs; 269 int nchildren; 270 271 dkeys = nilfs_btree_node_dkeys(node); 272 dptrs = nilfs_btree_node_dptrs(node, ncmax); 273 key = le64_to_cpu(dkeys[index]); 274 ptr = le64_to_cpu(dptrs[index]); 275 nchildren = nilfs_btree_node_get_nchildren(node); 276 if (keyp != NULL) 277 *keyp = key; 278 if (ptrp != NULL) 279 *ptrp = ptr; 280 281 if (index < nchildren - 1) { 282 memmove(dkeys + index, dkeys + index + 1, 283 (nchildren - index - 1) * sizeof(*dkeys)); 284 memmove(dptrs + index, dptrs + index + 1, 285 (nchildren - index - 1) * sizeof(*dptrs)); 286 } 287 nchildren--; 288 nilfs_btree_node_set_nchildren(node, nchildren); 289} 290 291static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, 292 __u64 key, int *indexp) 293{ 294 __u64 nkey; 295 int index, low, high, s; 296 297 /* binary search */ 298 low = 0; 299 high = nilfs_btree_node_get_nchildren(node) - 1; 300 index = 0; 301 s = 0; 302 while (low <= high) { 303 index = (low + high) / 2; 304 nkey = nilfs_btree_node_get_key(node, index); 305 if (nkey == key) { 306 s = 0; 307 goto out; 308 } else if (nkey < key) { 309 low = index + 1; 310 s = -1; 311 } else { 312 high = index - 1; 313 s = 1; 314 } 315 } 316 317 /* adjust index */ 318 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { 319 if (s > 0 && index > 0) 320 index--; 321 } else if (s < 0) 322 index++; 323 324 out: 325 *indexp = index; 326 327 return s == 0; 328} 329 330/** 331 * nilfs_btree_node_broken - verify consistency of btree node 332 * @node: btree node block to be examined 333 * @size: node size (in bytes) 334 * @inode: host inode of btree 335 * @blocknr: block number 336 * 337 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 338 */ 339static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, 340 size_t size, struct inode *inode, 341 sector_t blocknr) 342{ 343 int level, flags, nchildren; 344 int ret = 0; 345 346 level = nilfs_btree_node_get_level(node); 347 flags = nilfs_btree_node_get_flags(node); 348 nchildren = nilfs_btree_node_get_nchildren(node); 349 350 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 351 level >= NILFS_BTREE_LEVEL_MAX || 352 (flags & NILFS_BTREE_NODE_ROOT) || 353 nchildren < 0 || 354 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { 355 nilfs_crit(inode->i_sb, 356 "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", 357 inode->i_ino, (unsigned long long)blocknr, level, 358 flags, nchildren); 359 ret = 1; 360 } 361 return ret; 362} 363 364/** 365 * nilfs_btree_root_broken - verify consistency of btree root node 366 * @node: btree root node to be examined 367 * @inode: host inode of btree 368 * 369 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 370 */ 371static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, 372 struct inode *inode) 373{ 374 int level, flags, nchildren; 375 int ret = 0; 376 377 level = nilfs_btree_node_get_level(node); 378 flags = nilfs_btree_node_get_flags(node); 379 nchildren = nilfs_btree_node_get_nchildren(node); 380 381 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 382 level >= NILFS_BTREE_LEVEL_MAX || 383 nchildren < 0 || 384 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { 385 nilfs_crit(inode->i_sb, 386 "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", 387 inode->i_ino, level, flags, nchildren); 388 ret = 1; 389 } 390 return ret; 391} 392 393int nilfs_btree_broken_node_block(struct buffer_head *bh) 394{ 395 struct inode *inode; 396 int ret; 397 398 if (buffer_nilfs_checked(bh)) 399 return 0; 400 401 inode = bh->b_page->mapping->host; 402 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, 403 bh->b_size, inode, bh->b_blocknr); 404 if (likely(!ret)) 405 set_buffer_nilfs_checked(bh); 406 return ret; 407} 408 409static struct nilfs_btree_node * 410nilfs_btree_get_root(const struct nilfs_bmap *btree) 411{ 412 return (struct nilfs_btree_node *)btree->b_u.u_data; 413} 414 415static struct nilfs_btree_node * 416nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 417{ 418 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 419} 420 421static struct nilfs_btree_node * 422nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 423{ 424 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 425} 426 427static int nilfs_btree_height(const struct nilfs_bmap *btree) 428{ 429 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 430} 431 432static struct nilfs_btree_node * 433nilfs_btree_get_node(const struct nilfs_bmap *btree, 434 const struct nilfs_btree_path *path, 435 int level, int *ncmaxp) 436{ 437 struct nilfs_btree_node *node; 438 439 if (level == nilfs_btree_height(btree) - 1) { 440 node = nilfs_btree_get_root(btree); 441 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; 442 } else { 443 node = nilfs_btree_get_nonroot_node(path, level); 444 *ncmaxp = nilfs_btree_nchildren_per_block(btree); 445 } 446 return node; 447} 448 449static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, 450 struct nilfs_btree_node *node, int level) 451{ 452 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 453 dump_stack(); 454 nilfs_crit(btree->b_inode->i_sb, 455 "btree level mismatch (ino=%lu): %d != %d", 456 btree->b_inode->i_ino, 457 nilfs_btree_node_get_level(node), level); 458 return 1; 459 } 460 return 0; 461} 462 463struct nilfs_btree_readahead_info { 464 struct nilfs_btree_node *node; /* parent node */ 465 int max_ra_blocks; /* max nof blocks to read ahead */ 466 int index; /* current index on the parent node */ 467 int ncmax; /* nof children in the parent node */ 468}; 469 470static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 471 struct buffer_head **bhp, 472 const struct nilfs_btree_readahead_info *ra) 473{ 474 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 475 struct address_space *btnc = btnc_inode->i_mapping; 476 struct buffer_head *bh, *ra_bh; 477 sector_t submit_ptr = 0; 478 int ret; 479 480 ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh, 481 &submit_ptr); 482 if (ret) { 483 if (likely(ret == -EEXIST)) 484 goto out_check; 485 if (ret == -ENOENT) { 486 /* 487 * Block address translation failed due to invalid 488 * value of 'ptr'. In this case, return internal code 489 * -EINVAL (broken bmap) to notify bmap layer of fatal 490 * metadata corruption. 491 */ 492 ret = -EINVAL; 493 } 494 return ret; 495 } 496 497 if (ra) { 498 int i, n; 499 __u64 ptr2; 500 501 /* read ahead sibling nodes */ 502 for (n = ra->max_ra_blocks, i = ra->index + 1; 503 n > 0 && i < ra->ncmax; n--, i++) { 504 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); 505 506 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, 507 REQ_OP_READ, REQ_RAHEAD, 508 &ra_bh, &submit_ptr); 509 if (likely(!ret || ret == -EEXIST)) 510 brelse(ra_bh); 511 else if (ret != -EBUSY) 512 break; 513 if (!buffer_locked(bh)) 514 goto out_no_wait; 515 } 516 } 517 518 wait_on_buffer(bh); 519 520 out_no_wait: 521 if (!buffer_uptodate(bh)) { 522 nilfs_err(btree->b_inode->i_sb, 523 "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", 524 btree->b_inode->i_ino, (unsigned long long)ptr); 525 brelse(bh); 526 return -EIO; 527 } 528 529 out_check: 530 if (nilfs_btree_broken_node_block(bh)) { 531 clear_buffer_uptodate(bh); 532 brelse(bh); 533 return -EINVAL; 534 } 535 536 *bhp = bh; 537 return 0; 538} 539 540static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 541 struct buffer_head **bhp) 542{ 543 return __nilfs_btree_get_block(btree, ptr, bhp, NULL); 544} 545 546static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, 547 struct nilfs_btree_path *path, 548 __u64 key, __u64 *ptrp, int minlevel, 549 int readahead) 550{ 551 struct nilfs_btree_node *node; 552 struct nilfs_btree_readahead_info p, *ra; 553 __u64 ptr; 554 int level, index, found, ncmax, ret; 555 556 node = nilfs_btree_get_root(btree); 557 level = nilfs_btree_node_get_level(node); 558 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) 559 return -ENOENT; 560 561 found = nilfs_btree_node_lookup(node, key, &index); 562 ptr = nilfs_btree_node_get_ptr(node, index, 563 NILFS_BTREE_ROOT_NCHILDREN_MAX); 564 path[level].bp_bh = NULL; 565 path[level].bp_index = index; 566 567 ncmax = nilfs_btree_nchildren_per_block(btree); 568 569 while (--level >= minlevel) { 570 ra = NULL; 571 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { 572 p.node = nilfs_btree_get_node(btree, path, level + 1, 573 &p.ncmax); 574 p.index = index; 575 p.max_ra_blocks = 7; 576 ra = &p; 577 } 578 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, 579 ra); 580 if (ret < 0) 581 return ret; 582 583 node = nilfs_btree_get_nonroot_node(path, level); 584 if (nilfs_btree_bad_node(btree, node, level)) 585 return -EINVAL; 586 if (!found) 587 found = nilfs_btree_node_lookup(node, key, &index); 588 else 589 index = 0; 590 if (index < ncmax) { 591 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 592 } else { 593 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 594 /* insert */ 595 ptr = NILFS_BMAP_INVALID_PTR; 596 } 597 path[level].bp_index = index; 598 } 599 if (!found) 600 return -ENOENT; 601 602 if (ptrp != NULL) 603 *ptrp = ptr; 604 605 return 0; 606} 607 608static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, 609 struct nilfs_btree_path *path, 610 __u64 *keyp, __u64 *ptrp) 611{ 612 struct nilfs_btree_node *node; 613 __u64 ptr; 614 int index, level, ncmax, ret; 615 616 node = nilfs_btree_get_root(btree); 617 index = nilfs_btree_node_get_nchildren(node) - 1; 618 if (index < 0) 619 return -ENOENT; 620 level = nilfs_btree_node_get_level(node); 621 ptr = nilfs_btree_node_get_ptr(node, index, 622 NILFS_BTREE_ROOT_NCHILDREN_MAX); 623 path[level].bp_bh = NULL; 624 path[level].bp_index = index; 625 ncmax = nilfs_btree_nchildren_per_block(btree); 626 627 for (level--; level > 0; level--) { 628 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 629 if (ret < 0) 630 return ret; 631 node = nilfs_btree_get_nonroot_node(path, level); 632 if (nilfs_btree_bad_node(btree, node, level)) 633 return -EINVAL; 634 index = nilfs_btree_node_get_nchildren(node) - 1; 635 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 636 path[level].bp_index = index; 637 } 638 639 if (keyp != NULL) 640 *keyp = nilfs_btree_node_get_key(node, index); 641 if (ptrp != NULL) 642 *ptrp = ptr; 643 644 return 0; 645} 646 647/** 648 * nilfs_btree_get_next_key - get next valid key from btree path array 649 * @btree: bmap struct of btree 650 * @path: array of nilfs_btree_path struct 651 * @minlevel: start level 652 * @nextkey: place to store the next valid key 653 * 654 * Return Value: If a next key was found, 0 is returned. Otherwise, 655 * -ENOENT is returned. 656 */ 657static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, 658 const struct nilfs_btree_path *path, 659 int minlevel, __u64 *nextkey) 660{ 661 struct nilfs_btree_node *node; 662 int maxlevel = nilfs_btree_height(btree) - 1; 663 int index, next_adj, level; 664 665 /* Next index is already set to bp_index for leaf nodes. */ 666 next_adj = 0; 667 for (level = minlevel; level <= maxlevel; level++) { 668 if (level == maxlevel) 669 node = nilfs_btree_get_root(btree); 670 else 671 node = nilfs_btree_get_nonroot_node(path, level); 672 673 index = path[level].bp_index + next_adj; 674 if (index < nilfs_btree_node_get_nchildren(node)) { 675 /* Next key is in this node */ 676 *nextkey = nilfs_btree_node_get_key(node, index); 677 return 0; 678 } 679 /* For non-leaf nodes, next index is stored at bp_index + 1. */ 680 next_adj = 1; 681 } 682 return -ENOENT; 683} 684 685static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 686 __u64 key, int level, __u64 *ptrp) 687{ 688 struct nilfs_btree_path *path; 689 int ret; 690 691 path = nilfs_btree_alloc_path(); 692 if (path == NULL) 693 return -ENOMEM; 694 695 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); 696 697 nilfs_btree_free_path(path); 698 699 return ret; 700} 701 702static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, 703 __u64 key, __u64 *ptrp, 704 unsigned int maxblocks) 705{ 706 struct nilfs_btree_path *path; 707 struct nilfs_btree_node *node; 708 struct inode *dat = NULL; 709 __u64 ptr, ptr2; 710 sector_t blocknr; 711 int level = NILFS_BTREE_LEVEL_NODE_MIN; 712 int ret, cnt, index, maxlevel, ncmax; 713 struct nilfs_btree_readahead_info p; 714 715 path = nilfs_btree_alloc_path(); 716 if (path == NULL) 717 return -ENOMEM; 718 719 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); 720 if (ret < 0) 721 goto out; 722 723 if (NILFS_BMAP_USE_VBN(btree)) { 724 dat = nilfs_bmap_get_dat(btree); 725 ret = nilfs_dat_translate(dat, ptr, &blocknr); 726 if (ret < 0) 727 goto out; 728 ptr = blocknr; 729 } 730 cnt = 1; 731 if (cnt == maxblocks) 732 goto end; 733 734 maxlevel = nilfs_btree_height(btree) - 1; 735 node = nilfs_btree_get_node(btree, path, level, &ncmax); 736 index = path[level].bp_index + 1; 737 for (;;) { 738 while (index < nilfs_btree_node_get_nchildren(node)) { 739 if (nilfs_btree_node_get_key(node, index) != 740 key + cnt) 741 goto end; 742 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); 743 if (dat) { 744 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 745 if (ret < 0) 746 goto out; 747 ptr2 = blocknr; 748 } 749 if (ptr2 != ptr + cnt || ++cnt == maxblocks) 750 goto end; 751 index++; 752 continue; 753 } 754 if (level == maxlevel) 755 break; 756 757 /* look-up right sibling node */ 758 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); 759 p.index = path[level + 1].bp_index + 1; 760 p.max_ra_blocks = 7; 761 if (p.index >= nilfs_btree_node_get_nchildren(p.node) || 762 nilfs_btree_node_get_key(p.node, p.index) != key + cnt) 763 break; 764 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); 765 path[level + 1].bp_index = p.index; 766 767 brelse(path[level].bp_bh); 768 path[level].bp_bh = NULL; 769 770 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, 771 &p); 772 if (ret < 0) 773 goto out; 774 node = nilfs_btree_get_nonroot_node(path, level); 775 ncmax = nilfs_btree_nchildren_per_block(btree); 776 index = 0; 777 path[level].bp_index = index; 778 } 779 end: 780 *ptrp = ptr; 781 ret = cnt; 782 out: 783 nilfs_btree_free_path(path); 784 return ret; 785} 786 787static void nilfs_btree_promote_key(struct nilfs_bmap *btree, 788 struct nilfs_btree_path *path, 789 int level, __u64 key) 790{ 791 if (level < nilfs_btree_height(btree) - 1) { 792 do { 793 nilfs_btree_node_set_key( 794 nilfs_btree_get_nonroot_node(path, level), 795 path[level].bp_index, key); 796 if (!buffer_dirty(path[level].bp_bh)) 797 mark_buffer_dirty(path[level].bp_bh); 798 } while ((path[level].bp_index == 0) && 799 (++level < nilfs_btree_height(btree) - 1)); 800 } 801 802 /* root */ 803 if (level == nilfs_btree_height(btree) - 1) { 804 nilfs_btree_node_set_key(nilfs_btree_get_root(btree), 805 path[level].bp_index, key); 806 } 807} 808 809static void nilfs_btree_do_insert(struct nilfs_bmap *btree, 810 struct nilfs_btree_path *path, 811 int level, __u64 *keyp, __u64 *ptrp) 812{ 813 struct nilfs_btree_node *node; 814 int ncblk; 815 816 if (level < nilfs_btree_height(btree) - 1) { 817 node = nilfs_btree_get_nonroot_node(path, level); 818 ncblk = nilfs_btree_nchildren_per_block(btree); 819 nilfs_btree_node_insert(node, path[level].bp_index, 820 *keyp, *ptrp, ncblk); 821 if (!buffer_dirty(path[level].bp_bh)) 822 mark_buffer_dirty(path[level].bp_bh); 823 824 if (path[level].bp_index == 0) 825 nilfs_btree_promote_key(btree, path, level + 1, 826 nilfs_btree_node_get_key(node, 827 0)); 828 } else { 829 node = nilfs_btree_get_root(btree); 830 nilfs_btree_node_insert(node, path[level].bp_index, 831 *keyp, *ptrp, 832 NILFS_BTREE_ROOT_NCHILDREN_MAX); 833 } 834} 835 836static void nilfs_btree_carry_left(struct nilfs_bmap *btree, 837 struct nilfs_btree_path *path, 838 int level, __u64 *keyp, __u64 *ptrp) 839{ 840 struct nilfs_btree_node *node, *left; 841 int nchildren, lnchildren, n, move, ncblk; 842 843 node = nilfs_btree_get_nonroot_node(path, level); 844 left = nilfs_btree_get_sib_node(path, level); 845 nchildren = nilfs_btree_node_get_nchildren(node); 846 lnchildren = nilfs_btree_node_get_nchildren(left); 847 ncblk = nilfs_btree_nchildren_per_block(btree); 848 move = 0; 849 850 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 851 if (n > path[level].bp_index) { 852 /* move insert point */ 853 n--; 854 move = 1; 855 } 856 857 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 858 859 if (!buffer_dirty(path[level].bp_bh)) 860 mark_buffer_dirty(path[level].bp_bh); 861 if (!buffer_dirty(path[level].bp_sib_bh)) 862 mark_buffer_dirty(path[level].bp_sib_bh); 863 864 nilfs_btree_promote_key(btree, path, level + 1, 865 nilfs_btree_node_get_key(node, 0)); 866 867 if (move) { 868 brelse(path[level].bp_bh); 869 path[level].bp_bh = path[level].bp_sib_bh; 870 path[level].bp_sib_bh = NULL; 871 path[level].bp_index += lnchildren; 872 path[level + 1].bp_index--; 873 } else { 874 brelse(path[level].bp_sib_bh); 875 path[level].bp_sib_bh = NULL; 876 path[level].bp_index -= n; 877 } 878 879 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 880} 881 882static void nilfs_btree_carry_right(struct nilfs_bmap *btree, 883 struct nilfs_btree_path *path, 884 int level, __u64 *keyp, __u64 *ptrp) 885{ 886 struct nilfs_btree_node *node, *right; 887 int nchildren, rnchildren, n, move, ncblk; 888 889 node = nilfs_btree_get_nonroot_node(path, level); 890 right = nilfs_btree_get_sib_node(path, level); 891 nchildren = nilfs_btree_node_get_nchildren(node); 892 rnchildren = nilfs_btree_node_get_nchildren(right); 893 ncblk = nilfs_btree_nchildren_per_block(btree); 894 move = 0; 895 896 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 897 if (n > nchildren - path[level].bp_index) { 898 /* move insert point */ 899 n--; 900 move = 1; 901 } 902 903 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 904 905 if (!buffer_dirty(path[level].bp_bh)) 906 mark_buffer_dirty(path[level].bp_bh); 907 if (!buffer_dirty(path[level].bp_sib_bh)) 908 mark_buffer_dirty(path[level].bp_sib_bh); 909 910 path[level + 1].bp_index++; 911 nilfs_btree_promote_key(btree, path, level + 1, 912 nilfs_btree_node_get_key(right, 0)); 913 path[level + 1].bp_index--; 914 915 if (move) { 916 brelse(path[level].bp_bh); 917 path[level].bp_bh = path[level].bp_sib_bh; 918 path[level].bp_sib_bh = NULL; 919 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 920 path[level + 1].bp_index++; 921 } else { 922 brelse(path[level].bp_sib_bh); 923 path[level].bp_sib_bh = NULL; 924 } 925 926 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 927} 928 929static void nilfs_btree_split(struct nilfs_bmap *btree, 930 struct nilfs_btree_path *path, 931 int level, __u64 *keyp, __u64 *ptrp) 932{ 933 struct nilfs_btree_node *node, *right; 934 int nchildren, n, move, ncblk; 935 936 node = nilfs_btree_get_nonroot_node(path, level); 937 right = nilfs_btree_get_sib_node(path, level); 938 nchildren = nilfs_btree_node_get_nchildren(node); 939 ncblk = nilfs_btree_nchildren_per_block(btree); 940 move = 0; 941 942 n = (nchildren + 1) / 2; 943 if (n > nchildren - path[level].bp_index) { 944 n--; 945 move = 1; 946 } 947 948 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 949 950 if (!buffer_dirty(path[level].bp_bh)) 951 mark_buffer_dirty(path[level].bp_bh); 952 if (!buffer_dirty(path[level].bp_sib_bh)) 953 mark_buffer_dirty(path[level].bp_sib_bh); 954 955 if (move) { 956 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 957 nilfs_btree_node_insert(right, path[level].bp_index, 958 *keyp, *ptrp, ncblk); 959 960 *keyp = nilfs_btree_node_get_key(right, 0); 961 *ptrp = path[level].bp_newreq.bpr_ptr; 962 963 brelse(path[level].bp_bh); 964 path[level].bp_bh = path[level].bp_sib_bh; 965 path[level].bp_sib_bh = NULL; 966 } else { 967 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 968 969 *keyp = nilfs_btree_node_get_key(right, 0); 970 *ptrp = path[level].bp_newreq.bpr_ptr; 971 972 brelse(path[level].bp_sib_bh); 973 path[level].bp_sib_bh = NULL; 974 } 975 976 path[level + 1].bp_index++; 977} 978 979static void nilfs_btree_grow(struct nilfs_bmap *btree, 980 struct nilfs_btree_path *path, 981 int level, __u64 *keyp, __u64 *ptrp) 982{ 983 struct nilfs_btree_node *root, *child; 984 int n, ncblk; 985 986 root = nilfs_btree_get_root(btree); 987 child = nilfs_btree_get_sib_node(path, level); 988 ncblk = nilfs_btree_nchildren_per_block(btree); 989 990 n = nilfs_btree_node_get_nchildren(root); 991 992 nilfs_btree_node_move_right(root, child, n, 993 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 994 nilfs_btree_node_set_level(root, level + 1); 995 996 if (!buffer_dirty(path[level].bp_sib_bh)) 997 mark_buffer_dirty(path[level].bp_sib_bh); 998 999 path[level].bp_bh = path[level].bp_sib_bh; 1000 path[level].bp_sib_bh = NULL; 1001 1002 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 1003 1004 *keyp = nilfs_btree_node_get_key(child, 0); 1005 *ptrp = path[level].bp_newreq.bpr_ptr; 1006} 1007 1008static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, 1009 const struct nilfs_btree_path *path) 1010{ 1011 struct nilfs_btree_node *node; 1012 int level, ncmax; 1013 1014 if (path == NULL) 1015 return NILFS_BMAP_INVALID_PTR; 1016 1017 /* left sibling */ 1018 level = NILFS_BTREE_LEVEL_NODE_MIN; 1019 if (path[level].bp_index > 0) { 1020 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1021 return nilfs_btree_node_get_ptr(node, 1022 path[level].bp_index - 1, 1023 ncmax); 1024 } 1025 1026 /* parent */ 1027 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 1028 if (level <= nilfs_btree_height(btree) - 1) { 1029 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1030 return nilfs_btree_node_get_ptr(node, path[level].bp_index, 1031 ncmax); 1032 } 1033 1034 return NILFS_BMAP_INVALID_PTR; 1035} 1036 1037static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, 1038 const struct nilfs_btree_path *path, 1039 __u64 key) 1040{ 1041 __u64 ptr; 1042 1043 ptr = nilfs_bmap_find_target_seq(btree, key); 1044 if (ptr != NILFS_BMAP_INVALID_PTR) 1045 /* sequential access */ 1046 return ptr; 1047 1048 ptr = nilfs_btree_find_near(btree, path); 1049 if (ptr != NILFS_BMAP_INVALID_PTR) 1050 /* near */ 1051 return ptr; 1052 1053 /* block group */ 1054 return nilfs_bmap_find_target_in_group(btree); 1055} 1056 1057static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, 1058 struct nilfs_btree_path *path, 1059 int *levelp, __u64 key, __u64 ptr, 1060 struct nilfs_bmap_stats *stats) 1061{ 1062 struct buffer_head *bh; 1063 struct nilfs_btree_node *node, *parent, *sib; 1064 __u64 sibptr; 1065 int pindex, level, ncmax, ncblk, ret; 1066 struct inode *dat = NULL; 1067 1068 stats->bs_nblocks = 0; 1069 level = NILFS_BTREE_LEVEL_DATA; 1070 1071 /* allocate a new ptr for data block */ 1072 if (NILFS_BMAP_USE_VBN(btree)) { 1073 path[level].bp_newreq.bpr_ptr = 1074 nilfs_btree_find_target_v(btree, path, key); 1075 dat = nilfs_bmap_get_dat(btree); 1076 } 1077 1078 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1079 if (ret < 0) 1080 goto err_out_data; 1081 1082 ncblk = nilfs_btree_nchildren_per_block(btree); 1083 1084 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1085 level < nilfs_btree_height(btree) - 1; 1086 level++) { 1087 node = nilfs_btree_get_nonroot_node(path, level); 1088 if (nilfs_btree_node_get_nchildren(node) < ncblk) { 1089 path[level].bp_op = nilfs_btree_do_insert; 1090 stats->bs_nblocks++; 1091 goto out; 1092 } 1093 1094 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1095 pindex = path[level + 1].bp_index; 1096 1097 /* left sibling */ 1098 if (pindex > 0) { 1099 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1100 ncmax); 1101 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1102 if (ret < 0) 1103 goto err_out_child_node; 1104 sib = (struct nilfs_btree_node *)bh->b_data; 1105 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1106 path[level].bp_sib_bh = bh; 1107 path[level].bp_op = nilfs_btree_carry_left; 1108 stats->bs_nblocks++; 1109 goto out; 1110 } else { 1111 brelse(bh); 1112 } 1113 } 1114 1115 /* right sibling */ 1116 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { 1117 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1118 ncmax); 1119 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1120 if (ret < 0) 1121 goto err_out_child_node; 1122 sib = (struct nilfs_btree_node *)bh->b_data; 1123 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1124 path[level].bp_sib_bh = bh; 1125 path[level].bp_op = nilfs_btree_carry_right; 1126 stats->bs_nblocks++; 1127 goto out; 1128 } else { 1129 brelse(bh); 1130 } 1131 } 1132 1133 /* split */ 1134 path[level].bp_newreq.bpr_ptr = 1135 path[level - 1].bp_newreq.bpr_ptr + 1; 1136 ret = nilfs_bmap_prepare_alloc_ptr(btree, 1137 &path[level].bp_newreq, dat); 1138 if (ret < 0) 1139 goto err_out_child_node; 1140 ret = nilfs_btree_get_new_block(btree, 1141 path[level].bp_newreq.bpr_ptr, 1142 &bh); 1143 if (ret < 0) 1144 goto err_out_curr_node; 1145 1146 stats->bs_nblocks++; 1147 1148 sib = (struct nilfs_btree_node *)bh->b_data; 1149 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); 1150 path[level].bp_sib_bh = bh; 1151 path[level].bp_op = nilfs_btree_split; 1152 } 1153 1154 /* root */ 1155 node = nilfs_btree_get_root(btree); 1156 if (nilfs_btree_node_get_nchildren(node) < 1157 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1158 path[level].bp_op = nilfs_btree_do_insert; 1159 stats->bs_nblocks++; 1160 goto out; 1161 } 1162 1163 /* grow */ 1164 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1165 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1166 if (ret < 0) 1167 goto err_out_child_node; 1168 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1169 &bh); 1170 if (ret < 0) 1171 goto err_out_curr_node; 1172 1173 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 1174 0, level, 0, ncblk, NULL, NULL); 1175 path[level].bp_sib_bh = bh; 1176 path[level].bp_op = nilfs_btree_grow; 1177 1178 level++; 1179 path[level].bp_op = nilfs_btree_do_insert; 1180 1181 /* a newly-created node block and a data block are added */ 1182 stats->bs_nblocks += 2; 1183 1184 /* success */ 1185 out: 1186 *levelp = level; 1187 return ret; 1188 1189 /* error */ 1190 err_out_curr_node: 1191 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1192 err_out_child_node: 1193 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1194 nilfs_btnode_delete(path[level].bp_sib_bh); 1195 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1196 1197 } 1198 1199 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1200 err_out_data: 1201 *levelp = level; 1202 stats->bs_nblocks = 0; 1203 return ret; 1204} 1205 1206static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, 1207 struct nilfs_btree_path *path, 1208 int maxlevel, __u64 key, __u64 ptr) 1209{ 1210 struct inode *dat = NULL; 1211 int level; 1212 1213 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1214 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1215 if (NILFS_BMAP_USE_VBN(btree)) { 1216 nilfs_bmap_set_target_v(btree, key, ptr); 1217 dat = nilfs_bmap_get_dat(btree); 1218 } 1219 1220 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1221 nilfs_bmap_commit_alloc_ptr(btree, 1222 &path[level - 1].bp_newreq, dat); 1223 path[level].bp_op(btree, path, level, &key, &ptr); 1224 } 1225 1226 if (!nilfs_bmap_dirty(btree)) 1227 nilfs_bmap_set_dirty(btree); 1228} 1229 1230static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) 1231{ 1232 struct nilfs_btree_path *path; 1233 struct nilfs_bmap_stats stats; 1234 int level, ret; 1235 1236 path = nilfs_btree_alloc_path(); 1237 if (path == NULL) 1238 return -ENOMEM; 1239 1240 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1241 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1242 if (ret != -ENOENT) { 1243 if (ret == 0) 1244 ret = -EEXIST; 1245 goto out; 1246 } 1247 1248 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); 1249 if (ret < 0) 1250 goto out; 1251 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1252 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1253 1254 out: 1255 nilfs_btree_free_path(path); 1256 return ret; 1257} 1258 1259static void nilfs_btree_do_delete(struct nilfs_bmap *btree, 1260 struct nilfs_btree_path *path, 1261 int level, __u64 *keyp, __u64 *ptrp) 1262{ 1263 struct nilfs_btree_node *node; 1264 int ncblk; 1265 1266 if (level < nilfs_btree_height(btree) - 1) { 1267 node = nilfs_btree_get_nonroot_node(path, level); 1268 ncblk = nilfs_btree_nchildren_per_block(btree); 1269 nilfs_btree_node_delete(node, path[level].bp_index, 1270 keyp, ptrp, ncblk); 1271 if (!buffer_dirty(path[level].bp_bh)) 1272 mark_buffer_dirty(path[level].bp_bh); 1273 if (path[level].bp_index == 0) 1274 nilfs_btree_promote_key(btree, path, level + 1, 1275 nilfs_btree_node_get_key(node, 0)); 1276 } else { 1277 node = nilfs_btree_get_root(btree); 1278 nilfs_btree_node_delete(node, path[level].bp_index, 1279 keyp, ptrp, 1280 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1281 } 1282} 1283 1284static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, 1285 struct nilfs_btree_path *path, 1286 int level, __u64 *keyp, __u64 *ptrp) 1287{ 1288 struct nilfs_btree_node *node, *left; 1289 int nchildren, lnchildren, n, ncblk; 1290 1291 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1292 1293 node = nilfs_btree_get_nonroot_node(path, level); 1294 left = nilfs_btree_get_sib_node(path, level); 1295 nchildren = nilfs_btree_node_get_nchildren(node); 1296 lnchildren = nilfs_btree_node_get_nchildren(left); 1297 ncblk = nilfs_btree_nchildren_per_block(btree); 1298 1299 n = (nchildren + lnchildren) / 2 - nchildren; 1300 1301 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1302 1303 if (!buffer_dirty(path[level].bp_bh)) 1304 mark_buffer_dirty(path[level].bp_bh); 1305 if (!buffer_dirty(path[level].bp_sib_bh)) 1306 mark_buffer_dirty(path[level].bp_sib_bh); 1307 1308 nilfs_btree_promote_key(btree, path, level + 1, 1309 nilfs_btree_node_get_key(node, 0)); 1310 1311 brelse(path[level].bp_sib_bh); 1312 path[level].bp_sib_bh = NULL; 1313 path[level].bp_index += n; 1314} 1315 1316static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, 1317 struct nilfs_btree_path *path, 1318 int level, __u64 *keyp, __u64 *ptrp) 1319{ 1320 struct nilfs_btree_node *node, *right; 1321 int nchildren, rnchildren, n, ncblk; 1322 1323 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1324 1325 node = nilfs_btree_get_nonroot_node(path, level); 1326 right = nilfs_btree_get_sib_node(path, level); 1327 nchildren = nilfs_btree_node_get_nchildren(node); 1328 rnchildren = nilfs_btree_node_get_nchildren(right); 1329 ncblk = nilfs_btree_nchildren_per_block(btree); 1330 1331 n = (nchildren + rnchildren) / 2 - nchildren; 1332 1333 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1334 1335 if (!buffer_dirty(path[level].bp_bh)) 1336 mark_buffer_dirty(path[level].bp_bh); 1337 if (!buffer_dirty(path[level].bp_sib_bh)) 1338 mark_buffer_dirty(path[level].bp_sib_bh); 1339 1340 path[level + 1].bp_index++; 1341 nilfs_btree_promote_key(btree, path, level + 1, 1342 nilfs_btree_node_get_key(right, 0)); 1343 path[level + 1].bp_index--; 1344 1345 brelse(path[level].bp_sib_bh); 1346 path[level].bp_sib_bh = NULL; 1347} 1348 1349static void nilfs_btree_concat_left(struct nilfs_bmap *btree, 1350 struct nilfs_btree_path *path, 1351 int level, __u64 *keyp, __u64 *ptrp) 1352{ 1353 struct nilfs_btree_node *node, *left; 1354 int n, ncblk; 1355 1356 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1357 1358 node = nilfs_btree_get_nonroot_node(path, level); 1359 left = nilfs_btree_get_sib_node(path, level); 1360 ncblk = nilfs_btree_nchildren_per_block(btree); 1361 1362 n = nilfs_btree_node_get_nchildren(node); 1363 1364 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1365 1366 if (!buffer_dirty(path[level].bp_sib_bh)) 1367 mark_buffer_dirty(path[level].bp_sib_bh); 1368 1369 nilfs_btnode_delete(path[level].bp_bh); 1370 path[level].bp_bh = path[level].bp_sib_bh; 1371 path[level].bp_sib_bh = NULL; 1372 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1373} 1374 1375static void nilfs_btree_concat_right(struct nilfs_bmap *btree, 1376 struct nilfs_btree_path *path, 1377 int level, __u64 *keyp, __u64 *ptrp) 1378{ 1379 struct nilfs_btree_node *node, *right; 1380 int n, ncblk; 1381 1382 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1383 1384 node = nilfs_btree_get_nonroot_node(path, level); 1385 right = nilfs_btree_get_sib_node(path, level); 1386 ncblk = nilfs_btree_nchildren_per_block(btree); 1387 1388 n = nilfs_btree_node_get_nchildren(right); 1389 1390 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1391 1392 if (!buffer_dirty(path[level].bp_bh)) 1393 mark_buffer_dirty(path[level].bp_bh); 1394 1395 nilfs_btnode_delete(path[level].bp_sib_bh); 1396 path[level].bp_sib_bh = NULL; 1397 path[level + 1].bp_index++; 1398} 1399 1400static void nilfs_btree_shrink(struct nilfs_bmap *btree, 1401 struct nilfs_btree_path *path, 1402 int level, __u64 *keyp, __u64 *ptrp) 1403{ 1404 struct nilfs_btree_node *root, *child; 1405 int n, ncblk; 1406 1407 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1408 1409 root = nilfs_btree_get_root(btree); 1410 child = nilfs_btree_get_nonroot_node(path, level); 1411 ncblk = nilfs_btree_nchildren_per_block(btree); 1412 1413 nilfs_btree_node_delete(root, 0, NULL, NULL, 1414 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1415 nilfs_btree_node_set_level(root, level); 1416 n = nilfs_btree_node_get_nchildren(child); 1417 nilfs_btree_node_move_left(root, child, n, 1418 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 1419 1420 nilfs_btnode_delete(path[level].bp_bh); 1421 path[level].bp_bh = NULL; 1422} 1423 1424static void nilfs_btree_nop(struct nilfs_bmap *btree, 1425 struct nilfs_btree_path *path, 1426 int level, __u64 *keyp, __u64 *ptrp) 1427{ 1428} 1429 1430static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1431 struct nilfs_btree_path *path, 1432 int *levelp, 1433 struct nilfs_bmap_stats *stats, 1434 struct inode *dat) 1435{ 1436 struct buffer_head *bh; 1437 struct nilfs_btree_node *node, *parent, *sib; 1438 __u64 sibptr; 1439 int pindex, dindex, level, ncmin, ncmax, ncblk, ret; 1440 1441 ret = 0; 1442 stats->bs_nblocks = 0; 1443 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1444 ncblk = nilfs_btree_nchildren_per_block(btree); 1445 1446 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; 1447 level < nilfs_btree_height(btree) - 1; 1448 level++) { 1449 node = nilfs_btree_get_nonroot_node(path, level); 1450 path[level].bp_oldreq.bpr_ptr = 1451 nilfs_btree_node_get_ptr(node, dindex, ncblk); 1452 ret = nilfs_bmap_prepare_end_ptr(btree, 1453 &path[level].bp_oldreq, dat); 1454 if (ret < 0) 1455 goto err_out_child_node; 1456 1457 if (nilfs_btree_node_get_nchildren(node) > ncmin) { 1458 path[level].bp_op = nilfs_btree_do_delete; 1459 stats->bs_nblocks++; 1460 goto out; 1461 } 1462 1463 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1464 pindex = path[level + 1].bp_index; 1465 dindex = pindex; 1466 1467 if (pindex > 0) { 1468 /* left sibling */ 1469 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1470 ncmax); 1471 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1472 if (ret < 0) 1473 goto err_out_curr_node; 1474 sib = (struct nilfs_btree_node *)bh->b_data; 1475 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1476 path[level].bp_sib_bh = bh; 1477 path[level].bp_op = nilfs_btree_borrow_left; 1478 stats->bs_nblocks++; 1479 goto out; 1480 } else { 1481 path[level].bp_sib_bh = bh; 1482 path[level].bp_op = nilfs_btree_concat_left; 1483 stats->bs_nblocks++; 1484 /* continue; */ 1485 } 1486 } else if (pindex < 1487 nilfs_btree_node_get_nchildren(parent) - 1) { 1488 /* right sibling */ 1489 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1490 ncmax); 1491 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1492 if (ret < 0) 1493 goto err_out_curr_node; 1494 sib = (struct nilfs_btree_node *)bh->b_data; 1495 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1496 path[level].bp_sib_bh = bh; 1497 path[level].bp_op = nilfs_btree_borrow_right; 1498 stats->bs_nblocks++; 1499 goto out; 1500 } else { 1501 path[level].bp_sib_bh = bh; 1502 path[level].bp_op = nilfs_btree_concat_right; 1503 stats->bs_nblocks++; 1504 /* 1505 * When merging right sibling node 1506 * into the current node, pointer to 1507 * the right sibling node must be 1508 * terminated instead. The adjustment 1509 * below is required for that. 1510 */ 1511 dindex = pindex + 1; 1512 /* continue; */ 1513 } 1514 } else { 1515 /* no siblings */ 1516 /* the only child of the root node */ 1517 WARN_ON(level != nilfs_btree_height(btree) - 2); 1518 if (nilfs_btree_node_get_nchildren(node) - 1 <= 1519 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1520 path[level].bp_op = nilfs_btree_shrink; 1521 stats->bs_nblocks += 2; 1522 level++; 1523 path[level].bp_op = nilfs_btree_nop; 1524 goto shrink_root_child; 1525 } else { 1526 path[level].bp_op = nilfs_btree_do_delete; 1527 stats->bs_nblocks++; 1528 goto out; 1529 } 1530 } 1531 } 1532 1533 /* child of the root node is deleted */ 1534 path[level].bp_op = nilfs_btree_do_delete; 1535 stats->bs_nblocks++; 1536 1537shrink_root_child: 1538 node = nilfs_btree_get_root(btree); 1539 path[level].bp_oldreq.bpr_ptr = 1540 nilfs_btree_node_get_ptr(node, dindex, 1541 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1542 1543 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1544 if (ret < 0) 1545 goto err_out_child_node; 1546 1547 /* success */ 1548 out: 1549 *levelp = level; 1550 return ret; 1551 1552 /* error */ 1553 err_out_curr_node: 1554 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1555 err_out_child_node: 1556 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1557 brelse(path[level].bp_sib_bh); 1558 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1559 } 1560 *levelp = level; 1561 stats->bs_nblocks = 0; 1562 return ret; 1563} 1564 1565static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, 1566 struct nilfs_btree_path *path, 1567 int maxlevel, struct inode *dat) 1568{ 1569 int level; 1570 1571 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1572 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); 1573 path[level].bp_op(btree, path, level, NULL, NULL); 1574 } 1575 1576 if (!nilfs_bmap_dirty(btree)) 1577 nilfs_bmap_set_dirty(btree); 1578} 1579 1580static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) 1581 1582{ 1583 struct nilfs_btree_path *path; 1584 struct nilfs_bmap_stats stats; 1585 struct inode *dat; 1586 int level, ret; 1587 1588 path = nilfs_btree_alloc_path(); 1589 if (path == NULL) 1590 return -ENOMEM; 1591 1592 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1593 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1594 if (ret < 0) 1595 goto out; 1596 1597 1598 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1599 1600 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1601 if (ret < 0) 1602 goto out; 1603 nilfs_btree_commit_delete(btree, path, level, dat); 1604 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); 1605 1606out: 1607 nilfs_btree_free_path(path); 1608 return ret; 1609} 1610 1611static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, 1612 __u64 *keyp) 1613{ 1614 struct nilfs_btree_path *path; 1615 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; 1616 int ret; 1617 1618 path = nilfs_btree_alloc_path(); 1619 if (!path) 1620 return -ENOMEM; 1621 1622 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); 1623 if (!ret) 1624 *keyp = start; 1625 else if (ret == -ENOENT) 1626 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); 1627 1628 nilfs_btree_free_path(path); 1629 return ret; 1630} 1631 1632static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1633{ 1634 struct nilfs_btree_path *path; 1635 int ret; 1636 1637 path = nilfs_btree_alloc_path(); 1638 if (path == NULL) 1639 return -ENOMEM; 1640 1641 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1642 1643 nilfs_btree_free_path(path); 1644 1645 return ret; 1646} 1647 1648static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) 1649{ 1650 struct buffer_head *bh; 1651 struct nilfs_btree_node *root, *node; 1652 __u64 maxkey, nextmaxkey; 1653 __u64 ptr; 1654 int nchildren, ret; 1655 1656 root = nilfs_btree_get_root(btree); 1657 switch (nilfs_btree_height(btree)) { 1658 case 2: 1659 bh = NULL; 1660 node = root; 1661 break; 1662 case 3: 1663 nchildren = nilfs_btree_node_get_nchildren(root); 1664 if (nchildren > 1) 1665 return 0; 1666 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1667 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1668 ret = nilfs_btree_get_block(btree, ptr, &bh); 1669 if (ret < 0) 1670 return ret; 1671 node = (struct nilfs_btree_node *)bh->b_data; 1672 break; 1673 default: 1674 return 0; 1675 } 1676 1677 nchildren = nilfs_btree_node_get_nchildren(node); 1678 maxkey = nilfs_btree_node_get_key(node, nchildren - 1); 1679 nextmaxkey = (nchildren > 1) ? 1680 nilfs_btree_node_get_key(node, nchildren - 2) : 0; 1681 if (bh != NULL) 1682 brelse(bh); 1683 1684 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1685} 1686 1687static int nilfs_btree_gather_data(struct nilfs_bmap *btree, 1688 __u64 *keys, __u64 *ptrs, int nitems) 1689{ 1690 struct buffer_head *bh; 1691 struct nilfs_btree_node *node, *root; 1692 __le64 *dkeys; 1693 __le64 *dptrs; 1694 __u64 ptr; 1695 int nchildren, ncmax, i, ret; 1696 1697 root = nilfs_btree_get_root(btree); 1698 switch (nilfs_btree_height(btree)) { 1699 case 2: 1700 bh = NULL; 1701 node = root; 1702 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; 1703 break; 1704 case 3: 1705 nchildren = nilfs_btree_node_get_nchildren(root); 1706 WARN_ON(nchildren > 1); 1707 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1708 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1709 ret = nilfs_btree_get_block(btree, ptr, &bh); 1710 if (ret < 0) 1711 return ret; 1712 node = (struct nilfs_btree_node *)bh->b_data; 1713 ncmax = nilfs_btree_nchildren_per_block(btree); 1714 break; 1715 default: 1716 node = NULL; 1717 return -EINVAL; 1718 } 1719 1720 nchildren = nilfs_btree_node_get_nchildren(node); 1721 if (nchildren < nitems) 1722 nitems = nchildren; 1723 dkeys = nilfs_btree_node_dkeys(node); 1724 dptrs = nilfs_btree_node_dptrs(node, ncmax); 1725 for (i = 0; i < nitems; i++) { 1726 keys[i] = le64_to_cpu(dkeys[i]); 1727 ptrs[i] = le64_to_cpu(dptrs[i]); 1728 } 1729 1730 if (bh != NULL) 1731 brelse(bh); 1732 1733 return nitems; 1734} 1735 1736static int 1737nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, 1738 union nilfs_bmap_ptr_req *dreq, 1739 union nilfs_bmap_ptr_req *nreq, 1740 struct buffer_head **bhp, 1741 struct nilfs_bmap_stats *stats) 1742{ 1743 struct buffer_head *bh; 1744 struct inode *dat = NULL; 1745 int ret; 1746 1747 stats->bs_nblocks = 0; 1748 1749 /* for data */ 1750 /* cannot find near ptr */ 1751 if (NILFS_BMAP_USE_VBN(btree)) { 1752 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1753 dat = nilfs_bmap_get_dat(btree); 1754 } 1755 1756 ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); 1757 if (ret < 0) 1758 return ret; 1759 1760 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); 1761 if (ret < 0) 1762 return ret; 1763 1764 *bhp = NULL; 1765 stats->bs_nblocks++; 1766 if (nreq != NULL) { 1767 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1768 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); 1769 if (ret < 0) 1770 goto err_out_dreq; 1771 1772 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); 1773 if (ret < 0) 1774 goto err_out_nreq; 1775 1776 *bhp = bh; 1777 stats->bs_nblocks++; 1778 } 1779 1780 /* success */ 1781 return 0; 1782 1783 /* error */ 1784 err_out_nreq: 1785 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); 1786 err_out_dreq: 1787 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); 1788 stats->bs_nblocks = 0; 1789 return ret; 1790 1791} 1792 1793static void 1794nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, 1795 __u64 key, __u64 ptr, 1796 const __u64 *keys, const __u64 *ptrs, 1797 int n, 1798 union nilfs_bmap_ptr_req *dreq, 1799 union nilfs_bmap_ptr_req *nreq, 1800 struct buffer_head *bh) 1801{ 1802 struct nilfs_btree_node *node; 1803 struct inode *dat; 1804 __u64 tmpptr; 1805 int ncblk; 1806 1807 /* free resources */ 1808 if (btree->b_ops->bop_clear != NULL) 1809 btree->b_ops->bop_clear(btree); 1810 1811 /* ptr must be a pointer to a buffer head. */ 1812 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1813 1814 /* convert and insert */ 1815 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1816 __nilfs_btree_init(btree); 1817 if (nreq != NULL) { 1818 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1819 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1820 1821 /* create child node at level 1 */ 1822 node = (struct nilfs_btree_node *)bh->b_data; 1823 ncblk = nilfs_btree_nchildren_per_block(btree); 1824 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1825 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1826 if (!buffer_dirty(bh)) 1827 mark_buffer_dirty(bh); 1828 if (!nilfs_bmap_dirty(btree)) 1829 nilfs_bmap_set_dirty(btree); 1830 1831 brelse(bh); 1832 1833 /* create root node at level 2 */ 1834 node = nilfs_btree_get_root(btree); 1835 tmpptr = nreq->bpr_ptr; 1836 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, 1837 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1838 &keys[0], &tmpptr); 1839 } else { 1840 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1841 1842 /* create root node at level 1 */ 1843 node = nilfs_btree_get_root(btree); 1844 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, 1845 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1846 keys, ptrs); 1847 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, 1848 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1849 if (!nilfs_bmap_dirty(btree)) 1850 nilfs_bmap_set_dirty(btree); 1851 } 1852 1853 if (NILFS_BMAP_USE_VBN(btree)) 1854 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); 1855} 1856 1857/** 1858 * nilfs_btree_convert_and_insert - 1859 * @bmap: 1860 * @key: 1861 * @ptr: 1862 * @keys: 1863 * @ptrs: 1864 * @n: 1865 */ 1866int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, 1867 __u64 key, __u64 ptr, 1868 const __u64 *keys, const __u64 *ptrs, int n) 1869{ 1870 struct buffer_head *bh = NULL; 1871 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1872 struct nilfs_bmap_stats stats; 1873 int ret; 1874 1875 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1876 di = &dreq; 1877 ni = NULL; 1878 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1879 nilfs_btree_node_size(btree))) { 1880 di = &dreq; 1881 ni = &nreq; 1882 } else { 1883 di = NULL; 1884 ni = NULL; 1885 BUG(); 1886 } 1887 1888 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, 1889 &stats); 1890 if (ret < 0) 1891 return ret; 1892 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1893 di, ni, bh); 1894 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1895 return 0; 1896} 1897 1898static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, 1899 struct nilfs_btree_path *path, 1900 int level, 1901 struct buffer_head *bh) 1902{ 1903 while ((++level < nilfs_btree_height(btree) - 1) && 1904 !buffer_dirty(path[level].bp_bh)) 1905 mark_buffer_dirty(path[level].bp_bh); 1906 1907 return 0; 1908} 1909 1910static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, 1911 struct nilfs_btree_path *path, 1912 int level, struct inode *dat) 1913{ 1914 struct nilfs_btree_node *parent; 1915 int ncmax, ret; 1916 1917 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1918 path[level].bp_oldreq.bpr_ptr = 1919 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 1920 ncmax); 1921 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1922 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1923 &path[level].bp_newreq.bpr_req); 1924 if (ret < 0) 1925 return ret; 1926 1927 if (buffer_nilfs_node(path[level].bp_bh)) { 1928 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; 1929 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1930 path[level].bp_ctxt.bh = path[level].bp_bh; 1931 ret = nilfs_btnode_prepare_change_key( 1932 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1933 &path[level].bp_ctxt); 1934 if (ret < 0) { 1935 nilfs_dat_abort_update(dat, 1936 &path[level].bp_oldreq.bpr_req, 1937 &path[level].bp_newreq.bpr_req); 1938 return ret; 1939 } 1940 } 1941 1942 return 0; 1943} 1944 1945static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, 1946 struct nilfs_btree_path *path, 1947 int level, struct inode *dat) 1948{ 1949 struct nilfs_btree_node *parent; 1950 int ncmax; 1951 1952 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1953 &path[level].bp_newreq.bpr_req, 1954 btree->b_ptr_type == NILFS_BMAP_PTR_VS); 1955 1956 if (buffer_nilfs_node(path[level].bp_bh)) { 1957 nilfs_btnode_commit_change_key( 1958 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1959 &path[level].bp_ctxt); 1960 path[level].bp_bh = path[level].bp_ctxt.bh; 1961 } 1962 set_buffer_nilfs_volatile(path[level].bp_bh); 1963 1964 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1965 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, 1966 path[level].bp_newreq.bpr_ptr, ncmax); 1967} 1968 1969static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, 1970 struct nilfs_btree_path *path, 1971 int level, struct inode *dat) 1972{ 1973 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, 1974 &path[level].bp_newreq.bpr_req); 1975 if (buffer_nilfs_node(path[level].bp_bh)) 1976 nilfs_btnode_abort_change_key( 1977 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1978 &path[level].bp_ctxt); 1979} 1980 1981static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, 1982 struct nilfs_btree_path *path, 1983 int minlevel, int *maxlevelp, 1984 struct inode *dat) 1985{ 1986 int level, ret; 1987 1988 level = minlevel; 1989 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1990 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1991 if (ret < 0) 1992 return ret; 1993 } 1994 while ((++level < nilfs_btree_height(btree) - 1) && 1995 !buffer_dirty(path[level].bp_bh)) { 1996 1997 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 1998 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1999 if (ret < 0) 2000 goto out; 2001 } 2002 2003 /* success */ 2004 *maxlevelp = level - 1; 2005 return 0; 2006 2007 /* error */ 2008 out: 2009 while (--level > minlevel) 2010 nilfs_btree_abort_update_v(btree, path, level, dat); 2011 if (!buffer_nilfs_volatile(path[level].bp_bh)) 2012 nilfs_btree_abort_update_v(btree, path, level, dat); 2013 return ret; 2014} 2015 2016static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, 2017 struct nilfs_btree_path *path, 2018 int minlevel, int maxlevel, 2019 struct buffer_head *bh, 2020 struct inode *dat) 2021{ 2022 int level; 2023 2024 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 2025 nilfs_btree_commit_update_v(btree, path, minlevel, dat); 2026 2027 for (level = minlevel + 1; level <= maxlevel; level++) 2028 nilfs_btree_commit_update_v(btree, path, level, dat); 2029} 2030 2031static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, 2032 struct nilfs_btree_path *path, 2033 int level, struct buffer_head *bh) 2034{ 2035 int maxlevel = 0, ret; 2036 struct nilfs_btree_node *parent; 2037 struct inode *dat = nilfs_bmap_get_dat(btree); 2038 __u64 ptr; 2039 int ncmax; 2040 2041 get_bh(bh); 2042 path[level].bp_bh = bh; 2043 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, 2044 dat); 2045 if (ret < 0) 2046 goto out; 2047 2048 if (buffer_nilfs_volatile(path[level].bp_bh)) { 2049 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2050 ptr = nilfs_btree_node_get_ptr(parent, 2051 path[level + 1].bp_index, 2052 ncmax); 2053 ret = nilfs_dat_mark_dirty(dat, ptr); 2054 if (ret < 0) 2055 goto out; 2056 } 2057 2058 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); 2059 2060 out: 2061 brelse(path[level].bp_bh); 2062 path[level].bp_bh = NULL; 2063 return ret; 2064} 2065 2066static int nilfs_btree_propagate(struct nilfs_bmap *btree, 2067 struct buffer_head *bh) 2068{ 2069 struct nilfs_btree_path *path; 2070 struct nilfs_btree_node *node; 2071 __u64 key; 2072 int level, ret; 2073 2074 WARN_ON(!buffer_dirty(bh)); 2075 2076 path = nilfs_btree_alloc_path(); 2077 if (path == NULL) 2078 return -ENOMEM; 2079 2080 if (buffer_nilfs_node(bh)) { 2081 node = (struct nilfs_btree_node *)bh->b_data; 2082 key = nilfs_btree_node_get_key(node, 0); 2083 level = nilfs_btree_node_get_level(node); 2084 } else { 2085 key = nilfs_bmap_data_get_key(btree, bh); 2086 level = NILFS_BTREE_LEVEL_DATA; 2087 } 2088 2089 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2090 if (ret < 0) { 2091 if (unlikely(ret == -ENOENT)) 2092 nilfs_crit(btree->b_inode->i_sb, 2093 "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", 2094 btree->b_inode->i_ino, 2095 (unsigned long long)key, level); 2096 goto out; 2097 } 2098 2099 ret = NILFS_BMAP_USE_VBN(btree) ? 2100 nilfs_btree_propagate_v(btree, path, level, bh) : 2101 nilfs_btree_propagate_p(btree, path, level, bh); 2102 2103 out: 2104 nilfs_btree_free_path(path); 2105 2106 return ret; 2107} 2108 2109static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, 2110 struct buffer_head *bh) 2111{ 2112 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); 2113} 2114 2115static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, 2116 struct list_head *lists, 2117 struct buffer_head *bh) 2118{ 2119 struct list_head *head; 2120 struct buffer_head *cbh; 2121 struct nilfs_btree_node *node, *cnode; 2122 __u64 key, ckey; 2123 int level; 2124 2125 get_bh(bh); 2126 node = (struct nilfs_btree_node *)bh->b_data; 2127 key = nilfs_btree_node_get_key(node, 0); 2128 level = nilfs_btree_node_get_level(node); 2129 if (level < NILFS_BTREE_LEVEL_NODE_MIN || 2130 level >= NILFS_BTREE_LEVEL_MAX) { 2131 dump_stack(); 2132 nilfs_warn(btree->b_inode->i_sb, 2133 "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", 2134 level, (unsigned long long)key, 2135 btree->b_inode->i_ino, 2136 (unsigned long long)bh->b_blocknr); 2137 return; 2138 } 2139 2140 list_for_each(head, &lists[level]) { 2141 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2142 cnode = (struct nilfs_btree_node *)cbh->b_data; 2143 ckey = nilfs_btree_node_get_key(cnode, 0); 2144 if (key < ckey) 2145 break; 2146 } 2147 list_add_tail(&bh->b_assoc_buffers, head); 2148} 2149 2150static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, 2151 struct list_head *listp) 2152{ 2153 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 2154 struct address_space *btcache = btnc_inode->i_mapping; 2155 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2156 struct pagevec pvec; 2157 struct buffer_head *bh, *head; 2158 pgoff_t index = 0; 2159 int level, i; 2160 2161 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2162 level < NILFS_BTREE_LEVEL_MAX; 2163 level++) 2164 INIT_LIST_HEAD(&lists[level]); 2165 2166 pagevec_init(&pvec); 2167 2168 while (pagevec_lookup_tag(&pvec, btcache, &index, 2169 PAGECACHE_TAG_DIRTY)) { 2170 for (i = 0; i < pagevec_count(&pvec); i++) { 2171 bh = head = page_buffers(pvec.pages[i]); 2172 do { 2173 if (buffer_dirty(bh)) 2174 nilfs_btree_add_dirty_buffer(btree, 2175 lists, bh); 2176 } while ((bh = bh->b_this_page) != head); 2177 } 2178 pagevec_release(&pvec); 2179 cond_resched(); 2180 } 2181 2182 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2183 level < NILFS_BTREE_LEVEL_MAX; 2184 level++) 2185 list_splice_tail(&lists[level], listp); 2186} 2187 2188static int nilfs_btree_assign_p(struct nilfs_bmap *btree, 2189 struct nilfs_btree_path *path, 2190 int level, 2191 struct buffer_head **bh, 2192 sector_t blocknr, 2193 union nilfs_binfo *binfo) 2194{ 2195 struct nilfs_btree_node *parent; 2196 __u64 key; 2197 __u64 ptr; 2198 int ncmax, ret; 2199 2200 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2201 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2202 ncmax); 2203 if (buffer_nilfs_node(*bh)) { 2204 path[level].bp_ctxt.oldkey = ptr; 2205 path[level].bp_ctxt.newkey = blocknr; 2206 path[level].bp_ctxt.bh = *bh; 2207 ret = nilfs_btnode_prepare_change_key( 2208 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 2209 &path[level].bp_ctxt); 2210 if (ret < 0) 2211 return ret; 2212 nilfs_btnode_commit_change_key( 2213 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 2214 &path[level].bp_ctxt); 2215 *bh = path[level].bp_ctxt.bh; 2216 } 2217 2218 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, 2219 ncmax); 2220 2221 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2222 /* on-disk format */ 2223 binfo->bi_dat.bi_blkoff = cpu_to_le64(key); 2224 binfo->bi_dat.bi_level = level; 2225 2226 return 0; 2227} 2228 2229static int nilfs_btree_assign_v(struct nilfs_bmap *btree, 2230 struct nilfs_btree_path *path, 2231 int level, 2232 struct buffer_head **bh, 2233 sector_t blocknr, 2234 union nilfs_binfo *binfo) 2235{ 2236 struct nilfs_btree_node *parent; 2237 struct inode *dat = nilfs_bmap_get_dat(btree); 2238 __u64 key; 2239 __u64 ptr; 2240 union nilfs_bmap_ptr_req req; 2241 int ncmax, ret; 2242 2243 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2244 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2245 ncmax); 2246 req.bpr_ptr = ptr; 2247 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2248 if (ret < 0) 2249 return ret; 2250 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 2251 2252 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2253 /* on-disk format */ 2254 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); 2255 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2256 2257 return 0; 2258} 2259 2260static int nilfs_btree_assign(struct nilfs_bmap *btree, 2261 struct buffer_head **bh, 2262 sector_t blocknr, 2263 union nilfs_binfo *binfo) 2264{ 2265 struct nilfs_btree_path *path; 2266 struct nilfs_btree_node *node; 2267 __u64 key; 2268 int level, ret; 2269 2270 path = nilfs_btree_alloc_path(); 2271 if (path == NULL) 2272 return -ENOMEM; 2273 2274 if (buffer_nilfs_node(*bh)) { 2275 node = (struct nilfs_btree_node *)(*bh)->b_data; 2276 key = nilfs_btree_node_get_key(node, 0); 2277 level = nilfs_btree_node_get_level(node); 2278 } else { 2279 key = nilfs_bmap_data_get_key(btree, *bh); 2280 level = NILFS_BTREE_LEVEL_DATA; 2281 } 2282 2283 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2284 if (ret < 0) { 2285 WARN_ON(ret == -ENOENT); 2286 goto out; 2287 } 2288 2289 ret = NILFS_BMAP_USE_VBN(btree) ? 2290 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2291 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2292 2293 out: 2294 nilfs_btree_free_path(path); 2295 2296 return ret; 2297} 2298 2299static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, 2300 struct buffer_head **bh, 2301 sector_t blocknr, 2302 union nilfs_binfo *binfo) 2303{ 2304 struct nilfs_btree_node *node; 2305 __u64 key; 2306 int ret; 2307 2308 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, 2309 blocknr); 2310 if (ret < 0) 2311 return ret; 2312 2313 if (buffer_nilfs_node(*bh)) { 2314 node = (struct nilfs_btree_node *)(*bh)->b_data; 2315 key = nilfs_btree_node_get_key(node, 0); 2316 } else 2317 key = nilfs_bmap_data_get_key(btree, *bh); 2318 2319 /* on-disk format */ 2320 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2321 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2322 2323 return 0; 2324} 2325 2326static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) 2327{ 2328 struct buffer_head *bh; 2329 struct nilfs_btree_path *path; 2330 __u64 ptr; 2331 int ret; 2332 2333 path = nilfs_btree_alloc_path(); 2334 if (path == NULL) 2335 return -ENOMEM; 2336 2337 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); 2338 if (ret < 0) { 2339 WARN_ON(ret == -ENOENT); 2340 goto out; 2341 } 2342 ret = nilfs_btree_get_block(btree, ptr, &bh); 2343 if (ret < 0) { 2344 WARN_ON(ret == -ENOENT); 2345 goto out; 2346 } 2347 2348 if (!buffer_dirty(bh)) 2349 mark_buffer_dirty(bh); 2350 brelse(bh); 2351 if (!nilfs_bmap_dirty(btree)) 2352 nilfs_bmap_set_dirty(btree); 2353 2354 out: 2355 nilfs_btree_free_path(path); 2356 return ret; 2357} 2358 2359static const struct nilfs_bmap_operations nilfs_btree_ops = { 2360 .bop_lookup = nilfs_btree_lookup, 2361 .bop_lookup_contig = nilfs_btree_lookup_contig, 2362 .bop_insert = nilfs_btree_insert, 2363 .bop_delete = nilfs_btree_delete, 2364 .bop_clear = NULL, 2365 2366 .bop_propagate = nilfs_btree_propagate, 2367 2368 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2369 2370 .bop_assign = nilfs_btree_assign, 2371 .bop_mark = nilfs_btree_mark, 2372 2373 .bop_seek_key = nilfs_btree_seek_key, 2374 .bop_last_key = nilfs_btree_last_key, 2375 2376 .bop_check_insert = NULL, 2377 .bop_check_delete = nilfs_btree_check_delete, 2378 .bop_gather_data = nilfs_btree_gather_data, 2379}; 2380 2381static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2382 .bop_lookup = NULL, 2383 .bop_lookup_contig = NULL, 2384 .bop_insert = NULL, 2385 .bop_delete = NULL, 2386 .bop_clear = NULL, 2387 2388 .bop_propagate = nilfs_btree_propagate_gc, 2389 2390 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2391 2392 .bop_assign = nilfs_btree_assign_gc, 2393 .bop_mark = NULL, 2394 2395 .bop_seek_key = NULL, 2396 .bop_last_key = NULL, 2397 2398 .bop_check_insert = NULL, 2399 .bop_check_delete = NULL, 2400 .bop_gather_data = NULL, 2401}; 2402 2403static void __nilfs_btree_init(struct nilfs_bmap *bmap) 2404{ 2405 bmap->b_ops = &nilfs_btree_ops; 2406 bmap->b_nchildren_per_block = 2407 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2408} 2409 2410int nilfs_btree_init(struct nilfs_bmap *bmap) 2411{ 2412 int ret = 0; 2413 2414 __nilfs_btree_init(bmap); 2415 2416 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) 2417 ret = -EIO; 2418 else 2419 ret = nilfs_attach_btree_node_cache( 2420 &NILFS_BMAP_I(bmap)->vfs_inode); 2421 2422 return ret; 2423} 2424 2425void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2426{ 2427 bmap->b_ops = &nilfs_btree_ops_gc; 2428 bmap->b_nchildren_per_block = 2429 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2430} 2431