1/* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7#include "dm-space-map.h" 8#include "dm-space-map-common.h" 9#include "dm-space-map-metadata.h" 10 11#include <linux/list.h> 12#include <linux/slab.h> 13#include <linux/device-mapper.h> 14#include <linux/kernel.h> 15 16#define DM_MSG_PREFIX "space map metadata" 17 18/*----------------------------------------------------------------*/ 19 20/* 21 * An edge triggered threshold. 22 */ 23struct threshold { 24 bool threshold_set; 25 bool value_set; 26 dm_block_t threshold; 27 dm_block_t current_value; 28 dm_sm_threshold_fn fn; 29 void *context; 30}; 31 32static void threshold_init(struct threshold *t) 33{ 34 t->threshold_set = false; 35 t->value_set = false; 36} 37 38static void set_threshold(struct threshold *t, dm_block_t value, 39 dm_sm_threshold_fn fn, void *context) 40{ 41 t->threshold_set = true; 42 t->threshold = value; 43 t->fn = fn; 44 t->context = context; 45} 46 47static bool below_threshold(struct threshold *t, dm_block_t value) 48{ 49 return t->threshold_set && value <= t->threshold; 50} 51 52static bool threshold_already_triggered(struct threshold *t) 53{ 54 return t->value_set && below_threshold(t, t->current_value); 55} 56 57static void check_threshold(struct threshold *t, dm_block_t value) 58{ 59 if (below_threshold(t, value) && 60 !threshold_already_triggered(t)) 61 t->fn(t->context); 62 63 t->value_set = true; 64 t->current_value = value; 65} 66 67/*----------------------------------------------------------------*/ 68 69/* 70 * Space map interface. 71 * 72 * The low level disk format is written using the standard btree and 73 * transaction manager. This means that performing disk operations may 74 * cause us to recurse into the space map in order to allocate new blocks. 75 * For this reason we have a pool of pre-allocated blocks large enough to 76 * service any metadata_ll_disk operation. 77 */ 78 79/* 80 * FIXME: we should calculate this based on the size of the device. 81 * Only the metadata space map needs this functionality. 82 */ 83#define MAX_RECURSIVE_ALLOCATIONS 1024 84 85enum block_op_type { 86 BOP_INC, 87 BOP_DEC 88}; 89 90struct block_op { 91 enum block_op_type type; 92 dm_block_t block; 93}; 94 95struct bop_ring_buffer { 96 unsigned begin; 97 unsigned end; 98 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 99}; 100 101static void brb_init(struct bop_ring_buffer *brb) 102{ 103 brb->begin = 0; 104 brb->end = 0; 105} 106 107static bool brb_empty(struct bop_ring_buffer *brb) 108{ 109 return brb->begin == brb->end; 110} 111 112static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 113{ 114 unsigned r = old + 1; 115 return r >= ARRAY_SIZE(brb->bops) ? 0 : r; 116} 117 118static int brb_push(struct bop_ring_buffer *brb, 119 enum block_op_type type, dm_block_t b) 120{ 121 struct block_op *bop; 122 unsigned next = brb_next(brb, brb->end); 123 124 /* 125 * We don't allow the last bop to be filled, this way we can 126 * differentiate between full and empty. 127 */ 128 if (next == brb->begin) 129 return -ENOMEM; 130 131 bop = brb->bops + brb->end; 132 bop->type = type; 133 bop->block = b; 134 135 brb->end = next; 136 137 return 0; 138} 139 140static int brb_peek(struct bop_ring_buffer *brb, struct block_op *result) 141{ 142 struct block_op *bop; 143 144 if (brb_empty(brb)) 145 return -ENODATA; 146 147 bop = brb->bops + brb->begin; 148 result->type = bop->type; 149 result->block = bop->block; 150 151 return 0; 152} 153 154static int brb_pop(struct bop_ring_buffer *brb) 155{ 156 if (brb_empty(brb)) 157 return -ENODATA; 158 159 brb->begin = brb_next(brb, brb->begin); 160 161 return 0; 162} 163 164/*----------------------------------------------------------------*/ 165 166struct sm_metadata { 167 struct dm_space_map sm; 168 169 struct ll_disk ll; 170 struct ll_disk old_ll; 171 172 dm_block_t begin; 173 174 unsigned recursion_count; 175 unsigned allocated_this_transaction; 176 struct bop_ring_buffer uncommitted; 177 178 struct threshold threshold; 179}; 180 181static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 182{ 183 int r = brb_push(&smm->uncommitted, type, b); 184 185 if (r) { 186 DMERR("too many recursive allocations"); 187 return -ENOMEM; 188 } 189 190 return 0; 191} 192 193static int commit_bop(struct sm_metadata *smm, struct block_op *op) 194{ 195 int r = 0; 196 enum allocation_event ev; 197 198 switch (op->type) { 199 case BOP_INC: 200 r = sm_ll_inc(&smm->ll, op->block, &ev); 201 break; 202 203 case BOP_DEC: 204 r = sm_ll_dec(&smm->ll, op->block, &ev); 205 break; 206 } 207 208 return r; 209} 210 211static void in(struct sm_metadata *smm) 212{ 213 smm->recursion_count++; 214} 215 216static int apply_bops(struct sm_metadata *smm) 217{ 218 int r = 0; 219 220 while (!brb_empty(&smm->uncommitted)) { 221 struct block_op bop; 222 223 r = brb_peek(&smm->uncommitted, &bop); 224 if (r) { 225 DMERR("bug in bop ring buffer"); 226 break; 227 } 228 229 r = commit_bop(smm, &bop); 230 if (r) 231 break; 232 233 brb_pop(&smm->uncommitted); 234 } 235 236 return r; 237} 238 239static int out(struct sm_metadata *smm) 240{ 241 int r = 0; 242 243 /* 244 * If we're not recursing then very bad things are happening. 245 */ 246 if (!smm->recursion_count) { 247 DMERR("lost track of recursion depth"); 248 return -ENOMEM; 249 } 250 251 if (smm->recursion_count == 1) 252 r = apply_bops(smm); 253 254 smm->recursion_count--; 255 256 return r; 257} 258 259/* 260 * When using the out() function above, we often want to combine an error 261 * code for the operation run in the recursive context with that from 262 * out(). 263 */ 264static int combine_errors(int r1, int r2) 265{ 266 return r1 ? r1 : r2; 267} 268 269static int recursing(struct sm_metadata *smm) 270{ 271 return smm->recursion_count; 272} 273 274static void sm_metadata_destroy(struct dm_space_map *sm) 275{ 276 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 277 278 kfree(smm); 279} 280 281static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 282{ 283 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 284 285 *count = smm->ll.nr_blocks; 286 287 return 0; 288} 289 290static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 291{ 292 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 293 294 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 295 smm->allocated_this_transaction; 296 297 return 0; 298} 299 300static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 301 uint32_t *result) 302{ 303 int r; 304 unsigned i; 305 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 306 unsigned adjustment = 0; 307 308 /* 309 * We may have some uncommitted adjustments to add. This list 310 * should always be really short. 311 */ 312 for (i = smm->uncommitted.begin; 313 i != smm->uncommitted.end; 314 i = brb_next(&smm->uncommitted, i)) { 315 struct block_op *op = smm->uncommitted.bops + i; 316 317 if (op->block != b) 318 continue; 319 320 switch (op->type) { 321 case BOP_INC: 322 adjustment++; 323 break; 324 325 case BOP_DEC: 326 adjustment--; 327 break; 328 } 329 } 330 331 r = sm_ll_lookup(&smm->ll, b, result); 332 if (r) 333 return r; 334 335 *result += adjustment; 336 337 return 0; 338} 339 340static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 341 dm_block_t b, int *result) 342{ 343 int r, adjustment = 0; 344 unsigned i; 345 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 346 uint32_t rc; 347 348 /* 349 * We may have some uncommitted adjustments to add. This list 350 * should always be really short. 351 */ 352 for (i = smm->uncommitted.begin; 353 i != smm->uncommitted.end; 354 i = brb_next(&smm->uncommitted, i)) { 355 356 struct block_op *op = smm->uncommitted.bops + i; 357 358 if (op->block != b) 359 continue; 360 361 switch (op->type) { 362 case BOP_INC: 363 adjustment++; 364 break; 365 366 case BOP_DEC: 367 adjustment--; 368 break; 369 } 370 } 371 372 if (adjustment > 1) { 373 *result = 1; 374 return 0; 375 } 376 377 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 378 if (r) 379 return r; 380 381 if (rc == 3) 382 /* 383 * We err on the side of caution, and always return true. 384 */ 385 *result = 1; 386 else 387 *result = rc + adjustment > 1; 388 389 return 0; 390} 391 392static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 393 uint32_t count) 394{ 395 int r, r2; 396 enum allocation_event ev; 397 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 398 399 if (smm->recursion_count) { 400 DMERR("cannot recurse set_count()"); 401 return -EINVAL; 402 } 403 404 in(smm); 405 r = sm_ll_insert(&smm->ll, b, count, &ev); 406 r2 = out(smm); 407 408 return combine_errors(r, r2); 409} 410 411static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 412{ 413 int r, r2 = 0; 414 enum allocation_event ev; 415 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 416 417 if (recursing(smm)) 418 r = add_bop(smm, BOP_INC, b); 419 else { 420 in(smm); 421 r = sm_ll_inc(&smm->ll, b, &ev); 422 r2 = out(smm); 423 } 424 425 return combine_errors(r, r2); 426} 427 428static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 429{ 430 int r, r2 = 0; 431 enum allocation_event ev; 432 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 433 434 if (recursing(smm)) 435 r = add_bop(smm, BOP_DEC, b); 436 else { 437 in(smm); 438 r = sm_ll_dec(&smm->ll, b, &ev); 439 r2 = out(smm); 440 } 441 442 return combine_errors(r, r2); 443} 444 445static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 446{ 447 int r, r2 = 0; 448 enum allocation_event ev; 449 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 450 451 /* 452 * Any block we allocate has to be free in both the old and current ll. 453 */ 454 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b); 455 if (r == -ENOSPC) { 456 /* 457 * There's no free block between smm->begin and the end of the metadata device. 458 * We search before smm->begin in case something has been freed. 459 */ 460 r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, 0, smm->begin, b); 461 } 462 463 if (r) 464 return r; 465 466 smm->begin = *b + 1; 467 468 if (recursing(smm)) 469 r = add_bop(smm, BOP_INC, *b); 470 else { 471 in(smm); 472 r = sm_ll_inc(&smm->ll, *b, &ev); 473 r2 = out(smm); 474 } 475 476 if (!r) 477 smm->allocated_this_transaction++; 478 479 return combine_errors(r, r2); 480} 481 482static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 483{ 484 dm_block_t count; 485 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 486 487 int r = sm_metadata_new_block_(sm, b); 488 if (r) { 489 DMERR_LIMIT("unable to allocate new metadata block"); 490 return r; 491 } 492 493 r = sm_metadata_get_nr_free(sm, &count); 494 if (r) { 495 DMERR_LIMIT("couldn't get free block count"); 496 return r; 497 } 498 499 check_threshold(&smm->threshold, count); 500 501 return r; 502} 503 504static int sm_metadata_commit(struct dm_space_map *sm) 505{ 506 int r; 507 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 508 509 r = sm_ll_commit(&smm->ll); 510 if (r) 511 return r; 512 513 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 514 smm->allocated_this_transaction = 0; 515 516 return 0; 517} 518 519static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 520 dm_block_t threshold, 521 dm_sm_threshold_fn fn, 522 void *context) 523{ 524 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 525 526 set_threshold(&smm->threshold, threshold, fn, context); 527 528 return 0; 529} 530 531static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 532{ 533 *result = sizeof(struct disk_sm_root); 534 535 return 0; 536} 537 538static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 539{ 540 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 541 struct disk_sm_root root_le; 542 543 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 544 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 545 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 546 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 547 548 if (max < sizeof(root_le)) 549 return -ENOSPC; 550 551 memcpy(where_le, &root_le, sizeof(root_le)); 552 553 return 0; 554} 555 556static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 557 558static const struct dm_space_map ops = { 559 .destroy = sm_metadata_destroy, 560 .extend = sm_metadata_extend, 561 .get_nr_blocks = sm_metadata_get_nr_blocks, 562 .get_nr_free = sm_metadata_get_nr_free, 563 .get_count = sm_metadata_get_count, 564 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 565 .set_count = sm_metadata_set_count, 566 .inc_block = sm_metadata_inc_block, 567 .dec_block = sm_metadata_dec_block, 568 .new_block = sm_metadata_new_block, 569 .commit = sm_metadata_commit, 570 .root_size = sm_metadata_root_size, 571 .copy_root = sm_metadata_copy_root, 572 .register_threshold_callback = sm_metadata_register_threshold_callback 573}; 574 575/*----------------------------------------------------------------*/ 576 577/* 578 * When a new space map is created that manages its own space. We use 579 * this tiny bootstrap allocator. 580 */ 581static void sm_bootstrap_destroy(struct dm_space_map *sm) 582{ 583} 584 585static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 586{ 587 DMERR("bootstrap doesn't support extend"); 588 589 return -EINVAL; 590} 591 592static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 593{ 594 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 595 596 *count = smm->ll.nr_blocks; 597 598 return 0; 599} 600 601static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 602{ 603 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 604 605 *count = smm->ll.nr_blocks - smm->begin; 606 607 return 0; 608} 609 610static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 611 uint32_t *result) 612{ 613 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 614 615 *result = (b < smm->begin) ? 1 : 0; 616 617 return 0; 618} 619 620static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 621 dm_block_t b, int *result) 622{ 623 *result = 0; 624 625 return 0; 626} 627 628static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 629 uint32_t count) 630{ 631 DMERR("bootstrap doesn't support set_count"); 632 633 return -EINVAL; 634} 635 636static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 637{ 638 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 639 640 /* 641 * We know the entire device is unused. 642 */ 643 if (smm->begin == smm->ll.nr_blocks) 644 return -ENOSPC; 645 646 *b = smm->begin++; 647 648 return 0; 649} 650 651static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 652{ 653 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 654 655 return add_bop(smm, BOP_INC, b); 656} 657 658static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 659{ 660 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 661 662 return add_bop(smm, BOP_DEC, b); 663} 664 665static int sm_bootstrap_commit(struct dm_space_map *sm) 666{ 667 return 0; 668} 669 670static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 671{ 672 DMERR("bootstrap doesn't support root_size"); 673 674 return -EINVAL; 675} 676 677static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 678 size_t max) 679{ 680 DMERR("bootstrap doesn't support copy_root"); 681 682 return -EINVAL; 683} 684 685static const struct dm_space_map bootstrap_ops = { 686 .destroy = sm_bootstrap_destroy, 687 .extend = sm_bootstrap_extend, 688 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 689 .get_nr_free = sm_bootstrap_get_nr_free, 690 .get_count = sm_bootstrap_get_count, 691 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 692 .set_count = sm_bootstrap_set_count, 693 .inc_block = sm_bootstrap_inc_block, 694 .dec_block = sm_bootstrap_dec_block, 695 .new_block = sm_bootstrap_new_block, 696 .commit = sm_bootstrap_commit, 697 .root_size = sm_bootstrap_root_size, 698 .copy_root = sm_bootstrap_copy_root, 699 .register_threshold_callback = NULL 700}; 701 702/*----------------------------------------------------------------*/ 703 704static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 705{ 706 int r, i; 707 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 708 dm_block_t old_len = smm->ll.nr_blocks; 709 710 /* 711 * Flick into a mode where all blocks get allocated in the new area. 712 */ 713 smm->begin = old_len; 714 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 715 716 /* 717 * Extend. 718 */ 719 r = sm_ll_extend(&smm->ll, extra_blocks); 720 if (r) 721 goto out; 722 723 /* 724 * We repeatedly increment then commit until the commit doesn't 725 * allocate any new blocks. 726 */ 727 do { 728 for (i = old_len; !r && i < smm->begin; i++) 729 r = add_bop(smm, BOP_INC, i); 730 731 if (r) 732 goto out; 733 734 old_len = smm->begin; 735 736 r = apply_bops(smm); 737 if (r) { 738 DMERR("%s: apply_bops failed", __func__); 739 goto out; 740 } 741 742 r = sm_ll_commit(&smm->ll); 743 if (r) 744 goto out; 745 746 } while (old_len != smm->begin); 747 748out: 749 /* 750 * Switch back to normal behaviour. 751 */ 752 memcpy(sm, &ops, sizeof(*sm)); 753 return r; 754} 755 756/*----------------------------------------------------------------*/ 757 758struct dm_space_map *dm_sm_metadata_init(void) 759{ 760 struct sm_metadata *smm; 761 762 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 763 if (!smm) 764 return ERR_PTR(-ENOMEM); 765 766 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 767 768 return &smm->sm; 769} 770 771int dm_sm_metadata_create(struct dm_space_map *sm, 772 struct dm_transaction_manager *tm, 773 dm_block_t nr_blocks, 774 dm_block_t superblock) 775{ 776 int r; 777 dm_block_t i; 778 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 779 780 smm->begin = superblock + 1; 781 smm->recursion_count = 0; 782 smm->allocated_this_transaction = 0; 783 brb_init(&smm->uncommitted); 784 threshold_init(&smm->threshold); 785 786 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 787 788 r = sm_ll_new_metadata(&smm->ll, tm); 789 if (!r) { 790 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 791 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 792 r = sm_ll_extend(&smm->ll, nr_blocks); 793 } 794 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 795 if (r) 796 return r; 797 798 /* 799 * Now we need to update the newly created data structures with the 800 * allocated blocks that they were built from. 801 */ 802 for (i = superblock; !r && i < smm->begin; i++) 803 r = add_bop(smm, BOP_INC, i); 804 805 if (r) 806 return r; 807 808 r = apply_bops(smm); 809 if (r) { 810 DMERR("%s: apply_bops failed", __func__); 811 return r; 812 } 813 814 return sm_metadata_commit(sm); 815} 816 817int dm_sm_metadata_open(struct dm_space_map *sm, 818 struct dm_transaction_manager *tm, 819 void *root_le, size_t len) 820{ 821 int r; 822 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 823 824 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 825 if (r) 826 return r; 827 828 smm->begin = 0; 829 smm->recursion_count = 0; 830 smm->allocated_this_transaction = 0; 831 brb_init(&smm->uncommitted); 832 threshold_init(&smm->threshold); 833 834 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 835 return 0; 836} 837