1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6#include <linux/sched.h> 7#include <linux/sched/signal.h> 8#include <linux/pagemap.h> 9#include <linux/writeback.h> 10#include <linux/blkdev.h> 11#include <linux/sort.h> 12#include <linux/rcupdate.h> 13#include <linux/kthread.h> 14#include <linux/slab.h> 15#include <linux/ratelimit.h> 16#include <linux/percpu_counter.h> 17#include <linux/lockdep.h> 18#include <linux/crc32c.h> 19#include "misc.h" 20#include "tree-log.h" 21#include "disk-io.h" 22#include "print-tree.h" 23#include "volumes.h" 24#include "raid56.h" 25#include "locking.h" 26#include "free-space-cache.h" 27#include "free-space-tree.h" 28#include "sysfs.h" 29#include "qgroup.h" 30#include "ref-verify.h" 31#include "space-info.h" 32#include "block-rsv.h" 33#include "delalloc-space.h" 34#include "block-group.h" 35#include "discard.h" 36#include "rcu-string.h" 37 38#undef SCRAMBLE_DELAYED_REFS 39 40 41static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 42 struct btrfs_delayed_ref_node *node, u64 parent, 43 u64 root_objectid, u64 owner_objectid, 44 u64 owner_offset, int refs_to_drop, 45 struct btrfs_delayed_extent_op *extra_op); 46static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 47 struct extent_buffer *leaf, 48 struct btrfs_extent_item *ei); 49static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 50 u64 parent, u64 root_objectid, 51 u64 flags, u64 owner, u64 offset, 52 struct btrfs_key *ins, int ref_mod); 53static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 54 struct btrfs_delayed_ref_node *node, 55 struct btrfs_delayed_extent_op *extent_op); 56static int find_next_key(struct btrfs_path *path, int level, 57 struct btrfs_key *key); 58 59static int block_group_bits(struct btrfs_block_group *cache, u64 bits) 60{ 61 return (cache->flags & bits) == bits; 62} 63 64int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info, 65 u64 start, u64 num_bytes) 66{ 67 u64 end = start + num_bytes - 1; 68 set_extent_bits(&fs_info->excluded_extents, start, end, 69 EXTENT_UPTODATE); 70 return 0; 71} 72 73void btrfs_free_excluded_extents(struct btrfs_block_group *cache) 74{ 75 struct btrfs_fs_info *fs_info = cache->fs_info; 76 u64 start, end; 77 78 start = cache->start; 79 end = start + cache->length - 1; 80 81 clear_extent_bits(&fs_info->excluded_extents, start, end, 82 EXTENT_UPTODATE); 83} 84 85/* simple helper to search for an existing data extent at a given offset */ 86int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) 87{ 88 int ret; 89 struct btrfs_key key; 90 struct btrfs_path *path; 91 92 path = btrfs_alloc_path(); 93 if (!path) 94 return -ENOMEM; 95 96 key.objectid = start; 97 key.offset = len; 98 key.type = BTRFS_EXTENT_ITEM_KEY; 99 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 100 btrfs_free_path(path); 101 return ret; 102} 103 104/* 105 * helper function to lookup reference count and flags of a tree block. 106 * 107 * the head node for delayed ref is used to store the sum of all the 108 * reference count modifications queued up in the rbtree. the head 109 * node may also store the extent flags to set. This way you can check 110 * to see what the reference count and extent flags would be if all of 111 * the delayed refs are not processed. 112 */ 113int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 114 struct btrfs_fs_info *fs_info, u64 bytenr, 115 u64 offset, int metadata, u64 *refs, u64 *flags) 116{ 117 struct btrfs_delayed_ref_head *head; 118 struct btrfs_delayed_ref_root *delayed_refs; 119 struct btrfs_path *path; 120 struct btrfs_extent_item *ei; 121 struct extent_buffer *leaf; 122 struct btrfs_key key; 123 u32 item_size; 124 u64 num_refs; 125 u64 extent_flags; 126 int ret; 127 128 /* 129 * If we don't have skinny metadata, don't bother doing anything 130 * different 131 */ 132 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) { 133 offset = fs_info->nodesize; 134 metadata = 0; 135 } 136 137 path = btrfs_alloc_path(); 138 if (!path) 139 return -ENOMEM; 140 141 if (!trans) { 142 path->skip_locking = 1; 143 path->search_commit_root = 1; 144 } 145 146search_again: 147 key.objectid = bytenr; 148 key.offset = offset; 149 if (metadata) 150 key.type = BTRFS_METADATA_ITEM_KEY; 151 else 152 key.type = BTRFS_EXTENT_ITEM_KEY; 153 154 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); 155 if (ret < 0) 156 goto out_free; 157 158 if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) { 159 if (path->slots[0]) { 160 path->slots[0]--; 161 btrfs_item_key_to_cpu(path->nodes[0], &key, 162 path->slots[0]); 163 if (key.objectid == bytenr && 164 key.type == BTRFS_EXTENT_ITEM_KEY && 165 key.offset == fs_info->nodesize) 166 ret = 0; 167 } 168 } 169 170 if (ret == 0) { 171 leaf = path->nodes[0]; 172 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 173 if (item_size >= sizeof(*ei)) { 174 ei = btrfs_item_ptr(leaf, path->slots[0], 175 struct btrfs_extent_item); 176 num_refs = btrfs_extent_refs(leaf, ei); 177 extent_flags = btrfs_extent_flags(leaf, ei); 178 } else { 179 ret = -EINVAL; 180 btrfs_print_v0_err(fs_info); 181 if (trans) 182 btrfs_abort_transaction(trans, ret); 183 else 184 btrfs_handle_fs_error(fs_info, ret, NULL); 185 186 goto out_free; 187 } 188 189 BUG_ON(num_refs == 0); 190 } else { 191 num_refs = 0; 192 extent_flags = 0; 193 ret = 0; 194 } 195 196 if (!trans) 197 goto out; 198 199 delayed_refs = &trans->transaction->delayed_refs; 200 spin_lock(&delayed_refs->lock); 201 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); 202 if (head) { 203 if (!mutex_trylock(&head->mutex)) { 204 refcount_inc(&head->refs); 205 spin_unlock(&delayed_refs->lock); 206 207 btrfs_release_path(path); 208 209 /* 210 * Mutex was contended, block until it's released and try 211 * again 212 */ 213 mutex_lock(&head->mutex); 214 mutex_unlock(&head->mutex); 215 btrfs_put_delayed_ref_head(head); 216 goto search_again; 217 } 218 spin_lock(&head->lock); 219 if (head->extent_op && head->extent_op->update_flags) 220 extent_flags |= head->extent_op->flags_to_set; 221 else 222 BUG_ON(num_refs == 0); 223 224 num_refs += head->ref_mod; 225 spin_unlock(&head->lock); 226 mutex_unlock(&head->mutex); 227 } 228 spin_unlock(&delayed_refs->lock); 229out: 230 WARN_ON(num_refs == 0); 231 if (refs) 232 *refs = num_refs; 233 if (flags) 234 *flags = extent_flags; 235out_free: 236 btrfs_free_path(path); 237 return ret; 238} 239 240/* 241 * Back reference rules. Back refs have three main goals: 242 * 243 * 1) differentiate between all holders of references to an extent so that 244 * when a reference is dropped we can make sure it was a valid reference 245 * before freeing the extent. 246 * 247 * 2) Provide enough information to quickly find the holders of an extent 248 * if we notice a given block is corrupted or bad. 249 * 250 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 251 * maintenance. This is actually the same as #2, but with a slightly 252 * different use case. 253 * 254 * There are two kinds of back refs. The implicit back refs is optimized 255 * for pointers in non-shared tree blocks. For a given pointer in a block, 256 * back refs of this kind provide information about the block's owner tree 257 * and the pointer's key. These information allow us to find the block by 258 * b-tree searching. The full back refs is for pointers in tree blocks not 259 * referenced by their owner trees. The location of tree block is recorded 260 * in the back refs. Actually the full back refs is generic, and can be 261 * used in all cases the implicit back refs is used. The major shortcoming 262 * of the full back refs is its overhead. Every time a tree block gets 263 * COWed, we have to update back refs entry for all pointers in it. 264 * 265 * For a newly allocated tree block, we use implicit back refs for 266 * pointers in it. This means most tree related operations only involve 267 * implicit back refs. For a tree block created in old transaction, the 268 * only way to drop a reference to it is COW it. So we can detect the 269 * event that tree block loses its owner tree's reference and do the 270 * back refs conversion. 271 * 272 * When a tree block is COWed through a tree, there are four cases: 273 * 274 * The reference count of the block is one and the tree is the block's 275 * owner tree. Nothing to do in this case. 276 * 277 * The reference count of the block is one and the tree is not the 278 * block's owner tree. In this case, full back refs is used for pointers 279 * in the block. Remove these full back refs, add implicit back refs for 280 * every pointers in the new block. 281 * 282 * The reference count of the block is greater than one and the tree is 283 * the block's owner tree. In this case, implicit back refs is used for 284 * pointers in the block. Add full back refs for every pointers in the 285 * block, increase lower level extents' reference counts. The original 286 * implicit back refs are entailed to the new block. 287 * 288 * The reference count of the block is greater than one and the tree is 289 * not the block's owner tree. Add implicit back refs for every pointer in 290 * the new block, increase lower level extents' reference count. 291 * 292 * Back Reference Key composing: 293 * 294 * The key objectid corresponds to the first byte in the extent, 295 * The key type is used to differentiate between types of back refs. 296 * There are different meanings of the key offset for different types 297 * of back refs. 298 * 299 * File extents can be referenced by: 300 * 301 * - multiple snapshots, subvolumes, or different generations in one subvol 302 * - different files inside a single subvolume 303 * - different offsets inside a file (bookend extents in file.c) 304 * 305 * The extent ref structure for the implicit back refs has fields for: 306 * 307 * - Objectid of the subvolume root 308 * - objectid of the file holding the reference 309 * - original offset in the file 310 * - how many bookend extents 311 * 312 * The key offset for the implicit back refs is hash of the first 313 * three fields. 314 * 315 * The extent ref structure for the full back refs has field for: 316 * 317 * - number of pointers in the tree leaf 318 * 319 * The key offset for the implicit back refs is the first byte of 320 * the tree leaf 321 * 322 * When a file extent is allocated, The implicit back refs is used. 323 * the fields are filled in: 324 * 325 * (root_key.objectid, inode objectid, offset in file, 1) 326 * 327 * When a file extent is removed file truncation, we find the 328 * corresponding implicit back refs and check the following fields: 329 * 330 * (btrfs_header_owner(leaf), inode objectid, offset in file) 331 * 332 * Btree extents can be referenced by: 333 * 334 * - Different subvolumes 335 * 336 * Both the implicit back refs and the full back refs for tree blocks 337 * only consist of key. The key offset for the implicit back refs is 338 * objectid of block's owner tree. The key offset for the full back refs 339 * is the first byte of parent block. 340 * 341 * When implicit back refs is used, information about the lowest key and 342 * level of the tree block are required. These information are stored in 343 * tree block info structure. 344 */ 345 346/* 347 * is_data == BTRFS_REF_TYPE_BLOCK, tree block type is required, 348 * is_data == BTRFS_REF_TYPE_DATA, data type is requiried, 349 * is_data == BTRFS_REF_TYPE_ANY, either type is OK. 350 */ 351int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, 352 struct btrfs_extent_inline_ref *iref, 353 enum btrfs_inline_ref_type is_data) 354{ 355 int type = btrfs_extent_inline_ref_type(eb, iref); 356 u64 offset = btrfs_extent_inline_ref_offset(eb, iref); 357 358 if (type == BTRFS_TREE_BLOCK_REF_KEY || 359 type == BTRFS_SHARED_BLOCK_REF_KEY || 360 type == BTRFS_SHARED_DATA_REF_KEY || 361 type == BTRFS_EXTENT_DATA_REF_KEY) { 362 if (is_data == BTRFS_REF_TYPE_BLOCK) { 363 if (type == BTRFS_TREE_BLOCK_REF_KEY) 364 return type; 365 if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 366 ASSERT(eb->fs_info); 367 /* 368 * Every shared one has parent tree block, 369 * which must be aligned to sector size. 370 */ 371 if (offset && 372 IS_ALIGNED(offset, eb->fs_info->sectorsize)) 373 return type; 374 } 375 } else if (is_data == BTRFS_REF_TYPE_DATA) { 376 if (type == BTRFS_EXTENT_DATA_REF_KEY) 377 return type; 378 if (type == BTRFS_SHARED_DATA_REF_KEY) { 379 ASSERT(eb->fs_info); 380 /* 381 * Every shared one has parent tree block, 382 * which must be aligned to sector size. 383 */ 384 if (offset && 385 IS_ALIGNED(offset, eb->fs_info->sectorsize)) 386 return type; 387 } 388 } else { 389 ASSERT(is_data == BTRFS_REF_TYPE_ANY); 390 return type; 391 } 392 } 393 394 btrfs_print_leaf((struct extent_buffer *)eb); 395 btrfs_err(eb->fs_info, 396 "eb %llu iref 0x%lx invalid extent inline ref type %d", 397 eb->start, (unsigned long)iref, type); 398 WARN_ON(1); 399 400 return BTRFS_REF_TYPE_INVALID; 401} 402 403u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 404{ 405 u32 high_crc = ~(u32)0; 406 u32 low_crc = ~(u32)0; 407 __le64 lenum; 408 409 lenum = cpu_to_le64(root_objectid); 410 high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum)); 411 lenum = cpu_to_le64(owner); 412 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); 413 lenum = cpu_to_le64(offset); 414 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum)); 415 416 return ((u64)high_crc << 31) ^ (u64)low_crc; 417} 418 419static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 420 struct btrfs_extent_data_ref *ref) 421{ 422 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 423 btrfs_extent_data_ref_objectid(leaf, ref), 424 btrfs_extent_data_ref_offset(leaf, ref)); 425} 426 427static int match_extent_data_ref(struct extent_buffer *leaf, 428 struct btrfs_extent_data_ref *ref, 429 u64 root_objectid, u64 owner, u64 offset) 430{ 431 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 432 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 433 btrfs_extent_data_ref_offset(leaf, ref) != offset) 434 return 0; 435 return 1; 436} 437 438static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 439 struct btrfs_path *path, 440 u64 bytenr, u64 parent, 441 u64 root_objectid, 442 u64 owner, u64 offset) 443{ 444 struct btrfs_root *root = trans->fs_info->extent_root; 445 struct btrfs_key key; 446 struct btrfs_extent_data_ref *ref; 447 struct extent_buffer *leaf; 448 u32 nritems; 449 int ret; 450 int recow; 451 int err = -ENOENT; 452 453 key.objectid = bytenr; 454 if (parent) { 455 key.type = BTRFS_SHARED_DATA_REF_KEY; 456 key.offset = parent; 457 } else { 458 key.type = BTRFS_EXTENT_DATA_REF_KEY; 459 key.offset = hash_extent_data_ref(root_objectid, 460 owner, offset); 461 } 462again: 463 recow = 0; 464 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 465 if (ret < 0) { 466 err = ret; 467 goto fail; 468 } 469 470 if (parent) { 471 if (!ret) 472 return 0; 473 goto fail; 474 } 475 476 leaf = path->nodes[0]; 477 nritems = btrfs_header_nritems(leaf); 478 while (1) { 479 if (path->slots[0] >= nritems) { 480 ret = btrfs_next_leaf(root, path); 481 if (ret < 0) 482 err = ret; 483 if (ret) 484 goto fail; 485 486 leaf = path->nodes[0]; 487 nritems = btrfs_header_nritems(leaf); 488 recow = 1; 489 } 490 491 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 492 if (key.objectid != bytenr || 493 key.type != BTRFS_EXTENT_DATA_REF_KEY) 494 goto fail; 495 496 ref = btrfs_item_ptr(leaf, path->slots[0], 497 struct btrfs_extent_data_ref); 498 499 if (match_extent_data_ref(leaf, ref, root_objectid, 500 owner, offset)) { 501 if (recow) { 502 btrfs_release_path(path); 503 goto again; 504 } 505 err = 0; 506 break; 507 } 508 path->slots[0]++; 509 } 510fail: 511 return err; 512} 513 514static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 515 struct btrfs_path *path, 516 u64 bytenr, u64 parent, 517 u64 root_objectid, u64 owner, 518 u64 offset, int refs_to_add) 519{ 520 struct btrfs_root *root = trans->fs_info->extent_root; 521 struct btrfs_key key; 522 struct extent_buffer *leaf; 523 u32 size; 524 u32 num_refs; 525 int ret; 526 527 key.objectid = bytenr; 528 if (parent) { 529 key.type = BTRFS_SHARED_DATA_REF_KEY; 530 key.offset = parent; 531 size = sizeof(struct btrfs_shared_data_ref); 532 } else { 533 key.type = BTRFS_EXTENT_DATA_REF_KEY; 534 key.offset = hash_extent_data_ref(root_objectid, 535 owner, offset); 536 size = sizeof(struct btrfs_extent_data_ref); 537 } 538 539 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 540 if (ret && ret != -EEXIST) 541 goto fail; 542 543 leaf = path->nodes[0]; 544 if (parent) { 545 struct btrfs_shared_data_ref *ref; 546 ref = btrfs_item_ptr(leaf, path->slots[0], 547 struct btrfs_shared_data_ref); 548 if (ret == 0) { 549 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 550 } else { 551 num_refs = btrfs_shared_data_ref_count(leaf, ref); 552 num_refs += refs_to_add; 553 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 554 } 555 } else { 556 struct btrfs_extent_data_ref *ref; 557 while (ret == -EEXIST) { 558 ref = btrfs_item_ptr(leaf, path->slots[0], 559 struct btrfs_extent_data_ref); 560 if (match_extent_data_ref(leaf, ref, root_objectid, 561 owner, offset)) 562 break; 563 btrfs_release_path(path); 564 key.offset++; 565 ret = btrfs_insert_empty_item(trans, root, path, &key, 566 size); 567 if (ret && ret != -EEXIST) 568 goto fail; 569 570 leaf = path->nodes[0]; 571 } 572 ref = btrfs_item_ptr(leaf, path->slots[0], 573 struct btrfs_extent_data_ref); 574 if (ret == 0) { 575 btrfs_set_extent_data_ref_root(leaf, ref, 576 root_objectid); 577 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 578 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 579 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 580 } else { 581 num_refs = btrfs_extent_data_ref_count(leaf, ref); 582 num_refs += refs_to_add; 583 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 584 } 585 } 586 btrfs_mark_buffer_dirty(leaf); 587 ret = 0; 588fail: 589 btrfs_release_path(path); 590 return ret; 591} 592 593static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 594 struct btrfs_path *path, 595 int refs_to_drop, int *last_ref) 596{ 597 struct btrfs_key key; 598 struct btrfs_extent_data_ref *ref1 = NULL; 599 struct btrfs_shared_data_ref *ref2 = NULL; 600 struct extent_buffer *leaf; 601 u32 num_refs = 0; 602 int ret = 0; 603 604 leaf = path->nodes[0]; 605 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 606 607 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 608 ref1 = btrfs_item_ptr(leaf, path->slots[0], 609 struct btrfs_extent_data_ref); 610 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 611 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 612 ref2 = btrfs_item_ptr(leaf, path->slots[0], 613 struct btrfs_shared_data_ref); 614 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 615 } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) { 616 btrfs_print_v0_err(trans->fs_info); 617 btrfs_abort_transaction(trans, -EINVAL); 618 return -EINVAL; 619 } else { 620 BUG(); 621 } 622 623 BUG_ON(num_refs < refs_to_drop); 624 num_refs -= refs_to_drop; 625 626 if (num_refs == 0) { 627 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path); 628 *last_ref = 1; 629 } else { 630 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 631 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 632 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 633 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 634 btrfs_mark_buffer_dirty(leaf); 635 } 636 return ret; 637} 638 639static noinline u32 extent_data_ref_count(struct btrfs_path *path, 640 struct btrfs_extent_inline_ref *iref) 641{ 642 struct btrfs_key key; 643 struct extent_buffer *leaf; 644 struct btrfs_extent_data_ref *ref1; 645 struct btrfs_shared_data_ref *ref2; 646 u32 num_refs = 0; 647 int type; 648 649 leaf = path->nodes[0]; 650 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 651 652 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 653 if (iref) { 654 /* 655 * If type is invalid, we should have bailed out earlier than 656 * this call. 657 */ 658 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA); 659 ASSERT(type != BTRFS_REF_TYPE_INVALID); 660 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 661 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 662 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 663 } else { 664 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 665 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 666 } 667 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 668 ref1 = btrfs_item_ptr(leaf, path->slots[0], 669 struct btrfs_extent_data_ref); 670 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 671 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 672 ref2 = btrfs_item_ptr(leaf, path->slots[0], 673 struct btrfs_shared_data_ref); 674 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 675 } else { 676 WARN_ON(1); 677 } 678 return num_refs; 679} 680 681static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 682 struct btrfs_path *path, 683 u64 bytenr, u64 parent, 684 u64 root_objectid) 685{ 686 struct btrfs_root *root = trans->fs_info->extent_root; 687 struct btrfs_key key; 688 int ret; 689 690 key.objectid = bytenr; 691 if (parent) { 692 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 693 key.offset = parent; 694 } else { 695 key.type = BTRFS_TREE_BLOCK_REF_KEY; 696 key.offset = root_objectid; 697 } 698 699 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 700 if (ret > 0) 701 ret = -ENOENT; 702 return ret; 703} 704 705static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 706 struct btrfs_path *path, 707 u64 bytenr, u64 parent, 708 u64 root_objectid) 709{ 710 struct btrfs_key key; 711 int ret; 712 713 key.objectid = bytenr; 714 if (parent) { 715 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 716 key.offset = parent; 717 } else { 718 key.type = BTRFS_TREE_BLOCK_REF_KEY; 719 key.offset = root_objectid; 720 } 721 722 ret = btrfs_insert_empty_item(trans, trans->fs_info->extent_root, 723 path, &key, 0); 724 btrfs_release_path(path); 725 return ret; 726} 727 728static inline int extent_ref_type(u64 parent, u64 owner) 729{ 730 int type; 731 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 732 if (parent > 0) 733 type = BTRFS_SHARED_BLOCK_REF_KEY; 734 else 735 type = BTRFS_TREE_BLOCK_REF_KEY; 736 } else { 737 if (parent > 0) 738 type = BTRFS_SHARED_DATA_REF_KEY; 739 else 740 type = BTRFS_EXTENT_DATA_REF_KEY; 741 } 742 return type; 743} 744 745static int find_next_key(struct btrfs_path *path, int level, 746 struct btrfs_key *key) 747 748{ 749 for (; level < BTRFS_MAX_LEVEL; level++) { 750 if (!path->nodes[level]) 751 break; 752 if (path->slots[level] + 1 >= 753 btrfs_header_nritems(path->nodes[level])) 754 continue; 755 if (level == 0) 756 btrfs_item_key_to_cpu(path->nodes[level], key, 757 path->slots[level] + 1); 758 else 759 btrfs_node_key_to_cpu(path->nodes[level], key, 760 path->slots[level] + 1); 761 return 0; 762 } 763 return 1; 764} 765 766/* 767 * look for inline back ref. if back ref is found, *ref_ret is set 768 * to the address of inline back ref, and 0 is returned. 769 * 770 * if back ref isn't found, *ref_ret is set to the address where it 771 * should be inserted, and -ENOENT is returned. 772 * 773 * if insert is true and there are too many inline back refs, the path 774 * points to the extent item, and -EAGAIN is returned. 775 * 776 * NOTE: inline back refs are ordered in the same way that back ref 777 * items in the tree are ordered. 778 */ 779static noinline_for_stack 780int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 781 struct btrfs_path *path, 782 struct btrfs_extent_inline_ref **ref_ret, 783 u64 bytenr, u64 num_bytes, 784 u64 parent, u64 root_objectid, 785 u64 owner, u64 offset, int insert) 786{ 787 struct btrfs_fs_info *fs_info = trans->fs_info; 788 struct btrfs_root *root = fs_info->extent_root; 789 struct btrfs_key key; 790 struct extent_buffer *leaf; 791 struct btrfs_extent_item *ei; 792 struct btrfs_extent_inline_ref *iref; 793 u64 flags; 794 u64 item_size; 795 unsigned long ptr; 796 unsigned long end; 797 int extra_size; 798 int type; 799 int want; 800 int ret; 801 int err = 0; 802 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); 803 int needed; 804 805 key.objectid = bytenr; 806 key.type = BTRFS_EXTENT_ITEM_KEY; 807 key.offset = num_bytes; 808 809 want = extent_ref_type(parent, owner); 810 if (insert) { 811 extra_size = btrfs_extent_inline_ref_size(want); 812 path->keep_locks = 1; 813 } else 814 extra_size = -1; 815 816 /* 817 * Owner is our level, so we can just add one to get the level for the 818 * block we are interested in. 819 */ 820 if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) { 821 key.type = BTRFS_METADATA_ITEM_KEY; 822 key.offset = owner; 823 } 824 825again: 826 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 827 if (ret < 0) { 828 err = ret; 829 goto out; 830 } 831 832 /* 833 * We may be a newly converted file system which still has the old fat 834 * extent entries for metadata, so try and see if we have one of those. 835 */ 836 if (ret > 0 && skinny_metadata) { 837 skinny_metadata = false; 838 if (path->slots[0]) { 839 path->slots[0]--; 840 btrfs_item_key_to_cpu(path->nodes[0], &key, 841 path->slots[0]); 842 if (key.objectid == bytenr && 843 key.type == BTRFS_EXTENT_ITEM_KEY && 844 key.offset == num_bytes) 845 ret = 0; 846 } 847 if (ret) { 848 key.objectid = bytenr; 849 key.type = BTRFS_EXTENT_ITEM_KEY; 850 key.offset = num_bytes; 851 btrfs_release_path(path); 852 goto again; 853 } 854 } 855 856 if (ret && !insert) { 857 err = -ENOENT; 858 goto out; 859 } else if (WARN_ON(ret)) { 860 btrfs_print_leaf(path->nodes[0]); 861 btrfs_err(fs_info, 862"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu", 863 bytenr, num_bytes, parent, root_objectid, owner, 864 offset); 865 err = -EIO; 866 goto out; 867 } 868 869 leaf = path->nodes[0]; 870 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 871 if (unlikely(item_size < sizeof(*ei))) { 872 err = -EINVAL; 873 btrfs_print_v0_err(fs_info); 874 btrfs_abort_transaction(trans, err); 875 goto out; 876 } 877 878 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 879 flags = btrfs_extent_flags(leaf, ei); 880 881 ptr = (unsigned long)(ei + 1); 882 end = (unsigned long)ei + item_size; 883 884 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) { 885 ptr += sizeof(struct btrfs_tree_block_info); 886 BUG_ON(ptr > end); 887 } 888 889 if (owner >= BTRFS_FIRST_FREE_OBJECTID) 890 needed = BTRFS_REF_TYPE_DATA; 891 else 892 needed = BTRFS_REF_TYPE_BLOCK; 893 894 err = -ENOENT; 895 while (1) { 896 if (ptr >= end) { 897 WARN_ON(ptr > end); 898 break; 899 } 900 iref = (struct btrfs_extent_inline_ref *)ptr; 901 type = btrfs_get_extent_inline_ref_type(leaf, iref, needed); 902 if (type == BTRFS_REF_TYPE_INVALID) { 903 err = -EUCLEAN; 904 goto out; 905 } 906 907 if (want < type) 908 break; 909 if (want > type) { 910 ptr += btrfs_extent_inline_ref_size(type); 911 continue; 912 } 913 914 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 915 struct btrfs_extent_data_ref *dref; 916 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 917 if (match_extent_data_ref(leaf, dref, root_objectid, 918 owner, offset)) { 919 err = 0; 920 break; 921 } 922 if (hash_extent_data_ref_item(leaf, dref) < 923 hash_extent_data_ref(root_objectid, owner, offset)) 924 break; 925 } else { 926 u64 ref_offset; 927 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 928 if (parent > 0) { 929 if (parent == ref_offset) { 930 err = 0; 931 break; 932 } 933 if (ref_offset < parent) 934 break; 935 } else { 936 if (root_objectid == ref_offset) { 937 err = 0; 938 break; 939 } 940 if (ref_offset < root_objectid) 941 break; 942 } 943 } 944 ptr += btrfs_extent_inline_ref_size(type); 945 } 946 if (err == -ENOENT && insert) { 947 if (item_size + extra_size >= 948 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 949 err = -EAGAIN; 950 goto out; 951 } 952 /* 953 * To add new inline back ref, we have to make sure 954 * there is no corresponding back ref item. 955 * For simplicity, we just do not add new inline back 956 * ref if there is any kind of item for this block 957 */ 958 if (find_next_key(path, 0, &key) == 0 && 959 key.objectid == bytenr && 960 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 961 err = -EAGAIN; 962 goto out; 963 } 964 } 965 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 966out: 967 if (insert) { 968 path->keep_locks = 0; 969 btrfs_unlock_up_safe(path, 1); 970 } 971 return err; 972} 973 974/* 975 * helper to add new inline back ref 976 */ 977static noinline_for_stack 978void setup_inline_extent_backref(struct btrfs_fs_info *fs_info, 979 struct btrfs_path *path, 980 struct btrfs_extent_inline_ref *iref, 981 u64 parent, u64 root_objectid, 982 u64 owner, u64 offset, int refs_to_add, 983 struct btrfs_delayed_extent_op *extent_op) 984{ 985 struct extent_buffer *leaf; 986 struct btrfs_extent_item *ei; 987 unsigned long ptr; 988 unsigned long end; 989 unsigned long item_offset; 990 u64 refs; 991 int size; 992 int type; 993 994 leaf = path->nodes[0]; 995 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 996 item_offset = (unsigned long)iref - (unsigned long)ei; 997 998 type = extent_ref_type(parent, owner); 999 size = btrfs_extent_inline_ref_size(type); 1000 1001 btrfs_extend_item(path, size); 1002 1003 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1004 refs = btrfs_extent_refs(leaf, ei); 1005 refs += refs_to_add; 1006 btrfs_set_extent_refs(leaf, ei, refs); 1007 if (extent_op) 1008 __run_delayed_extent_op(extent_op, leaf, ei); 1009 1010 ptr = (unsigned long)ei + item_offset; 1011 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1012 if (ptr < end - size) 1013 memmove_extent_buffer(leaf, ptr + size, ptr, 1014 end - size - ptr); 1015 1016 iref = (struct btrfs_extent_inline_ref *)ptr; 1017 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1018 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1019 struct btrfs_extent_data_ref *dref; 1020 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1021 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1022 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1023 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1024 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1025 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1026 struct btrfs_shared_data_ref *sref; 1027 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1028 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1029 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1030 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1031 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1032 } else { 1033 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1034 } 1035 btrfs_mark_buffer_dirty(leaf); 1036} 1037 1038static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1039 struct btrfs_path *path, 1040 struct btrfs_extent_inline_ref **ref_ret, 1041 u64 bytenr, u64 num_bytes, u64 parent, 1042 u64 root_objectid, u64 owner, u64 offset) 1043{ 1044 int ret; 1045 1046 ret = lookup_inline_extent_backref(trans, path, ref_ret, bytenr, 1047 num_bytes, parent, root_objectid, 1048 owner, offset, 0); 1049 if (ret != -ENOENT) 1050 return ret; 1051 1052 btrfs_release_path(path); 1053 *ref_ret = NULL; 1054 1055 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1056 ret = lookup_tree_block_ref(trans, path, bytenr, parent, 1057 root_objectid); 1058 } else { 1059 ret = lookup_extent_data_ref(trans, path, bytenr, parent, 1060 root_objectid, owner, offset); 1061 } 1062 return ret; 1063} 1064 1065/* 1066 * helper to update/remove inline back ref 1067 */ 1068static noinline_for_stack 1069void update_inline_extent_backref(struct btrfs_path *path, 1070 struct btrfs_extent_inline_ref *iref, 1071 int refs_to_mod, 1072 struct btrfs_delayed_extent_op *extent_op, 1073 int *last_ref) 1074{ 1075 struct extent_buffer *leaf = path->nodes[0]; 1076 struct btrfs_extent_item *ei; 1077 struct btrfs_extent_data_ref *dref = NULL; 1078 struct btrfs_shared_data_ref *sref = NULL; 1079 unsigned long ptr; 1080 unsigned long end; 1081 u32 item_size; 1082 int size; 1083 int type; 1084 u64 refs; 1085 1086 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1087 refs = btrfs_extent_refs(leaf, ei); 1088 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1089 refs += refs_to_mod; 1090 btrfs_set_extent_refs(leaf, ei, refs); 1091 if (extent_op) 1092 __run_delayed_extent_op(extent_op, leaf, ei); 1093 1094 /* 1095 * If type is invalid, we should have bailed out after 1096 * lookup_inline_extent_backref(). 1097 */ 1098 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY); 1099 ASSERT(type != BTRFS_REF_TYPE_INVALID); 1100 1101 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1102 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1103 refs = btrfs_extent_data_ref_count(leaf, dref); 1104 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1105 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1106 refs = btrfs_shared_data_ref_count(leaf, sref); 1107 } else { 1108 refs = 1; 1109 BUG_ON(refs_to_mod != -1); 1110 } 1111 1112 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1113 refs += refs_to_mod; 1114 1115 if (refs > 0) { 1116 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1117 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1118 else 1119 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1120 } else { 1121 *last_ref = 1; 1122 size = btrfs_extent_inline_ref_size(type); 1123 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1124 ptr = (unsigned long)iref; 1125 end = (unsigned long)ei + item_size; 1126 if (ptr + size < end) 1127 memmove_extent_buffer(leaf, ptr, ptr + size, 1128 end - ptr - size); 1129 item_size -= size; 1130 btrfs_truncate_item(path, item_size, 1); 1131 } 1132 btrfs_mark_buffer_dirty(leaf); 1133} 1134 1135static noinline_for_stack 1136int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1137 struct btrfs_path *path, 1138 u64 bytenr, u64 num_bytes, u64 parent, 1139 u64 root_objectid, u64 owner, 1140 u64 offset, int refs_to_add, 1141 struct btrfs_delayed_extent_op *extent_op) 1142{ 1143 struct btrfs_extent_inline_ref *iref; 1144 int ret; 1145 1146 ret = lookup_inline_extent_backref(trans, path, &iref, bytenr, 1147 num_bytes, parent, root_objectid, 1148 owner, offset, 1); 1149 if (ret == 0) { 1150 /* 1151 * We're adding refs to a tree block we already own, this 1152 * should not happen at all. 1153 */ 1154 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1155 btrfs_crit(trans->fs_info, 1156"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu", 1157 bytenr, num_bytes, root_objectid); 1158 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) { 1159 WARN_ON(1); 1160 btrfs_crit(trans->fs_info, 1161 "path->slots[0]=%d path->nodes[0]:", path->slots[0]); 1162 btrfs_print_leaf(path->nodes[0]); 1163 } 1164 return -EUCLEAN; 1165 } 1166 update_inline_extent_backref(path, iref, refs_to_add, 1167 extent_op, NULL); 1168 } else if (ret == -ENOENT) { 1169 setup_inline_extent_backref(trans->fs_info, path, iref, parent, 1170 root_objectid, owner, offset, 1171 refs_to_add, extent_op); 1172 ret = 0; 1173 } 1174 return ret; 1175} 1176 1177static int remove_extent_backref(struct btrfs_trans_handle *trans, 1178 struct btrfs_path *path, 1179 struct btrfs_extent_inline_ref *iref, 1180 int refs_to_drop, int is_data, int *last_ref) 1181{ 1182 int ret = 0; 1183 1184 BUG_ON(!is_data && refs_to_drop != 1); 1185 if (iref) { 1186 update_inline_extent_backref(path, iref, -refs_to_drop, NULL, 1187 last_ref); 1188 } else if (is_data) { 1189 ret = remove_extent_data_ref(trans, path, refs_to_drop, 1190 last_ref); 1191 } else { 1192 *last_ref = 1; 1193 ret = btrfs_del_item(trans, trans->fs_info->extent_root, path); 1194 } 1195 return ret; 1196} 1197 1198static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len, 1199 u64 *discarded_bytes) 1200{ 1201 int j, ret = 0; 1202 u64 bytes_left, end; 1203 u64 aligned_start = ALIGN(start, 1 << 9); 1204 1205 /* Adjust the range to be aligned to 512B sectors if necessary. */ 1206 if (start != aligned_start) { 1207 len -= aligned_start - start; 1208 len = round_down(len, 1 << 9); 1209 start = aligned_start; 1210 } 1211 1212 *discarded_bytes = 0; 1213 1214 if (!len) 1215 return 0; 1216 1217 end = start + len; 1218 bytes_left = len; 1219 1220 /* Skip any superblocks on this device. */ 1221 for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) { 1222 u64 sb_start = btrfs_sb_offset(j); 1223 u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE; 1224 u64 size = sb_start - start; 1225 1226 if (!in_range(sb_start, start, bytes_left) && 1227 !in_range(sb_end, start, bytes_left) && 1228 !in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE)) 1229 continue; 1230 1231 /* 1232 * Superblock spans beginning of range. Adjust start and 1233 * try again. 1234 */ 1235 if (sb_start <= start) { 1236 start += sb_end - start; 1237 if (start > end) { 1238 bytes_left = 0; 1239 break; 1240 } 1241 bytes_left = end - start; 1242 continue; 1243 } 1244 1245 if (size) { 1246 ret = blkdev_issue_discard(bdev, start >> 9, size >> 9, 1247 GFP_NOFS, 0); 1248 if (!ret) 1249 *discarded_bytes += size; 1250 else if (ret != -EOPNOTSUPP) 1251 return ret; 1252 } 1253 1254 start = sb_end; 1255 if (start > end) { 1256 bytes_left = 0; 1257 break; 1258 } 1259 bytes_left = end - start; 1260 } 1261 1262 if (bytes_left) { 1263 ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9, 1264 GFP_NOFS, 0); 1265 if (!ret) 1266 *discarded_bytes += bytes_left; 1267 } 1268 return ret; 1269} 1270 1271int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, 1272 u64 num_bytes, u64 *actual_bytes) 1273{ 1274 int ret = 0; 1275 u64 discarded_bytes = 0; 1276 u64 end = bytenr + num_bytes; 1277 u64 cur = bytenr; 1278 struct btrfs_bio *bbio = NULL; 1279 1280 1281 /* 1282 * Avoid races with device replace and make sure our bbio has devices 1283 * associated to its stripes that don't go away while we are discarding. 1284 */ 1285 btrfs_bio_counter_inc_blocked(fs_info); 1286 while (cur < end) { 1287 struct btrfs_bio_stripe *stripe; 1288 int i; 1289 1290 num_bytes = end - cur; 1291 /* Tell the block device(s) that the sectors can be discarded */ 1292 ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur, 1293 &num_bytes, &bbio, 0); 1294 /* 1295 * Error can be -ENOMEM, -ENOENT (no such chunk mapping) or 1296 * -EOPNOTSUPP. For any such error, @num_bytes is not updated, 1297 * thus we can't continue anyway. 1298 */ 1299 if (ret < 0) 1300 goto out; 1301 1302 stripe = bbio->stripes; 1303 for (i = 0; i < bbio->num_stripes; i++, stripe++) { 1304 u64 bytes; 1305 struct request_queue *req_q; 1306 struct btrfs_device *device = stripe->dev; 1307 1308 if (!device->bdev) { 1309 ASSERT(btrfs_test_opt(fs_info, DEGRADED)); 1310 continue; 1311 } 1312 req_q = bdev_get_queue(device->bdev); 1313 if (!blk_queue_discard(req_q)) 1314 continue; 1315 1316 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) 1317 continue; 1318 1319 ret = btrfs_issue_discard(device->bdev, 1320 stripe->physical, 1321 stripe->length, 1322 &bytes); 1323 if (!ret) { 1324 discarded_bytes += bytes; 1325 } else if (ret != -EOPNOTSUPP) { 1326 /* 1327 * Logic errors or -ENOMEM, or -EIO, but 1328 * unlikely to happen. 1329 * 1330 * And since there are two loops, explicitly 1331 * go to out to avoid confusion. 1332 */ 1333 btrfs_put_bbio(bbio); 1334 goto out; 1335 } 1336 1337 /* 1338 * Just in case we get back EOPNOTSUPP for some reason, 1339 * just ignore the return value so we don't screw up 1340 * people calling discard_extent. 1341 */ 1342 ret = 0; 1343 } 1344 btrfs_put_bbio(bbio); 1345 cur += num_bytes; 1346 } 1347out: 1348 btrfs_bio_counter_dec(fs_info); 1349 1350 if (actual_bytes) 1351 *actual_bytes = discarded_bytes; 1352 1353 1354 if (ret == -EOPNOTSUPP) 1355 ret = 0; 1356 return ret; 1357} 1358 1359/* Can return -ENOMEM */ 1360int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1361 struct btrfs_ref *generic_ref) 1362{ 1363 struct btrfs_fs_info *fs_info = trans->fs_info; 1364 int ret; 1365 1366 ASSERT(generic_ref->type != BTRFS_REF_NOT_SET && 1367 generic_ref->action); 1368 BUG_ON(generic_ref->type == BTRFS_REF_METADATA && 1369 generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID); 1370 1371 if (generic_ref->type == BTRFS_REF_METADATA) 1372 ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL); 1373 else 1374 ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0); 1375 1376 btrfs_ref_tree_mod(fs_info, generic_ref); 1377 1378 return ret; 1379} 1380 1381/* 1382 * __btrfs_inc_extent_ref - insert backreference for a given extent 1383 * 1384 * The counterpart is in __btrfs_free_extent(), with examples and more details 1385 * how it works. 1386 * 1387 * @trans: Handle of transaction 1388 * 1389 * @node: The delayed ref node used to get the bytenr/length for 1390 * extent whose references are incremented. 1391 * 1392 * @parent: If this is a shared extent (BTRFS_SHARED_DATA_REF_KEY/ 1393 * BTRFS_SHARED_BLOCK_REF_KEY) then it holds the logical 1394 * bytenr of the parent block. Since new extents are always 1395 * created with indirect references, this will only be the case 1396 * when relocating a shared extent. In that case, root_objectid 1397 * will be BTRFS_TREE_RELOC_OBJECTID. Otheriwse, parent must 1398 * be 0 1399 * 1400 * @root_objectid: The id of the root where this modification has originated, 1401 * this can be either one of the well-known metadata trees or 1402 * the subvolume id which references this extent. 1403 * 1404 * @owner: For data extents it is the inode number of the owning file. 1405 * For metadata extents this parameter holds the level in the 1406 * tree of the extent. 1407 * 1408 * @offset: For metadata extents the offset is ignored and is currently 1409 * always passed as 0. For data extents it is the fileoffset 1410 * this extent belongs to. 1411 * 1412 * @refs_to_add Number of references to add 1413 * 1414 * @extent_op Pointer to a structure, holding information necessary when 1415 * updating a tree block's flags 1416 * 1417 */ 1418static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1419 struct btrfs_delayed_ref_node *node, 1420 u64 parent, u64 root_objectid, 1421 u64 owner, u64 offset, int refs_to_add, 1422 struct btrfs_delayed_extent_op *extent_op) 1423{ 1424 struct btrfs_path *path; 1425 struct extent_buffer *leaf; 1426 struct btrfs_extent_item *item; 1427 struct btrfs_key key; 1428 u64 bytenr = node->bytenr; 1429 u64 num_bytes = node->num_bytes; 1430 u64 refs; 1431 int ret; 1432 1433 path = btrfs_alloc_path(); 1434 if (!path) 1435 return -ENOMEM; 1436 1437 path->leave_spinning = 1; 1438 /* this will setup the path even if it fails to insert the back ref */ 1439 ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes, 1440 parent, root_objectid, owner, 1441 offset, refs_to_add, extent_op); 1442 if ((ret < 0 && ret != -EAGAIN) || !ret) 1443 goto out; 1444 1445 /* 1446 * Ok we had -EAGAIN which means we didn't have space to insert and 1447 * inline extent ref, so just update the reference count and add a 1448 * normal backref. 1449 */ 1450 leaf = path->nodes[0]; 1451 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1452 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1453 refs = btrfs_extent_refs(leaf, item); 1454 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1455 if (extent_op) 1456 __run_delayed_extent_op(extent_op, leaf, item); 1457 1458 btrfs_mark_buffer_dirty(leaf); 1459 btrfs_release_path(path); 1460 1461 path->leave_spinning = 1; 1462 /* now insert the actual backref */ 1463 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1464 BUG_ON(refs_to_add != 1); 1465 ret = insert_tree_block_ref(trans, path, bytenr, parent, 1466 root_objectid); 1467 } else { 1468 ret = insert_extent_data_ref(trans, path, bytenr, parent, 1469 root_objectid, owner, offset, 1470 refs_to_add); 1471 } 1472 if (ret) 1473 btrfs_abort_transaction(trans, ret); 1474out: 1475 btrfs_free_path(path); 1476 return ret; 1477} 1478 1479static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1480 struct btrfs_delayed_ref_node *node, 1481 struct btrfs_delayed_extent_op *extent_op, 1482 int insert_reserved) 1483{ 1484 int ret = 0; 1485 struct btrfs_delayed_data_ref *ref; 1486 struct btrfs_key ins; 1487 u64 parent = 0; 1488 u64 ref_root = 0; 1489 u64 flags = 0; 1490 1491 ins.objectid = node->bytenr; 1492 ins.offset = node->num_bytes; 1493 ins.type = BTRFS_EXTENT_ITEM_KEY; 1494 1495 ref = btrfs_delayed_node_to_data_ref(node); 1496 trace_run_delayed_data_ref(trans->fs_info, node, ref, node->action); 1497 1498 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1499 parent = ref->parent; 1500 ref_root = ref->root; 1501 1502 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1503 if (extent_op) 1504 flags |= extent_op->flags_to_set; 1505 ret = alloc_reserved_file_extent(trans, parent, ref_root, 1506 flags, ref->objectid, 1507 ref->offset, &ins, 1508 node->ref_mod); 1509 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1510 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root, 1511 ref->objectid, ref->offset, 1512 node->ref_mod, extent_op); 1513 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1514 ret = __btrfs_free_extent(trans, node, parent, 1515 ref_root, ref->objectid, 1516 ref->offset, node->ref_mod, 1517 extent_op); 1518 } else { 1519 BUG(); 1520 } 1521 return ret; 1522} 1523 1524static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1525 struct extent_buffer *leaf, 1526 struct btrfs_extent_item *ei) 1527{ 1528 u64 flags = btrfs_extent_flags(leaf, ei); 1529 if (extent_op->update_flags) { 1530 flags |= extent_op->flags_to_set; 1531 btrfs_set_extent_flags(leaf, ei, flags); 1532 } 1533 1534 if (extent_op->update_key) { 1535 struct btrfs_tree_block_info *bi; 1536 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1537 bi = (struct btrfs_tree_block_info *)(ei + 1); 1538 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1539 } 1540} 1541 1542static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1543 struct btrfs_delayed_ref_head *head, 1544 struct btrfs_delayed_extent_op *extent_op) 1545{ 1546 struct btrfs_fs_info *fs_info = trans->fs_info; 1547 struct btrfs_key key; 1548 struct btrfs_path *path; 1549 struct btrfs_extent_item *ei; 1550 struct extent_buffer *leaf; 1551 u32 item_size; 1552 int ret; 1553 int err = 0; 1554 int metadata = !extent_op->is_data; 1555 1556 if (TRANS_ABORTED(trans)) 1557 return 0; 1558 1559 if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) 1560 metadata = 0; 1561 1562 path = btrfs_alloc_path(); 1563 if (!path) 1564 return -ENOMEM; 1565 1566 key.objectid = head->bytenr; 1567 1568 if (metadata) { 1569 key.type = BTRFS_METADATA_ITEM_KEY; 1570 key.offset = extent_op->level; 1571 } else { 1572 key.type = BTRFS_EXTENT_ITEM_KEY; 1573 key.offset = head->num_bytes; 1574 } 1575 1576again: 1577 path->leave_spinning = 1; 1578 ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1); 1579 if (ret < 0) { 1580 err = ret; 1581 goto out; 1582 } 1583 if (ret > 0) { 1584 if (metadata) { 1585 if (path->slots[0] > 0) { 1586 path->slots[0]--; 1587 btrfs_item_key_to_cpu(path->nodes[0], &key, 1588 path->slots[0]); 1589 if (key.objectid == head->bytenr && 1590 key.type == BTRFS_EXTENT_ITEM_KEY && 1591 key.offset == head->num_bytes) 1592 ret = 0; 1593 } 1594 if (ret > 0) { 1595 btrfs_release_path(path); 1596 metadata = 0; 1597 1598 key.objectid = head->bytenr; 1599 key.offset = head->num_bytes; 1600 key.type = BTRFS_EXTENT_ITEM_KEY; 1601 goto again; 1602 } 1603 } else { 1604 err = -EIO; 1605 goto out; 1606 } 1607 } 1608 1609 leaf = path->nodes[0]; 1610 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1611 1612 if (unlikely(item_size < sizeof(*ei))) { 1613 err = -EINVAL; 1614 btrfs_print_v0_err(fs_info); 1615 btrfs_abort_transaction(trans, err); 1616 goto out; 1617 } 1618 1619 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1620 __run_delayed_extent_op(extent_op, leaf, ei); 1621 1622 btrfs_mark_buffer_dirty(leaf); 1623out: 1624 btrfs_free_path(path); 1625 return err; 1626} 1627 1628static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 1629 struct btrfs_delayed_ref_node *node, 1630 struct btrfs_delayed_extent_op *extent_op, 1631 int insert_reserved) 1632{ 1633 int ret = 0; 1634 struct btrfs_delayed_tree_ref *ref; 1635 u64 parent = 0; 1636 u64 ref_root = 0; 1637 1638 ref = btrfs_delayed_node_to_tree_ref(node); 1639 trace_run_delayed_tree_ref(trans->fs_info, node, ref, node->action); 1640 1641 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1642 parent = ref->parent; 1643 ref_root = ref->root; 1644 1645 if (unlikely(node->ref_mod != 1)) { 1646 btrfs_err(trans->fs_info, 1647 "btree block %llu has %d references rather than 1: action %d ref_root %llu parent %llu", 1648 node->bytenr, node->ref_mod, node->action, ref_root, 1649 parent); 1650 return -EUCLEAN; 1651 } 1652 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1653 BUG_ON(!extent_op || !extent_op->update_flags); 1654 ret = alloc_reserved_tree_block(trans, node, extent_op); 1655 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1656 ret = __btrfs_inc_extent_ref(trans, node, parent, ref_root, 1657 ref->level, 0, 1, extent_op); 1658 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1659 ret = __btrfs_free_extent(trans, node, parent, ref_root, 1660 ref->level, 0, 1, extent_op); 1661 } else { 1662 BUG(); 1663 } 1664 return ret; 1665} 1666 1667/* helper function to actually process a single delayed ref entry */ 1668static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1669 struct btrfs_delayed_ref_node *node, 1670 struct btrfs_delayed_extent_op *extent_op, 1671 int insert_reserved) 1672{ 1673 int ret = 0; 1674 1675 if (TRANS_ABORTED(trans)) { 1676 if (insert_reserved) 1677 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); 1678 return 0; 1679 } 1680 1681 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1682 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1683 ret = run_delayed_tree_ref(trans, node, extent_op, 1684 insert_reserved); 1685 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1686 node->type == BTRFS_SHARED_DATA_REF_KEY) 1687 ret = run_delayed_data_ref(trans, node, extent_op, 1688 insert_reserved); 1689 else 1690 BUG(); 1691 if (ret && insert_reserved) 1692 btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); 1693 if (ret < 0) 1694 btrfs_err(trans->fs_info, 1695"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d", 1696 node->bytenr, node->num_bytes, node->type, 1697 node->action, node->ref_mod, ret); 1698 return ret; 1699} 1700 1701static inline struct btrfs_delayed_ref_node * 1702select_delayed_ref(struct btrfs_delayed_ref_head *head) 1703{ 1704 struct btrfs_delayed_ref_node *ref; 1705 1706 if (RB_EMPTY_ROOT(&head->ref_tree.rb_root)) 1707 return NULL; 1708 1709 /* 1710 * Select a delayed ref of type BTRFS_ADD_DELAYED_REF first. 1711 * This is to prevent a ref count from going down to zero, which deletes 1712 * the extent item from the extent tree, when there still are references 1713 * to add, which would fail because they would not find the extent item. 1714 */ 1715 if (!list_empty(&head->ref_add_list)) 1716 return list_first_entry(&head->ref_add_list, 1717 struct btrfs_delayed_ref_node, add_list); 1718 1719 ref = rb_entry(rb_first_cached(&head->ref_tree), 1720 struct btrfs_delayed_ref_node, ref_node); 1721 ASSERT(list_empty(&ref->add_list)); 1722 return ref; 1723} 1724 1725static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, 1726 struct btrfs_delayed_ref_head *head) 1727{ 1728 spin_lock(&delayed_refs->lock); 1729 head->processing = 0; 1730 delayed_refs->num_heads_ready++; 1731 spin_unlock(&delayed_refs->lock); 1732 btrfs_delayed_ref_unlock(head); 1733} 1734 1735static struct btrfs_delayed_extent_op *cleanup_extent_op( 1736 struct btrfs_delayed_ref_head *head) 1737{ 1738 struct btrfs_delayed_extent_op *extent_op = head->extent_op; 1739 1740 if (!extent_op) 1741 return NULL; 1742 1743 if (head->must_insert_reserved) { 1744 head->extent_op = NULL; 1745 btrfs_free_delayed_extent_op(extent_op); 1746 return NULL; 1747 } 1748 return extent_op; 1749} 1750 1751static int run_and_cleanup_extent_op(struct btrfs_trans_handle *trans, 1752 struct btrfs_delayed_ref_head *head) 1753{ 1754 struct btrfs_delayed_extent_op *extent_op; 1755 int ret; 1756 1757 extent_op = cleanup_extent_op(head); 1758 if (!extent_op) 1759 return 0; 1760 head->extent_op = NULL; 1761 spin_unlock(&head->lock); 1762 ret = run_delayed_extent_op(trans, head, extent_op); 1763 btrfs_free_delayed_extent_op(extent_op); 1764 return ret ? ret : 1; 1765} 1766 1767void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, 1768 struct btrfs_delayed_ref_root *delayed_refs, 1769 struct btrfs_delayed_ref_head *head) 1770{ 1771 int nr_items = 1; /* Dropping this ref head update. */ 1772 1773 /* 1774 * We had csum deletions accounted for in our delayed refs rsv, we need 1775 * to drop the csum leaves for this update from our delayed_refs_rsv. 1776 */ 1777 if (head->total_ref_mod < 0 && head->is_data) { 1778 spin_lock(&delayed_refs->lock); 1779 delayed_refs->pending_csums -= head->num_bytes; 1780 spin_unlock(&delayed_refs->lock); 1781 nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes); 1782 } 1783 1784 /* 1785 * We were dropping refs, or had a new ref and dropped it, and thus must 1786 * adjust down our total_bytes_pinned, the space may or may not have 1787 * been pinned and so is accounted for properly in the pinned space by 1788 * now. 1789 */ 1790 if (head->total_ref_mod < 0 || 1791 (head->total_ref_mod == 0 && head->must_insert_reserved)) { 1792 u64 flags = btrfs_ref_head_to_space_flags(head); 1793 1794 btrfs_mod_total_bytes_pinned(fs_info, flags, -head->num_bytes); 1795 } 1796 1797 btrfs_delayed_refs_rsv_release(fs_info, nr_items); 1798} 1799 1800static int cleanup_ref_head(struct btrfs_trans_handle *trans, 1801 struct btrfs_delayed_ref_head *head) 1802{ 1803 1804 struct btrfs_fs_info *fs_info = trans->fs_info; 1805 struct btrfs_delayed_ref_root *delayed_refs; 1806 int ret; 1807 1808 delayed_refs = &trans->transaction->delayed_refs; 1809 1810 ret = run_and_cleanup_extent_op(trans, head); 1811 if (ret < 0) { 1812 unselect_delayed_ref_head(delayed_refs, head); 1813 btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); 1814 return ret; 1815 } else if (ret) { 1816 return ret; 1817 } 1818 1819 /* 1820 * Need to drop our head ref lock and re-acquire the delayed ref lock 1821 * and then re-check to make sure nobody got added. 1822 */ 1823 spin_unlock(&head->lock); 1824 spin_lock(&delayed_refs->lock); 1825 spin_lock(&head->lock); 1826 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root) || head->extent_op) { 1827 spin_unlock(&head->lock); 1828 spin_unlock(&delayed_refs->lock); 1829 return 1; 1830 } 1831 btrfs_delete_ref_head(delayed_refs, head); 1832 spin_unlock(&head->lock); 1833 spin_unlock(&delayed_refs->lock); 1834 1835 if (head->must_insert_reserved) { 1836 btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1); 1837 if (head->is_data) { 1838 ret = btrfs_del_csums(trans, fs_info->csum_root, 1839 head->bytenr, head->num_bytes); 1840 } 1841 } 1842 1843 btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head); 1844 1845 trace_run_delayed_ref_head(fs_info, head, 0); 1846 btrfs_delayed_ref_unlock(head); 1847 btrfs_put_delayed_ref_head(head); 1848 return ret; 1849} 1850 1851static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head( 1852 struct btrfs_trans_handle *trans) 1853{ 1854 struct btrfs_delayed_ref_root *delayed_refs = 1855 &trans->transaction->delayed_refs; 1856 struct btrfs_delayed_ref_head *head = NULL; 1857 int ret; 1858 1859 spin_lock(&delayed_refs->lock); 1860 head = btrfs_select_ref_head(delayed_refs); 1861 if (!head) { 1862 spin_unlock(&delayed_refs->lock); 1863 return head; 1864 } 1865 1866 /* 1867 * Grab the lock that says we are going to process all the refs for 1868 * this head 1869 */ 1870 ret = btrfs_delayed_ref_lock(delayed_refs, head); 1871 spin_unlock(&delayed_refs->lock); 1872 1873 /* 1874 * We may have dropped the spin lock to get the head mutex lock, and 1875 * that might have given someone else time to free the head. If that's 1876 * true, it has been removed from our list and we can move on. 1877 */ 1878 if (ret == -EAGAIN) 1879 head = ERR_PTR(-EAGAIN); 1880 1881 return head; 1882} 1883 1884static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, 1885 struct btrfs_delayed_ref_head *locked_ref, 1886 unsigned long *run_refs) 1887{ 1888 struct btrfs_fs_info *fs_info = trans->fs_info; 1889 struct btrfs_delayed_ref_root *delayed_refs; 1890 struct btrfs_delayed_extent_op *extent_op; 1891 struct btrfs_delayed_ref_node *ref; 1892 int must_insert_reserved = 0; 1893 int ret; 1894 1895 delayed_refs = &trans->transaction->delayed_refs; 1896 1897 lockdep_assert_held(&locked_ref->mutex); 1898 lockdep_assert_held(&locked_ref->lock); 1899 1900 while ((ref = select_delayed_ref(locked_ref))) { 1901 if (ref->seq && 1902 btrfs_check_delayed_seq(fs_info, ref->seq)) { 1903 spin_unlock(&locked_ref->lock); 1904 unselect_delayed_ref_head(delayed_refs, locked_ref); 1905 return -EAGAIN; 1906 } 1907 1908 (*run_refs)++; 1909 ref->in_tree = 0; 1910 rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree); 1911 RB_CLEAR_NODE(&ref->ref_node); 1912 if (!list_empty(&ref->add_list)) 1913 list_del(&ref->add_list); 1914 /* 1915 * When we play the delayed ref, also correct the ref_mod on 1916 * head 1917 */ 1918 switch (ref->action) { 1919 case BTRFS_ADD_DELAYED_REF: 1920 case BTRFS_ADD_DELAYED_EXTENT: 1921 locked_ref->ref_mod -= ref->ref_mod; 1922 break; 1923 case BTRFS_DROP_DELAYED_REF: 1924 locked_ref->ref_mod += ref->ref_mod; 1925 break; 1926 default: 1927 WARN_ON(1); 1928 } 1929 atomic_dec(&delayed_refs->num_entries); 1930 1931 /* 1932 * Record the must_insert_reserved flag before we drop the 1933 * spin lock. 1934 */ 1935 must_insert_reserved = locked_ref->must_insert_reserved; 1936 locked_ref->must_insert_reserved = 0; 1937 1938 extent_op = locked_ref->extent_op; 1939 locked_ref->extent_op = NULL; 1940 spin_unlock(&locked_ref->lock); 1941 1942 ret = run_one_delayed_ref(trans, ref, extent_op, 1943 must_insert_reserved); 1944 1945 btrfs_free_delayed_extent_op(extent_op); 1946 if (ret) { 1947 unselect_delayed_ref_head(delayed_refs, locked_ref); 1948 btrfs_put_delayed_ref(ref); 1949 return ret; 1950 } 1951 1952 btrfs_put_delayed_ref(ref); 1953 cond_resched(); 1954 1955 spin_lock(&locked_ref->lock); 1956 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref); 1957 } 1958 1959 return 0; 1960} 1961 1962/* 1963 * Returns 0 on success or if called with an already aborted transaction. 1964 * Returns -ENOMEM or -EIO on failure and will abort the transaction. 1965 */ 1966static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 1967 unsigned long nr) 1968{ 1969 struct btrfs_fs_info *fs_info = trans->fs_info; 1970 struct btrfs_delayed_ref_root *delayed_refs; 1971 struct btrfs_delayed_ref_head *locked_ref = NULL; 1972 ktime_t start = ktime_get(); 1973 int ret; 1974 unsigned long count = 0; 1975 unsigned long actual_count = 0; 1976 1977 delayed_refs = &trans->transaction->delayed_refs; 1978 do { 1979 if (!locked_ref) { 1980 locked_ref = btrfs_obtain_ref_head(trans); 1981 if (IS_ERR_OR_NULL(locked_ref)) { 1982 if (PTR_ERR(locked_ref) == -EAGAIN) { 1983 continue; 1984 } else { 1985 break; 1986 } 1987 } 1988 count++; 1989 } 1990 /* 1991 * We need to try and merge add/drops of the same ref since we 1992 * can run into issues with relocate dropping the implicit ref 1993 * and then it being added back again before the drop can 1994 * finish. If we merged anything we need to re-loop so we can 1995 * get a good ref. 1996 * Or we can get node references of the same type that weren't 1997 * merged when created due to bumps in the tree mod seq, and 1998 * we need to merge them to prevent adding an inline extent 1999 * backref before dropping it (triggering a BUG_ON at 2000 * insert_inline_extent_backref()). 2001 */ 2002 spin_lock(&locked_ref->lock); 2003 btrfs_merge_delayed_refs(trans, delayed_refs, locked_ref); 2004 2005 ret = btrfs_run_delayed_refs_for_head(trans, locked_ref, 2006 &actual_count); 2007 if (ret < 0 && ret != -EAGAIN) { 2008 /* 2009 * Error, btrfs_run_delayed_refs_for_head already 2010 * unlocked everything so just bail out 2011 */ 2012 return ret; 2013 } else if (!ret) { 2014 /* 2015 * Success, perform the usual cleanup of a processed 2016 * head 2017 */ 2018 ret = cleanup_ref_head(trans, locked_ref); 2019 if (ret > 0 ) { 2020 /* We dropped our lock, we need to loop. */ 2021 ret = 0; 2022 continue; 2023 } else if (ret) { 2024 return ret; 2025 } 2026 } 2027 2028 /* 2029 * Either success case or btrfs_run_delayed_refs_for_head 2030 * returned -EAGAIN, meaning we need to select another head 2031 */ 2032 2033 locked_ref = NULL; 2034 cond_resched(); 2035 } while ((nr != -1 && count < nr) || locked_ref); 2036 2037 /* 2038 * We don't want to include ref heads since we can have empty ref heads 2039 * and those will drastically skew our runtime down since we just do 2040 * accounting, no actual extent tree updates. 2041 */ 2042 if (actual_count > 0) { 2043 u64 runtime = ktime_to_ns(ktime_sub(ktime_get(), start)); 2044 u64 avg; 2045 2046 /* 2047 * We weigh the current average higher than our current runtime 2048 * to avoid large swings in the average. 2049 */ 2050 spin_lock(&delayed_refs->lock); 2051 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime; 2052 fs_info->avg_delayed_ref_runtime = avg >> 2; /* div by 4 */ 2053 spin_unlock(&delayed_refs->lock); 2054 } 2055 return 0; 2056} 2057 2058#ifdef SCRAMBLE_DELAYED_REFS 2059/* 2060 * Normally delayed refs get processed in ascending bytenr order. This 2061 * correlates in most cases to the order added. To expose dependencies on this 2062 * order, we start to process the tree in the middle instead of the beginning 2063 */ 2064static u64 find_middle(struct rb_root *root) 2065{ 2066 struct rb_node *n = root->rb_node; 2067 struct btrfs_delayed_ref_node *entry; 2068 int alt = 1; 2069 u64 middle; 2070 u64 first = 0, last = 0; 2071 2072 n = rb_first(root); 2073 if (n) { 2074 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); 2075 first = entry->bytenr; 2076 } 2077 n = rb_last(root); 2078 if (n) { 2079 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); 2080 last = entry->bytenr; 2081 } 2082 n = root->rb_node; 2083 2084 while (n) { 2085 entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); 2086 WARN_ON(!entry->in_tree); 2087 2088 middle = entry->bytenr; 2089 2090 if (alt) 2091 n = n->rb_left; 2092 else 2093 n = n->rb_right; 2094 2095 alt = 1 - alt; 2096 } 2097 return middle; 2098} 2099#endif 2100 2101/* 2102 * Takes the number of bytes to be csumm'ed and figures out how many leaves it 2103 * would require to store the csums for that many bytes. 2104 */ 2105u64 btrfs_csum_bytes_to_leaves(struct btrfs_fs_info *fs_info, u64 csum_bytes) 2106{ 2107 u64 csum_size; 2108 u64 num_csums_per_leaf; 2109 u64 num_csums; 2110 2111 csum_size = BTRFS_MAX_ITEM_SIZE(fs_info); 2112 num_csums_per_leaf = div64_u64(csum_size, 2113 (u64)btrfs_super_csum_size(fs_info->super_copy)); 2114 num_csums = div64_u64(csum_bytes, fs_info->sectorsize); 2115 num_csums += num_csums_per_leaf - 1; 2116 num_csums = div64_u64(num_csums, num_csums_per_leaf); 2117 return num_csums; 2118} 2119 2120/* 2121 * this starts processing the delayed reference count updates and 2122 * extent insertions we have queued up so far. count can be 2123 * 0, which means to process everything in the tree at the start 2124 * of the run (but not newly added entries), or it can be some target 2125 * number you'd like to process. 2126 * 2127 * Returns 0 on success or if called with an aborted transaction 2128 * Returns <0 on error and aborts the transaction 2129 */ 2130int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2131 unsigned long count) 2132{ 2133 struct btrfs_fs_info *fs_info = trans->fs_info; 2134 struct rb_node *node; 2135 struct btrfs_delayed_ref_root *delayed_refs; 2136 struct btrfs_delayed_ref_head *head; 2137 int ret; 2138 int run_all = count == (unsigned long)-1; 2139 2140 /* We'll clean this up in btrfs_cleanup_transaction */ 2141 if (TRANS_ABORTED(trans)) 2142 return 0; 2143 2144 if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags)) 2145 return 0; 2146 2147 delayed_refs = &trans->transaction->delayed_refs; 2148 if (count == 0) 2149 count = atomic_read(&delayed_refs->num_entries) * 2; 2150 2151again: 2152#ifdef SCRAMBLE_DELAYED_REFS 2153 delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); 2154#endif 2155 ret = __btrfs_run_delayed_refs(trans, count); 2156 if (ret < 0) { 2157 btrfs_abort_transaction(trans, ret); 2158 return ret; 2159 } 2160 2161 if (run_all) { 2162 btrfs_create_pending_block_groups(trans); 2163 2164 spin_lock(&delayed_refs->lock); 2165 node = rb_first_cached(&delayed_refs->href_root); 2166 if (!node) { 2167 spin_unlock(&delayed_refs->lock); 2168 goto out; 2169 } 2170 head = rb_entry(node, struct btrfs_delayed_ref_head, 2171 href_node); 2172 refcount_inc(&head->refs); 2173 spin_unlock(&delayed_refs->lock); 2174 2175 /* Mutex was contended, block until it's released and retry. */ 2176 mutex_lock(&head->mutex); 2177 mutex_unlock(&head->mutex); 2178 2179 btrfs_put_delayed_ref_head(head); 2180 cond_resched(); 2181 goto again; 2182 } 2183out: 2184 return 0; 2185} 2186 2187int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2188 struct extent_buffer *eb, u64 flags, 2189 int level, int is_data) 2190{ 2191 struct btrfs_delayed_extent_op *extent_op; 2192 int ret; 2193 2194 extent_op = btrfs_alloc_delayed_extent_op(); 2195 if (!extent_op) 2196 return -ENOMEM; 2197 2198 extent_op->flags_to_set = flags; 2199 extent_op->update_flags = true; 2200 extent_op->update_key = false; 2201 extent_op->is_data = is_data ? true : false; 2202 extent_op->level = level; 2203 2204 ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op); 2205 if (ret) 2206 btrfs_free_delayed_extent_op(extent_op); 2207 return ret; 2208} 2209 2210static noinline int check_delayed_ref(struct btrfs_root *root, 2211 struct btrfs_path *path, 2212 u64 objectid, u64 offset, u64 bytenr) 2213{ 2214 struct btrfs_delayed_ref_head *head; 2215 struct btrfs_delayed_ref_node *ref; 2216 struct btrfs_delayed_data_ref *data_ref; 2217 struct btrfs_delayed_ref_root *delayed_refs; 2218 struct btrfs_transaction *cur_trans; 2219 struct rb_node *node; 2220 int ret = 0; 2221 2222 spin_lock(&root->fs_info->trans_lock); 2223 cur_trans = root->fs_info->running_transaction; 2224 if (cur_trans) 2225 refcount_inc(&cur_trans->use_count); 2226 spin_unlock(&root->fs_info->trans_lock); 2227 if (!cur_trans) 2228 return 0; 2229 2230 delayed_refs = &cur_trans->delayed_refs; 2231 spin_lock(&delayed_refs->lock); 2232 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); 2233 if (!head) { 2234 spin_unlock(&delayed_refs->lock); 2235 btrfs_put_transaction(cur_trans); 2236 return 0; 2237 } 2238 2239 if (!mutex_trylock(&head->mutex)) { 2240 refcount_inc(&head->refs); 2241 spin_unlock(&delayed_refs->lock); 2242 2243 btrfs_release_path(path); 2244 2245 /* 2246 * Mutex was contended, block until it's released and let 2247 * caller try again 2248 */ 2249 mutex_lock(&head->mutex); 2250 mutex_unlock(&head->mutex); 2251 btrfs_put_delayed_ref_head(head); 2252 btrfs_put_transaction(cur_trans); 2253 return -EAGAIN; 2254 } 2255 spin_unlock(&delayed_refs->lock); 2256 2257 spin_lock(&head->lock); 2258 /* 2259 * XXX: We should replace this with a proper search function in the 2260 * future. 2261 */ 2262 for (node = rb_first_cached(&head->ref_tree); node; 2263 node = rb_next(node)) { 2264 ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); 2265 /* If it's a shared ref we know a cross reference exists */ 2266 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) { 2267 ret = 1; 2268 break; 2269 } 2270 2271 data_ref = btrfs_delayed_node_to_data_ref(ref); 2272 2273 /* 2274 * If our ref doesn't match the one we're currently looking at 2275 * then we have a cross reference. 2276 */ 2277 if (data_ref->root != root->root_key.objectid || 2278 data_ref->objectid != objectid || 2279 data_ref->offset != offset) { 2280 ret = 1; 2281 break; 2282 } 2283 } 2284 spin_unlock(&head->lock); 2285 mutex_unlock(&head->mutex); 2286 btrfs_put_transaction(cur_trans); 2287 return ret; 2288} 2289 2290static noinline int check_committed_ref(struct btrfs_root *root, 2291 struct btrfs_path *path, 2292 u64 objectid, u64 offset, u64 bytenr, 2293 bool strict) 2294{ 2295 struct btrfs_fs_info *fs_info = root->fs_info; 2296 struct btrfs_root *extent_root = fs_info->extent_root; 2297 struct extent_buffer *leaf; 2298 struct btrfs_extent_data_ref *ref; 2299 struct btrfs_extent_inline_ref *iref; 2300 struct btrfs_extent_item *ei; 2301 struct btrfs_key key; 2302 u32 item_size; 2303 int type; 2304 int ret; 2305 2306 key.objectid = bytenr; 2307 key.offset = (u64)-1; 2308 key.type = BTRFS_EXTENT_ITEM_KEY; 2309 2310 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2311 if (ret < 0) 2312 goto out; 2313 BUG_ON(ret == 0); /* Corruption */ 2314 2315 ret = -ENOENT; 2316 if (path->slots[0] == 0) 2317 goto out; 2318 2319 path->slots[0]--; 2320 leaf = path->nodes[0]; 2321 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2322 2323 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2324 goto out; 2325 2326 ret = 1; 2327 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2328 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2329 2330 /* If extent item has more than 1 inline ref then it's shared */ 2331 if (item_size != sizeof(*ei) + 2332 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2333 goto out; 2334 2335 /* 2336 * If extent created before last snapshot => it's shared unless the 2337 * snapshot has been deleted. Use the heuristic if strict is false. 2338 */ 2339 if (!strict && 2340 (btrfs_extent_generation(leaf, ei) <= 2341 btrfs_root_last_snapshot(&root->root_item))) 2342 goto out; 2343 2344 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2345 2346 /* If this extent has SHARED_DATA_REF then it's shared */ 2347 type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA); 2348 if (type != BTRFS_EXTENT_DATA_REF_KEY) 2349 goto out; 2350 2351 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2352 if (btrfs_extent_refs(leaf, ei) != 2353 btrfs_extent_data_ref_count(leaf, ref) || 2354 btrfs_extent_data_ref_root(leaf, ref) != 2355 root->root_key.objectid || 2356 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2357 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2358 goto out; 2359 2360 ret = 0; 2361out: 2362 return ret; 2363} 2364 2365int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset, 2366 u64 bytenr, bool strict) 2367{ 2368 struct btrfs_path *path; 2369 int ret; 2370 2371 path = btrfs_alloc_path(); 2372 if (!path) 2373 return -ENOMEM; 2374 2375 do { 2376 ret = check_committed_ref(root, path, objectid, 2377 offset, bytenr, strict); 2378 if (ret && ret != -ENOENT) 2379 goto out; 2380 2381 ret = check_delayed_ref(root, path, objectid, offset, bytenr); 2382 } while (ret == -EAGAIN); 2383 2384out: 2385 btrfs_free_path(path); 2386 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) 2387 WARN_ON(ret > 0); 2388 return ret; 2389} 2390 2391static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2392 struct btrfs_root *root, 2393 struct extent_buffer *buf, 2394 int full_backref, int inc) 2395{ 2396 struct btrfs_fs_info *fs_info = root->fs_info; 2397 u64 bytenr; 2398 u64 num_bytes; 2399 u64 parent; 2400 u64 ref_root; 2401 u32 nritems; 2402 struct btrfs_key key; 2403 struct btrfs_file_extent_item *fi; 2404 struct btrfs_ref generic_ref = { 0 }; 2405 bool for_reloc = btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC); 2406 int i; 2407 int action; 2408 int level; 2409 int ret = 0; 2410 2411 if (btrfs_is_testing(fs_info)) 2412 return 0; 2413 2414 ref_root = btrfs_header_owner(buf); 2415 nritems = btrfs_header_nritems(buf); 2416 level = btrfs_header_level(buf); 2417 2418 if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && level == 0) 2419 return 0; 2420 2421 if (full_backref) 2422 parent = buf->start; 2423 else 2424 parent = 0; 2425 if (inc) 2426 action = BTRFS_ADD_DELAYED_REF; 2427 else 2428 action = BTRFS_DROP_DELAYED_REF; 2429 2430 for (i = 0; i < nritems; i++) { 2431 if (level == 0) { 2432 btrfs_item_key_to_cpu(buf, &key, i); 2433 if (key.type != BTRFS_EXTENT_DATA_KEY) 2434 continue; 2435 fi = btrfs_item_ptr(buf, i, 2436 struct btrfs_file_extent_item); 2437 if (btrfs_file_extent_type(buf, fi) == 2438 BTRFS_FILE_EXTENT_INLINE) 2439 continue; 2440 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2441 if (bytenr == 0) 2442 continue; 2443 2444 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2445 key.offset -= btrfs_file_extent_offset(buf, fi); 2446 btrfs_init_generic_ref(&generic_ref, action, bytenr, 2447 num_bytes, parent); 2448 generic_ref.real_root = root->root_key.objectid; 2449 btrfs_init_data_ref(&generic_ref, ref_root, key.objectid, 2450 key.offset); 2451 generic_ref.skip_qgroup = for_reloc; 2452 if (inc) 2453 ret = btrfs_inc_extent_ref(trans, &generic_ref); 2454 else 2455 ret = btrfs_free_extent(trans, &generic_ref); 2456 if (ret) 2457 goto fail; 2458 } else { 2459 bytenr = btrfs_node_blockptr(buf, i); 2460 num_bytes = fs_info->nodesize; 2461 btrfs_init_generic_ref(&generic_ref, action, bytenr, 2462 num_bytes, parent); 2463 generic_ref.real_root = root->root_key.objectid; 2464 btrfs_init_tree_ref(&generic_ref, level - 1, ref_root); 2465 generic_ref.skip_qgroup = for_reloc; 2466 if (inc) 2467 ret = btrfs_inc_extent_ref(trans, &generic_ref); 2468 else 2469 ret = btrfs_free_extent(trans, &generic_ref); 2470 if (ret) 2471 goto fail; 2472 } 2473 } 2474 return 0; 2475fail: 2476 return ret; 2477} 2478 2479int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2480 struct extent_buffer *buf, int full_backref) 2481{ 2482 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2483} 2484 2485int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2486 struct extent_buffer *buf, int full_backref) 2487{ 2488 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2489} 2490 2491int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr) 2492{ 2493 struct btrfs_block_group *block_group; 2494 int readonly = 0; 2495 2496 block_group = btrfs_lookup_block_group(fs_info, bytenr); 2497 if (!block_group || block_group->ro) 2498 readonly = 1; 2499 if (block_group) 2500 btrfs_put_block_group(block_group); 2501 return readonly; 2502} 2503 2504static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data) 2505{ 2506 struct btrfs_fs_info *fs_info = root->fs_info; 2507 u64 flags; 2508 u64 ret; 2509 2510 if (data) 2511 flags = BTRFS_BLOCK_GROUP_DATA; 2512 else if (root == fs_info->chunk_root) 2513 flags = BTRFS_BLOCK_GROUP_SYSTEM; 2514 else 2515 flags = BTRFS_BLOCK_GROUP_METADATA; 2516 2517 ret = btrfs_get_alloc_profile(fs_info, flags); 2518 return ret; 2519} 2520 2521static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start) 2522{ 2523 struct btrfs_block_group *cache; 2524 u64 bytenr; 2525 2526 spin_lock(&fs_info->block_group_cache_lock); 2527 bytenr = fs_info->first_logical_byte; 2528 spin_unlock(&fs_info->block_group_cache_lock); 2529 2530 if (bytenr < (u64)-1) 2531 return bytenr; 2532 2533 cache = btrfs_lookup_first_block_group(fs_info, search_start); 2534 if (!cache) 2535 return 0; 2536 2537 bytenr = cache->start; 2538 btrfs_put_block_group(cache); 2539 2540 return bytenr; 2541} 2542 2543static int pin_down_extent(struct btrfs_trans_handle *trans, 2544 struct btrfs_block_group *cache, 2545 u64 bytenr, u64 num_bytes, int reserved) 2546{ 2547 struct btrfs_fs_info *fs_info = cache->fs_info; 2548 2549 spin_lock(&cache->space_info->lock); 2550 spin_lock(&cache->lock); 2551 cache->pinned += num_bytes; 2552 btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info, 2553 num_bytes); 2554 if (reserved) { 2555 cache->reserved -= num_bytes; 2556 cache->space_info->bytes_reserved -= num_bytes; 2557 } 2558 spin_unlock(&cache->lock); 2559 spin_unlock(&cache->space_info->lock); 2560 2561 __btrfs_mod_total_bytes_pinned(cache->space_info, num_bytes); 2562 set_extent_dirty(&trans->transaction->pinned_extents, bytenr, 2563 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); 2564 return 0; 2565} 2566 2567int btrfs_pin_extent(struct btrfs_trans_handle *trans, 2568 u64 bytenr, u64 num_bytes, int reserved) 2569{ 2570 struct btrfs_block_group *cache; 2571 2572 cache = btrfs_lookup_block_group(trans->fs_info, bytenr); 2573 BUG_ON(!cache); /* Logic error */ 2574 2575 pin_down_extent(trans, cache, bytenr, num_bytes, reserved); 2576 2577 btrfs_put_block_group(cache); 2578 return 0; 2579} 2580 2581/* 2582 * this function must be called within transaction 2583 */ 2584int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, 2585 u64 bytenr, u64 num_bytes) 2586{ 2587 struct btrfs_block_group *cache; 2588 int ret; 2589 2590 btrfs_add_excluded_extent(trans->fs_info, bytenr, num_bytes); 2591 2592 cache = btrfs_lookup_block_group(trans->fs_info, bytenr); 2593 if (!cache) 2594 return -EINVAL; 2595 2596 /* 2597 * pull in the free space cache (if any) so that our pin 2598 * removes the free space from the cache. We have load_only set 2599 * to one because the slow code to read in the free extents does check 2600 * the pinned extents. 2601 */ 2602 btrfs_cache_block_group(cache, 1); 2603 2604 pin_down_extent(trans, cache, bytenr, num_bytes, 0); 2605 2606 /* remove us from the free space cache (if we're there at all) */ 2607 ret = btrfs_remove_free_space(cache, bytenr, num_bytes); 2608 btrfs_put_block_group(cache); 2609 return ret; 2610} 2611 2612static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, 2613 u64 start, u64 num_bytes) 2614{ 2615 int ret; 2616 struct btrfs_block_group *block_group; 2617 struct btrfs_caching_control *caching_ctl; 2618 2619 block_group = btrfs_lookup_block_group(fs_info, start); 2620 if (!block_group) 2621 return -EINVAL; 2622 2623 btrfs_cache_block_group(block_group, 0); 2624 caching_ctl = btrfs_get_caching_control(block_group); 2625 2626 if (!caching_ctl) { 2627 /* Logic error */ 2628 BUG_ON(!btrfs_block_group_done(block_group)); 2629 ret = btrfs_remove_free_space(block_group, start, num_bytes); 2630 } else { 2631 mutex_lock(&caching_ctl->mutex); 2632 2633 if (start >= caching_ctl->progress) { 2634 ret = btrfs_add_excluded_extent(fs_info, start, 2635 num_bytes); 2636 } else if (start + num_bytes <= caching_ctl->progress) { 2637 ret = btrfs_remove_free_space(block_group, 2638 start, num_bytes); 2639 } else { 2640 num_bytes = caching_ctl->progress - start; 2641 ret = btrfs_remove_free_space(block_group, 2642 start, num_bytes); 2643 if (ret) 2644 goto out_lock; 2645 2646 num_bytes = (start + num_bytes) - 2647 caching_ctl->progress; 2648 start = caching_ctl->progress; 2649 ret = btrfs_add_excluded_extent(fs_info, start, 2650 num_bytes); 2651 } 2652out_lock: 2653 mutex_unlock(&caching_ctl->mutex); 2654 btrfs_put_caching_control(caching_ctl); 2655 } 2656 btrfs_put_block_group(block_group); 2657 return ret; 2658} 2659 2660int btrfs_exclude_logged_extents(struct extent_buffer *eb) 2661{ 2662 struct btrfs_fs_info *fs_info = eb->fs_info; 2663 struct btrfs_file_extent_item *item; 2664 struct btrfs_key key; 2665 int found_type; 2666 int i; 2667 int ret = 0; 2668 2669 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) 2670 return 0; 2671 2672 for (i = 0; i < btrfs_header_nritems(eb); i++) { 2673 btrfs_item_key_to_cpu(eb, &key, i); 2674 if (key.type != BTRFS_EXTENT_DATA_KEY) 2675 continue; 2676 item = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item); 2677 found_type = btrfs_file_extent_type(eb, item); 2678 if (found_type == BTRFS_FILE_EXTENT_INLINE) 2679 continue; 2680 if (btrfs_file_extent_disk_bytenr(eb, item) == 0) 2681 continue; 2682 key.objectid = btrfs_file_extent_disk_bytenr(eb, item); 2683 key.offset = btrfs_file_extent_disk_num_bytes(eb, item); 2684 ret = __exclude_logged_extent(fs_info, key.objectid, key.offset); 2685 if (ret) 2686 break; 2687 } 2688 2689 return ret; 2690} 2691 2692static void 2693btrfs_inc_block_group_reservations(struct btrfs_block_group *bg) 2694{ 2695 atomic_inc(&bg->reservations); 2696} 2697 2698/* 2699 * Returns the free cluster for the given space info and sets empty_cluster to 2700 * what it should be based on the mount options. 2701 */ 2702static struct btrfs_free_cluster * 2703fetch_cluster_info(struct btrfs_fs_info *fs_info, 2704 struct btrfs_space_info *space_info, u64 *empty_cluster) 2705{ 2706 struct btrfs_free_cluster *ret = NULL; 2707 2708 *empty_cluster = 0; 2709 if (btrfs_mixed_space_info(space_info)) 2710 return ret; 2711 2712 if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { 2713 ret = &fs_info->meta_alloc_cluster; 2714 if (btrfs_test_opt(fs_info, SSD)) 2715 *empty_cluster = SZ_2M; 2716 else 2717 *empty_cluster = SZ_64K; 2718 } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && 2719 btrfs_test_opt(fs_info, SSD_SPREAD)) { 2720 *empty_cluster = SZ_2M; 2721 ret = &fs_info->data_alloc_cluster; 2722 } 2723 2724 return ret; 2725} 2726 2727static int unpin_extent_range(struct btrfs_fs_info *fs_info, 2728 u64 start, u64 end, 2729 const bool return_free_space) 2730{ 2731 struct btrfs_block_group *cache = NULL; 2732 struct btrfs_space_info *space_info; 2733 struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 2734 struct btrfs_free_cluster *cluster = NULL; 2735 u64 len; 2736 u64 total_unpinned = 0; 2737 u64 empty_cluster = 0; 2738 bool readonly; 2739 2740 while (start <= end) { 2741 readonly = false; 2742 if (!cache || 2743 start >= cache->start + cache->length) { 2744 if (cache) 2745 btrfs_put_block_group(cache); 2746 total_unpinned = 0; 2747 cache = btrfs_lookup_block_group(fs_info, start); 2748 BUG_ON(!cache); /* Logic error */ 2749 2750 cluster = fetch_cluster_info(fs_info, 2751 cache->space_info, 2752 &empty_cluster); 2753 empty_cluster <<= 1; 2754 } 2755 2756 len = cache->start + cache->length - start; 2757 len = min(len, end + 1 - start); 2758 2759 if (start < cache->last_byte_to_unpin && return_free_space) { 2760 u64 add_len = min(len, cache->last_byte_to_unpin - start); 2761 2762 btrfs_add_free_space(cache, start, add_len); 2763 } 2764 2765 start += len; 2766 total_unpinned += len; 2767 space_info = cache->space_info; 2768 2769 /* 2770 * If this space cluster has been marked as fragmented and we've 2771 * unpinned enough in this block group to potentially allow a 2772 * cluster to be created inside of it go ahead and clear the 2773 * fragmented check. 2774 */ 2775 if (cluster && cluster->fragmented && 2776 total_unpinned > empty_cluster) { 2777 spin_lock(&cluster->lock); 2778 cluster->fragmented = 0; 2779 spin_unlock(&cluster->lock); 2780 } 2781 2782 spin_lock(&space_info->lock); 2783 spin_lock(&cache->lock); 2784 cache->pinned -= len; 2785 btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len); 2786 space_info->max_extent_size = 0; 2787 __btrfs_mod_total_bytes_pinned(space_info, -len); 2788 if (cache->ro) { 2789 space_info->bytes_readonly += len; 2790 readonly = true; 2791 } 2792 spin_unlock(&cache->lock); 2793 if (!readonly && return_free_space && 2794 global_rsv->space_info == space_info) { 2795 u64 to_add = len; 2796 2797 spin_lock(&global_rsv->lock); 2798 if (!global_rsv->full) { 2799 to_add = min(len, global_rsv->size - 2800 global_rsv->reserved); 2801 global_rsv->reserved += to_add; 2802 btrfs_space_info_update_bytes_may_use(fs_info, 2803 space_info, to_add); 2804 if (global_rsv->reserved >= global_rsv->size) 2805 global_rsv->full = 1; 2806 len -= to_add; 2807 } 2808 spin_unlock(&global_rsv->lock); 2809 } 2810 /* Add to any tickets we may have */ 2811 if (!readonly && return_free_space && len) 2812 btrfs_try_granting_tickets(fs_info, space_info); 2813 spin_unlock(&space_info->lock); 2814 } 2815 2816 if (cache) 2817 btrfs_put_block_group(cache); 2818 return 0; 2819} 2820 2821int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) 2822{ 2823 struct btrfs_fs_info *fs_info = trans->fs_info; 2824 struct btrfs_block_group *block_group, *tmp; 2825 struct list_head *deleted_bgs; 2826 struct extent_io_tree *unpin; 2827 u64 start; 2828 u64 end; 2829 int ret; 2830 2831 unpin = &trans->transaction->pinned_extents; 2832 2833 while (!TRANS_ABORTED(trans)) { 2834 struct extent_state *cached_state = NULL; 2835 2836 mutex_lock(&fs_info->unused_bg_unpin_mutex); 2837 ret = find_first_extent_bit(unpin, 0, &start, &end, 2838 EXTENT_DIRTY, &cached_state); 2839 if (ret) { 2840 mutex_unlock(&fs_info->unused_bg_unpin_mutex); 2841 break; 2842 } 2843 if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) 2844 clear_extent_bits(&fs_info->excluded_extents, start, 2845 end, EXTENT_UPTODATE); 2846 2847 if (btrfs_test_opt(fs_info, DISCARD_SYNC)) 2848 ret = btrfs_discard_extent(fs_info, start, 2849 end + 1 - start, NULL); 2850 2851 clear_extent_dirty(unpin, start, end, &cached_state); 2852 unpin_extent_range(fs_info, start, end, true); 2853 mutex_unlock(&fs_info->unused_bg_unpin_mutex); 2854 free_extent_state(cached_state); 2855 cond_resched(); 2856 } 2857 2858 if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) { 2859 btrfs_discard_calc_delay(&fs_info->discard_ctl); 2860 btrfs_discard_schedule_work(&fs_info->discard_ctl, true); 2861 } 2862 2863 /* 2864 * Transaction is finished. We don't need the lock anymore. We 2865 * do need to clean up the block groups in case of a transaction 2866 * abort. 2867 */ 2868 deleted_bgs = &trans->transaction->deleted_bgs; 2869 list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) { 2870 u64 trimmed = 0; 2871 2872 ret = -EROFS; 2873 if (!TRANS_ABORTED(trans)) 2874 ret = btrfs_discard_extent(fs_info, 2875 block_group->start, 2876 block_group->length, 2877 &trimmed); 2878 2879 list_del_init(&block_group->bg_list); 2880 btrfs_unfreeze_block_group(block_group); 2881 btrfs_put_block_group(block_group); 2882 2883 if (ret) { 2884 const char *errstr = btrfs_decode_error(ret); 2885 btrfs_warn(fs_info, 2886 "discard failed while removing blockgroup: errno=%d %s", 2887 ret, errstr); 2888 } 2889 } 2890 2891 return 0; 2892} 2893 2894/* 2895 * Drop one or more refs of @node. 2896 * 2897 * 1. Locate the extent refs. 2898 * It's either inline in EXTENT/METADATA_ITEM or in keyed SHARED_* item. 2899 * Locate it, then reduce the refs number or remove the ref line completely. 2900 * 2901 * 2. Update the refs count in EXTENT/METADATA_ITEM 2902 * 2903 * Inline backref case: 2904 * 2905 * in extent tree we have: 2906 * 2907 * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82 2908 * refs 2 gen 6 flags DATA 2909 * extent data backref root FS_TREE objectid 258 offset 0 count 1 2910 * extent data backref root FS_TREE objectid 257 offset 0 count 1 2911 * 2912 * This function gets called with: 2913 * 2914 * node->bytenr = 13631488 2915 * node->num_bytes = 1048576 2916 * root_objectid = FS_TREE 2917 * owner_objectid = 257 2918 * owner_offset = 0 2919 * refs_to_drop = 1 2920 * 2921 * Then we should get some like: 2922 * 2923 * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 16201 itemsize 82 2924 * refs 1 gen 6 flags DATA 2925 * extent data backref root FS_TREE objectid 258 offset 0 count 1 2926 * 2927 * Keyed backref case: 2928 * 2929 * in extent tree we have: 2930 * 2931 * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24 2932 * refs 754 gen 6 flags DATA 2933 * [...] 2934 * item 2 key (13631488 EXTENT_DATA_REF <HASH>) itemoff 3915 itemsize 28 2935 * extent data backref root FS_TREE objectid 866 offset 0 count 1 2936 * 2937 * This function get called with: 2938 * 2939 * node->bytenr = 13631488 2940 * node->num_bytes = 1048576 2941 * root_objectid = FS_TREE 2942 * owner_objectid = 866 2943 * owner_offset = 0 2944 * refs_to_drop = 1 2945 * 2946 * Then we should get some like: 2947 * 2948 * item 0 key (13631488 EXTENT_ITEM 1048576) itemoff 3971 itemsize 24 2949 * refs 753 gen 6 flags DATA 2950 * 2951 * And that (13631488 EXTENT_DATA_REF <HASH>) gets removed. 2952 */ 2953static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 2954 struct btrfs_delayed_ref_node *node, u64 parent, 2955 u64 root_objectid, u64 owner_objectid, 2956 u64 owner_offset, int refs_to_drop, 2957 struct btrfs_delayed_extent_op *extent_op) 2958{ 2959 struct btrfs_fs_info *info = trans->fs_info; 2960 struct btrfs_key key; 2961 struct btrfs_path *path; 2962 struct btrfs_root *extent_root = info->extent_root; 2963 struct extent_buffer *leaf; 2964 struct btrfs_extent_item *ei; 2965 struct btrfs_extent_inline_ref *iref; 2966 int ret; 2967 int is_data; 2968 int extent_slot = 0; 2969 int found_extent = 0; 2970 int num_to_del = 1; 2971 u32 item_size; 2972 u64 refs; 2973 u64 bytenr = node->bytenr; 2974 u64 num_bytes = node->num_bytes; 2975 int last_ref = 0; 2976 bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA); 2977 2978 path = btrfs_alloc_path(); 2979 if (!path) 2980 return -ENOMEM; 2981 2982 path->leave_spinning = 1; 2983 2984 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 2985 2986 if (!is_data && refs_to_drop != 1) { 2987 btrfs_crit(info, 2988"invalid refs_to_drop, dropping more than 1 refs for tree block %llu refs_to_drop %u", 2989 node->bytenr, refs_to_drop); 2990 ret = -EINVAL; 2991 btrfs_abort_transaction(trans, ret); 2992 goto out; 2993 } 2994 2995 if (is_data) 2996 skinny_metadata = false; 2997 2998 ret = lookup_extent_backref(trans, path, &iref, bytenr, num_bytes, 2999 parent, root_objectid, owner_objectid, 3000 owner_offset); 3001 if (ret == 0) { 3002 /* 3003 * Either the inline backref or the SHARED_DATA_REF/ 3004 * SHARED_BLOCK_REF is found 3005 * 3006 * Here is a quick path to locate EXTENT/METADATA_ITEM. 3007 * It's possible the EXTENT/METADATA_ITEM is near current slot. 3008 */ 3009 extent_slot = path->slots[0]; 3010 while (extent_slot >= 0) { 3011 btrfs_item_key_to_cpu(path->nodes[0], &key, 3012 extent_slot); 3013 if (key.objectid != bytenr) 3014 break; 3015 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3016 key.offset == num_bytes) { 3017 found_extent = 1; 3018 break; 3019 } 3020 if (key.type == BTRFS_METADATA_ITEM_KEY && 3021 key.offset == owner_objectid) { 3022 found_extent = 1; 3023 break; 3024 } 3025 3026 /* Quick path didn't find the EXTEMT/METADATA_ITEM */ 3027 if (path->slots[0] - extent_slot > 5) 3028 break; 3029 extent_slot--; 3030 } 3031 3032 if (!found_extent) { 3033 if (iref) { 3034 btrfs_crit(info, 3035"invalid iref, no EXTENT/METADATA_ITEM found but has inline extent ref"); 3036 btrfs_abort_transaction(trans, -EUCLEAN); 3037 goto err_dump; 3038 } 3039 /* Must be SHARED_* item, remove the backref first */ 3040 ret = remove_extent_backref(trans, path, NULL, 3041 refs_to_drop, 3042 is_data, &last_ref); 3043 if (ret) { 3044 btrfs_abort_transaction(trans, ret); 3045 goto out; 3046 } 3047 btrfs_release_path(path); 3048 path->leave_spinning = 1; 3049 3050 /* Slow path to locate EXTENT/METADATA_ITEM */ 3051 key.objectid = bytenr; 3052 key.type = BTRFS_EXTENT_ITEM_KEY; 3053 key.offset = num_bytes; 3054 3055 if (!is_data && skinny_metadata) { 3056 key.type = BTRFS_METADATA_ITEM_KEY; 3057 key.offset = owner_objectid; 3058 } 3059 3060 ret = btrfs_search_slot(trans, extent_root, 3061 &key, path, -1, 1); 3062 if (ret > 0 && skinny_metadata && path->slots[0]) { 3063 /* 3064 * Couldn't find our skinny metadata item, 3065 * see if we have ye olde extent item. 3066 */ 3067 path->slots[0]--; 3068 btrfs_item_key_to_cpu(path->nodes[0], &key, 3069 path->slots[0]); 3070 if (key.objectid == bytenr && 3071 key.type == BTRFS_EXTENT_ITEM_KEY && 3072 key.offset == num_bytes) 3073 ret = 0; 3074 } 3075 3076 if (ret > 0 && skinny_metadata) { 3077 skinny_metadata = false; 3078 key.objectid = bytenr; 3079 key.type = BTRFS_EXTENT_ITEM_KEY; 3080 key.offset = num_bytes; 3081 btrfs_release_path(path); 3082 ret = btrfs_search_slot(trans, extent_root, 3083 &key, path, -1, 1); 3084 } 3085 3086 if (ret) { 3087 btrfs_err(info, 3088 "umm, got %d back from search, was looking for %llu", 3089 ret, bytenr); 3090 if (ret > 0) 3091 btrfs_print_leaf(path->nodes[0]); 3092 } 3093 if (ret < 0) { 3094 btrfs_abort_transaction(trans, ret); 3095 goto out; 3096 } 3097 extent_slot = path->slots[0]; 3098 } 3099 } else if (WARN_ON(ret == -ENOENT)) { 3100 btrfs_print_leaf(path->nodes[0]); 3101 btrfs_err(info, 3102 "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu", 3103 bytenr, parent, root_objectid, owner_objectid, 3104 owner_offset); 3105 btrfs_abort_transaction(trans, ret); 3106 goto out; 3107 } else { 3108 btrfs_abort_transaction(trans, ret); 3109 goto out; 3110 } 3111 3112 leaf = path->nodes[0]; 3113 item_size = btrfs_item_size_nr(leaf, extent_slot); 3114 if (unlikely(item_size < sizeof(*ei))) { 3115 ret = -EINVAL; 3116 btrfs_print_v0_err(info); 3117 btrfs_abort_transaction(trans, ret); 3118 goto out; 3119 } 3120 ei = btrfs_item_ptr(leaf, extent_slot, 3121 struct btrfs_extent_item); 3122 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID && 3123 key.type == BTRFS_EXTENT_ITEM_KEY) { 3124 struct btrfs_tree_block_info *bi; 3125 if (item_size < sizeof(*ei) + sizeof(*bi)) { 3126 btrfs_crit(info, 3127"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu", 3128 key.objectid, key.type, key.offset, 3129 owner_objectid, item_size, 3130 sizeof(*ei) + sizeof(*bi)); 3131 btrfs_abort_transaction(trans, -EUCLEAN); 3132 goto err_dump; 3133 } 3134 bi = (struct btrfs_tree_block_info *)(ei + 1); 3135 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 3136 } 3137 3138 refs = btrfs_extent_refs(leaf, ei); 3139 if (refs < refs_to_drop) { 3140 btrfs_crit(info, 3141 "trying to drop %d refs but we only have %llu for bytenr %llu", 3142 refs_to_drop, refs, bytenr); 3143 btrfs_abort_transaction(trans, -EUCLEAN); 3144 goto err_dump; 3145 } 3146 refs -= refs_to_drop; 3147 3148 if (refs > 0) { 3149 if (extent_op) 3150 __run_delayed_extent_op(extent_op, leaf, ei); 3151 /* 3152 * In the case of inline back ref, reference count will 3153 * be updated by remove_extent_backref 3154 */ 3155 if (iref) { 3156 if (!found_extent) { 3157 btrfs_crit(info, 3158"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found"); 3159 btrfs_abort_transaction(trans, -EUCLEAN); 3160 goto err_dump; 3161 } 3162 } else { 3163 btrfs_set_extent_refs(leaf, ei, refs); 3164 btrfs_mark_buffer_dirty(leaf); 3165 } 3166 if (found_extent) { 3167 ret = remove_extent_backref(trans, path, iref, 3168 refs_to_drop, is_data, 3169 &last_ref); 3170 if (ret) { 3171 btrfs_abort_transaction(trans, ret); 3172 goto out; 3173 } 3174 } 3175 } else { 3176 /* In this branch refs == 1 */ 3177 if (found_extent) { 3178 if (is_data && refs_to_drop != 3179 extent_data_ref_count(path, iref)) { 3180 btrfs_crit(info, 3181 "invalid refs_to_drop, current refs %u refs_to_drop %u", 3182 extent_data_ref_count(path, iref), 3183 refs_to_drop); 3184 btrfs_abort_transaction(trans, -EUCLEAN); 3185 goto err_dump; 3186 } 3187 if (iref) { 3188 if (path->slots[0] != extent_slot) { 3189 btrfs_crit(info, 3190"invalid iref, extent item key (%llu %u %llu) doesn't have wanted iref", 3191 key.objectid, key.type, 3192 key.offset); 3193 btrfs_abort_transaction(trans, -EUCLEAN); 3194 goto err_dump; 3195 } 3196 } else { 3197 /* 3198 * No inline ref, we must be at SHARED_* item, 3199 * And it's single ref, it must be: 3200 * | extent_slot ||extent_slot + 1| 3201 * [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ] 3202 */ 3203 if (path->slots[0] != extent_slot + 1) { 3204 btrfs_crit(info, 3205 "invalid SHARED_* item, previous item is not EXTENT/METADATA_ITEM"); 3206 btrfs_abort_transaction(trans, -EUCLEAN); 3207 goto err_dump; 3208 } 3209 path->slots[0] = extent_slot; 3210 num_to_del = 2; 3211 } 3212 } 3213 3214 last_ref = 1; 3215 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 3216 num_to_del); 3217 if (ret) { 3218 btrfs_abort_transaction(trans, ret); 3219 goto out; 3220 } 3221 btrfs_release_path(path); 3222 3223 if (is_data) { 3224 ret = btrfs_del_csums(trans, info->csum_root, bytenr, 3225 num_bytes); 3226 if (ret) { 3227 btrfs_abort_transaction(trans, ret); 3228 goto out; 3229 } 3230 } 3231 3232 ret = add_to_free_space_tree(trans, bytenr, num_bytes); 3233 if (ret) { 3234 btrfs_abort_transaction(trans, ret); 3235 goto out; 3236 } 3237 3238 ret = btrfs_update_block_group(trans, bytenr, num_bytes, 0); 3239 if (ret) { 3240 btrfs_abort_transaction(trans, ret); 3241 goto out; 3242 } 3243 } 3244 btrfs_release_path(path); 3245 3246out: 3247 btrfs_free_path(path); 3248 return ret; 3249err_dump: 3250 /* 3251 * Leaf dump can take up a lot of log buffer, so we only do full leaf 3252 * dump for debug build. 3253 */ 3254 if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) { 3255 btrfs_crit(info, "path->slots[0]=%d extent_slot=%d", 3256 path->slots[0], extent_slot); 3257 btrfs_print_leaf(path->nodes[0]); 3258 } 3259 3260 btrfs_free_path(path); 3261 return -EUCLEAN; 3262} 3263 3264/* 3265 * when we free an block, it is possible (and likely) that we free the last 3266 * delayed ref for that extent as well. This searches the delayed ref tree for 3267 * a given extent, and if there are no other delayed refs to be processed, it 3268 * removes it from the tree. 3269 */ 3270static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 3271 u64 bytenr) 3272{ 3273 struct btrfs_delayed_ref_head *head; 3274 struct btrfs_delayed_ref_root *delayed_refs; 3275 int ret = 0; 3276 3277 delayed_refs = &trans->transaction->delayed_refs; 3278 spin_lock(&delayed_refs->lock); 3279 head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); 3280 if (!head) 3281 goto out_delayed_unlock; 3282 3283 spin_lock(&head->lock); 3284 if (!RB_EMPTY_ROOT(&head->ref_tree.rb_root)) 3285 goto out; 3286 3287 if (cleanup_extent_op(head) != NULL) 3288 goto out; 3289 3290 /* 3291 * waiting for the lock here would deadlock. If someone else has it 3292 * locked they are already in the process of dropping it anyway 3293 */ 3294 if (!mutex_trylock(&head->mutex)) 3295 goto out; 3296 3297 btrfs_delete_ref_head(delayed_refs, head); 3298 head->processing = 0; 3299 3300 spin_unlock(&head->lock); 3301 spin_unlock(&delayed_refs->lock); 3302 3303 BUG_ON(head->extent_op); 3304 if (head->must_insert_reserved) 3305 ret = 1; 3306 3307 btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head); 3308 mutex_unlock(&head->mutex); 3309 btrfs_put_delayed_ref_head(head); 3310 return ret; 3311out: 3312 spin_unlock(&head->lock); 3313 3314out_delayed_unlock: 3315 spin_unlock(&delayed_refs->lock); 3316 return 0; 3317} 3318 3319void btrfs_free_tree_block(struct btrfs_trans_handle *trans, 3320 struct btrfs_root *root, 3321 struct extent_buffer *buf, 3322 u64 parent, int last_ref) 3323{ 3324 struct btrfs_fs_info *fs_info = root->fs_info; 3325 struct btrfs_ref generic_ref = { 0 }; 3326 int ret; 3327 3328 btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, 3329 buf->start, buf->len, parent); 3330 btrfs_init_tree_ref(&generic_ref, btrfs_header_level(buf), 3331 root->root_key.objectid); 3332 3333 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 3334 btrfs_ref_tree_mod(fs_info, &generic_ref); 3335 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL); 3336 BUG_ON(ret); /* -ENOMEM */ 3337 } 3338 3339 if (last_ref && btrfs_header_generation(buf) == trans->transid) { 3340 struct btrfs_block_group *cache; 3341 3342 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 3343 ret = check_ref_cleanup(trans, buf->start); 3344 if (!ret) 3345 goto out; 3346 } 3347 3348 cache = btrfs_lookup_block_group(fs_info, buf->start); 3349 3350 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3351 pin_down_extent(trans, cache, buf->start, buf->len, 1); 3352 btrfs_put_block_group(cache); 3353 goto out; 3354 } 3355 3356 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 3357 3358 btrfs_add_free_space(cache, buf->start, buf->len); 3359 btrfs_free_reserved_bytes(cache, buf->len, 0); 3360 btrfs_put_block_group(cache); 3361 trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len); 3362 } 3363out: 3364 if (last_ref) { 3365 /* 3366 * Deleting the buffer, clear the corrupt flag since it doesn't 3367 * matter anymore. 3368 */ 3369 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); 3370 } 3371} 3372 3373/* Can return -ENOMEM */ 3374int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) 3375{ 3376 struct btrfs_fs_info *fs_info = trans->fs_info; 3377 int ret; 3378 3379 if (btrfs_is_testing(fs_info)) 3380 return 0; 3381 3382 /* 3383 * tree log blocks never actually go into the extent allocation 3384 * tree, just update pinning info and exit early. 3385 */ 3386 if ((ref->type == BTRFS_REF_METADATA && 3387 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) || 3388 (ref->type == BTRFS_REF_DATA && 3389 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) { 3390 /* unlocks the pinned mutex */ 3391 btrfs_pin_extent(trans, ref->bytenr, ref->len, 1); 3392 ret = 0; 3393 } else if (ref->type == BTRFS_REF_METADATA) { 3394 ret = btrfs_add_delayed_tree_ref(trans, ref, NULL); 3395 } else { 3396 ret = btrfs_add_delayed_data_ref(trans, ref, 0); 3397 } 3398 3399 if (!((ref->type == BTRFS_REF_METADATA && 3400 ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID) || 3401 (ref->type == BTRFS_REF_DATA && 3402 ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID))) 3403 btrfs_ref_tree_mod(fs_info, ref); 3404 3405 return ret; 3406} 3407 3408enum btrfs_loop_type { 3409 LOOP_CACHING_NOWAIT, 3410 LOOP_CACHING_WAIT, 3411 LOOP_ALLOC_CHUNK, 3412 LOOP_NO_EMPTY_SIZE, 3413}; 3414 3415static inline void 3416btrfs_lock_block_group(struct btrfs_block_group *cache, 3417 int delalloc) 3418{ 3419 if (delalloc) 3420 down_read(&cache->data_rwsem); 3421} 3422 3423static inline void btrfs_grab_block_group(struct btrfs_block_group *cache, 3424 int delalloc) 3425{ 3426 btrfs_get_block_group(cache); 3427 if (delalloc) 3428 down_read(&cache->data_rwsem); 3429} 3430 3431static struct btrfs_block_group *btrfs_lock_cluster( 3432 struct btrfs_block_group *block_group, 3433 struct btrfs_free_cluster *cluster, 3434 int delalloc) 3435 __acquires(&cluster->refill_lock) 3436{ 3437 struct btrfs_block_group *used_bg = NULL; 3438 3439 spin_lock(&cluster->refill_lock); 3440 while (1) { 3441 used_bg = cluster->block_group; 3442 if (!used_bg) 3443 return NULL; 3444 3445 if (used_bg == block_group) 3446 return used_bg; 3447 3448 btrfs_get_block_group(used_bg); 3449 3450 if (!delalloc) 3451 return used_bg; 3452 3453 if (down_read_trylock(&used_bg->data_rwsem)) 3454 return used_bg; 3455 3456 spin_unlock(&cluster->refill_lock); 3457 3458 /* We should only have one-level nested. */ 3459 down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING); 3460 3461 spin_lock(&cluster->refill_lock); 3462 if (used_bg == cluster->block_group) 3463 return used_bg; 3464 3465 up_read(&used_bg->data_rwsem); 3466 btrfs_put_block_group(used_bg); 3467 } 3468} 3469 3470static inline void 3471btrfs_release_block_group(struct btrfs_block_group *cache, 3472 int delalloc) 3473{ 3474 if (delalloc) 3475 up_read(&cache->data_rwsem); 3476 btrfs_put_block_group(cache); 3477} 3478 3479enum btrfs_extent_allocation_policy { 3480 BTRFS_EXTENT_ALLOC_CLUSTERED, 3481}; 3482 3483/* 3484 * Structure used internally for find_free_extent() function. Wraps needed 3485 * parameters. 3486 */ 3487struct find_free_extent_ctl { 3488 /* Basic allocation info */ 3489 u64 num_bytes; 3490 u64 empty_size; 3491 u64 flags; 3492 int delalloc; 3493 3494 /* Where to start the search inside the bg */ 3495 u64 search_start; 3496 3497 /* For clustered allocation */ 3498 u64 empty_cluster; 3499 struct btrfs_free_cluster *last_ptr; 3500 bool use_cluster; 3501 3502 bool have_caching_bg; 3503 bool orig_have_caching_bg; 3504 3505 /* RAID index, converted from flags */ 3506 int index; 3507 3508 /* 3509 * Current loop number, check find_free_extent_update_loop() for details 3510 */ 3511 int loop; 3512 3513 /* 3514 * Whether we're refilling a cluster, if true we need to re-search 3515 * current block group but don't try to refill the cluster again. 3516 */ 3517 bool retry_clustered; 3518 3519 /* 3520 * Whether we're updating free space cache, if true we need to re-search 3521 * current block group but don't try updating free space cache again. 3522 */ 3523 bool retry_unclustered; 3524 3525 /* If current block group is cached */ 3526 int cached; 3527 3528 /* Max contiguous hole found */ 3529 u64 max_extent_size; 3530 3531 /* Total free space from free space cache, not always contiguous */ 3532 u64 total_free_space; 3533 3534 /* Found result */ 3535 u64 found_offset; 3536 3537 /* Hint where to start looking for an empty space */ 3538 u64 hint_byte; 3539 3540 /* Allocation policy */ 3541 enum btrfs_extent_allocation_policy policy; 3542}; 3543 3544 3545/* 3546 * Helper function for find_free_extent(). 3547 * 3548 * Return -ENOENT to inform caller that we need fallback to unclustered mode. 3549 * Return -EAGAIN to inform caller that we need to re-search this block group 3550 * Return >0 to inform caller that we find nothing 3551 * Return 0 means we have found a location and set ffe_ctl->found_offset. 3552 */ 3553static int find_free_extent_clustered(struct btrfs_block_group *bg, 3554 struct find_free_extent_ctl *ffe_ctl, 3555 struct btrfs_block_group **cluster_bg_ret) 3556{ 3557 struct btrfs_block_group *cluster_bg; 3558 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; 3559 u64 aligned_cluster; 3560 u64 offset; 3561 int ret; 3562 3563 cluster_bg = btrfs_lock_cluster(bg, last_ptr, ffe_ctl->delalloc); 3564 if (!cluster_bg) 3565 goto refill_cluster; 3566 if (cluster_bg != bg && (cluster_bg->ro || 3567 !block_group_bits(cluster_bg, ffe_ctl->flags))) 3568 goto release_cluster; 3569 3570 offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr, 3571 ffe_ctl->num_bytes, cluster_bg->start, 3572 &ffe_ctl->max_extent_size); 3573 if (offset) { 3574 /* We have a block, we're done */ 3575 spin_unlock(&last_ptr->refill_lock); 3576 trace_btrfs_reserve_extent_cluster(cluster_bg, 3577 ffe_ctl->search_start, ffe_ctl->num_bytes); 3578 *cluster_bg_ret = cluster_bg; 3579 ffe_ctl->found_offset = offset; 3580 return 0; 3581 } 3582 WARN_ON(last_ptr->block_group != cluster_bg); 3583 3584release_cluster: 3585 /* 3586 * If we are on LOOP_NO_EMPTY_SIZE, we can't set up a new clusters, so 3587 * lets just skip it and let the allocator find whatever block it can 3588 * find. If we reach this point, we will have tried the cluster 3589 * allocator plenty of times and not have found anything, so we are 3590 * likely way too fragmented for the clustering stuff to find anything. 3591 * 3592 * However, if the cluster is taken from the current block group, 3593 * release the cluster first, so that we stand a better chance of 3594 * succeeding in the unclustered allocation. 3595 */ 3596 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE && cluster_bg != bg) { 3597 spin_unlock(&last_ptr->refill_lock); 3598 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc); 3599 return -ENOENT; 3600 } 3601 3602 /* This cluster didn't work out, free it and start over */ 3603 btrfs_return_cluster_to_free_space(NULL, last_ptr); 3604 3605 if (cluster_bg != bg) 3606 btrfs_release_block_group(cluster_bg, ffe_ctl->delalloc); 3607 3608refill_cluster: 3609 if (ffe_ctl->loop >= LOOP_NO_EMPTY_SIZE) { 3610 spin_unlock(&last_ptr->refill_lock); 3611 return -ENOENT; 3612 } 3613 3614 aligned_cluster = max_t(u64, 3615 ffe_ctl->empty_cluster + ffe_ctl->empty_size, 3616 bg->full_stripe_len); 3617 ret = btrfs_find_space_cluster(bg, last_ptr, ffe_ctl->search_start, 3618 ffe_ctl->num_bytes, aligned_cluster); 3619 if (ret == 0) { 3620 /* Now pull our allocation out of this cluster */ 3621 offset = btrfs_alloc_from_cluster(bg, last_ptr, 3622 ffe_ctl->num_bytes, ffe_ctl->search_start, 3623 &ffe_ctl->max_extent_size); 3624 if (offset) { 3625 /* We found one, proceed */ 3626 spin_unlock(&last_ptr->refill_lock); 3627 trace_btrfs_reserve_extent_cluster(bg, 3628 ffe_ctl->search_start, 3629 ffe_ctl->num_bytes); 3630 ffe_ctl->found_offset = offset; 3631 return 0; 3632 } 3633 } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT && 3634 !ffe_ctl->retry_clustered) { 3635 spin_unlock(&last_ptr->refill_lock); 3636 3637 ffe_ctl->retry_clustered = true; 3638 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes + 3639 ffe_ctl->empty_cluster + ffe_ctl->empty_size); 3640 return -EAGAIN; 3641 } 3642 /* 3643 * At this point we either didn't find a cluster or we weren't able to 3644 * allocate a block from our cluster. Free the cluster we've been 3645 * trying to use, and go to the next block group. 3646 */ 3647 btrfs_return_cluster_to_free_space(NULL, last_ptr); 3648 spin_unlock(&last_ptr->refill_lock); 3649 return 1; 3650} 3651 3652/* 3653 * Return >0 to inform caller that we find nothing 3654 * Return 0 when we found an free extent and set ffe_ctrl->found_offset 3655 * Return -EAGAIN to inform caller that we need to re-search this block group 3656 */ 3657static int find_free_extent_unclustered(struct btrfs_block_group *bg, 3658 struct find_free_extent_ctl *ffe_ctl) 3659{ 3660 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; 3661 u64 offset; 3662 3663 /* 3664 * We are doing an unclustered allocation, set the fragmented flag so 3665 * we don't bother trying to setup a cluster again until we get more 3666 * space. 3667 */ 3668 if (unlikely(last_ptr)) { 3669 spin_lock(&last_ptr->lock); 3670 last_ptr->fragmented = 1; 3671 spin_unlock(&last_ptr->lock); 3672 } 3673 if (ffe_ctl->cached) { 3674 struct btrfs_free_space_ctl *free_space_ctl; 3675 3676 free_space_ctl = bg->free_space_ctl; 3677 spin_lock(&free_space_ctl->tree_lock); 3678 if (free_space_ctl->free_space < 3679 ffe_ctl->num_bytes + ffe_ctl->empty_cluster + 3680 ffe_ctl->empty_size) { 3681 ffe_ctl->total_free_space = max_t(u64, 3682 ffe_ctl->total_free_space, 3683 free_space_ctl->free_space); 3684 spin_unlock(&free_space_ctl->tree_lock); 3685 return 1; 3686 } 3687 spin_unlock(&free_space_ctl->tree_lock); 3688 } 3689 3690 offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start, 3691 ffe_ctl->num_bytes, ffe_ctl->empty_size, 3692 &ffe_ctl->max_extent_size); 3693 3694 /* 3695 * If we didn't find a chunk, and we haven't failed on this block group 3696 * before, and this block group is in the middle of caching and we are 3697 * ok with waiting, then go ahead and wait for progress to be made, and 3698 * set @retry_unclustered to true. 3699 * 3700 * If @retry_unclustered is true then we've already waited on this 3701 * block group once and should move on to the next block group. 3702 */ 3703 if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached && 3704 ffe_ctl->loop > LOOP_CACHING_NOWAIT) { 3705 btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes + 3706 ffe_ctl->empty_size); 3707 ffe_ctl->retry_unclustered = true; 3708 return -EAGAIN; 3709 } else if (!offset) { 3710 return 1; 3711 } 3712 ffe_ctl->found_offset = offset; 3713 return 0; 3714} 3715 3716static int do_allocation_clustered(struct btrfs_block_group *block_group, 3717 struct find_free_extent_ctl *ffe_ctl, 3718 struct btrfs_block_group **bg_ret) 3719{ 3720 int ret; 3721 3722 /* We want to try and use the cluster allocator, so lets look there */ 3723 if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) { 3724 ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret); 3725 if (ret >= 0 || ret == -EAGAIN) 3726 return ret; 3727 /* ret == -ENOENT case falls through */ 3728 } 3729 3730 return find_free_extent_unclustered(block_group, ffe_ctl); 3731} 3732 3733static int do_allocation(struct btrfs_block_group *block_group, 3734 struct find_free_extent_ctl *ffe_ctl, 3735 struct btrfs_block_group **bg_ret) 3736{ 3737 switch (ffe_ctl->policy) { 3738 case BTRFS_EXTENT_ALLOC_CLUSTERED: 3739 return do_allocation_clustered(block_group, ffe_ctl, bg_ret); 3740 default: 3741 BUG(); 3742 } 3743} 3744 3745static void release_block_group(struct btrfs_block_group *block_group, 3746 struct find_free_extent_ctl *ffe_ctl, 3747 int delalloc) 3748{ 3749 switch (ffe_ctl->policy) { 3750 case BTRFS_EXTENT_ALLOC_CLUSTERED: 3751 ffe_ctl->retry_clustered = false; 3752 ffe_ctl->retry_unclustered = false; 3753 break; 3754 default: 3755 BUG(); 3756 } 3757 3758 BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) != 3759 ffe_ctl->index); 3760 btrfs_release_block_group(block_group, delalloc); 3761} 3762 3763static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl, 3764 struct btrfs_key *ins) 3765{ 3766 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; 3767 3768 if (!ffe_ctl->use_cluster && last_ptr) { 3769 spin_lock(&last_ptr->lock); 3770 last_ptr->window_start = ins->objectid; 3771 spin_unlock(&last_ptr->lock); 3772 } 3773} 3774 3775static void found_extent(struct find_free_extent_ctl *ffe_ctl, 3776 struct btrfs_key *ins) 3777{ 3778 switch (ffe_ctl->policy) { 3779 case BTRFS_EXTENT_ALLOC_CLUSTERED: 3780 found_extent_clustered(ffe_ctl, ins); 3781 break; 3782 default: 3783 BUG(); 3784 } 3785} 3786 3787static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl) 3788{ 3789 switch (ffe_ctl->policy) { 3790 case BTRFS_EXTENT_ALLOC_CLUSTERED: 3791 /* 3792 * If we can't allocate a new chunk we've already looped through 3793 * at least once, move on to the NO_EMPTY_SIZE case. 3794 */ 3795 ffe_ctl->loop = LOOP_NO_EMPTY_SIZE; 3796 return 0; 3797 default: 3798 BUG(); 3799 } 3800} 3801 3802/* 3803 * Return >0 means caller needs to re-search for free extent 3804 * Return 0 means we have the needed free extent. 3805 * Return <0 means we failed to locate any free extent. 3806 */ 3807static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, 3808 struct btrfs_key *ins, 3809 struct find_free_extent_ctl *ffe_ctl, 3810 bool full_search) 3811{ 3812 struct btrfs_root *root = fs_info->extent_root; 3813 int ret; 3814 3815 if ((ffe_ctl->loop == LOOP_CACHING_NOWAIT) && 3816 ffe_ctl->have_caching_bg && !ffe_ctl->orig_have_caching_bg) 3817 ffe_ctl->orig_have_caching_bg = true; 3818 3819 if (!ins->objectid && ffe_ctl->loop >= LOOP_CACHING_WAIT && 3820 ffe_ctl->have_caching_bg) 3821 return 1; 3822 3823 if (!ins->objectid && ++(ffe_ctl->index) < BTRFS_NR_RAID_TYPES) 3824 return 1; 3825 3826 if (ins->objectid) { 3827 found_extent(ffe_ctl, ins); 3828 return 0; 3829 } 3830 3831 /* 3832 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking 3833 * caching kthreads as we move along 3834 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 3835 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 3836 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 3837 * again 3838 */ 3839 if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) { 3840 ffe_ctl->index = 0; 3841 if (ffe_ctl->loop == LOOP_CACHING_NOWAIT) { 3842 /* 3843 * We want to skip the LOOP_CACHING_WAIT step if we 3844 * don't have any uncached bgs and we've already done a 3845 * full search through. 3846 */ 3847 if (ffe_ctl->orig_have_caching_bg || !full_search) 3848 ffe_ctl->loop = LOOP_CACHING_WAIT; 3849 else 3850 ffe_ctl->loop = LOOP_ALLOC_CHUNK; 3851 } else { 3852 ffe_ctl->loop++; 3853 } 3854 3855 if (ffe_ctl->loop == LOOP_ALLOC_CHUNK) { 3856 struct btrfs_trans_handle *trans; 3857 int exist = 0; 3858 3859 trans = current->journal_info; 3860 if (trans) 3861 exist = 1; 3862 else 3863 trans = btrfs_join_transaction(root); 3864 3865 if (IS_ERR(trans)) { 3866 ret = PTR_ERR(trans); 3867 return ret; 3868 } 3869 3870 ret = btrfs_chunk_alloc(trans, ffe_ctl->flags, 3871 CHUNK_ALLOC_FORCE); 3872 3873 /* Do not bail out on ENOSPC since we can do more. */ 3874 if (ret == -ENOSPC) 3875 ret = chunk_allocation_failed(ffe_ctl); 3876 else if (ret < 0) 3877 btrfs_abort_transaction(trans, ret); 3878 else 3879 ret = 0; 3880 if (!exist) 3881 btrfs_end_transaction(trans); 3882 if (ret) 3883 return ret; 3884 } 3885 3886 if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) { 3887 if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED) 3888 return -ENOSPC; 3889 3890 /* 3891 * Don't loop again if we already have no empty_size and 3892 * no empty_cluster. 3893 */ 3894 if (ffe_ctl->empty_size == 0 && 3895 ffe_ctl->empty_cluster == 0) 3896 return -ENOSPC; 3897 ffe_ctl->empty_size = 0; 3898 ffe_ctl->empty_cluster = 0; 3899 } 3900 return 1; 3901 } 3902 return -ENOSPC; 3903} 3904 3905static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info, 3906 struct find_free_extent_ctl *ffe_ctl, 3907 struct btrfs_space_info *space_info, 3908 struct btrfs_key *ins) 3909{ 3910 /* 3911 * If our free space is heavily fragmented we may not be able to make 3912 * big contiguous allocations, so instead of doing the expensive search 3913 * for free space, simply return ENOSPC with our max_extent_size so we 3914 * can go ahead and search for a more manageable chunk. 3915 * 3916 * If our max_extent_size is large enough for our allocation simply 3917 * disable clustering since we will likely not be able to find enough 3918 * space to create a cluster and induce latency trying. 3919 */ 3920 if (space_info->max_extent_size) { 3921 spin_lock(&space_info->lock); 3922 if (space_info->max_extent_size && 3923 ffe_ctl->num_bytes > space_info->max_extent_size) { 3924 ins->offset = space_info->max_extent_size; 3925 spin_unlock(&space_info->lock); 3926 return -ENOSPC; 3927 } else if (space_info->max_extent_size) { 3928 ffe_ctl->use_cluster = false; 3929 } 3930 spin_unlock(&space_info->lock); 3931 } 3932 3933 ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info, 3934 &ffe_ctl->empty_cluster); 3935 if (ffe_ctl->last_ptr) { 3936 struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr; 3937 3938 spin_lock(&last_ptr->lock); 3939 if (last_ptr->block_group) 3940 ffe_ctl->hint_byte = last_ptr->window_start; 3941 if (last_ptr->fragmented) { 3942 /* 3943 * We still set window_start so we can keep track of the 3944 * last place we found an allocation to try and save 3945 * some time. 3946 */ 3947 ffe_ctl->hint_byte = last_ptr->window_start; 3948 ffe_ctl->use_cluster = false; 3949 } 3950 spin_unlock(&last_ptr->lock); 3951 } 3952 3953 return 0; 3954} 3955 3956static int prepare_allocation(struct btrfs_fs_info *fs_info, 3957 struct find_free_extent_ctl *ffe_ctl, 3958 struct btrfs_space_info *space_info, 3959 struct btrfs_key *ins) 3960{ 3961 switch (ffe_ctl->policy) { 3962 case BTRFS_EXTENT_ALLOC_CLUSTERED: 3963 return prepare_allocation_clustered(fs_info, ffe_ctl, 3964 space_info, ins); 3965 default: 3966 BUG(); 3967 } 3968} 3969 3970/* 3971 * walks the btree of allocated extents and find a hole of a given size. 3972 * The key ins is changed to record the hole: 3973 * ins->objectid == start position 3974 * ins->flags = BTRFS_EXTENT_ITEM_KEY 3975 * ins->offset == the size of the hole. 3976 * Any available blocks before search_start are skipped. 3977 * 3978 * If there is no suitable free space, we will record the max size of 3979 * the free space extent currently. 3980 * 3981 * The overall logic and call chain: 3982 * 3983 * find_free_extent() 3984 * |- Iterate through all block groups 3985 * | |- Get a valid block group 3986 * | |- Try to do clustered allocation in that block group 3987 * | |- Try to do unclustered allocation in that block group 3988 * | |- Check if the result is valid 3989 * | | |- If valid, then exit 3990 * | |- Jump to next block group 3991 * | 3992 * |- Push harder to find free extents 3993 * |- If not found, re-iterate all block groups 3994 */ 3995static noinline int find_free_extent(struct btrfs_root *root, 3996 u64 ram_bytes, u64 num_bytes, u64 empty_size, 3997 u64 hint_byte_orig, struct btrfs_key *ins, 3998 u64 flags, int delalloc) 3999{ 4000 struct btrfs_fs_info *fs_info = root->fs_info; 4001 int ret = 0; 4002 int cache_block_group_error = 0; 4003 struct btrfs_block_group *block_group = NULL; 4004 struct find_free_extent_ctl ffe_ctl = {0}; 4005 struct btrfs_space_info *space_info; 4006 bool full_search = false; 4007 4008 WARN_ON(num_bytes < fs_info->sectorsize); 4009 4010 ffe_ctl.num_bytes = num_bytes; 4011 ffe_ctl.empty_size = empty_size; 4012 ffe_ctl.flags = flags; 4013 ffe_ctl.search_start = 0; 4014 ffe_ctl.delalloc = delalloc; 4015 ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags); 4016 ffe_ctl.have_caching_bg = false; 4017 ffe_ctl.orig_have_caching_bg = false; 4018 ffe_ctl.found_offset = 0; 4019 ffe_ctl.hint_byte = hint_byte_orig; 4020 ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED; 4021 4022 /* For clustered allocation */ 4023 ffe_ctl.retry_clustered = false; 4024 ffe_ctl.retry_unclustered = false; 4025 ffe_ctl.last_ptr = NULL; 4026 ffe_ctl.use_cluster = true; 4027 4028 ins->type = BTRFS_EXTENT_ITEM_KEY; 4029 ins->objectid = 0; 4030 ins->offset = 0; 4031 4032 trace_find_free_extent(root, num_bytes, empty_size, flags); 4033 4034 space_info = btrfs_find_space_info(fs_info, flags); 4035 if (!space_info) { 4036 btrfs_err(fs_info, "No space info for %llu", flags); 4037 return -ENOSPC; 4038 } 4039 4040 ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins); 4041 if (ret < 0) 4042 return ret; 4043 4044 ffe_ctl.search_start = max(ffe_ctl.search_start, 4045 first_logical_byte(fs_info, 0)); 4046 ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte); 4047 if (ffe_ctl.search_start == ffe_ctl.hint_byte) { 4048 block_group = btrfs_lookup_block_group(fs_info, 4049 ffe_ctl.search_start); 4050 /* 4051 * we don't want to use the block group if it doesn't match our 4052 * allocation bits, or if its not cached. 4053 * 4054 * However if we are re-searching with an ideal block group 4055 * picked out then we don't care that the block group is cached. 4056 */ 4057 if (block_group && block_group_bits(block_group, flags) && 4058 block_group->cached != BTRFS_CACHE_NO) { 4059 down_read(&space_info->groups_sem); 4060 if (list_empty(&block_group->list) || 4061 block_group->ro) { 4062 /* 4063 * someone is removing this block group, 4064 * we can't jump into the have_block_group 4065 * target because our list pointers are not 4066 * valid 4067 */ 4068 btrfs_put_block_group(block_group); 4069 up_read(&space_info->groups_sem); 4070 } else { 4071 ffe_ctl.index = btrfs_bg_flags_to_raid_index( 4072 block_group->flags); 4073 btrfs_lock_block_group(block_group, delalloc); 4074 goto have_block_group; 4075 } 4076 } else if (block_group) { 4077 btrfs_put_block_group(block_group); 4078 } 4079 } 4080search: 4081 ffe_ctl.have_caching_bg = false; 4082 if (ffe_ctl.index == btrfs_bg_flags_to_raid_index(flags) || 4083 ffe_ctl.index == 0) 4084 full_search = true; 4085 down_read(&space_info->groups_sem); 4086 list_for_each_entry(block_group, 4087 &space_info->block_groups[ffe_ctl.index], list) { 4088 struct btrfs_block_group *bg_ret; 4089 4090 /* If the block group is read-only, we can skip it entirely. */ 4091 if (unlikely(block_group->ro)) 4092 continue; 4093 4094 btrfs_grab_block_group(block_group, delalloc); 4095 ffe_ctl.search_start = block_group->start; 4096 4097 /* 4098 * this can happen if we end up cycling through all the 4099 * raid types, but we want to make sure we only allocate 4100 * for the proper type. 4101 */ 4102 if (!block_group_bits(block_group, flags)) { 4103 u64 extra = BTRFS_BLOCK_GROUP_DUP | 4104 BTRFS_BLOCK_GROUP_RAID1_MASK | 4105 BTRFS_BLOCK_GROUP_RAID56_MASK | 4106 BTRFS_BLOCK_GROUP_RAID10; 4107 4108 /* 4109 * if they asked for extra copies and this block group 4110 * doesn't provide them, bail. This does allow us to 4111 * fill raid0 from raid1. 4112 */ 4113 if ((flags & extra) && !(block_group->flags & extra)) 4114 goto loop; 4115 4116 /* 4117 * This block group has different flags than we want. 4118 * It's possible that we have MIXED_GROUP flag but no 4119 * block group is mixed. Just skip such block group. 4120 */ 4121 btrfs_release_block_group(block_group, delalloc); 4122 continue; 4123 } 4124 4125have_block_group: 4126 ffe_ctl.cached = btrfs_block_group_done(block_group); 4127 if (unlikely(!ffe_ctl.cached)) { 4128 ffe_ctl.have_caching_bg = true; 4129 ret = btrfs_cache_block_group(block_group, 0); 4130 4131 /* 4132 * If we get ENOMEM here or something else we want to 4133 * try other block groups, because it may not be fatal. 4134 * However if we can't find anything else we need to 4135 * save our return here so that we return the actual 4136 * error that caused problems, not ENOSPC. 4137 */ 4138 if (ret < 0) { 4139 if (!cache_block_group_error) 4140 cache_block_group_error = ret; 4141 ret = 0; 4142 goto loop; 4143 } 4144 ret = 0; 4145 } 4146 4147 if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) { 4148 if (!cache_block_group_error) 4149 cache_block_group_error = -EIO; 4150 goto loop; 4151 } 4152 4153 bg_ret = NULL; 4154 ret = do_allocation(block_group, &ffe_ctl, &bg_ret); 4155 if (ret == 0) { 4156 if (bg_ret && bg_ret != block_group) { 4157 btrfs_release_block_group(block_group, delalloc); 4158 block_group = bg_ret; 4159 } 4160 } else if (ret == -EAGAIN) { 4161 goto have_block_group; 4162 } else if (ret > 0) { 4163 goto loop; 4164 } 4165 4166 /* Checks */ 4167 ffe_ctl.search_start = round_up(ffe_ctl.found_offset, 4168 fs_info->stripesize); 4169 4170 /* move on to the next group */ 4171 if (ffe_ctl.search_start + num_bytes > 4172 block_group->start + block_group->length) { 4173 btrfs_add_free_space(block_group, ffe_ctl.found_offset, 4174 num_bytes); 4175 goto loop; 4176 } 4177 4178 if (ffe_ctl.found_offset < ffe_ctl.search_start) 4179 btrfs_add_free_space(block_group, ffe_ctl.found_offset, 4180 ffe_ctl.search_start - ffe_ctl.found_offset); 4181 4182 ret = btrfs_add_reserved_bytes(block_group, ram_bytes, 4183 num_bytes, delalloc); 4184 if (ret == -EAGAIN) { 4185 btrfs_add_free_space(block_group, ffe_ctl.found_offset, 4186 num_bytes); 4187 goto loop; 4188 } 4189 btrfs_inc_block_group_reservations(block_group); 4190 4191 /* we are all good, lets return */ 4192 ins->objectid = ffe_ctl.search_start; 4193 ins->offset = num_bytes; 4194 4195 trace_btrfs_reserve_extent(block_group, ffe_ctl.search_start, 4196 num_bytes); 4197 btrfs_release_block_group(block_group, delalloc); 4198 break; 4199loop: 4200 release_block_group(block_group, &ffe_ctl, delalloc); 4201 cond_resched(); 4202 } 4203 up_read(&space_info->groups_sem); 4204 4205 ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search); 4206 if (ret > 0) 4207 goto search; 4208 4209 if (ret == -ENOSPC && !cache_block_group_error) { 4210 /* 4211 * Use ffe_ctl->total_free_space as fallback if we can't find 4212 * any contiguous hole. 4213 */ 4214 if (!ffe_ctl.max_extent_size) 4215 ffe_ctl.max_extent_size = ffe_ctl.total_free_space; 4216 spin_lock(&space_info->lock); 4217 space_info->max_extent_size = ffe_ctl.max_extent_size; 4218 spin_unlock(&space_info->lock); 4219 ins->offset = ffe_ctl.max_extent_size; 4220 } else if (ret == -ENOSPC) { 4221 ret = cache_block_group_error; 4222 } 4223 return ret; 4224} 4225 4226/* 4227 * btrfs_reserve_extent - entry point to the extent allocator. Tries to find a 4228 * hole that is at least as big as @num_bytes. 4229 * 4230 * @root - The root that will contain this extent 4231 * 4232 * @ram_bytes - The amount of space in ram that @num_bytes take. This 4233 * is used for accounting purposes. This value differs 4234 * from @num_bytes only in the case of compressed extents. 4235 * 4236 * @num_bytes - Number of bytes to allocate on-disk. 4237 * 4238 * @min_alloc_size - Indicates the minimum amount of space that the 4239 * allocator should try to satisfy. In some cases 4240 * @num_bytes may be larger than what is required and if 4241 * the filesystem is fragmented then allocation fails. 4242 * However, the presence of @min_alloc_size gives a 4243 * chance to try and satisfy the smaller allocation. 4244 * 4245 * @empty_size - A hint that you plan on doing more COW. This is the 4246 * size in bytes the allocator should try to find free 4247 * next to the block it returns. This is just a hint and 4248 * may be ignored by the allocator. 4249 * 4250 * @hint_byte - Hint to the allocator to start searching above the byte 4251 * address passed. It might be ignored. 4252 * 4253 * @ins - This key is modified to record the found hole. It will 4254 * have the following values: 4255 * ins->objectid == start position 4256 * ins->flags = BTRFS_EXTENT_ITEM_KEY 4257 * ins->offset == the size of the hole. 4258 * 4259 * @is_data - Boolean flag indicating whether an extent is 4260 * allocated for data (true) or metadata (false) 4261 * 4262 * @delalloc - Boolean flag indicating whether this allocation is for 4263 * delalloc or not. If 'true' data_rwsem of block groups 4264 * is going to be acquired. 4265 * 4266 * 4267 * Returns 0 when an allocation succeeded or < 0 when an error occurred. In 4268 * case -ENOSPC is returned then @ins->offset will contain the size of the 4269 * largest available hole the allocator managed to find. 4270 */ 4271int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, 4272 u64 num_bytes, u64 min_alloc_size, 4273 u64 empty_size, u64 hint_byte, 4274 struct btrfs_key *ins, int is_data, int delalloc) 4275{ 4276 struct btrfs_fs_info *fs_info = root->fs_info; 4277 bool final_tried = num_bytes == min_alloc_size; 4278 u64 flags; 4279 int ret; 4280 4281 flags = get_alloc_profile_by_root(root, is_data); 4282again: 4283 WARN_ON(num_bytes < fs_info->sectorsize); 4284 ret = find_free_extent(root, ram_bytes, num_bytes, empty_size, 4285 hint_byte, ins, flags, delalloc); 4286 if (!ret && !is_data) { 4287 btrfs_dec_block_group_reservations(fs_info, ins->objectid); 4288 } else if (ret == -ENOSPC) { 4289 if (!final_tried && ins->offset) { 4290 num_bytes = min(num_bytes >> 1, ins->offset); 4291 num_bytes = round_down(num_bytes, 4292 fs_info->sectorsize); 4293 num_bytes = max(num_bytes, min_alloc_size); 4294 ram_bytes = num_bytes; 4295 if (num_bytes == min_alloc_size) 4296 final_tried = true; 4297 goto again; 4298 } else if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { 4299 struct btrfs_space_info *sinfo; 4300 4301 sinfo = btrfs_find_space_info(fs_info, flags); 4302 btrfs_err(fs_info, 4303 "allocation failed flags %llu, wanted %llu", 4304 flags, num_bytes); 4305 if (sinfo) 4306 btrfs_dump_space_info(fs_info, sinfo, 4307 num_bytes, 1); 4308 } 4309 } 4310 4311 return ret; 4312} 4313 4314int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, 4315 u64 start, u64 len, int delalloc) 4316{ 4317 struct btrfs_block_group *cache; 4318 4319 cache = btrfs_lookup_block_group(fs_info, start); 4320 if (!cache) { 4321 btrfs_err(fs_info, "Unable to find block group for %llu", 4322 start); 4323 return -ENOSPC; 4324 } 4325 4326 btrfs_add_free_space(cache, start, len); 4327 btrfs_free_reserved_bytes(cache, len, delalloc); 4328 trace_btrfs_reserved_extent_free(fs_info, start, len); 4329 4330 btrfs_put_block_group(cache); 4331 return 0; 4332} 4333 4334int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, 4335 u64 len) 4336{ 4337 struct btrfs_block_group *cache; 4338 int ret = 0; 4339 4340 cache = btrfs_lookup_block_group(trans->fs_info, start); 4341 if (!cache) { 4342 btrfs_err(trans->fs_info, "unable to find block group for %llu", 4343 start); 4344 return -ENOSPC; 4345 } 4346 4347 ret = pin_down_extent(trans, cache, start, len, 1); 4348 btrfs_put_block_group(cache); 4349 return ret; 4350} 4351 4352static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4353 u64 parent, u64 root_objectid, 4354 u64 flags, u64 owner, u64 offset, 4355 struct btrfs_key *ins, int ref_mod) 4356{ 4357 struct btrfs_fs_info *fs_info = trans->fs_info; 4358 int ret; 4359 struct btrfs_extent_item *extent_item; 4360 struct btrfs_extent_inline_ref *iref; 4361 struct btrfs_path *path; 4362 struct extent_buffer *leaf; 4363 int type; 4364 u32 size; 4365 4366 if (parent > 0) 4367 type = BTRFS_SHARED_DATA_REF_KEY; 4368 else 4369 type = BTRFS_EXTENT_DATA_REF_KEY; 4370 4371 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 4372 4373 path = btrfs_alloc_path(); 4374 if (!path) 4375 return -ENOMEM; 4376 4377 path->leave_spinning = 1; 4378 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4379 ins, size); 4380 if (ret) { 4381 btrfs_free_path(path); 4382 return ret; 4383 } 4384 4385 leaf = path->nodes[0]; 4386 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4387 struct btrfs_extent_item); 4388 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 4389 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4390 btrfs_set_extent_flags(leaf, extent_item, 4391 flags | BTRFS_EXTENT_FLAG_DATA); 4392 4393 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 4394 btrfs_set_extent_inline_ref_type(leaf, iref, type); 4395 if (parent > 0) { 4396 struct btrfs_shared_data_ref *ref; 4397 ref = (struct btrfs_shared_data_ref *)(iref + 1); 4398 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4399 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 4400 } else { 4401 struct btrfs_extent_data_ref *ref; 4402 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 4403 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 4404 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 4405 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 4406 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 4407 } 4408 4409 btrfs_mark_buffer_dirty(path->nodes[0]); 4410 btrfs_free_path(path); 4411 4412 ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset); 4413 if (ret) 4414 return ret; 4415 4416 ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, 1); 4417 if (ret) { /* -ENOENT, logic error */ 4418 btrfs_err(fs_info, "update block group failed for %llu %llu", 4419 ins->objectid, ins->offset); 4420 BUG(); 4421 } 4422 trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset); 4423 return ret; 4424} 4425 4426static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 4427 struct btrfs_delayed_ref_node *node, 4428 struct btrfs_delayed_extent_op *extent_op) 4429{ 4430 struct btrfs_fs_info *fs_info = trans->fs_info; 4431 int ret; 4432 struct btrfs_extent_item *extent_item; 4433 struct btrfs_key extent_key; 4434 struct btrfs_tree_block_info *block_info; 4435 struct btrfs_extent_inline_ref *iref; 4436 struct btrfs_path *path; 4437 struct extent_buffer *leaf; 4438 struct btrfs_delayed_tree_ref *ref; 4439 u32 size = sizeof(*extent_item) + sizeof(*iref); 4440 u64 num_bytes; 4441 u64 flags = extent_op->flags_to_set; 4442 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); 4443 4444 ref = btrfs_delayed_node_to_tree_ref(node); 4445 4446 extent_key.objectid = node->bytenr; 4447 if (skinny_metadata) { 4448 extent_key.offset = ref->level; 4449 extent_key.type = BTRFS_METADATA_ITEM_KEY; 4450 num_bytes = fs_info->nodesize; 4451 } else { 4452 extent_key.offset = node->num_bytes; 4453 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 4454 size += sizeof(*block_info); 4455 num_bytes = node->num_bytes; 4456 } 4457 4458 path = btrfs_alloc_path(); 4459 if (!path) 4460 return -ENOMEM; 4461 4462 path->leave_spinning = 1; 4463 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4464 &extent_key, size); 4465 if (ret) { 4466 btrfs_free_path(path); 4467 return ret; 4468 } 4469 4470 leaf = path->nodes[0]; 4471 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4472 struct btrfs_extent_item); 4473 btrfs_set_extent_refs(leaf, extent_item, 1); 4474 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4475 btrfs_set_extent_flags(leaf, extent_item, 4476 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 4477 4478 if (skinny_metadata) { 4479 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 4480 } else { 4481 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 4482 btrfs_set_tree_block_key(leaf, block_info, &extent_op->key); 4483 btrfs_set_tree_block_level(leaf, block_info, ref->level); 4484 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 4485 } 4486 4487 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) { 4488 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4489 btrfs_set_extent_inline_ref_type(leaf, iref, 4490 BTRFS_SHARED_BLOCK_REF_KEY); 4491 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->parent); 4492 } else { 4493 btrfs_set_extent_inline_ref_type(leaf, iref, 4494 BTRFS_TREE_BLOCK_REF_KEY); 4495 btrfs_set_extent_inline_ref_offset(leaf, iref, ref->root); 4496 } 4497 4498 btrfs_mark_buffer_dirty(leaf); 4499 btrfs_free_path(path); 4500 4501 ret = remove_from_free_space_tree(trans, extent_key.objectid, 4502 num_bytes); 4503 if (ret) 4504 return ret; 4505 4506 ret = btrfs_update_block_group(trans, extent_key.objectid, 4507 fs_info->nodesize, 1); 4508 if (ret) { /* -ENOENT, logic error */ 4509 btrfs_err(fs_info, "update block group failed for %llu %llu", 4510 extent_key.objectid, extent_key.offset); 4511 BUG(); 4512 } 4513 4514 trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid, 4515 fs_info->nodesize); 4516 return ret; 4517} 4518 4519int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4520 struct btrfs_root *root, u64 owner, 4521 u64 offset, u64 ram_bytes, 4522 struct btrfs_key *ins) 4523{ 4524 struct btrfs_ref generic_ref = { 0 }; 4525 4526 BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); 4527 4528 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, 4529 ins->objectid, ins->offset, 0); 4530 btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset); 4531 btrfs_ref_tree_mod(root->fs_info, &generic_ref); 4532 4533 return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes); 4534} 4535 4536/* 4537 * this is used by the tree logging recovery code. It records that 4538 * an extent has been allocated and makes sure to clear the free 4539 * space cache bits as well 4540 */ 4541int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 4542 u64 root_objectid, u64 owner, u64 offset, 4543 struct btrfs_key *ins) 4544{ 4545 struct btrfs_fs_info *fs_info = trans->fs_info; 4546 int ret; 4547 struct btrfs_block_group *block_group; 4548 struct btrfs_space_info *space_info; 4549 4550 /* 4551 * Mixed block groups will exclude before processing the log so we only 4552 * need to do the exclude dance if this fs isn't mixed. 4553 */ 4554 if (!btrfs_fs_incompat(fs_info, MIXED_GROUPS)) { 4555 ret = __exclude_logged_extent(fs_info, ins->objectid, 4556 ins->offset); 4557 if (ret) 4558 return ret; 4559 } 4560 4561 block_group = btrfs_lookup_block_group(fs_info, ins->objectid); 4562 if (!block_group) 4563 return -EINVAL; 4564 4565 space_info = block_group->space_info; 4566 spin_lock(&space_info->lock); 4567 spin_lock(&block_group->lock); 4568 space_info->bytes_reserved += ins->offset; 4569 block_group->reserved += ins->offset; 4570 spin_unlock(&block_group->lock); 4571 spin_unlock(&space_info->lock); 4572 4573 ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner, 4574 offset, ins, 1); 4575 if (ret) 4576 btrfs_pin_extent(trans, ins->objectid, ins->offset, 1); 4577 btrfs_put_block_group(block_group); 4578 return ret; 4579} 4580 4581static struct extent_buffer * 4582btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, 4583 u64 bytenr, int level, u64 owner, 4584 enum btrfs_lock_nesting nest) 4585{ 4586 struct btrfs_fs_info *fs_info = root->fs_info; 4587 struct extent_buffer *buf; 4588 4589 buf = btrfs_find_create_tree_block(fs_info, bytenr); 4590 if (IS_ERR(buf)) 4591 return buf; 4592 4593 /* 4594 * Extra safety check in case the extent tree is corrupted and extent 4595 * allocator chooses to use a tree block which is already used and 4596 * locked. 4597 */ 4598 if (buf->lock_owner == current->pid) { 4599 btrfs_err_rl(fs_info, 4600"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected", 4601 buf->start, btrfs_header_owner(buf), current->pid); 4602 free_extent_buffer(buf); 4603 return ERR_PTR(-EUCLEAN); 4604 } 4605 4606 btrfs_set_buffer_lockdep_class(owner, buf, level); 4607 __btrfs_tree_lock(buf, nest); 4608 btrfs_clean_tree_block(buf); 4609 clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); 4610 4611 btrfs_set_lock_blocking_write(buf); 4612 set_extent_buffer_uptodate(buf); 4613 4614 memzero_extent_buffer(buf, 0, sizeof(struct btrfs_header)); 4615 btrfs_set_header_level(buf, level); 4616 btrfs_set_header_bytenr(buf, buf->start); 4617 btrfs_set_header_generation(buf, trans->transid); 4618 btrfs_set_header_backref_rev(buf, BTRFS_MIXED_BACKREF_REV); 4619 btrfs_set_header_owner(buf, owner); 4620 write_extent_buffer_fsid(buf, fs_info->fs_devices->metadata_uuid); 4621 write_extent_buffer_chunk_tree_uuid(buf, fs_info->chunk_tree_uuid); 4622 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4623 buf->log_index = root->log_transid % 2; 4624 /* 4625 * we allow two log transactions at a time, use different 4626 * EXTENT bit to differentiate dirty pages. 4627 */ 4628 if (buf->log_index == 0) 4629 set_extent_dirty(&root->dirty_log_pages, buf->start, 4630 buf->start + buf->len - 1, GFP_NOFS); 4631 else 4632 set_extent_new(&root->dirty_log_pages, buf->start, 4633 buf->start + buf->len - 1); 4634 } else { 4635 buf->log_index = -1; 4636 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4637 buf->start + buf->len - 1, GFP_NOFS); 4638 } 4639 trans->dirty = true; 4640 /* this returns a buffer locked for blocking */ 4641 return buf; 4642} 4643 4644/* 4645 * finds a free extent and does all the dirty work required for allocation 4646 * returns the tree buffer or an ERR_PTR on error. 4647 */ 4648struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, 4649 struct btrfs_root *root, 4650 u64 parent, u64 root_objectid, 4651 const struct btrfs_disk_key *key, 4652 int level, u64 hint, 4653 u64 empty_size, 4654 enum btrfs_lock_nesting nest) 4655{ 4656 struct btrfs_fs_info *fs_info = root->fs_info; 4657 struct btrfs_key ins; 4658 struct btrfs_block_rsv *block_rsv; 4659 struct extent_buffer *buf; 4660 struct btrfs_delayed_extent_op *extent_op; 4661 struct btrfs_ref generic_ref = { 0 }; 4662 u64 flags = 0; 4663 int ret; 4664 u32 blocksize = fs_info->nodesize; 4665 bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); 4666 4667#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 4668 if (btrfs_is_testing(fs_info)) { 4669 buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr, 4670 level, root_objectid, nest); 4671 if (!IS_ERR(buf)) 4672 root->alloc_bytenr += blocksize; 4673 return buf; 4674 } 4675#endif 4676 4677 block_rsv = btrfs_use_block_rsv(trans, root, blocksize); 4678 if (IS_ERR(block_rsv)) 4679 return ERR_CAST(block_rsv); 4680 4681 ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize, 4682 empty_size, hint, &ins, 0, 0); 4683 if (ret) 4684 goto out_unuse; 4685 4686 buf = btrfs_init_new_buffer(trans, root, ins.objectid, level, 4687 root_objectid, nest); 4688 if (IS_ERR(buf)) { 4689 ret = PTR_ERR(buf); 4690 goto out_free_reserved; 4691 } 4692 4693 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4694 if (parent == 0) 4695 parent = ins.objectid; 4696 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 4697 } else 4698 BUG_ON(parent > 0); 4699 4700 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4701 extent_op = btrfs_alloc_delayed_extent_op(); 4702 if (!extent_op) { 4703 ret = -ENOMEM; 4704 goto out_free_buf; 4705 } 4706 if (key) 4707 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 4708 else 4709 memset(&extent_op->key, 0, sizeof(extent_op->key)); 4710 extent_op->flags_to_set = flags; 4711 extent_op->update_key = skinny_metadata ? false : true; 4712 extent_op->update_flags = true; 4713 extent_op->is_data = false; 4714 extent_op->level = level; 4715 4716 btrfs_init_generic_ref(&generic_ref, BTRFS_ADD_DELAYED_EXTENT, 4717 ins.objectid, ins.offset, parent); 4718 generic_ref.real_root = root->root_key.objectid; 4719 btrfs_init_tree_ref(&generic_ref, level, root_objectid); 4720 btrfs_ref_tree_mod(fs_info, &generic_ref); 4721 ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op); 4722 if (ret) 4723 goto out_free_delayed; 4724 } 4725 return buf; 4726 4727out_free_delayed: 4728 btrfs_free_delayed_extent_op(extent_op); 4729out_free_buf: 4730 btrfs_tree_unlock(buf); 4731 free_extent_buffer(buf); 4732out_free_reserved: 4733 btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0); 4734out_unuse: 4735 btrfs_unuse_block_rsv(fs_info, block_rsv, blocksize); 4736 return ERR_PTR(ret); 4737} 4738 4739struct walk_control { 4740 u64 refs[BTRFS_MAX_LEVEL]; 4741 u64 flags[BTRFS_MAX_LEVEL]; 4742 struct btrfs_key update_progress; 4743 struct btrfs_key drop_progress; 4744 int drop_level; 4745 int stage; 4746 int level; 4747 int shared_level; 4748 int update_ref; 4749 int keep_locks; 4750 int reada_slot; 4751 int reada_count; 4752 int restarted; 4753}; 4754 4755#define DROP_REFERENCE 1 4756#define UPDATE_BACKREF 2 4757 4758static noinline void reada_walk_down(struct btrfs_trans_handle *trans, 4759 struct btrfs_root *root, 4760 struct walk_control *wc, 4761 struct btrfs_path *path) 4762{ 4763 struct btrfs_fs_info *fs_info = root->fs_info; 4764 u64 bytenr; 4765 u64 generation; 4766 u64 refs; 4767 u64 flags; 4768 u32 nritems; 4769 struct btrfs_key key; 4770 struct extent_buffer *eb; 4771 int ret; 4772 int slot; 4773 int nread = 0; 4774 4775 if (path->slots[wc->level] < wc->reada_slot) { 4776 wc->reada_count = wc->reada_count * 2 / 3; 4777 wc->reada_count = max(wc->reada_count, 2); 4778 } else { 4779 wc->reada_count = wc->reada_count * 3 / 2; 4780 wc->reada_count = min_t(int, wc->reada_count, 4781 BTRFS_NODEPTRS_PER_BLOCK(fs_info)); 4782 } 4783 4784 eb = path->nodes[wc->level]; 4785 nritems = btrfs_header_nritems(eb); 4786 4787 for (slot = path->slots[wc->level]; slot < nritems; slot++) { 4788 if (nread >= wc->reada_count) 4789 break; 4790 4791 cond_resched(); 4792 bytenr = btrfs_node_blockptr(eb, slot); 4793 generation = btrfs_node_ptr_generation(eb, slot); 4794 4795 if (slot == path->slots[wc->level]) 4796 goto reada; 4797 4798 if (wc->stage == UPDATE_BACKREF && 4799 generation <= root->root_key.offset) 4800 continue; 4801 4802 /* We don't lock the tree block, it's OK to be racy here */ 4803 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, 4804 wc->level - 1, 1, &refs, 4805 &flags); 4806 /* We don't care about errors in readahead. */ 4807 if (ret < 0) 4808 continue; 4809 BUG_ON(refs == 0); 4810 4811 if (wc->stage == DROP_REFERENCE) { 4812 if (refs == 1) 4813 goto reada; 4814 4815 if (wc->level == 1 && 4816 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 4817 continue; 4818 if (!wc->update_ref || 4819 generation <= root->root_key.offset) 4820 continue; 4821 btrfs_node_key_to_cpu(eb, &key, slot); 4822 ret = btrfs_comp_cpu_keys(&key, 4823 &wc->update_progress); 4824 if (ret < 0) 4825 continue; 4826 } else { 4827 if (wc->level == 1 && 4828 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 4829 continue; 4830 } 4831reada: 4832 readahead_tree_block(fs_info, bytenr); 4833 nread++; 4834 } 4835 wc->reada_slot = slot; 4836} 4837 4838/* 4839 * helper to process tree block while walking down the tree. 4840 * 4841 * when wc->stage == UPDATE_BACKREF, this function updates 4842 * back refs for pointers in the block. 4843 * 4844 * NOTE: return value 1 means we should stop walking down. 4845 */ 4846static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 4847 struct btrfs_root *root, 4848 struct btrfs_path *path, 4849 struct walk_control *wc, int lookup_info) 4850{ 4851 struct btrfs_fs_info *fs_info = root->fs_info; 4852 int level = wc->level; 4853 struct extent_buffer *eb = path->nodes[level]; 4854 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 4855 int ret; 4856 4857 if (wc->stage == UPDATE_BACKREF && 4858 btrfs_header_owner(eb) != root->root_key.objectid) 4859 return 1; 4860 4861 /* 4862 * when reference count of tree block is 1, it won't increase 4863 * again. once full backref flag is set, we never clear it. 4864 */ 4865 if (lookup_info && 4866 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 4867 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { 4868 BUG_ON(!path->locks[level]); 4869 ret = btrfs_lookup_extent_info(trans, fs_info, 4870 eb->start, level, 1, 4871 &wc->refs[level], 4872 &wc->flags[level]); 4873 BUG_ON(ret == -ENOMEM); 4874 if (ret) 4875 return ret; 4876 BUG_ON(wc->refs[level] == 0); 4877 } 4878 4879 if (wc->stage == DROP_REFERENCE) { 4880 if (wc->refs[level] > 1) 4881 return 1; 4882 4883 if (path->locks[level] && !wc->keep_locks) { 4884 btrfs_tree_unlock_rw(eb, path->locks[level]); 4885 path->locks[level] = 0; 4886 } 4887 return 0; 4888 } 4889 4890 /* wc->stage == UPDATE_BACKREF */ 4891 if (!(wc->flags[level] & flag)) { 4892 BUG_ON(!path->locks[level]); 4893 ret = btrfs_inc_ref(trans, root, eb, 1); 4894 BUG_ON(ret); /* -ENOMEM */ 4895 ret = btrfs_dec_ref(trans, root, eb, 0); 4896 BUG_ON(ret); /* -ENOMEM */ 4897 ret = btrfs_set_disk_extent_flags(trans, eb, flag, 4898 btrfs_header_level(eb), 0); 4899 BUG_ON(ret); /* -ENOMEM */ 4900 wc->flags[level] |= flag; 4901 } 4902 4903 /* 4904 * the block is shared by multiple trees, so it's not good to 4905 * keep the tree lock 4906 */ 4907 if (path->locks[level] && level > 0) { 4908 btrfs_tree_unlock_rw(eb, path->locks[level]); 4909 path->locks[level] = 0; 4910 } 4911 return 0; 4912} 4913 4914/* 4915 * This is used to verify a ref exists for this root to deal with a bug where we 4916 * would have a drop_progress key that hadn't been updated properly. 4917 */ 4918static int check_ref_exists(struct btrfs_trans_handle *trans, 4919 struct btrfs_root *root, u64 bytenr, u64 parent, 4920 int level) 4921{ 4922 struct btrfs_path *path; 4923 struct btrfs_extent_inline_ref *iref; 4924 int ret; 4925 4926 path = btrfs_alloc_path(); 4927 if (!path) 4928 return -ENOMEM; 4929 4930 ret = lookup_extent_backref(trans, path, &iref, bytenr, 4931 root->fs_info->nodesize, parent, 4932 root->root_key.objectid, level, 0); 4933 btrfs_free_path(path); 4934 if (ret == -ENOENT) 4935 return 0; 4936 if (ret < 0) 4937 return ret; 4938 return 1; 4939} 4940 4941/* 4942 * helper to process tree block pointer. 4943 * 4944 * when wc->stage == DROP_REFERENCE, this function checks 4945 * reference count of the block pointed to. if the block 4946 * is shared and we need update back refs for the subtree 4947 * rooted at the block, this function changes wc->stage to 4948 * UPDATE_BACKREF. if the block is shared and there is no 4949 * need to update back, this function drops the reference 4950 * to the block. 4951 * 4952 * NOTE: return value 1 means we should stop walking down. 4953 */ 4954static noinline int do_walk_down(struct btrfs_trans_handle *trans, 4955 struct btrfs_root *root, 4956 struct btrfs_path *path, 4957 struct walk_control *wc, int *lookup_info) 4958{ 4959 struct btrfs_fs_info *fs_info = root->fs_info; 4960 u64 bytenr; 4961 u64 generation; 4962 u64 parent; 4963 struct btrfs_key key; 4964 struct btrfs_key first_key; 4965 struct btrfs_ref ref = { 0 }; 4966 struct extent_buffer *next; 4967 int level = wc->level; 4968 int reada = 0; 4969 int ret = 0; 4970 bool need_account = false; 4971 4972 generation = btrfs_node_ptr_generation(path->nodes[level], 4973 path->slots[level]); 4974 /* 4975 * if the lower level block was created before the snapshot 4976 * was created, we know there is no need to update back refs 4977 * for the subtree 4978 */ 4979 if (wc->stage == UPDATE_BACKREF && 4980 generation <= root->root_key.offset) { 4981 *lookup_info = 1; 4982 return 1; 4983 } 4984 4985 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 4986 btrfs_node_key_to_cpu(path->nodes[level], &first_key, 4987 path->slots[level]); 4988 4989 next = find_extent_buffer(fs_info, bytenr); 4990 if (!next) { 4991 next = btrfs_find_create_tree_block(fs_info, bytenr); 4992 if (IS_ERR(next)) 4993 return PTR_ERR(next); 4994 4995 btrfs_set_buffer_lockdep_class(root->root_key.objectid, next, 4996 level - 1); 4997 reada = 1; 4998 } 4999 btrfs_tree_lock(next); 5000 btrfs_set_lock_blocking_write(next); 5001 5002 ret = btrfs_lookup_extent_info(trans, fs_info, bytenr, level - 1, 1, 5003 &wc->refs[level - 1], 5004 &wc->flags[level - 1]); 5005 if (ret < 0) 5006 goto out_unlock; 5007 5008 if (unlikely(wc->refs[level - 1] == 0)) { 5009 btrfs_err(fs_info, "Missing references."); 5010 ret = -EIO; 5011 goto out_unlock; 5012 } 5013 *lookup_info = 0; 5014 5015 if (wc->stage == DROP_REFERENCE) { 5016 if (wc->refs[level - 1] > 1) { 5017 need_account = true; 5018 if (level == 1 && 5019 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5020 goto skip; 5021 5022 if (!wc->update_ref || 5023 generation <= root->root_key.offset) 5024 goto skip; 5025 5026 btrfs_node_key_to_cpu(path->nodes[level], &key, 5027 path->slots[level]); 5028 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); 5029 if (ret < 0) 5030 goto skip; 5031 5032 wc->stage = UPDATE_BACKREF; 5033 wc->shared_level = level - 1; 5034 } 5035 } else { 5036 if (level == 1 && 5037 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5038 goto skip; 5039 } 5040 5041 if (!btrfs_buffer_uptodate(next, generation, 0)) { 5042 btrfs_tree_unlock(next); 5043 free_extent_buffer(next); 5044 next = NULL; 5045 *lookup_info = 1; 5046 } 5047 5048 if (!next) { 5049 if (reada && level == 1) 5050 reada_walk_down(trans, root, wc, path); 5051 next = read_tree_block(fs_info, bytenr, generation, level - 1, 5052 &first_key); 5053 if (IS_ERR(next)) { 5054 return PTR_ERR(next); 5055 } else if (!extent_buffer_uptodate(next)) { 5056 free_extent_buffer(next); 5057 return -EIO; 5058 } 5059 btrfs_tree_lock(next); 5060 btrfs_set_lock_blocking_write(next); 5061 } 5062 5063 level--; 5064 ASSERT(level == btrfs_header_level(next)); 5065 if (level != btrfs_header_level(next)) { 5066 btrfs_err(root->fs_info, "mismatched level"); 5067 ret = -EIO; 5068 goto out_unlock; 5069 } 5070 path->nodes[level] = next; 5071 path->slots[level] = 0; 5072 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 5073 wc->level = level; 5074 if (wc->level == 1) 5075 wc->reada_slot = 0; 5076 return 0; 5077skip: 5078 wc->refs[level - 1] = 0; 5079 wc->flags[level - 1] = 0; 5080 if (wc->stage == DROP_REFERENCE) { 5081 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5082 parent = path->nodes[level]->start; 5083 } else { 5084 ASSERT(root->root_key.objectid == 5085 btrfs_header_owner(path->nodes[level])); 5086 if (root->root_key.objectid != 5087 btrfs_header_owner(path->nodes[level])) { 5088 btrfs_err(root->fs_info, 5089 "mismatched block owner"); 5090 ret = -EIO; 5091 goto out_unlock; 5092 } 5093 parent = 0; 5094 } 5095 5096 /* 5097 * If we had a drop_progress we need to verify the refs are set 5098 * as expected. If we find our ref then we know that from here 5099 * on out everything should be correct, and we can clear the 5100 * ->restarted flag. 5101 */ 5102 if (wc->restarted) { 5103 ret = check_ref_exists(trans, root, bytenr, parent, 5104 level - 1); 5105 if (ret < 0) 5106 goto out_unlock; 5107 if (ret == 0) 5108 goto no_delete; 5109 ret = 0; 5110 wc->restarted = 0; 5111 } 5112 5113 /* 5114 * Reloc tree doesn't contribute to qgroup numbers, and we have 5115 * already accounted them at merge time (replace_path), 5116 * thus we could skip expensive subtree trace here. 5117 */ 5118 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 5119 need_account) { 5120 ret = btrfs_qgroup_trace_subtree(trans, next, 5121 generation, level - 1); 5122 if (ret) { 5123 btrfs_err_rl(fs_info, 5124 "Error %d accounting shared subtree. Quota is out of sync, rescan required.", 5125 ret); 5126 } 5127 } 5128 5129 /* 5130 * We need to update the next key in our walk control so we can 5131 * update the drop_progress key accordingly. We don't care if 5132 * find_next_key doesn't find a key because that means we're at 5133 * the end and are going to clean up now. 5134 */ 5135 wc->drop_level = level; 5136 find_next_key(path, level, &wc->drop_progress); 5137 5138 btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr, 5139 fs_info->nodesize, parent); 5140 btrfs_init_tree_ref(&ref, level - 1, root->root_key.objectid); 5141 ret = btrfs_free_extent(trans, &ref); 5142 if (ret) 5143 goto out_unlock; 5144 } 5145no_delete: 5146 *lookup_info = 1; 5147 ret = 1; 5148 5149out_unlock: 5150 btrfs_tree_unlock(next); 5151 free_extent_buffer(next); 5152 5153 return ret; 5154} 5155 5156/* 5157 * helper to process tree block while walking up the tree. 5158 * 5159 * when wc->stage == DROP_REFERENCE, this function drops 5160 * reference count on the block. 5161 * 5162 * when wc->stage == UPDATE_BACKREF, this function changes 5163 * wc->stage back to DROP_REFERENCE if we changed wc->stage 5164 * to UPDATE_BACKREF previously while processing the block. 5165 * 5166 * NOTE: return value 1 means we should stop walking up. 5167 */ 5168static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 5169 struct btrfs_root *root, 5170 struct btrfs_path *path, 5171 struct walk_control *wc) 5172{ 5173 struct btrfs_fs_info *fs_info = root->fs_info; 5174 int ret; 5175 int level = wc->level; 5176 struct extent_buffer *eb = path->nodes[level]; 5177 u64 parent = 0; 5178 5179 if (wc->stage == UPDATE_BACKREF) { 5180 BUG_ON(wc->shared_level < level); 5181 if (level < wc->shared_level) 5182 goto out; 5183 5184 ret = find_next_key(path, level + 1, &wc->update_progress); 5185 if (ret > 0) 5186 wc->update_ref = 0; 5187 5188 wc->stage = DROP_REFERENCE; 5189 wc->shared_level = -1; 5190 path->slots[level] = 0; 5191 5192 /* 5193 * check reference count again if the block isn't locked. 5194 * we should start walking down the tree again if reference 5195 * count is one. 5196 */ 5197 if (!path->locks[level]) { 5198 BUG_ON(level == 0); 5199 btrfs_tree_lock(eb); 5200 btrfs_set_lock_blocking_write(eb); 5201 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 5202 5203 ret = btrfs_lookup_extent_info(trans, fs_info, 5204 eb->start, level, 1, 5205 &wc->refs[level], 5206 &wc->flags[level]); 5207 if (ret < 0) { 5208 btrfs_tree_unlock_rw(eb, path->locks[level]); 5209 path->locks[level] = 0; 5210 return ret; 5211 } 5212 BUG_ON(wc->refs[level] == 0); 5213 if (wc->refs[level] == 1) { 5214 btrfs_tree_unlock_rw(eb, path->locks[level]); 5215 path->locks[level] = 0; 5216 return 1; 5217 } 5218 } 5219 } 5220 5221 /* wc->stage == DROP_REFERENCE */ 5222 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 5223 5224 if (wc->refs[level] == 1) { 5225 if (level == 0) { 5226 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5227 ret = btrfs_dec_ref(trans, root, eb, 1); 5228 else 5229 ret = btrfs_dec_ref(trans, root, eb, 0); 5230 BUG_ON(ret); /* -ENOMEM */ 5231 if (is_fstree(root->root_key.objectid)) { 5232 ret = btrfs_qgroup_trace_leaf_items(trans, eb); 5233 if (ret) { 5234 btrfs_err_rl(fs_info, 5235 "error %d accounting leaf items, quota is out of sync, rescan required", 5236 ret); 5237 } 5238 } 5239 } 5240 /* make block locked assertion in btrfs_clean_tree_block happy */ 5241 if (!path->locks[level] && 5242 btrfs_header_generation(eb) == trans->transid) { 5243 btrfs_tree_lock(eb); 5244 btrfs_set_lock_blocking_write(eb); 5245 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 5246 } 5247 btrfs_clean_tree_block(eb); 5248 } 5249 5250 if (eb == root->node) { 5251 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5252 parent = eb->start; 5253 else if (root->root_key.objectid != btrfs_header_owner(eb)) 5254 goto owner_mismatch; 5255 } else { 5256 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5257 parent = path->nodes[level + 1]->start; 5258 else if (root->root_key.objectid != 5259 btrfs_header_owner(path->nodes[level + 1])) 5260 goto owner_mismatch; 5261 } 5262 5263 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); 5264out: 5265 wc->refs[level] = 0; 5266 wc->flags[level] = 0; 5267 return 0; 5268 5269owner_mismatch: 5270 btrfs_err_rl(fs_info, "unexpected tree owner, have %llu expect %llu", 5271 btrfs_header_owner(eb), root->root_key.objectid); 5272 return -EUCLEAN; 5273} 5274 5275static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 5276 struct btrfs_root *root, 5277 struct btrfs_path *path, 5278 struct walk_control *wc) 5279{ 5280 int level = wc->level; 5281 int lookup_info = 1; 5282 int ret; 5283 5284 while (level >= 0) { 5285 ret = walk_down_proc(trans, root, path, wc, lookup_info); 5286 if (ret > 0) 5287 break; 5288 5289 if (level == 0) 5290 break; 5291 5292 if (path->slots[level] >= 5293 btrfs_header_nritems(path->nodes[level])) 5294 break; 5295 5296 ret = do_walk_down(trans, root, path, wc, &lookup_info); 5297 if (ret > 0) { 5298 path->slots[level]++; 5299 continue; 5300 } else if (ret < 0) 5301 return ret; 5302 level = wc->level; 5303 } 5304 return 0; 5305} 5306 5307static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 5308 struct btrfs_root *root, 5309 struct btrfs_path *path, 5310 struct walk_control *wc, int max_level) 5311{ 5312 int level = wc->level; 5313 int ret; 5314 5315 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 5316 while (level < max_level && path->nodes[level]) { 5317 wc->level = level; 5318 if (path->slots[level] + 1 < 5319 btrfs_header_nritems(path->nodes[level])) { 5320 path->slots[level]++; 5321 return 0; 5322 } else { 5323 ret = walk_up_proc(trans, root, path, wc); 5324 if (ret > 0) 5325 return 0; 5326 if (ret < 0) 5327 return ret; 5328 5329 if (path->locks[level]) { 5330 btrfs_tree_unlock_rw(path->nodes[level], 5331 path->locks[level]); 5332 path->locks[level] = 0; 5333 } 5334 free_extent_buffer(path->nodes[level]); 5335 path->nodes[level] = NULL; 5336 level++; 5337 } 5338 } 5339 return 1; 5340} 5341 5342/* 5343 * drop a subvolume tree. 5344 * 5345 * this function traverses the tree freeing any blocks that only 5346 * referenced by the tree. 5347 * 5348 * when a shared tree block is found. this function decreases its 5349 * reference count by one. if update_ref is true, this function 5350 * also make sure backrefs for the shared block and all lower level 5351 * blocks are properly updated. 5352 * 5353 * If called with for_reloc == 0, may exit early with -EAGAIN 5354 */ 5355int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc) 5356{ 5357 struct btrfs_fs_info *fs_info = root->fs_info; 5358 struct btrfs_path *path; 5359 struct btrfs_trans_handle *trans; 5360 struct btrfs_root *tree_root = fs_info->tree_root; 5361 struct btrfs_root_item *root_item = &root->root_item; 5362 struct walk_control *wc; 5363 struct btrfs_key key; 5364 int err = 0; 5365 int ret; 5366 int level; 5367 bool root_dropped = false; 5368 5369 btrfs_debug(fs_info, "Drop subvolume %llu", root->root_key.objectid); 5370 5371 path = btrfs_alloc_path(); 5372 if (!path) { 5373 err = -ENOMEM; 5374 goto out; 5375 } 5376 5377 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5378 if (!wc) { 5379 btrfs_free_path(path); 5380 err = -ENOMEM; 5381 goto out; 5382 } 5383 5384 /* 5385 * Use join to avoid potential EINTR from transaction start. See 5386 * wait_reserve_ticket and the whole reservation callchain. 5387 */ 5388 if (for_reloc) 5389 trans = btrfs_join_transaction(tree_root); 5390 else 5391 trans = btrfs_start_transaction(tree_root, 0); 5392 if (IS_ERR(trans)) { 5393 err = PTR_ERR(trans); 5394 goto out_free; 5395 } 5396 5397 err = btrfs_run_delayed_items(trans); 5398 if (err) 5399 goto out_end_trans; 5400 5401 /* 5402 * This will help us catch people modifying the fs tree while we're 5403 * dropping it. It is unsafe to mess with the fs tree while it's being 5404 * dropped as we unlock the root node and parent nodes as we walk down 5405 * the tree, assuming nothing will change. If something does change 5406 * then we'll have stale information and drop references to blocks we've 5407 * already dropped. 5408 */ 5409 set_bit(BTRFS_ROOT_DELETING, &root->state); 5410 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 5411 level = btrfs_header_level(root->node); 5412 path->nodes[level] = btrfs_lock_root_node(root); 5413 btrfs_set_lock_blocking_write(path->nodes[level]); 5414 path->slots[level] = 0; 5415 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 5416 memset(&wc->update_progress, 0, 5417 sizeof(wc->update_progress)); 5418 } else { 5419 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 5420 memcpy(&wc->update_progress, &key, 5421 sizeof(wc->update_progress)); 5422 5423 level = root_item->drop_level; 5424 BUG_ON(level == 0); 5425 path->lowest_level = level; 5426 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5427 path->lowest_level = 0; 5428 if (ret < 0) { 5429 err = ret; 5430 goto out_end_trans; 5431 } 5432 WARN_ON(ret > 0); 5433 5434 /* 5435 * unlock our path, this is safe because only this 5436 * function is allowed to delete this snapshot 5437 */ 5438 btrfs_unlock_up_safe(path, 0); 5439 5440 level = btrfs_header_level(root->node); 5441 while (1) { 5442 btrfs_tree_lock(path->nodes[level]); 5443 btrfs_set_lock_blocking_write(path->nodes[level]); 5444 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 5445 5446 ret = btrfs_lookup_extent_info(trans, fs_info, 5447 path->nodes[level]->start, 5448 level, 1, &wc->refs[level], 5449 &wc->flags[level]); 5450 if (ret < 0) { 5451 err = ret; 5452 goto out_end_trans; 5453 } 5454 BUG_ON(wc->refs[level] == 0); 5455 5456 if (level == root_item->drop_level) 5457 break; 5458 5459 btrfs_tree_unlock(path->nodes[level]); 5460 path->locks[level] = 0; 5461 WARN_ON(wc->refs[level] != 1); 5462 level--; 5463 } 5464 } 5465 5466 wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state); 5467 wc->level = level; 5468 wc->shared_level = -1; 5469 wc->stage = DROP_REFERENCE; 5470 wc->update_ref = update_ref; 5471 wc->keep_locks = 0; 5472 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info); 5473 5474 while (1) { 5475 5476 ret = walk_down_tree(trans, root, path, wc); 5477 if (ret < 0) { 5478 err = ret; 5479 break; 5480 } 5481 5482 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 5483 if (ret < 0) { 5484 err = ret; 5485 break; 5486 } 5487 5488 if (ret > 0) { 5489 BUG_ON(wc->stage != DROP_REFERENCE); 5490 break; 5491 } 5492 5493 if (wc->stage == DROP_REFERENCE) { 5494 wc->drop_level = wc->level; 5495 btrfs_node_key_to_cpu(path->nodes[wc->drop_level], 5496 &wc->drop_progress, 5497 path->slots[wc->drop_level]); 5498 } 5499 btrfs_cpu_key_to_disk(&root_item->drop_progress, 5500 &wc->drop_progress); 5501 root_item->drop_level = wc->drop_level; 5502 5503 BUG_ON(wc->level == 0); 5504 if (btrfs_should_end_transaction(trans) || 5505 (!for_reloc && btrfs_need_cleaner_sleep(fs_info))) { 5506 ret = btrfs_update_root(trans, tree_root, 5507 &root->root_key, 5508 root_item); 5509 if (ret) { 5510 btrfs_abort_transaction(trans, ret); 5511 err = ret; 5512 goto out_end_trans; 5513 } 5514 5515 btrfs_end_transaction_throttle(trans); 5516 if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) { 5517 btrfs_debug(fs_info, 5518 "drop snapshot early exit"); 5519 err = -EAGAIN; 5520 goto out_free; 5521 } 5522 5523 /* 5524 * Use join to avoid potential EINTR from transaction 5525 * start. See wait_reserve_ticket and the whole 5526 * reservation callchain. 5527 */ 5528 if (for_reloc) 5529 trans = btrfs_join_transaction(tree_root); 5530 else 5531 trans = btrfs_start_transaction(tree_root, 0); 5532 if (IS_ERR(trans)) { 5533 err = PTR_ERR(trans); 5534 goto out_free; 5535 } 5536 } 5537 } 5538 btrfs_release_path(path); 5539 if (err) 5540 goto out_end_trans; 5541 5542 ret = btrfs_del_root(trans, &root->root_key); 5543 if (ret) { 5544 btrfs_abort_transaction(trans, ret); 5545 err = ret; 5546 goto out_end_trans; 5547 } 5548 5549 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 5550 ret = btrfs_find_root(tree_root, &root->root_key, path, 5551 NULL, NULL); 5552 if (ret < 0) { 5553 btrfs_abort_transaction(trans, ret); 5554 err = ret; 5555 goto out_end_trans; 5556 } else if (ret > 0) { 5557 /* if we fail to delete the orphan item this time 5558 * around, it'll get picked up the next time. 5559 * 5560 * The most common failure here is just -ENOENT. 5561 */ 5562 btrfs_del_orphan_item(trans, tree_root, 5563 root->root_key.objectid); 5564 } 5565 } 5566 5567 /* 5568 * This subvolume is going to be completely dropped, and won't be 5569 * recorded as dirty roots, thus pertrans meta rsv will not be freed at 5570 * commit transaction time. So free it here manually. 5571 */ 5572 btrfs_qgroup_convert_reserved_meta(root, INT_MAX); 5573 btrfs_qgroup_free_meta_all_pertrans(root); 5574 5575 if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) 5576 btrfs_add_dropped_root(trans, root); 5577 else 5578 btrfs_put_root(root); 5579 root_dropped = true; 5580out_end_trans: 5581 btrfs_end_transaction_throttle(trans); 5582out_free: 5583 kfree(wc); 5584 btrfs_free_path(path); 5585out: 5586 /* 5587 * So if we need to stop dropping the snapshot for whatever reason we 5588 * need to make sure to add it back to the dead root list so that we 5589 * keep trying to do the work later. This also cleans up roots if we 5590 * don't have it in the radix (like when we recover after a power fail 5591 * or unmount) so we don't leak memory. 5592 */ 5593 if (!for_reloc && !root_dropped) 5594 btrfs_add_dead_root(root); 5595 return err; 5596} 5597 5598/* 5599 * drop subtree rooted at tree block 'node'. 5600 * 5601 * NOTE: this function will unlock and release tree block 'node' 5602 * only used by relocation code 5603 */ 5604int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 5605 struct btrfs_root *root, 5606 struct extent_buffer *node, 5607 struct extent_buffer *parent) 5608{ 5609 struct btrfs_fs_info *fs_info = root->fs_info; 5610 struct btrfs_path *path; 5611 struct walk_control *wc; 5612 int level; 5613 int parent_level; 5614 int ret = 0; 5615 int wret; 5616 5617 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5618 5619 path = btrfs_alloc_path(); 5620 if (!path) 5621 return -ENOMEM; 5622 5623 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5624 if (!wc) { 5625 btrfs_free_path(path); 5626 return -ENOMEM; 5627 } 5628 5629 btrfs_assert_tree_locked(parent); 5630 parent_level = btrfs_header_level(parent); 5631 atomic_inc(&parent->refs); 5632 path->nodes[parent_level] = parent; 5633 path->slots[parent_level] = btrfs_header_nritems(parent); 5634 5635 btrfs_assert_tree_locked(node); 5636 level = btrfs_header_level(node); 5637 path->nodes[level] = node; 5638 path->slots[level] = 0; 5639 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 5640 5641 wc->refs[parent_level] = 1; 5642 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5643 wc->level = level; 5644 wc->shared_level = -1; 5645 wc->stage = DROP_REFERENCE; 5646 wc->update_ref = 0; 5647 wc->keep_locks = 1; 5648 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(fs_info); 5649 5650 while (1) { 5651 wret = walk_down_tree(trans, root, path, wc); 5652 if (wret < 0) { 5653 ret = wret; 5654 break; 5655 } 5656 5657 wret = walk_up_tree(trans, root, path, wc, parent_level); 5658 if (wret < 0) 5659 ret = wret; 5660 if (wret != 0) 5661 break; 5662 } 5663 5664 kfree(wc); 5665 btrfs_free_path(path); 5666 return ret; 5667} 5668 5669/* 5670 * helper to account the unused space of all the readonly block group in the 5671 * space_info. takes mirrors into account. 5672 */ 5673u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) 5674{ 5675 struct btrfs_block_group *block_group; 5676 u64 free_bytes = 0; 5677 int factor; 5678 5679 /* It's df, we don't care if it's racy */ 5680 if (list_empty(&sinfo->ro_bgs)) 5681 return 0; 5682 5683 spin_lock(&sinfo->lock); 5684 list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) { 5685 spin_lock(&block_group->lock); 5686 5687 if (!block_group->ro) { 5688 spin_unlock(&block_group->lock); 5689 continue; 5690 } 5691 5692 factor = btrfs_bg_type_to_factor(block_group->flags); 5693 free_bytes += (block_group->length - 5694 block_group->used) * factor; 5695 5696 spin_unlock(&block_group->lock); 5697 } 5698 spin_unlock(&sinfo->lock); 5699 5700 return free_bytes; 5701} 5702 5703int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, 5704 u64 start, u64 end) 5705{ 5706 return unpin_extent_range(fs_info, start, end, false); 5707} 5708 5709/* 5710 * It used to be that old block groups would be left around forever. 5711 * Iterating over them would be enough to trim unused space. Since we 5712 * now automatically remove them, we also need to iterate over unallocated 5713 * space. 5714 * 5715 * We don't want a transaction for this since the discard may take a 5716 * substantial amount of time. We don't require that a transaction be 5717 * running, but we do need to take a running transaction into account 5718 * to ensure that we're not discarding chunks that were released or 5719 * allocated in the current transaction. 5720 * 5721 * Holding the chunks lock will prevent other threads from allocating 5722 * or releasing chunks, but it won't prevent a running transaction 5723 * from committing and releasing the memory that the pending chunks 5724 * list head uses. For that, we need to take a reference to the 5725 * transaction and hold the commit root sem. We only need to hold 5726 * it while performing the free space search since we have already 5727 * held back allocations. 5728 */ 5729static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed) 5730{ 5731 u64 start = SZ_1M, len = 0, end = 0; 5732 int ret; 5733 5734 *trimmed = 0; 5735 5736 /* Discard not supported = nothing to do. */ 5737 if (!blk_queue_discard(bdev_get_queue(device->bdev))) 5738 return 0; 5739 5740 /* Not writable = nothing to do. */ 5741 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) 5742 return 0; 5743 5744 /* No free space = nothing to do. */ 5745 if (device->total_bytes <= device->bytes_used) 5746 return 0; 5747 5748 ret = 0; 5749 5750 while (1) { 5751 struct btrfs_fs_info *fs_info = device->fs_info; 5752 u64 bytes; 5753 5754 ret = mutex_lock_interruptible(&fs_info->chunk_mutex); 5755 if (ret) 5756 break; 5757 5758 find_first_clear_extent_bit(&device->alloc_state, start, 5759 &start, &end, 5760 CHUNK_TRIMMED | CHUNK_ALLOCATED); 5761 5762 /* Check if there are any CHUNK_* bits left */ 5763 if (start > device->total_bytes) { 5764 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); 5765 btrfs_warn_in_rcu(fs_info, 5766"ignoring attempt to trim beyond device size: offset %llu length %llu device %s device size %llu", 5767 start, end - start + 1, 5768 rcu_str_deref(device->name), 5769 device->total_bytes); 5770 mutex_unlock(&fs_info->chunk_mutex); 5771 ret = 0; 5772 break; 5773 } 5774 5775 /* Ensure we skip the reserved area in the first 1M */ 5776 start = max_t(u64, start, SZ_1M); 5777 5778 /* 5779 * If find_first_clear_extent_bit find a range that spans the 5780 * end of the device it will set end to -1, in this case it's up 5781 * to the caller to trim the value to the size of the device. 5782 */ 5783 end = min(end, device->total_bytes - 1); 5784 5785 len = end - start + 1; 5786 5787 /* We didn't find any extents */ 5788 if (!len) { 5789 mutex_unlock(&fs_info->chunk_mutex); 5790 ret = 0; 5791 break; 5792 } 5793 5794 ret = btrfs_issue_discard(device->bdev, start, len, 5795 &bytes); 5796 if (!ret) 5797 set_extent_bits(&device->alloc_state, start, 5798 start + bytes - 1, 5799 CHUNK_TRIMMED); 5800 mutex_unlock(&fs_info->chunk_mutex); 5801 5802 if (ret) 5803 break; 5804 5805 start += len; 5806 *trimmed += bytes; 5807 5808 if (fatal_signal_pending(current)) { 5809 ret = -ERESTARTSYS; 5810 break; 5811 } 5812 5813 cond_resched(); 5814 } 5815 5816 return ret; 5817} 5818 5819/* 5820 * Trim the whole filesystem by: 5821 * 1) trimming the free space in each block group 5822 * 2) trimming the unallocated space on each device 5823 * 5824 * This will also continue trimming even if a block group or device encounters 5825 * an error. The return value will be the last error, or 0 if nothing bad 5826 * happens. 5827 */ 5828int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) 5829{ 5830 struct btrfs_block_group *cache = NULL; 5831 struct btrfs_device *device; 5832 struct list_head *devices; 5833 u64 group_trimmed; 5834 u64 range_end = U64_MAX; 5835 u64 start; 5836 u64 end; 5837 u64 trimmed = 0; 5838 u64 bg_failed = 0; 5839 u64 dev_failed = 0; 5840 int bg_ret = 0; 5841 int dev_ret = 0; 5842 int ret = 0; 5843 5844 /* 5845 * Check range overflow if range->len is set. 5846 * The default range->len is U64_MAX. 5847 */ 5848 if (range->len != U64_MAX && 5849 check_add_overflow(range->start, range->len, &range_end)) 5850 return -EINVAL; 5851 5852 cache = btrfs_lookup_first_block_group(fs_info, range->start); 5853 for (; cache; cache = btrfs_next_block_group(cache)) { 5854 if (cache->start >= range_end) { 5855 btrfs_put_block_group(cache); 5856 break; 5857 } 5858 5859 start = max(range->start, cache->start); 5860 end = min(range_end, cache->start + cache->length); 5861 5862 if (end - start >= range->minlen) { 5863 if (!btrfs_block_group_done(cache)) { 5864 ret = btrfs_cache_block_group(cache, 0); 5865 if (ret) { 5866 bg_failed++; 5867 bg_ret = ret; 5868 continue; 5869 } 5870 ret = btrfs_wait_block_group_cache_done(cache); 5871 if (ret) { 5872 bg_failed++; 5873 bg_ret = ret; 5874 continue; 5875 } 5876 } 5877 ret = btrfs_trim_block_group(cache, 5878 &group_trimmed, 5879 start, 5880 end, 5881 range->minlen); 5882 5883 trimmed += group_trimmed; 5884 if (ret) { 5885 bg_failed++; 5886 bg_ret = ret; 5887 continue; 5888 } 5889 } 5890 } 5891 5892 if (bg_failed) 5893 btrfs_warn(fs_info, 5894 "failed to trim %llu block group(s), last error %d", 5895 bg_failed, bg_ret); 5896 mutex_lock(&fs_info->fs_devices->device_list_mutex); 5897 devices = &fs_info->fs_devices->devices; 5898 list_for_each_entry(device, devices, dev_list) { 5899 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) 5900 continue; 5901 5902 ret = btrfs_trim_free_extents(device, &group_trimmed); 5903 if (ret) { 5904 dev_failed++; 5905 dev_ret = ret; 5906 break; 5907 } 5908 5909 trimmed += group_trimmed; 5910 } 5911 mutex_unlock(&fs_info->fs_devices->device_list_mutex); 5912 5913 if (dev_failed) 5914 btrfs_warn(fs_info, 5915 "failed to trim %llu device(s), last error %d", 5916 dev_failed, dev_ret); 5917 range->len = trimmed; 5918 if (bg_ret) 5919 return bg_ret; 5920 return dev_ret; 5921} 5922