1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (C) 2008 Red Hat. All rights reserved. 4 */ 5 6#include <linux/pagemap.h> 7#include <linux/sched.h> 8#include <linux/sched/signal.h> 9#include <linux/slab.h> 10#include <linux/math64.h> 11#include <linux/ratelimit.h> 12#include <linux/error-injection.h> 13#include <linux/sched/mm.h> 14#include "ctree.h" 15#include "free-space-cache.h" 16#include "transaction.h" 17#include "disk-io.h" 18#include "extent_io.h" 19#include "inode-map.h" 20#include "volumes.h" 21#include "space-info.h" 22#include "delalloc-space.h" 23#include "block-group.h" 24#include "discard.h" 25 26#define BITS_PER_BITMAP (PAGE_SIZE * 8UL) 27#define MAX_CACHE_BYTES_PER_GIG SZ_64K 28#define FORCE_EXTENT_THRESHOLD SZ_1M 29 30struct btrfs_trim_range { 31 u64 start; 32 u64 bytes; 33 struct list_head list; 34}; 35 36static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl, 37 struct btrfs_free_space *bitmap_info); 38static int link_free_space(struct btrfs_free_space_ctl *ctl, 39 struct btrfs_free_space *info); 40static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 41 struct btrfs_free_space *info); 42static int btrfs_wait_cache_io_root(struct btrfs_root *root, 43 struct btrfs_trans_handle *trans, 44 struct btrfs_io_ctl *io_ctl, 45 struct btrfs_path *path); 46 47static struct inode *__lookup_free_space_inode(struct btrfs_root *root, 48 struct btrfs_path *path, 49 u64 offset) 50{ 51 struct btrfs_fs_info *fs_info = root->fs_info; 52 struct btrfs_key key; 53 struct btrfs_key location; 54 struct btrfs_disk_key disk_key; 55 struct btrfs_free_space_header *header; 56 struct extent_buffer *leaf; 57 struct inode *inode = NULL; 58 unsigned nofs_flag; 59 int ret; 60 61 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 62 key.offset = offset; 63 key.type = 0; 64 65 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 66 if (ret < 0) 67 return ERR_PTR(ret); 68 if (ret > 0) { 69 btrfs_release_path(path); 70 return ERR_PTR(-ENOENT); 71 } 72 73 leaf = path->nodes[0]; 74 header = btrfs_item_ptr(leaf, path->slots[0], 75 struct btrfs_free_space_header); 76 btrfs_free_space_key(leaf, header, &disk_key); 77 btrfs_disk_key_to_cpu(&location, &disk_key); 78 btrfs_release_path(path); 79 80 /* 81 * We are often under a trans handle at this point, so we need to make 82 * sure NOFS is set to keep us from deadlocking. 83 */ 84 nofs_flag = memalloc_nofs_save(); 85 inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path); 86 btrfs_release_path(path); 87 memalloc_nofs_restore(nofs_flag); 88 if (IS_ERR(inode)) 89 return inode; 90 91 mapping_set_gfp_mask(inode->i_mapping, 92 mapping_gfp_constraint(inode->i_mapping, 93 ~(__GFP_FS | __GFP_HIGHMEM))); 94 95 return inode; 96} 97 98struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group, 99 struct btrfs_path *path) 100{ 101 struct btrfs_fs_info *fs_info = block_group->fs_info; 102 struct inode *inode = NULL; 103 u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 104 105 spin_lock(&block_group->lock); 106 if (block_group->inode) 107 inode = igrab(block_group->inode); 108 spin_unlock(&block_group->lock); 109 if (inode) 110 return inode; 111 112 inode = __lookup_free_space_inode(fs_info->tree_root, path, 113 block_group->start); 114 if (IS_ERR(inode)) 115 return inode; 116 117 spin_lock(&block_group->lock); 118 if (!((BTRFS_I(inode)->flags & flags) == flags)) { 119 btrfs_info(fs_info, "Old style space inode found, converting."); 120 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM | 121 BTRFS_INODE_NODATACOW; 122 block_group->disk_cache_state = BTRFS_DC_CLEAR; 123 } 124 125 if (!block_group->iref) { 126 block_group->inode = igrab(inode); 127 block_group->iref = 1; 128 } 129 spin_unlock(&block_group->lock); 130 131 return inode; 132} 133 134static int __create_free_space_inode(struct btrfs_root *root, 135 struct btrfs_trans_handle *trans, 136 struct btrfs_path *path, 137 u64 ino, u64 offset) 138{ 139 struct btrfs_key key; 140 struct btrfs_disk_key disk_key; 141 struct btrfs_free_space_header *header; 142 struct btrfs_inode_item *inode_item; 143 struct extent_buffer *leaf; 144 u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC; 145 int ret; 146 147 ret = btrfs_insert_empty_inode(trans, root, path, ino); 148 if (ret) 149 return ret; 150 151 /* We inline crc's for the free disk space cache */ 152 if (ino != BTRFS_FREE_INO_OBJECTID) 153 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW; 154 155 leaf = path->nodes[0]; 156 inode_item = btrfs_item_ptr(leaf, path->slots[0], 157 struct btrfs_inode_item); 158 btrfs_item_key(leaf, &disk_key, path->slots[0]); 159 memzero_extent_buffer(leaf, (unsigned long)inode_item, 160 sizeof(*inode_item)); 161 btrfs_set_inode_generation(leaf, inode_item, trans->transid); 162 btrfs_set_inode_size(leaf, inode_item, 0); 163 btrfs_set_inode_nbytes(leaf, inode_item, 0); 164 btrfs_set_inode_uid(leaf, inode_item, 0); 165 btrfs_set_inode_gid(leaf, inode_item, 0); 166 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); 167 btrfs_set_inode_flags(leaf, inode_item, flags); 168 btrfs_set_inode_nlink(leaf, inode_item, 1); 169 btrfs_set_inode_transid(leaf, inode_item, trans->transid); 170 btrfs_set_inode_block_group(leaf, inode_item, offset); 171 btrfs_mark_buffer_dirty(leaf); 172 btrfs_release_path(path); 173 174 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 175 key.offset = offset; 176 key.type = 0; 177 ret = btrfs_insert_empty_item(trans, root, path, &key, 178 sizeof(struct btrfs_free_space_header)); 179 if (ret < 0) { 180 btrfs_release_path(path); 181 return ret; 182 } 183 184 leaf = path->nodes[0]; 185 header = btrfs_item_ptr(leaf, path->slots[0], 186 struct btrfs_free_space_header); 187 memzero_extent_buffer(leaf, (unsigned long)header, sizeof(*header)); 188 btrfs_set_free_space_key(leaf, header, &disk_key); 189 btrfs_mark_buffer_dirty(leaf); 190 btrfs_release_path(path); 191 192 return 0; 193} 194 195int create_free_space_inode(struct btrfs_trans_handle *trans, 196 struct btrfs_block_group *block_group, 197 struct btrfs_path *path) 198{ 199 int ret; 200 u64 ino; 201 202 ret = btrfs_find_free_objectid(trans->fs_info->tree_root, &ino); 203 if (ret < 0) 204 return ret; 205 206 return __create_free_space_inode(trans->fs_info->tree_root, trans, path, 207 ino, block_group->start); 208} 209 210int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info, 211 struct btrfs_block_rsv *rsv) 212{ 213 u64 needed_bytes; 214 int ret; 215 216 /* 1 for slack space, 1 for updating the inode */ 217 needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) + 218 btrfs_calc_metadata_size(fs_info, 1); 219 220 spin_lock(&rsv->lock); 221 if (rsv->reserved < needed_bytes) 222 ret = -ENOSPC; 223 else 224 ret = 0; 225 spin_unlock(&rsv->lock); 226 return ret; 227} 228 229int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans, 230 struct btrfs_block_group *block_group, 231 struct inode *inode) 232{ 233 struct btrfs_root *root = BTRFS_I(inode)->root; 234 int ret = 0; 235 bool locked = false; 236 237 if (block_group) { 238 struct btrfs_path *path = btrfs_alloc_path(); 239 240 if (!path) { 241 ret = -ENOMEM; 242 goto fail; 243 } 244 locked = true; 245 mutex_lock(&trans->transaction->cache_write_mutex); 246 if (!list_empty(&block_group->io_list)) { 247 list_del_init(&block_group->io_list); 248 249 btrfs_wait_cache_io(trans, block_group, path); 250 btrfs_put_block_group(block_group); 251 } 252 253 /* 254 * now that we've truncated the cache away, its no longer 255 * setup or written 256 */ 257 spin_lock(&block_group->lock); 258 block_group->disk_cache_state = BTRFS_DC_CLEAR; 259 spin_unlock(&block_group->lock); 260 btrfs_free_path(path); 261 } 262 263 btrfs_i_size_write(BTRFS_I(inode), 0); 264 truncate_pagecache(inode, 0); 265 266 /* 267 * We skip the throttling logic for free space cache inodes, so we don't 268 * need to check for -EAGAIN. 269 */ 270 ret = btrfs_truncate_inode_items(trans, root, inode, 271 0, BTRFS_EXTENT_DATA_KEY); 272 if (ret) 273 goto fail; 274 275 ret = btrfs_update_inode(trans, root, inode); 276 277fail: 278 if (locked) 279 mutex_unlock(&trans->transaction->cache_write_mutex); 280 if (ret) 281 btrfs_abort_transaction(trans, ret); 282 283 return ret; 284} 285 286static void readahead_cache(struct inode *inode) 287{ 288 struct file_ra_state *ra; 289 unsigned long last_index; 290 291 ra = kzalloc(sizeof(*ra), GFP_NOFS); 292 if (!ra) 293 return; 294 295 file_ra_state_init(ra, inode->i_mapping); 296 last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; 297 298 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index); 299 300 kfree(ra); 301} 302 303static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode, 304 int write) 305{ 306 int num_pages; 307 int check_crcs = 0; 308 309 num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); 310 311 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FREE_INO_OBJECTID) 312 check_crcs = 1; 313 314 /* Make sure we can fit our crcs and generation into the first page */ 315 if (write && check_crcs && 316 (num_pages * sizeof(u32) + sizeof(u64)) > PAGE_SIZE) 317 return -ENOSPC; 318 319 memset(io_ctl, 0, sizeof(struct btrfs_io_ctl)); 320 321 io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS); 322 if (!io_ctl->pages) 323 return -ENOMEM; 324 325 io_ctl->num_pages = num_pages; 326 io_ctl->fs_info = btrfs_sb(inode->i_sb); 327 io_ctl->check_crcs = check_crcs; 328 io_ctl->inode = inode; 329 330 return 0; 331} 332ALLOW_ERROR_INJECTION(io_ctl_init, ERRNO); 333 334static void io_ctl_free(struct btrfs_io_ctl *io_ctl) 335{ 336 kfree(io_ctl->pages); 337 io_ctl->pages = NULL; 338} 339 340static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl) 341{ 342 if (io_ctl->cur) { 343 io_ctl->cur = NULL; 344 io_ctl->orig = NULL; 345 } 346} 347 348static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear) 349{ 350 ASSERT(io_ctl->index < io_ctl->num_pages); 351 io_ctl->page = io_ctl->pages[io_ctl->index++]; 352 io_ctl->cur = page_address(io_ctl->page); 353 io_ctl->orig = io_ctl->cur; 354 io_ctl->size = PAGE_SIZE; 355 if (clear) 356 clear_page(io_ctl->cur); 357} 358 359static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl) 360{ 361 int i; 362 363 io_ctl_unmap_page(io_ctl); 364 365 for (i = 0; i < io_ctl->num_pages; i++) { 366 if (io_ctl->pages[i]) { 367 ClearPageChecked(io_ctl->pages[i]); 368 unlock_page(io_ctl->pages[i]); 369 put_page(io_ctl->pages[i]); 370 } 371 } 372} 373 374static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate) 375{ 376 struct page *page; 377 struct inode *inode = io_ctl->inode; 378 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 379 int i; 380 381 for (i = 0; i < io_ctl->num_pages; i++) { 382 page = find_or_create_page(inode->i_mapping, i, mask); 383 if (!page) { 384 io_ctl_drop_pages(io_ctl); 385 return -ENOMEM; 386 } 387 io_ctl->pages[i] = page; 388 if (uptodate && !PageUptodate(page)) { 389 btrfs_readpage(NULL, page); 390 lock_page(page); 391 if (page->mapping != inode->i_mapping) { 392 btrfs_err(BTRFS_I(inode)->root->fs_info, 393 "free space cache page truncated"); 394 io_ctl_drop_pages(io_ctl); 395 return -EIO; 396 } 397 if (!PageUptodate(page)) { 398 btrfs_err(BTRFS_I(inode)->root->fs_info, 399 "error reading free space cache"); 400 io_ctl_drop_pages(io_ctl); 401 return -EIO; 402 } 403 } 404 } 405 406 for (i = 0; i < io_ctl->num_pages; i++) { 407 clear_page_dirty_for_io(io_ctl->pages[i]); 408 set_page_extent_mapped(io_ctl->pages[i]); 409 } 410 411 return 0; 412} 413 414static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 415{ 416 io_ctl_map_page(io_ctl, 1); 417 418 /* 419 * Skip the csum areas. If we don't check crcs then we just have a 420 * 64bit chunk at the front of the first page. 421 */ 422 if (io_ctl->check_crcs) { 423 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages); 424 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages); 425 } else { 426 io_ctl->cur += sizeof(u64); 427 io_ctl->size -= sizeof(u64) * 2; 428 } 429 430 put_unaligned_le64(generation, io_ctl->cur); 431 io_ctl->cur += sizeof(u64); 432} 433 434static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation) 435{ 436 u64 cache_gen; 437 438 /* 439 * Skip the crc area. If we don't check crcs then we just have a 64bit 440 * chunk at the front of the first page. 441 */ 442 if (io_ctl->check_crcs) { 443 io_ctl->cur += sizeof(u32) * io_ctl->num_pages; 444 io_ctl->size -= sizeof(u64) + 445 (sizeof(u32) * io_ctl->num_pages); 446 } else { 447 io_ctl->cur += sizeof(u64); 448 io_ctl->size -= sizeof(u64) * 2; 449 } 450 451 cache_gen = get_unaligned_le64(io_ctl->cur); 452 if (cache_gen != generation) { 453 btrfs_err_rl(io_ctl->fs_info, 454 "space cache generation (%llu) does not match inode (%llu)", 455 cache_gen, generation); 456 io_ctl_unmap_page(io_ctl); 457 return -EIO; 458 } 459 io_ctl->cur += sizeof(u64); 460 return 0; 461} 462 463static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index) 464{ 465 u32 *tmp; 466 u32 crc = ~(u32)0; 467 unsigned offset = 0; 468 469 if (!io_ctl->check_crcs) { 470 io_ctl_unmap_page(io_ctl); 471 return; 472 } 473 474 if (index == 0) 475 offset = sizeof(u32) * io_ctl->num_pages; 476 477 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 478 btrfs_crc32c_final(crc, (u8 *)&crc); 479 io_ctl_unmap_page(io_ctl); 480 tmp = page_address(io_ctl->pages[0]); 481 tmp += index; 482 *tmp = crc; 483} 484 485static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index) 486{ 487 u32 *tmp, val; 488 u32 crc = ~(u32)0; 489 unsigned offset = 0; 490 491 if (!io_ctl->check_crcs) { 492 io_ctl_map_page(io_ctl, 0); 493 return 0; 494 } 495 496 if (index == 0) 497 offset = sizeof(u32) * io_ctl->num_pages; 498 499 tmp = page_address(io_ctl->pages[0]); 500 tmp += index; 501 val = *tmp; 502 503 io_ctl_map_page(io_ctl, 0); 504 crc = btrfs_crc32c(crc, io_ctl->orig + offset, PAGE_SIZE - offset); 505 btrfs_crc32c_final(crc, (u8 *)&crc); 506 if (val != crc) { 507 btrfs_err_rl(io_ctl->fs_info, 508 "csum mismatch on free space cache"); 509 io_ctl_unmap_page(io_ctl); 510 return -EIO; 511 } 512 513 return 0; 514} 515 516static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes, 517 void *bitmap) 518{ 519 struct btrfs_free_space_entry *entry; 520 521 if (!io_ctl->cur) 522 return -ENOSPC; 523 524 entry = io_ctl->cur; 525 put_unaligned_le64(offset, &entry->offset); 526 put_unaligned_le64(bytes, &entry->bytes); 527 entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP : 528 BTRFS_FREE_SPACE_EXTENT; 529 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 530 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 531 532 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 533 return 0; 534 535 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 536 537 /* No more pages to map */ 538 if (io_ctl->index >= io_ctl->num_pages) 539 return 0; 540 541 /* map the next page */ 542 io_ctl_map_page(io_ctl, 1); 543 return 0; 544} 545 546static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap) 547{ 548 if (!io_ctl->cur) 549 return -ENOSPC; 550 551 /* 552 * If we aren't at the start of the current page, unmap this one and 553 * map the next one if there is any left. 554 */ 555 if (io_ctl->cur != io_ctl->orig) { 556 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 557 if (io_ctl->index >= io_ctl->num_pages) 558 return -ENOSPC; 559 io_ctl_map_page(io_ctl, 0); 560 } 561 562 copy_page(io_ctl->cur, bitmap); 563 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 564 if (io_ctl->index < io_ctl->num_pages) 565 io_ctl_map_page(io_ctl, 0); 566 return 0; 567} 568 569static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl) 570{ 571 /* 572 * If we're not on the boundary we know we've modified the page and we 573 * need to crc the page. 574 */ 575 if (io_ctl->cur != io_ctl->orig) 576 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 577 else 578 io_ctl_unmap_page(io_ctl); 579 580 while (io_ctl->index < io_ctl->num_pages) { 581 io_ctl_map_page(io_ctl, 1); 582 io_ctl_set_crc(io_ctl, io_ctl->index - 1); 583 } 584} 585 586static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl, 587 struct btrfs_free_space *entry, u8 *type) 588{ 589 struct btrfs_free_space_entry *e; 590 int ret; 591 592 if (!io_ctl->cur) { 593 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 594 if (ret) 595 return ret; 596 } 597 598 e = io_ctl->cur; 599 entry->offset = get_unaligned_le64(&e->offset); 600 entry->bytes = get_unaligned_le64(&e->bytes); 601 *type = e->type; 602 io_ctl->cur += sizeof(struct btrfs_free_space_entry); 603 io_ctl->size -= sizeof(struct btrfs_free_space_entry); 604 605 if (io_ctl->size >= sizeof(struct btrfs_free_space_entry)) 606 return 0; 607 608 io_ctl_unmap_page(io_ctl); 609 610 return 0; 611} 612 613static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl, 614 struct btrfs_free_space *entry) 615{ 616 int ret; 617 618 ret = io_ctl_check_crc(io_ctl, io_ctl->index); 619 if (ret) 620 return ret; 621 622 copy_page(entry->bitmap, io_ctl->cur); 623 io_ctl_unmap_page(io_ctl); 624 625 return 0; 626} 627 628/* 629 * Since we attach pinned extents after the fact we can have contiguous sections 630 * of free space that are split up in entries. This poses a problem with the 631 * tree logging stuff since it could have allocated across what appears to be 2 632 * entries since we would have merged the entries when adding the pinned extents 633 * back to the free space cache. So run through the space cache that we just 634 * loaded and merge contiguous entries. This will make the log replay stuff not 635 * blow up and it will make for nicer allocator behavior. 636 */ 637static void merge_space_tree(struct btrfs_free_space_ctl *ctl) 638{ 639 struct btrfs_free_space *e, *prev = NULL; 640 struct rb_node *n; 641 642again: 643 spin_lock(&ctl->tree_lock); 644 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 645 e = rb_entry(n, struct btrfs_free_space, offset_index); 646 if (!prev) 647 goto next; 648 if (e->bitmap || prev->bitmap) 649 goto next; 650 if (prev->offset + prev->bytes == e->offset) { 651 unlink_free_space(ctl, prev); 652 unlink_free_space(ctl, e); 653 prev->bytes += e->bytes; 654 kmem_cache_free(btrfs_free_space_cachep, e); 655 link_free_space(ctl, prev); 656 prev = NULL; 657 spin_unlock(&ctl->tree_lock); 658 goto again; 659 } 660next: 661 prev = e; 662 } 663 spin_unlock(&ctl->tree_lock); 664} 665 666static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, 667 struct btrfs_free_space_ctl *ctl, 668 struct btrfs_path *path, u64 offset) 669{ 670 struct btrfs_fs_info *fs_info = root->fs_info; 671 struct btrfs_free_space_header *header; 672 struct extent_buffer *leaf; 673 struct btrfs_io_ctl io_ctl; 674 struct btrfs_key key; 675 struct btrfs_free_space *e, *n; 676 LIST_HEAD(bitmaps); 677 u64 num_entries; 678 u64 num_bitmaps; 679 u64 generation; 680 u8 type; 681 int ret = 0; 682 683 /* Nothing in the space cache, goodbye */ 684 if (!i_size_read(inode)) 685 return 0; 686 687 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 688 key.offset = offset; 689 key.type = 0; 690 691 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 692 if (ret < 0) 693 return 0; 694 else if (ret > 0) { 695 btrfs_release_path(path); 696 return 0; 697 } 698 699 ret = -1; 700 701 leaf = path->nodes[0]; 702 header = btrfs_item_ptr(leaf, path->slots[0], 703 struct btrfs_free_space_header); 704 num_entries = btrfs_free_space_entries(leaf, header); 705 num_bitmaps = btrfs_free_space_bitmaps(leaf, header); 706 generation = btrfs_free_space_generation(leaf, header); 707 btrfs_release_path(path); 708 709 if (!BTRFS_I(inode)->generation) { 710 btrfs_info(fs_info, 711 "the free space cache file (%llu) is invalid, skip it", 712 offset); 713 return 0; 714 } 715 716 if (BTRFS_I(inode)->generation != generation) { 717 btrfs_err(fs_info, 718 "free space inode generation (%llu) did not match free space cache generation (%llu)", 719 BTRFS_I(inode)->generation, generation); 720 return 0; 721 } 722 723 if (!num_entries) 724 return 0; 725 726 ret = io_ctl_init(&io_ctl, inode, 0); 727 if (ret) 728 return ret; 729 730 readahead_cache(inode); 731 732 ret = io_ctl_prepare_pages(&io_ctl, true); 733 if (ret) 734 goto out; 735 736 ret = io_ctl_check_crc(&io_ctl, 0); 737 if (ret) 738 goto free_cache; 739 740 ret = io_ctl_check_generation(&io_ctl, generation); 741 if (ret) 742 goto free_cache; 743 744 while (num_entries) { 745 e = kmem_cache_zalloc(btrfs_free_space_cachep, 746 GFP_NOFS); 747 if (!e) { 748 ret = -ENOMEM; 749 goto free_cache; 750 } 751 752 ret = io_ctl_read_entry(&io_ctl, e, &type); 753 if (ret) { 754 kmem_cache_free(btrfs_free_space_cachep, e); 755 goto free_cache; 756 } 757 758 /* 759 * Sync discard ensures that the free space cache is always 760 * trimmed. So when reading this in, the state should reflect 761 * that. We also do this for async as a stop gap for lack of 762 * persistence. 763 */ 764 if (btrfs_test_opt(fs_info, DISCARD_SYNC) || 765 btrfs_test_opt(fs_info, DISCARD_ASYNC)) 766 e->trim_state = BTRFS_TRIM_STATE_TRIMMED; 767 768 if (!e->bytes) { 769 ret = -1; 770 kmem_cache_free(btrfs_free_space_cachep, e); 771 goto free_cache; 772 } 773 774 if (type == BTRFS_FREE_SPACE_EXTENT) { 775 spin_lock(&ctl->tree_lock); 776 ret = link_free_space(ctl, e); 777 spin_unlock(&ctl->tree_lock); 778 if (ret) { 779 btrfs_err(fs_info, 780 "Duplicate entries in free space cache, dumping"); 781 kmem_cache_free(btrfs_free_space_cachep, e); 782 goto free_cache; 783 } 784 } else { 785 ASSERT(num_bitmaps); 786 num_bitmaps--; 787 e->bitmap = kmem_cache_zalloc( 788 btrfs_free_space_bitmap_cachep, GFP_NOFS); 789 if (!e->bitmap) { 790 ret = -ENOMEM; 791 kmem_cache_free( 792 btrfs_free_space_cachep, e); 793 goto free_cache; 794 } 795 spin_lock(&ctl->tree_lock); 796 ret = link_free_space(ctl, e); 797 if (ret) { 798 spin_unlock(&ctl->tree_lock); 799 btrfs_err(fs_info, 800 "Duplicate entries in free space cache, dumping"); 801 kmem_cache_free(btrfs_free_space_cachep, e); 802 goto free_cache; 803 } 804 ctl->total_bitmaps++; 805 ctl->op->recalc_thresholds(ctl); 806 spin_unlock(&ctl->tree_lock); 807 list_add_tail(&e->list, &bitmaps); 808 } 809 810 num_entries--; 811 } 812 813 io_ctl_unmap_page(&io_ctl); 814 815 /* 816 * We add the bitmaps at the end of the entries in order that 817 * the bitmap entries are added to the cache. 818 */ 819 list_for_each_entry_safe(e, n, &bitmaps, list) { 820 list_del_init(&e->list); 821 ret = io_ctl_read_bitmap(&io_ctl, e); 822 if (ret) 823 goto free_cache; 824 e->bitmap_extents = count_bitmap_extents(ctl, e); 825 if (!btrfs_free_space_trimmed(e)) { 826 ctl->discardable_extents[BTRFS_STAT_CURR] += 827 e->bitmap_extents; 828 ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes; 829 } 830 } 831 832 io_ctl_drop_pages(&io_ctl); 833 merge_space_tree(ctl); 834 ret = 1; 835out: 836 btrfs_discard_update_discardable(ctl->private, ctl); 837 io_ctl_free(&io_ctl); 838 return ret; 839free_cache: 840 io_ctl_drop_pages(&io_ctl); 841 __btrfs_remove_free_space_cache(ctl); 842 goto out; 843} 844 845int load_free_space_cache(struct btrfs_block_group *block_group) 846{ 847 struct btrfs_fs_info *fs_info = block_group->fs_info; 848 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 849 struct inode *inode; 850 struct btrfs_path *path; 851 int ret = 0; 852 bool matched; 853 u64 used = block_group->used; 854 855 /* 856 * If this block group has been marked to be cleared for one reason or 857 * another then we can't trust the on disk cache, so just return. 858 */ 859 spin_lock(&block_group->lock); 860 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 861 spin_unlock(&block_group->lock); 862 return 0; 863 } 864 spin_unlock(&block_group->lock); 865 866 path = btrfs_alloc_path(); 867 if (!path) 868 return 0; 869 path->search_commit_root = 1; 870 path->skip_locking = 1; 871 872 /* 873 * We must pass a path with search_commit_root set to btrfs_iget in 874 * order to avoid a deadlock when allocating extents for the tree root. 875 * 876 * When we are COWing an extent buffer from the tree root, when looking 877 * for a free extent, at extent-tree.c:find_free_extent(), we can find 878 * block group without its free space cache loaded. When we find one 879 * we must load its space cache which requires reading its free space 880 * cache's inode item from the root tree. If this inode item is located 881 * in the same leaf that we started COWing before, then we end up in 882 * deadlock on the extent buffer (trying to read lock it when we 883 * previously write locked it). 884 * 885 * It's safe to read the inode item using the commit root because 886 * block groups, once loaded, stay in memory forever (until they are 887 * removed) as well as their space caches once loaded. New block groups 888 * once created get their ->cached field set to BTRFS_CACHE_FINISHED so 889 * we will never try to read their inode item while the fs is mounted. 890 */ 891 inode = lookup_free_space_inode(block_group, path); 892 if (IS_ERR(inode)) { 893 btrfs_free_path(path); 894 return 0; 895 } 896 897 /* We may have converted the inode and made the cache invalid. */ 898 spin_lock(&block_group->lock); 899 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) { 900 spin_unlock(&block_group->lock); 901 btrfs_free_path(path); 902 goto out; 903 } 904 spin_unlock(&block_group->lock); 905 906 ret = __load_free_space_cache(fs_info->tree_root, inode, ctl, 907 path, block_group->start); 908 btrfs_free_path(path); 909 if (ret <= 0) 910 goto out; 911 912 spin_lock(&ctl->tree_lock); 913 matched = (ctl->free_space == (block_group->length - used - 914 block_group->bytes_super)); 915 spin_unlock(&ctl->tree_lock); 916 917 if (!matched) { 918 __btrfs_remove_free_space_cache(ctl); 919 btrfs_warn(fs_info, 920 "block group %llu has wrong amount of free space", 921 block_group->start); 922 ret = -1; 923 } 924out: 925 if (ret < 0) { 926 /* This cache is bogus, make sure it gets cleared */ 927 spin_lock(&block_group->lock); 928 block_group->disk_cache_state = BTRFS_DC_CLEAR; 929 spin_unlock(&block_group->lock); 930 ret = 0; 931 932 btrfs_warn(fs_info, 933 "failed to load free space cache for block group %llu, rebuilding it now", 934 block_group->start); 935 } 936 937 iput(inode); 938 return ret; 939} 940 941static noinline_for_stack 942int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl, 943 struct btrfs_free_space_ctl *ctl, 944 struct btrfs_block_group *block_group, 945 int *entries, int *bitmaps, 946 struct list_head *bitmap_list) 947{ 948 int ret; 949 struct btrfs_free_cluster *cluster = NULL; 950 struct btrfs_free_cluster *cluster_locked = NULL; 951 struct rb_node *node = rb_first(&ctl->free_space_offset); 952 struct btrfs_trim_range *trim_entry; 953 954 /* Get the cluster for this block_group if it exists */ 955 if (block_group && !list_empty(&block_group->cluster_list)) { 956 cluster = list_entry(block_group->cluster_list.next, 957 struct btrfs_free_cluster, 958 block_group_list); 959 } 960 961 if (!node && cluster) { 962 cluster_locked = cluster; 963 spin_lock(&cluster_locked->lock); 964 node = rb_first(&cluster->root); 965 cluster = NULL; 966 } 967 968 /* Write out the extent entries */ 969 while (node) { 970 struct btrfs_free_space *e; 971 972 e = rb_entry(node, struct btrfs_free_space, offset_index); 973 *entries += 1; 974 975 ret = io_ctl_add_entry(io_ctl, e->offset, e->bytes, 976 e->bitmap); 977 if (ret) 978 goto fail; 979 980 if (e->bitmap) { 981 list_add_tail(&e->list, bitmap_list); 982 *bitmaps += 1; 983 } 984 node = rb_next(node); 985 if (!node && cluster) { 986 node = rb_first(&cluster->root); 987 cluster_locked = cluster; 988 spin_lock(&cluster_locked->lock); 989 cluster = NULL; 990 } 991 } 992 if (cluster_locked) { 993 spin_unlock(&cluster_locked->lock); 994 cluster_locked = NULL; 995 } 996 997 /* 998 * Make sure we don't miss any range that was removed from our rbtree 999 * because trimming is running. Otherwise after a umount+mount (or crash 1000 * after committing the transaction) we would leak free space and get 1001 * an inconsistent free space cache report from fsck. 1002 */ 1003 list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { 1004 ret = io_ctl_add_entry(io_ctl, trim_entry->start, 1005 trim_entry->bytes, NULL); 1006 if (ret) 1007 goto fail; 1008 *entries += 1; 1009 } 1010 1011 return 0; 1012fail: 1013 if (cluster_locked) 1014 spin_unlock(&cluster_locked->lock); 1015 return -ENOSPC; 1016} 1017 1018static noinline_for_stack int 1019update_cache_item(struct btrfs_trans_handle *trans, 1020 struct btrfs_root *root, 1021 struct inode *inode, 1022 struct btrfs_path *path, u64 offset, 1023 int entries, int bitmaps) 1024{ 1025 struct btrfs_key key; 1026 struct btrfs_free_space_header *header; 1027 struct extent_buffer *leaf; 1028 int ret; 1029 1030 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 1031 key.offset = offset; 1032 key.type = 0; 1033 1034 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1035 if (ret < 0) { 1036 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1037 EXTENT_DELALLOC, 0, 0, NULL); 1038 goto fail; 1039 } 1040 leaf = path->nodes[0]; 1041 if (ret > 0) { 1042 struct btrfs_key found_key; 1043 ASSERT(path->slots[0]); 1044 path->slots[0]--; 1045 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1046 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID || 1047 found_key.offset != offset) { 1048 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, 1049 inode->i_size - 1, EXTENT_DELALLOC, 0, 1050 0, NULL); 1051 btrfs_release_path(path); 1052 goto fail; 1053 } 1054 } 1055 1056 BTRFS_I(inode)->generation = trans->transid; 1057 header = btrfs_item_ptr(leaf, path->slots[0], 1058 struct btrfs_free_space_header); 1059 btrfs_set_free_space_entries(leaf, header, entries); 1060 btrfs_set_free_space_bitmaps(leaf, header, bitmaps); 1061 btrfs_set_free_space_generation(leaf, header, trans->transid); 1062 btrfs_mark_buffer_dirty(leaf); 1063 btrfs_release_path(path); 1064 1065 return 0; 1066 1067fail: 1068 return -1; 1069} 1070 1071static noinline_for_stack int write_pinned_extent_entries( 1072 struct btrfs_trans_handle *trans, 1073 struct btrfs_block_group *block_group, 1074 struct btrfs_io_ctl *io_ctl, 1075 int *entries) 1076{ 1077 u64 start, extent_start, extent_end, len; 1078 struct extent_io_tree *unpin = NULL; 1079 int ret; 1080 1081 if (!block_group) 1082 return 0; 1083 1084 /* 1085 * We want to add any pinned extents to our free space cache 1086 * so we don't leak the space 1087 * 1088 * We shouldn't have switched the pinned extents yet so this is the 1089 * right one 1090 */ 1091 unpin = &trans->transaction->pinned_extents; 1092 1093 start = block_group->start; 1094 1095 while (start < block_group->start + block_group->length) { 1096 ret = find_first_extent_bit(unpin, start, 1097 &extent_start, &extent_end, 1098 EXTENT_DIRTY, NULL); 1099 if (ret) 1100 return 0; 1101 1102 /* This pinned extent is out of our range */ 1103 if (extent_start >= block_group->start + block_group->length) 1104 return 0; 1105 1106 extent_start = max(extent_start, start); 1107 extent_end = min(block_group->start + block_group->length, 1108 extent_end + 1); 1109 len = extent_end - extent_start; 1110 1111 *entries += 1; 1112 ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); 1113 if (ret) 1114 return -ENOSPC; 1115 1116 start = extent_end; 1117 } 1118 1119 return 0; 1120} 1121 1122static noinline_for_stack int 1123write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) 1124{ 1125 struct btrfs_free_space *entry, *next; 1126 int ret; 1127 1128 /* Write out the bitmaps */ 1129 list_for_each_entry_safe(entry, next, bitmap_list, list) { 1130 ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); 1131 if (ret) 1132 return -ENOSPC; 1133 list_del_init(&entry->list); 1134 } 1135 1136 return 0; 1137} 1138 1139static int flush_dirty_cache(struct inode *inode) 1140{ 1141 int ret; 1142 1143 ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); 1144 if (ret) 1145 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, 1146 EXTENT_DELALLOC, 0, 0, NULL); 1147 1148 return ret; 1149} 1150 1151static void noinline_for_stack 1152cleanup_bitmap_list(struct list_head *bitmap_list) 1153{ 1154 struct btrfs_free_space *entry, *next; 1155 1156 list_for_each_entry_safe(entry, next, bitmap_list, list) 1157 list_del_init(&entry->list); 1158} 1159 1160static void noinline_for_stack 1161cleanup_write_cache_enospc(struct inode *inode, 1162 struct btrfs_io_ctl *io_ctl, 1163 struct extent_state **cached_state) 1164{ 1165 io_ctl_drop_pages(io_ctl); 1166 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1167 i_size_read(inode) - 1, cached_state); 1168} 1169 1170static int __btrfs_wait_cache_io(struct btrfs_root *root, 1171 struct btrfs_trans_handle *trans, 1172 struct btrfs_block_group *block_group, 1173 struct btrfs_io_ctl *io_ctl, 1174 struct btrfs_path *path, u64 offset) 1175{ 1176 int ret; 1177 struct inode *inode = io_ctl->inode; 1178 1179 if (!inode) 1180 return 0; 1181 1182 /* Flush the dirty pages in the cache file. */ 1183 ret = flush_dirty_cache(inode); 1184 if (ret) 1185 goto out; 1186 1187 /* Update the cache item to tell everyone this cache file is valid. */ 1188 ret = update_cache_item(trans, root, inode, path, offset, 1189 io_ctl->entries, io_ctl->bitmaps); 1190out: 1191 if (ret) { 1192 invalidate_inode_pages2(inode->i_mapping); 1193 BTRFS_I(inode)->generation = 0; 1194 if (block_group) 1195 btrfs_debug(root->fs_info, 1196 "failed to write free space cache for block group %llu error %d", 1197 block_group->start, ret); 1198 } 1199 btrfs_update_inode(trans, root, inode); 1200 1201 if (block_group) { 1202 /* the dirty list is protected by the dirty_bgs_lock */ 1203 spin_lock(&trans->transaction->dirty_bgs_lock); 1204 1205 /* the disk_cache_state is protected by the block group lock */ 1206 spin_lock(&block_group->lock); 1207 1208 /* 1209 * only mark this as written if we didn't get put back on 1210 * the dirty list while waiting for IO. Otherwise our 1211 * cache state won't be right, and we won't get written again 1212 */ 1213 if (!ret && list_empty(&block_group->dirty_list)) 1214 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1215 else if (ret) 1216 block_group->disk_cache_state = BTRFS_DC_ERROR; 1217 1218 spin_unlock(&block_group->lock); 1219 spin_unlock(&trans->transaction->dirty_bgs_lock); 1220 io_ctl->inode = NULL; 1221 iput(inode); 1222 } 1223 1224 return ret; 1225 1226} 1227 1228static int btrfs_wait_cache_io_root(struct btrfs_root *root, 1229 struct btrfs_trans_handle *trans, 1230 struct btrfs_io_ctl *io_ctl, 1231 struct btrfs_path *path) 1232{ 1233 return __btrfs_wait_cache_io(root, trans, NULL, io_ctl, path, 0); 1234} 1235 1236int btrfs_wait_cache_io(struct btrfs_trans_handle *trans, 1237 struct btrfs_block_group *block_group, 1238 struct btrfs_path *path) 1239{ 1240 return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans, 1241 block_group, &block_group->io_ctl, 1242 path, block_group->start); 1243} 1244 1245/** 1246 * __btrfs_write_out_cache - write out cached info to an inode 1247 * @root - the root the inode belongs to 1248 * @ctl - the free space cache we are going to write out 1249 * @block_group - the block_group for this cache if it belongs to a block_group 1250 * @trans - the trans handle 1251 * 1252 * This function writes out a free space cache struct to disk for quick recovery 1253 * on mount. This will return 0 if it was successful in writing the cache out, 1254 * or an errno if it was not. 1255 */ 1256static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, 1257 struct btrfs_free_space_ctl *ctl, 1258 struct btrfs_block_group *block_group, 1259 struct btrfs_io_ctl *io_ctl, 1260 struct btrfs_trans_handle *trans) 1261{ 1262 struct extent_state *cached_state = NULL; 1263 LIST_HEAD(bitmap_list); 1264 int entries = 0; 1265 int bitmaps = 0; 1266 int ret; 1267 int must_iput = 0; 1268 1269 if (!i_size_read(inode)) 1270 return -EIO; 1271 1272 WARN_ON(io_ctl->pages); 1273 ret = io_ctl_init(io_ctl, inode, 1); 1274 if (ret) 1275 return ret; 1276 1277 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { 1278 down_write(&block_group->data_rwsem); 1279 spin_lock(&block_group->lock); 1280 if (block_group->delalloc_bytes) { 1281 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 1282 spin_unlock(&block_group->lock); 1283 up_write(&block_group->data_rwsem); 1284 BTRFS_I(inode)->generation = 0; 1285 ret = 0; 1286 must_iput = 1; 1287 goto out; 1288 } 1289 spin_unlock(&block_group->lock); 1290 } 1291 1292 /* Lock all pages first so we can lock the extent safely. */ 1293 ret = io_ctl_prepare_pages(io_ctl, false); 1294 if (ret) 1295 goto out_unlock; 1296 1297 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 1298 &cached_state); 1299 1300 io_ctl_set_generation(io_ctl, trans->transid); 1301 1302 mutex_lock(&ctl->cache_writeout_mutex); 1303 /* Write out the extent entries in the free space cache */ 1304 spin_lock(&ctl->tree_lock); 1305 ret = write_cache_extent_entries(io_ctl, ctl, 1306 block_group, &entries, &bitmaps, 1307 &bitmap_list); 1308 if (ret) 1309 goto out_nospc_locked; 1310 1311 /* 1312 * Some spaces that are freed in the current transaction are pinned, 1313 * they will be added into free space cache after the transaction is 1314 * committed, we shouldn't lose them. 1315 * 1316 * If this changes while we are working we'll get added back to 1317 * the dirty list and redo it. No locking needed 1318 */ 1319 ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries); 1320 if (ret) 1321 goto out_nospc_locked; 1322 1323 /* 1324 * At last, we write out all the bitmaps and keep cache_writeout_mutex 1325 * locked while doing it because a concurrent trim can be manipulating 1326 * or freeing the bitmap. 1327 */ 1328 ret = write_bitmap_entries(io_ctl, &bitmap_list); 1329 spin_unlock(&ctl->tree_lock); 1330 mutex_unlock(&ctl->cache_writeout_mutex); 1331 if (ret) 1332 goto out_nospc; 1333 1334 /* Zero out the rest of the pages just to make sure */ 1335 io_ctl_zero_remaining_pages(io_ctl); 1336 1337 /* Everything is written out, now we dirty the pages in the file. */ 1338 ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages, 1339 io_ctl->num_pages, 0, i_size_read(inode), 1340 &cached_state); 1341 if (ret) 1342 goto out_nospc; 1343 1344 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1345 up_write(&block_group->data_rwsem); 1346 /* 1347 * Release the pages and unlock the extent, we will flush 1348 * them out later 1349 */ 1350 io_ctl_drop_pages(io_ctl); 1351 io_ctl_free(io_ctl); 1352 1353 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 1354 i_size_read(inode) - 1, &cached_state); 1355 1356 /* 1357 * at this point the pages are under IO and we're happy, 1358 * The caller is responsible for waiting on them and updating 1359 * the cache and the inode 1360 */ 1361 io_ctl->entries = entries; 1362 io_ctl->bitmaps = bitmaps; 1363 1364 ret = btrfs_fdatawrite_range(inode, 0, (u64)-1); 1365 if (ret) 1366 goto out; 1367 1368 return 0; 1369 1370out_nospc_locked: 1371 cleanup_bitmap_list(&bitmap_list); 1372 spin_unlock(&ctl->tree_lock); 1373 mutex_unlock(&ctl->cache_writeout_mutex); 1374 1375out_nospc: 1376 cleanup_write_cache_enospc(inode, io_ctl, &cached_state); 1377 1378out_unlock: 1379 if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) 1380 up_write(&block_group->data_rwsem); 1381 1382out: 1383 io_ctl->inode = NULL; 1384 io_ctl_free(io_ctl); 1385 if (ret) { 1386 invalidate_inode_pages2(inode->i_mapping); 1387 BTRFS_I(inode)->generation = 0; 1388 } 1389 btrfs_update_inode(trans, root, inode); 1390 if (must_iput) 1391 iput(inode); 1392 return ret; 1393} 1394 1395int btrfs_write_out_cache(struct btrfs_trans_handle *trans, 1396 struct btrfs_block_group *block_group, 1397 struct btrfs_path *path) 1398{ 1399 struct btrfs_fs_info *fs_info = trans->fs_info; 1400 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1401 struct inode *inode; 1402 int ret = 0; 1403 1404 spin_lock(&block_group->lock); 1405 if (block_group->disk_cache_state < BTRFS_DC_SETUP) { 1406 spin_unlock(&block_group->lock); 1407 return 0; 1408 } 1409 spin_unlock(&block_group->lock); 1410 1411 inode = lookup_free_space_inode(block_group, path); 1412 if (IS_ERR(inode)) 1413 return 0; 1414 1415 ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl, 1416 block_group, &block_group->io_ctl, trans); 1417 if (ret) { 1418 btrfs_debug(fs_info, 1419 "failed to write free space cache for block group %llu error %d", 1420 block_group->start, ret); 1421 spin_lock(&block_group->lock); 1422 block_group->disk_cache_state = BTRFS_DC_ERROR; 1423 spin_unlock(&block_group->lock); 1424 1425 block_group->io_ctl.inode = NULL; 1426 iput(inode); 1427 } 1428 1429 /* 1430 * if ret == 0 the caller is expected to call btrfs_wait_cache_io 1431 * to wait for IO and put the inode 1432 */ 1433 1434 return ret; 1435} 1436 1437static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit, 1438 u64 offset) 1439{ 1440 ASSERT(offset >= bitmap_start); 1441 offset -= bitmap_start; 1442 return (unsigned long)(div_u64(offset, unit)); 1443} 1444 1445static inline unsigned long bytes_to_bits(u64 bytes, u32 unit) 1446{ 1447 return (unsigned long)(div_u64(bytes, unit)); 1448} 1449 1450static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl, 1451 u64 offset) 1452{ 1453 u64 bitmap_start; 1454 u64 bytes_per_bitmap; 1455 1456 bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit; 1457 bitmap_start = offset - ctl->start; 1458 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 1459 bitmap_start *= bytes_per_bitmap; 1460 bitmap_start += ctl->start; 1461 1462 return bitmap_start; 1463} 1464 1465static int tree_insert_offset(struct rb_root *root, u64 offset, 1466 struct rb_node *node, int bitmap) 1467{ 1468 struct rb_node **p = &root->rb_node; 1469 struct rb_node *parent = NULL; 1470 struct btrfs_free_space *info; 1471 1472 while (*p) { 1473 parent = *p; 1474 info = rb_entry(parent, struct btrfs_free_space, offset_index); 1475 1476 if (offset < info->offset) { 1477 p = &(*p)->rb_left; 1478 } else if (offset > info->offset) { 1479 p = &(*p)->rb_right; 1480 } else { 1481 /* 1482 * we could have a bitmap entry and an extent entry 1483 * share the same offset. If this is the case, we want 1484 * the extent entry to always be found first if we do a 1485 * linear search through the tree, since we want to have 1486 * the quickest allocation time, and allocating from an 1487 * extent is faster than allocating from a bitmap. So 1488 * if we're inserting a bitmap and we find an entry at 1489 * this offset, we want to go right, or after this entry 1490 * logically. If we are inserting an extent and we've 1491 * found a bitmap, we want to go left, or before 1492 * logically. 1493 */ 1494 if (bitmap) { 1495 if (info->bitmap) { 1496 WARN_ON_ONCE(1); 1497 return -EEXIST; 1498 } 1499 p = &(*p)->rb_right; 1500 } else { 1501 if (!info->bitmap) { 1502 WARN_ON_ONCE(1); 1503 return -EEXIST; 1504 } 1505 p = &(*p)->rb_left; 1506 } 1507 } 1508 } 1509 1510 rb_link_node(node, parent, p); 1511 rb_insert_color(node, root); 1512 1513 return 0; 1514} 1515 1516/* 1517 * searches the tree for the given offset. 1518 * 1519 * fuzzy - If this is set, then we are trying to make an allocation, and we just 1520 * want a section that has at least bytes size and comes at or after the given 1521 * offset. 1522 */ 1523static struct btrfs_free_space * 1524tree_search_offset(struct btrfs_free_space_ctl *ctl, 1525 u64 offset, int bitmap_only, int fuzzy) 1526{ 1527 struct rb_node *n = ctl->free_space_offset.rb_node; 1528 struct btrfs_free_space *entry, *prev = NULL; 1529 1530 /* find entry that is closest to the 'offset' */ 1531 while (1) { 1532 if (!n) { 1533 entry = NULL; 1534 break; 1535 } 1536 1537 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1538 prev = entry; 1539 1540 if (offset < entry->offset) 1541 n = n->rb_left; 1542 else if (offset > entry->offset) 1543 n = n->rb_right; 1544 else 1545 break; 1546 } 1547 1548 if (bitmap_only) { 1549 if (!entry) 1550 return NULL; 1551 if (entry->bitmap) 1552 return entry; 1553 1554 /* 1555 * bitmap entry and extent entry may share same offset, 1556 * in that case, bitmap entry comes after extent entry. 1557 */ 1558 n = rb_next(n); 1559 if (!n) 1560 return NULL; 1561 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1562 if (entry->offset != offset) 1563 return NULL; 1564 1565 WARN_ON(!entry->bitmap); 1566 return entry; 1567 } else if (entry) { 1568 if (entry->bitmap) { 1569 /* 1570 * if previous extent entry covers the offset, 1571 * we should return it instead of the bitmap entry 1572 */ 1573 n = rb_prev(&entry->offset_index); 1574 if (n) { 1575 prev = rb_entry(n, struct btrfs_free_space, 1576 offset_index); 1577 if (!prev->bitmap && 1578 prev->offset + prev->bytes > offset) 1579 entry = prev; 1580 } 1581 } 1582 return entry; 1583 } 1584 1585 if (!prev) 1586 return NULL; 1587 1588 /* find last entry before the 'offset' */ 1589 entry = prev; 1590 if (entry->offset > offset) { 1591 n = rb_prev(&entry->offset_index); 1592 if (n) { 1593 entry = rb_entry(n, struct btrfs_free_space, 1594 offset_index); 1595 ASSERT(entry->offset <= offset); 1596 } else { 1597 if (fuzzy) 1598 return entry; 1599 else 1600 return NULL; 1601 } 1602 } 1603 1604 if (entry->bitmap) { 1605 n = rb_prev(&entry->offset_index); 1606 if (n) { 1607 prev = rb_entry(n, struct btrfs_free_space, 1608 offset_index); 1609 if (!prev->bitmap && 1610 prev->offset + prev->bytes > offset) 1611 return prev; 1612 } 1613 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) 1614 return entry; 1615 } else if (entry->offset + entry->bytes > offset) 1616 return entry; 1617 1618 if (!fuzzy) 1619 return NULL; 1620 1621 while (1) { 1622 if (entry->bitmap) { 1623 if (entry->offset + BITS_PER_BITMAP * 1624 ctl->unit > offset) 1625 break; 1626 } else { 1627 if (entry->offset + entry->bytes > offset) 1628 break; 1629 } 1630 1631 n = rb_next(&entry->offset_index); 1632 if (!n) 1633 return NULL; 1634 entry = rb_entry(n, struct btrfs_free_space, offset_index); 1635 } 1636 return entry; 1637} 1638 1639static inline void 1640__unlink_free_space(struct btrfs_free_space_ctl *ctl, 1641 struct btrfs_free_space *info) 1642{ 1643 rb_erase(&info->offset_index, &ctl->free_space_offset); 1644 ctl->free_extents--; 1645 1646 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1647 ctl->discardable_extents[BTRFS_STAT_CURR]--; 1648 ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes; 1649 } 1650} 1651 1652static void unlink_free_space(struct btrfs_free_space_ctl *ctl, 1653 struct btrfs_free_space *info) 1654{ 1655 __unlink_free_space(ctl, info); 1656 ctl->free_space -= info->bytes; 1657} 1658 1659static int link_free_space(struct btrfs_free_space_ctl *ctl, 1660 struct btrfs_free_space *info) 1661{ 1662 int ret = 0; 1663 1664 ASSERT(info->bytes || info->bitmap); 1665 ret = tree_insert_offset(&ctl->free_space_offset, info->offset, 1666 &info->offset_index, (info->bitmap != NULL)); 1667 if (ret) 1668 return ret; 1669 1670 if (!info->bitmap && !btrfs_free_space_trimmed(info)) { 1671 ctl->discardable_extents[BTRFS_STAT_CURR]++; 1672 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 1673 } 1674 1675 ctl->free_space += info->bytes; 1676 ctl->free_extents++; 1677 return ret; 1678} 1679 1680static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) 1681{ 1682 struct btrfs_block_group *block_group = ctl->private; 1683 u64 max_bytes; 1684 u64 bitmap_bytes; 1685 u64 extent_bytes; 1686 u64 size = block_group->length; 1687 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1688 u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1689 1690 max_bitmaps = max_t(u64, max_bitmaps, 1); 1691 1692 ASSERT(ctl->total_bitmaps <= max_bitmaps); 1693 1694 /* 1695 * We are trying to keep the total amount of memory used per 1GiB of 1696 * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation 1697 * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of 1698 * bitmaps, we may end up using more memory than this. 1699 */ 1700 if (size < SZ_1G) 1701 max_bytes = MAX_CACHE_BYTES_PER_GIG; 1702 else 1703 max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); 1704 1705 bitmap_bytes = ctl->total_bitmaps * ctl->unit; 1706 1707 /* 1708 * we want the extent entry threshold to always be at most 1/2 the max 1709 * bytes we can have, or whatever is less than that. 1710 */ 1711 extent_bytes = max_bytes - bitmap_bytes; 1712 extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1); 1713 1714 ctl->extents_thresh = 1715 div_u64(extent_bytes, sizeof(struct btrfs_free_space)); 1716} 1717 1718static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1719 struct btrfs_free_space *info, 1720 u64 offset, u64 bytes) 1721{ 1722 unsigned long start, count, end; 1723 int extent_delta = -1; 1724 1725 start = offset_to_bit(info->offset, ctl->unit, offset); 1726 count = bytes_to_bits(bytes, ctl->unit); 1727 end = start + count; 1728 ASSERT(end <= BITS_PER_BITMAP); 1729 1730 bitmap_clear(info->bitmap, start, count); 1731 1732 info->bytes -= bytes; 1733 if (info->max_extent_size > ctl->unit) 1734 info->max_extent_size = 0; 1735 1736 if (start && test_bit(start - 1, info->bitmap)) 1737 extent_delta++; 1738 1739 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1740 extent_delta++; 1741 1742 info->bitmap_extents += extent_delta; 1743 if (!btrfs_free_space_trimmed(info)) { 1744 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1745 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 1746 } 1747} 1748 1749static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, 1750 struct btrfs_free_space *info, u64 offset, 1751 u64 bytes) 1752{ 1753 __bitmap_clear_bits(ctl, info, offset, bytes); 1754 ctl->free_space -= bytes; 1755} 1756 1757static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, 1758 struct btrfs_free_space *info, u64 offset, 1759 u64 bytes) 1760{ 1761 unsigned long start, count, end; 1762 int extent_delta = 1; 1763 1764 start = offset_to_bit(info->offset, ctl->unit, offset); 1765 count = bytes_to_bits(bytes, ctl->unit); 1766 end = start + count; 1767 ASSERT(end <= BITS_PER_BITMAP); 1768 1769 bitmap_set(info->bitmap, start, count); 1770 1771 info->bytes += bytes; 1772 ctl->free_space += bytes; 1773 1774 if (start && test_bit(start - 1, info->bitmap)) 1775 extent_delta--; 1776 1777 if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) 1778 extent_delta--; 1779 1780 info->bitmap_extents += extent_delta; 1781 if (!btrfs_free_space_trimmed(info)) { 1782 ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; 1783 ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes; 1784 } 1785} 1786 1787/* 1788 * If we can not find suitable extent, we will use bytes to record 1789 * the size of the max extent. 1790 */ 1791static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1792 struct btrfs_free_space *bitmap_info, u64 *offset, 1793 u64 *bytes, bool for_alloc) 1794{ 1795 unsigned long found_bits = 0; 1796 unsigned long max_bits = 0; 1797 unsigned long bits, i; 1798 unsigned long next_zero; 1799 unsigned long extent_bits; 1800 1801 /* 1802 * Skip searching the bitmap if we don't have a contiguous section that 1803 * is large enough for this allocation. 1804 */ 1805 if (for_alloc && 1806 bitmap_info->max_extent_size && 1807 bitmap_info->max_extent_size < *bytes) { 1808 *bytes = bitmap_info->max_extent_size; 1809 return -1; 1810 } 1811 1812 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1813 max_t(u64, *offset, bitmap_info->offset)); 1814 bits = bytes_to_bits(*bytes, ctl->unit); 1815 1816 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1817 if (for_alloc && bits == 1) { 1818 found_bits = 1; 1819 break; 1820 } 1821 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1822 BITS_PER_BITMAP, i); 1823 extent_bits = next_zero - i; 1824 if (extent_bits >= bits) { 1825 found_bits = extent_bits; 1826 break; 1827 } else if (extent_bits > max_bits) { 1828 max_bits = extent_bits; 1829 } 1830 i = next_zero; 1831 } 1832 1833 if (found_bits) { 1834 *offset = (u64)(i * ctl->unit) + bitmap_info->offset; 1835 *bytes = (u64)(found_bits) * ctl->unit; 1836 return 0; 1837 } 1838 1839 *bytes = (u64)(max_bits) * ctl->unit; 1840 bitmap_info->max_extent_size = *bytes; 1841 return -1; 1842} 1843 1844static inline u64 get_max_extent_size(struct btrfs_free_space *entry) 1845{ 1846 if (entry->bitmap) 1847 return entry->max_extent_size; 1848 return entry->bytes; 1849} 1850 1851/* Cache the size of the max extent in bytes */ 1852static struct btrfs_free_space * 1853find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1854 unsigned long align, u64 *max_extent_size) 1855{ 1856 struct btrfs_free_space *entry; 1857 struct rb_node *node; 1858 u64 tmp; 1859 u64 align_off; 1860 int ret; 1861 1862 if (!ctl->free_space_offset.rb_node) 1863 goto out; 1864 1865 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1866 if (!entry) 1867 goto out; 1868 1869 for (node = &entry->offset_index; node; node = rb_next(node)) { 1870 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1871 if (entry->bytes < *bytes) { 1872 *max_extent_size = max(get_max_extent_size(entry), 1873 *max_extent_size); 1874 continue; 1875 } 1876 1877 /* make sure the space returned is big enough 1878 * to match our requested alignment 1879 */ 1880 if (*bytes >= align) { 1881 tmp = entry->offset - ctl->start + align - 1; 1882 tmp = div64_u64(tmp, align); 1883 tmp = tmp * align + ctl->start; 1884 align_off = tmp - entry->offset; 1885 } else { 1886 align_off = 0; 1887 tmp = entry->offset; 1888 } 1889 1890 if (entry->bytes < *bytes + align_off) { 1891 *max_extent_size = max(get_max_extent_size(entry), 1892 *max_extent_size); 1893 continue; 1894 } 1895 1896 if (entry->bitmap) { 1897 u64 size = *bytes; 1898 1899 ret = search_bitmap(ctl, entry, &tmp, &size, true); 1900 if (!ret) { 1901 *offset = tmp; 1902 *bytes = size; 1903 return entry; 1904 } else { 1905 *max_extent_size = 1906 max(get_max_extent_size(entry), 1907 *max_extent_size); 1908 } 1909 continue; 1910 } 1911 1912 *offset = tmp; 1913 *bytes = entry->bytes - align_off; 1914 return entry; 1915 } 1916out: 1917 return NULL; 1918} 1919 1920static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl, 1921 struct btrfs_free_space *bitmap_info) 1922{ 1923 struct btrfs_block_group *block_group = ctl->private; 1924 u64 bytes = bitmap_info->bytes; 1925 unsigned int rs, re; 1926 int count = 0; 1927 1928 if (!block_group || !bytes) 1929 return count; 1930 1931 bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0, 1932 BITS_PER_BITMAP) { 1933 bytes -= (rs - re) * ctl->unit; 1934 count++; 1935 1936 if (!bytes) 1937 break; 1938 } 1939 1940 return count; 1941} 1942 1943static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, 1944 struct btrfs_free_space *info, u64 offset) 1945{ 1946 info->offset = offset_to_bitmap(ctl, offset); 1947 info->bytes = 0; 1948 info->bitmap_extents = 0; 1949 INIT_LIST_HEAD(&info->list); 1950 link_free_space(ctl, info); 1951 ctl->total_bitmaps++; 1952 1953 ctl->op->recalc_thresholds(ctl); 1954} 1955 1956static void free_bitmap(struct btrfs_free_space_ctl *ctl, 1957 struct btrfs_free_space *bitmap_info) 1958{ 1959 /* 1960 * Normally when this is called, the bitmap is completely empty. However, 1961 * if we are blowing up the free space cache for one reason or another 1962 * via __btrfs_remove_free_space_cache(), then it may not be freed and 1963 * we may leave stats on the table. 1964 */ 1965 if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) { 1966 ctl->discardable_extents[BTRFS_STAT_CURR] -= 1967 bitmap_info->bitmap_extents; 1968 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes; 1969 1970 } 1971 unlink_free_space(ctl, bitmap_info); 1972 kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap); 1973 kmem_cache_free(btrfs_free_space_cachep, bitmap_info); 1974 ctl->total_bitmaps--; 1975 ctl->op->recalc_thresholds(ctl); 1976} 1977 1978static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, 1979 struct btrfs_free_space *bitmap_info, 1980 u64 *offset, u64 *bytes) 1981{ 1982 u64 end; 1983 u64 search_start, search_bytes; 1984 int ret; 1985 1986again: 1987 end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1; 1988 1989 /* 1990 * We need to search for bits in this bitmap. We could only cover some 1991 * of the extent in this bitmap thanks to how we add space, so we need 1992 * to search for as much as it as we can and clear that amount, and then 1993 * go searching for the next bit. 1994 */ 1995 search_start = *offset; 1996 search_bytes = ctl->unit; 1997 search_bytes = min(search_bytes, end - search_start + 1); 1998 ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes, 1999 false); 2000 if (ret < 0 || search_start != *offset) 2001 return -EINVAL; 2002 2003 /* We may have found more bits than what we need */ 2004 search_bytes = min(search_bytes, *bytes); 2005 2006 /* Cannot clear past the end of the bitmap */ 2007 search_bytes = min(search_bytes, end - search_start + 1); 2008 2009 bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes); 2010 *offset += search_bytes; 2011 *bytes -= search_bytes; 2012 2013 if (*bytes) { 2014 struct rb_node *next = rb_next(&bitmap_info->offset_index); 2015 if (!bitmap_info->bytes) 2016 free_bitmap(ctl, bitmap_info); 2017 2018 /* 2019 * no entry after this bitmap, but we still have bytes to 2020 * remove, so something has gone wrong. 2021 */ 2022 if (!next) 2023 return -EINVAL; 2024 2025 bitmap_info = rb_entry(next, struct btrfs_free_space, 2026 offset_index); 2027 2028 /* 2029 * if the next entry isn't a bitmap we need to return to let the 2030 * extent stuff do its work. 2031 */ 2032 if (!bitmap_info->bitmap) 2033 return -EAGAIN; 2034 2035 /* 2036 * Ok the next item is a bitmap, but it may not actually hold 2037 * the information for the rest of this free space stuff, so 2038 * look for it, and if we don't find it return so we can try 2039 * everything over again. 2040 */ 2041 search_start = *offset; 2042 search_bytes = ctl->unit; 2043 ret = search_bitmap(ctl, bitmap_info, &search_start, 2044 &search_bytes, false); 2045 if (ret < 0 || search_start != *offset) 2046 return -EAGAIN; 2047 2048 goto again; 2049 } else if (!bitmap_info->bytes) 2050 free_bitmap(ctl, bitmap_info); 2051 2052 return 0; 2053} 2054 2055static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, 2056 struct btrfs_free_space *info, u64 offset, 2057 u64 bytes, enum btrfs_trim_state trim_state) 2058{ 2059 u64 bytes_to_set = 0; 2060 u64 end; 2061 2062 /* 2063 * This is a tradeoff to make bitmap trim state minimal. We mark the 2064 * whole bitmap untrimmed if at any point we add untrimmed regions. 2065 */ 2066 if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) { 2067 if (btrfs_free_space_trimmed(info)) { 2068 ctl->discardable_extents[BTRFS_STAT_CURR] += 2069 info->bitmap_extents; 2070 ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; 2071 } 2072 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2073 } 2074 2075 end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); 2076 2077 bytes_to_set = min(end - offset, bytes); 2078 2079 bitmap_set_bits(ctl, info, offset, bytes_to_set); 2080 2081 /* 2082 * We set some bytes, we have no idea what the max extent size is 2083 * anymore. 2084 */ 2085 info->max_extent_size = 0; 2086 2087 return bytes_to_set; 2088 2089} 2090 2091static bool use_bitmap(struct btrfs_free_space_ctl *ctl, 2092 struct btrfs_free_space *info) 2093{ 2094 struct btrfs_block_group *block_group = ctl->private; 2095 struct btrfs_fs_info *fs_info = block_group->fs_info; 2096 bool forced = false; 2097 2098#ifdef CONFIG_BTRFS_DEBUG 2099 if (btrfs_should_fragment_free_space(block_group)) 2100 forced = true; 2101#endif 2102 2103 /* This is a way to reclaim large regions from the bitmaps. */ 2104 if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD) 2105 return false; 2106 2107 /* 2108 * If we are below the extents threshold then we can add this as an 2109 * extent, and don't have to deal with the bitmap 2110 */ 2111 if (!forced && ctl->free_extents < ctl->extents_thresh) { 2112 /* 2113 * If this block group has some small extents we don't want to 2114 * use up all of our free slots in the cache with them, we want 2115 * to reserve them to larger extents, however if we have plenty 2116 * of cache left then go ahead an dadd them, no sense in adding 2117 * the overhead of a bitmap if we don't have to. 2118 */ 2119 if (info->bytes <= fs_info->sectorsize * 8) { 2120 if (ctl->free_extents * 3 <= ctl->extents_thresh) 2121 return false; 2122 } else { 2123 return false; 2124 } 2125 } 2126 2127 /* 2128 * The original block groups from mkfs can be really small, like 8 2129 * megabytes, so don't bother with a bitmap for those entries. However 2130 * some block groups can be smaller than what a bitmap would cover but 2131 * are still large enough that they could overflow the 32k memory limit, 2132 * so allow those block groups to still be allowed to have a bitmap 2133 * entry. 2134 */ 2135 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length) 2136 return false; 2137 2138 return true; 2139} 2140 2141static const struct btrfs_free_space_op free_space_op = { 2142 .recalc_thresholds = recalculate_thresholds, 2143 .use_bitmap = use_bitmap, 2144}; 2145 2146static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, 2147 struct btrfs_free_space *info) 2148{ 2149 struct btrfs_free_space *bitmap_info; 2150 struct btrfs_block_group *block_group = NULL; 2151 int added = 0; 2152 u64 bytes, offset, bytes_added; 2153 enum btrfs_trim_state trim_state; 2154 int ret; 2155 2156 bytes = info->bytes; 2157 offset = info->offset; 2158 trim_state = info->trim_state; 2159 2160 if (!ctl->op->use_bitmap(ctl, info)) 2161 return 0; 2162 2163 if (ctl->op == &free_space_op) 2164 block_group = ctl->private; 2165again: 2166 /* 2167 * Since we link bitmaps right into the cluster we need to see if we 2168 * have a cluster here, and if so and it has our bitmap we need to add 2169 * the free space to that bitmap. 2170 */ 2171 if (block_group && !list_empty(&block_group->cluster_list)) { 2172 struct btrfs_free_cluster *cluster; 2173 struct rb_node *node; 2174 struct btrfs_free_space *entry; 2175 2176 cluster = list_entry(block_group->cluster_list.next, 2177 struct btrfs_free_cluster, 2178 block_group_list); 2179 spin_lock(&cluster->lock); 2180 node = rb_first(&cluster->root); 2181 if (!node) { 2182 spin_unlock(&cluster->lock); 2183 goto no_cluster_bitmap; 2184 } 2185 2186 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2187 if (!entry->bitmap) { 2188 spin_unlock(&cluster->lock); 2189 goto no_cluster_bitmap; 2190 } 2191 2192 if (entry->offset == offset_to_bitmap(ctl, offset)) { 2193 bytes_added = add_bytes_to_bitmap(ctl, entry, offset, 2194 bytes, trim_state); 2195 bytes -= bytes_added; 2196 offset += bytes_added; 2197 } 2198 spin_unlock(&cluster->lock); 2199 if (!bytes) { 2200 ret = 1; 2201 goto out; 2202 } 2203 } 2204 2205no_cluster_bitmap: 2206 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2207 1, 0); 2208 if (!bitmap_info) { 2209 ASSERT(added == 0); 2210 goto new_bitmap; 2211 } 2212 2213 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 2214 trim_state); 2215 bytes -= bytes_added; 2216 offset += bytes_added; 2217 added = 0; 2218 2219 if (!bytes) { 2220 ret = 1; 2221 goto out; 2222 } else 2223 goto again; 2224 2225new_bitmap: 2226 if (info && info->bitmap) { 2227 add_new_bitmap(ctl, info, offset); 2228 added = 1; 2229 info = NULL; 2230 goto again; 2231 } else { 2232 spin_unlock(&ctl->tree_lock); 2233 2234 /* no pre-allocated info, allocate a new one */ 2235 if (!info) { 2236 info = kmem_cache_zalloc(btrfs_free_space_cachep, 2237 GFP_NOFS); 2238 if (!info) { 2239 spin_lock(&ctl->tree_lock); 2240 ret = -ENOMEM; 2241 goto out; 2242 } 2243 } 2244 2245 /* allocate the bitmap */ 2246 info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, 2247 GFP_NOFS); 2248 info->trim_state = BTRFS_TRIM_STATE_TRIMMED; 2249 spin_lock(&ctl->tree_lock); 2250 if (!info->bitmap) { 2251 ret = -ENOMEM; 2252 goto out; 2253 } 2254 goto again; 2255 } 2256 2257out: 2258 if (info) { 2259 if (info->bitmap) 2260 kmem_cache_free(btrfs_free_space_bitmap_cachep, 2261 info->bitmap); 2262 kmem_cache_free(btrfs_free_space_cachep, info); 2263 } 2264 2265 return ret; 2266} 2267 2268/* 2269 * Free space merging rules: 2270 * 1) Merge trimmed areas together 2271 * 2) Let untrimmed areas coalesce with trimmed areas 2272 * 3) Always pull neighboring regions from bitmaps 2273 * 2274 * The above rules are for when we merge free space based on btrfs_trim_state. 2275 * Rules 2 and 3 are subtle because they are suboptimal, but are done for the 2276 * same reason: to promote larger extent regions which makes life easier for 2277 * find_free_extent(). Rule 2 enables coalescing based on the common path 2278 * being returning free space from btrfs_finish_extent_commit(). So when free 2279 * space is trimmed, it will prevent aggregating trimmed new region and 2280 * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents 2281 * and provide find_free_extent() with the largest extents possible hoping for 2282 * the reuse path. 2283 */ 2284static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, 2285 struct btrfs_free_space *info, bool update_stat) 2286{ 2287 struct btrfs_free_space *left_info = NULL; 2288 struct btrfs_free_space *right_info; 2289 bool merged = false; 2290 u64 offset = info->offset; 2291 u64 bytes = info->bytes; 2292 const bool is_trimmed = btrfs_free_space_trimmed(info); 2293 2294 /* 2295 * first we want to see if there is free space adjacent to the range we 2296 * are adding, if there is remove that struct and add a new one to 2297 * cover the entire range 2298 */ 2299 right_info = tree_search_offset(ctl, offset + bytes, 0, 0); 2300 if (right_info && rb_prev(&right_info->offset_index)) 2301 left_info = rb_entry(rb_prev(&right_info->offset_index), 2302 struct btrfs_free_space, offset_index); 2303 else if (!right_info) 2304 left_info = tree_search_offset(ctl, offset - 1, 0, 0); 2305 2306 /* See try_merge_free_space() comment. */ 2307 if (right_info && !right_info->bitmap && 2308 (!is_trimmed || btrfs_free_space_trimmed(right_info))) { 2309 if (update_stat) 2310 unlink_free_space(ctl, right_info); 2311 else 2312 __unlink_free_space(ctl, right_info); 2313 info->bytes += right_info->bytes; 2314 kmem_cache_free(btrfs_free_space_cachep, right_info); 2315 merged = true; 2316 } 2317 2318 /* See try_merge_free_space() comment. */ 2319 if (left_info && !left_info->bitmap && 2320 left_info->offset + left_info->bytes == offset && 2321 (!is_trimmed || btrfs_free_space_trimmed(left_info))) { 2322 if (update_stat) 2323 unlink_free_space(ctl, left_info); 2324 else 2325 __unlink_free_space(ctl, left_info); 2326 info->offset = left_info->offset; 2327 info->bytes += left_info->bytes; 2328 kmem_cache_free(btrfs_free_space_cachep, left_info); 2329 merged = true; 2330 } 2331 2332 return merged; 2333} 2334 2335static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, 2336 struct btrfs_free_space *info, 2337 bool update_stat) 2338{ 2339 struct btrfs_free_space *bitmap; 2340 unsigned long i; 2341 unsigned long j; 2342 const u64 end = info->offset + info->bytes; 2343 const u64 bitmap_offset = offset_to_bitmap(ctl, end); 2344 u64 bytes; 2345 2346 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2347 if (!bitmap) 2348 return false; 2349 2350 i = offset_to_bit(bitmap->offset, ctl->unit, end); 2351 j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); 2352 if (j == i) 2353 return false; 2354 bytes = (j - i) * ctl->unit; 2355 info->bytes += bytes; 2356 2357 /* See try_merge_free_space() comment. */ 2358 if (!btrfs_free_space_trimmed(bitmap)) 2359 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2360 2361 if (update_stat) 2362 bitmap_clear_bits(ctl, bitmap, end, bytes); 2363 else 2364 __bitmap_clear_bits(ctl, bitmap, end, bytes); 2365 2366 if (!bitmap->bytes) 2367 free_bitmap(ctl, bitmap); 2368 2369 return true; 2370} 2371 2372static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, 2373 struct btrfs_free_space *info, 2374 bool update_stat) 2375{ 2376 struct btrfs_free_space *bitmap; 2377 u64 bitmap_offset; 2378 unsigned long i; 2379 unsigned long j; 2380 unsigned long prev_j; 2381 u64 bytes; 2382 2383 bitmap_offset = offset_to_bitmap(ctl, info->offset); 2384 /* If we're on a boundary, try the previous logical bitmap. */ 2385 if (bitmap_offset == info->offset) { 2386 if (info->offset == 0) 2387 return false; 2388 bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); 2389 } 2390 2391 bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); 2392 if (!bitmap) 2393 return false; 2394 2395 i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; 2396 j = 0; 2397 prev_j = (unsigned long)-1; 2398 for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { 2399 if (j > i) 2400 break; 2401 prev_j = j; 2402 } 2403 if (prev_j == i) 2404 return false; 2405 2406 if (prev_j == (unsigned long)-1) 2407 bytes = (i + 1) * ctl->unit; 2408 else 2409 bytes = (i - prev_j) * ctl->unit; 2410 2411 info->offset -= bytes; 2412 info->bytes += bytes; 2413 2414 /* See try_merge_free_space() comment. */ 2415 if (!btrfs_free_space_trimmed(bitmap)) 2416 info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2417 2418 if (update_stat) 2419 bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2420 else 2421 __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); 2422 2423 if (!bitmap->bytes) 2424 free_bitmap(ctl, bitmap); 2425 2426 return true; 2427} 2428 2429/* 2430 * We prefer always to allocate from extent entries, both for clustered and 2431 * non-clustered allocation requests. So when attempting to add a new extent 2432 * entry, try to see if there's adjacent free space in bitmap entries, and if 2433 * there is, migrate that space from the bitmaps to the extent. 2434 * Like this we get better chances of satisfying space allocation requests 2435 * because we attempt to satisfy them based on a single cache entry, and never 2436 * on 2 or more entries - even if the entries represent a contiguous free space 2437 * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry 2438 * ends). 2439 */ 2440static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, 2441 struct btrfs_free_space *info, 2442 bool update_stat) 2443{ 2444 /* 2445 * Only work with disconnected entries, as we can change their offset, 2446 * and must be extent entries. 2447 */ 2448 ASSERT(!info->bitmap); 2449 ASSERT(RB_EMPTY_NODE(&info->offset_index)); 2450 2451 if (ctl->total_bitmaps > 0) { 2452 bool stole_end; 2453 bool stole_front = false; 2454 2455 stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); 2456 if (ctl->total_bitmaps > 0) 2457 stole_front = steal_from_bitmap_to_front(ctl, info, 2458 update_stat); 2459 2460 if (stole_end || stole_front) 2461 try_merge_free_space(ctl, info, update_stat); 2462 } 2463} 2464 2465int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, 2466 struct btrfs_free_space_ctl *ctl, 2467 u64 offset, u64 bytes, 2468 enum btrfs_trim_state trim_state) 2469{ 2470 struct btrfs_block_group *block_group = ctl->private; 2471 struct btrfs_free_space *info; 2472 int ret = 0; 2473 u64 filter_bytes = bytes; 2474 2475 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 2476 if (!info) 2477 return -ENOMEM; 2478 2479 info->offset = offset; 2480 info->bytes = bytes; 2481 info->trim_state = trim_state; 2482 RB_CLEAR_NODE(&info->offset_index); 2483 2484 spin_lock(&ctl->tree_lock); 2485 2486 if (try_merge_free_space(ctl, info, true)) 2487 goto link; 2488 2489 /* 2490 * There was no extent directly to the left or right of this new 2491 * extent then we know we're going to have to allocate a new extent, so 2492 * before we do that see if we need to drop this into a bitmap 2493 */ 2494 ret = insert_into_bitmap(ctl, info); 2495 if (ret < 0) { 2496 goto out; 2497 } else if (ret) { 2498 ret = 0; 2499 goto out; 2500 } 2501link: 2502 /* 2503 * Only steal free space from adjacent bitmaps if we're sure we're not 2504 * going to add the new free space to existing bitmap entries - because 2505 * that would mean unnecessary work that would be reverted. Therefore 2506 * attempt to steal space from bitmaps if we're adding an extent entry. 2507 */ 2508 steal_from_bitmap(ctl, info, true); 2509 2510 filter_bytes = max(filter_bytes, info->bytes); 2511 2512 ret = link_free_space(ctl, info); 2513 if (ret) 2514 kmem_cache_free(btrfs_free_space_cachep, info); 2515out: 2516 btrfs_discard_update_discardable(block_group, ctl); 2517 spin_unlock(&ctl->tree_lock); 2518 2519 if (ret) { 2520 btrfs_crit(fs_info, "unable to add free space :%d", ret); 2521 ASSERT(ret != -EEXIST); 2522 } 2523 2524 if (trim_state != BTRFS_TRIM_STATE_TRIMMED) { 2525 btrfs_discard_check_filter(block_group, filter_bytes); 2526 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 2527 } 2528 2529 return ret; 2530} 2531 2532int btrfs_add_free_space(struct btrfs_block_group *block_group, 2533 u64 bytenr, u64 size) 2534{ 2535 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2536 2537 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC)) 2538 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2539 2540 return __btrfs_add_free_space(block_group->fs_info, 2541 block_group->free_space_ctl, 2542 bytenr, size, trim_state); 2543} 2544 2545/* 2546 * This is a subtle distinction because when adding free space back in general, 2547 * we want it to be added as untrimmed for async. But in the case where we add 2548 * it on loading of a block group, we want to consider it trimmed. 2549 */ 2550int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group, 2551 u64 bytenr, u64 size) 2552{ 2553 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2554 2555 if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) || 2556 btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 2557 trim_state = BTRFS_TRIM_STATE_TRIMMED; 2558 2559 return __btrfs_add_free_space(block_group->fs_info, 2560 block_group->free_space_ctl, 2561 bytenr, size, trim_state); 2562} 2563 2564int btrfs_remove_free_space(struct btrfs_block_group *block_group, 2565 u64 offset, u64 bytes) 2566{ 2567 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2568 struct btrfs_free_space *info; 2569 int ret; 2570 bool re_search = false; 2571 2572 spin_lock(&ctl->tree_lock); 2573 2574again: 2575 ret = 0; 2576 if (!bytes) 2577 goto out_lock; 2578 2579 info = tree_search_offset(ctl, offset, 0, 0); 2580 if (!info) { 2581 /* 2582 * oops didn't find an extent that matched the space we wanted 2583 * to remove, look for a bitmap instead 2584 */ 2585 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 2586 1, 0); 2587 if (!info) { 2588 /* 2589 * If we found a partial bit of our free space in a 2590 * bitmap but then couldn't find the other part this may 2591 * be a problem, so WARN about it. 2592 */ 2593 WARN_ON(re_search); 2594 goto out_lock; 2595 } 2596 } 2597 2598 re_search = false; 2599 if (!info->bitmap) { 2600 unlink_free_space(ctl, info); 2601 if (offset == info->offset) { 2602 u64 to_free = min(bytes, info->bytes); 2603 2604 info->bytes -= to_free; 2605 info->offset += to_free; 2606 if (info->bytes) { 2607 ret = link_free_space(ctl, info); 2608 WARN_ON(ret); 2609 } else { 2610 kmem_cache_free(btrfs_free_space_cachep, info); 2611 } 2612 2613 offset += to_free; 2614 bytes -= to_free; 2615 goto again; 2616 } else { 2617 u64 old_end = info->bytes + info->offset; 2618 2619 info->bytes = offset - info->offset; 2620 ret = link_free_space(ctl, info); 2621 WARN_ON(ret); 2622 if (ret) 2623 goto out_lock; 2624 2625 /* Not enough bytes in this entry to satisfy us */ 2626 if (old_end < offset + bytes) { 2627 bytes -= old_end - offset; 2628 offset = old_end; 2629 goto again; 2630 } else if (old_end == offset + bytes) { 2631 /* all done */ 2632 goto out_lock; 2633 } 2634 spin_unlock(&ctl->tree_lock); 2635 2636 ret = __btrfs_add_free_space(block_group->fs_info, ctl, 2637 offset + bytes, 2638 old_end - (offset + bytes), 2639 info->trim_state); 2640 WARN_ON(ret); 2641 goto out; 2642 } 2643 } 2644 2645 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 2646 if (ret == -EAGAIN) { 2647 re_search = true; 2648 goto again; 2649 } 2650out_lock: 2651 btrfs_discard_update_discardable(block_group, ctl); 2652 spin_unlock(&ctl->tree_lock); 2653out: 2654 return ret; 2655} 2656 2657void btrfs_dump_free_space(struct btrfs_block_group *block_group, 2658 u64 bytes) 2659{ 2660 struct btrfs_fs_info *fs_info = block_group->fs_info; 2661 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2662 struct btrfs_free_space *info; 2663 struct rb_node *n; 2664 int count = 0; 2665 2666 spin_lock(&ctl->tree_lock); 2667 for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) { 2668 info = rb_entry(n, struct btrfs_free_space, offset_index); 2669 if (info->bytes >= bytes && !block_group->ro) 2670 count++; 2671 btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s", 2672 info->offset, info->bytes, 2673 (info->bitmap) ? "yes" : "no"); 2674 } 2675 spin_unlock(&ctl->tree_lock); 2676 btrfs_info(fs_info, "block group has cluster?: %s", 2677 list_empty(&block_group->cluster_list) ? "no" : "yes"); 2678 btrfs_info(fs_info, 2679 "%d blocks of free space at or bigger than bytes is", count); 2680} 2681 2682void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group) 2683{ 2684 struct btrfs_fs_info *fs_info = block_group->fs_info; 2685 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2686 2687 spin_lock_init(&ctl->tree_lock); 2688 ctl->unit = fs_info->sectorsize; 2689 ctl->start = block_group->start; 2690 ctl->private = block_group; 2691 ctl->op = &free_space_op; 2692 INIT_LIST_HEAD(&ctl->trimming_ranges); 2693 mutex_init(&ctl->cache_writeout_mutex); 2694 2695 /* 2696 * we only want to have 32k of ram per block group for keeping 2697 * track of free space, and if we pass 1/2 of that we want to 2698 * start converting things over to using bitmaps 2699 */ 2700 ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); 2701} 2702 2703/* 2704 * for a given cluster, put all of its extents back into the free 2705 * space cache. If the block group passed doesn't match the block group 2706 * pointed to by the cluster, someone else raced in and freed the 2707 * cluster already. In that case, we just return without changing anything 2708 */ 2709static void __btrfs_return_cluster_to_free_space( 2710 struct btrfs_block_group *block_group, 2711 struct btrfs_free_cluster *cluster) 2712{ 2713 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2714 struct btrfs_free_space *entry; 2715 struct rb_node *node; 2716 2717 spin_lock(&cluster->lock); 2718 if (cluster->block_group != block_group) { 2719 spin_unlock(&cluster->lock); 2720 return; 2721 } 2722 2723 cluster->block_group = NULL; 2724 cluster->window_start = 0; 2725 list_del_init(&cluster->block_group_list); 2726 2727 node = rb_first(&cluster->root); 2728 while (node) { 2729 bool bitmap; 2730 2731 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2732 node = rb_next(&entry->offset_index); 2733 rb_erase(&entry->offset_index, &cluster->root); 2734 RB_CLEAR_NODE(&entry->offset_index); 2735 2736 bitmap = (entry->bitmap != NULL); 2737 if (!bitmap) { 2738 /* Merging treats extents as if they were new */ 2739 if (!btrfs_free_space_trimmed(entry)) { 2740 ctl->discardable_extents[BTRFS_STAT_CURR]--; 2741 ctl->discardable_bytes[BTRFS_STAT_CURR] -= 2742 entry->bytes; 2743 } 2744 2745 try_merge_free_space(ctl, entry, false); 2746 steal_from_bitmap(ctl, entry, false); 2747 2748 /* As we insert directly, update these statistics */ 2749 if (!btrfs_free_space_trimmed(entry)) { 2750 ctl->discardable_extents[BTRFS_STAT_CURR]++; 2751 ctl->discardable_bytes[BTRFS_STAT_CURR] += 2752 entry->bytes; 2753 } 2754 } 2755 tree_insert_offset(&ctl->free_space_offset, 2756 entry->offset, &entry->offset_index, bitmap); 2757 } 2758 cluster->root = RB_ROOT; 2759 spin_unlock(&cluster->lock); 2760 btrfs_put_block_group(block_group); 2761} 2762 2763static void __btrfs_remove_free_space_cache_locked( 2764 struct btrfs_free_space_ctl *ctl) 2765{ 2766 struct btrfs_free_space *info; 2767 struct rb_node *node; 2768 2769 while ((node = rb_last(&ctl->free_space_offset)) != NULL) { 2770 info = rb_entry(node, struct btrfs_free_space, offset_index); 2771 if (!info->bitmap) { 2772 unlink_free_space(ctl, info); 2773 kmem_cache_free(btrfs_free_space_cachep, info); 2774 } else { 2775 free_bitmap(ctl, info); 2776 } 2777 2778 cond_resched_lock(&ctl->tree_lock); 2779 } 2780} 2781 2782void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) 2783{ 2784 spin_lock(&ctl->tree_lock); 2785 __btrfs_remove_free_space_cache_locked(ctl); 2786 if (ctl->private) 2787 btrfs_discard_update_discardable(ctl->private, ctl); 2788 spin_unlock(&ctl->tree_lock); 2789} 2790 2791void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) 2792{ 2793 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2794 struct btrfs_free_cluster *cluster; 2795 struct list_head *head; 2796 2797 spin_lock(&ctl->tree_lock); 2798 while ((head = block_group->cluster_list.next) != 2799 &block_group->cluster_list) { 2800 cluster = list_entry(head, struct btrfs_free_cluster, 2801 block_group_list); 2802 2803 WARN_ON(cluster->block_group != block_group); 2804 __btrfs_return_cluster_to_free_space(block_group, cluster); 2805 2806 cond_resched_lock(&ctl->tree_lock); 2807 } 2808 __btrfs_remove_free_space_cache_locked(ctl); 2809 btrfs_discard_update_discardable(block_group, ctl); 2810 spin_unlock(&ctl->tree_lock); 2811 2812} 2813 2814/** 2815 * btrfs_is_free_space_trimmed - see if everything is trimmed 2816 * @block_group: block_group of interest 2817 * 2818 * Walk @block_group's free space rb_tree to determine if everything is trimmed. 2819 */ 2820bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) 2821{ 2822 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2823 struct btrfs_free_space *info; 2824 struct rb_node *node; 2825 bool ret = true; 2826 2827 spin_lock(&ctl->tree_lock); 2828 node = rb_first(&ctl->free_space_offset); 2829 2830 while (node) { 2831 info = rb_entry(node, struct btrfs_free_space, offset_index); 2832 2833 if (!btrfs_free_space_trimmed(info)) { 2834 ret = false; 2835 break; 2836 } 2837 2838 node = rb_next(node); 2839 } 2840 2841 spin_unlock(&ctl->tree_lock); 2842 return ret; 2843} 2844 2845u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, 2846 u64 offset, u64 bytes, u64 empty_size, 2847 u64 *max_extent_size) 2848{ 2849 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2850 struct btrfs_discard_ctl *discard_ctl = 2851 &block_group->fs_info->discard_ctl; 2852 struct btrfs_free_space *entry = NULL; 2853 u64 bytes_search = bytes + empty_size; 2854 u64 ret = 0; 2855 u64 align_gap = 0; 2856 u64 align_gap_len = 0; 2857 enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 2858 2859 spin_lock(&ctl->tree_lock); 2860 entry = find_free_space(ctl, &offset, &bytes_search, 2861 block_group->full_stripe_len, max_extent_size); 2862 if (!entry) 2863 goto out; 2864 2865 ret = offset; 2866 if (entry->bitmap) { 2867 bitmap_clear_bits(ctl, entry, offset, bytes); 2868 2869 if (!btrfs_free_space_trimmed(entry)) 2870 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2871 2872 if (!entry->bytes) 2873 free_bitmap(ctl, entry); 2874 } else { 2875 unlink_free_space(ctl, entry); 2876 align_gap_len = offset - entry->offset; 2877 align_gap = entry->offset; 2878 align_gap_trim_state = entry->trim_state; 2879 2880 if (!btrfs_free_space_trimmed(entry)) 2881 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 2882 2883 entry->offset = offset + bytes; 2884 WARN_ON(entry->bytes < bytes + align_gap_len); 2885 2886 entry->bytes -= bytes + align_gap_len; 2887 if (!entry->bytes) 2888 kmem_cache_free(btrfs_free_space_cachep, entry); 2889 else 2890 link_free_space(ctl, entry); 2891 } 2892out: 2893 btrfs_discard_update_discardable(block_group, ctl); 2894 spin_unlock(&ctl->tree_lock); 2895 2896 if (align_gap_len) 2897 __btrfs_add_free_space(block_group->fs_info, ctl, 2898 align_gap, align_gap_len, 2899 align_gap_trim_state); 2900 return ret; 2901} 2902 2903/* 2904 * given a cluster, put all of its extents back into the free space 2905 * cache. If a block group is passed, this function will only free 2906 * a cluster that belongs to the passed block group. 2907 * 2908 * Otherwise, it'll get a reference on the block group pointed to by the 2909 * cluster and remove the cluster from it. 2910 */ 2911void btrfs_return_cluster_to_free_space( 2912 struct btrfs_block_group *block_group, 2913 struct btrfs_free_cluster *cluster) 2914{ 2915 struct btrfs_free_space_ctl *ctl; 2916 2917 /* first, get a safe pointer to the block group */ 2918 spin_lock(&cluster->lock); 2919 if (!block_group) { 2920 block_group = cluster->block_group; 2921 if (!block_group) { 2922 spin_unlock(&cluster->lock); 2923 return; 2924 } 2925 } else if (cluster->block_group != block_group) { 2926 /* someone else has already freed it don't redo their work */ 2927 spin_unlock(&cluster->lock); 2928 return; 2929 } 2930 btrfs_get_block_group(block_group); 2931 spin_unlock(&cluster->lock); 2932 2933 ctl = block_group->free_space_ctl; 2934 2935 /* now return any extents the cluster had on it */ 2936 spin_lock(&ctl->tree_lock); 2937 __btrfs_return_cluster_to_free_space(block_group, cluster); 2938 spin_unlock(&ctl->tree_lock); 2939 2940 btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group); 2941 2942 /* finally drop our ref */ 2943 btrfs_put_block_group(block_group); 2944} 2945 2946static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group, 2947 struct btrfs_free_cluster *cluster, 2948 struct btrfs_free_space *entry, 2949 u64 bytes, u64 min_start, 2950 u64 *max_extent_size) 2951{ 2952 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2953 int err; 2954 u64 search_start = cluster->window_start; 2955 u64 search_bytes = bytes; 2956 u64 ret = 0; 2957 2958 search_start = min_start; 2959 search_bytes = bytes; 2960 2961 err = search_bitmap(ctl, entry, &search_start, &search_bytes, true); 2962 if (err) { 2963 *max_extent_size = max(get_max_extent_size(entry), 2964 *max_extent_size); 2965 return 0; 2966 } 2967 2968 ret = search_start; 2969 __bitmap_clear_bits(ctl, entry, ret, bytes); 2970 2971 return ret; 2972} 2973 2974/* 2975 * given a cluster, try to allocate 'bytes' from it, returns 0 2976 * if it couldn't find anything suitably large, or a logical disk offset 2977 * if things worked out 2978 */ 2979u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, 2980 struct btrfs_free_cluster *cluster, u64 bytes, 2981 u64 min_start, u64 *max_extent_size) 2982{ 2983 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2984 struct btrfs_discard_ctl *discard_ctl = 2985 &block_group->fs_info->discard_ctl; 2986 struct btrfs_free_space *entry = NULL; 2987 struct rb_node *node; 2988 u64 ret = 0; 2989 2990 spin_lock(&cluster->lock); 2991 if (bytes > cluster->max_size) 2992 goto out; 2993 2994 if (cluster->block_group != block_group) 2995 goto out; 2996 2997 node = rb_first(&cluster->root); 2998 if (!node) 2999 goto out; 3000 3001 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3002 while (1) { 3003 if (entry->bytes < bytes) 3004 *max_extent_size = max(get_max_extent_size(entry), 3005 *max_extent_size); 3006 3007 if (entry->bytes < bytes || 3008 (!entry->bitmap && entry->offset < min_start)) { 3009 node = rb_next(&entry->offset_index); 3010 if (!node) 3011 break; 3012 entry = rb_entry(node, struct btrfs_free_space, 3013 offset_index); 3014 continue; 3015 } 3016 3017 if (entry->bitmap) { 3018 ret = btrfs_alloc_from_bitmap(block_group, 3019 cluster, entry, bytes, 3020 cluster->window_start, 3021 max_extent_size); 3022 if (ret == 0) { 3023 node = rb_next(&entry->offset_index); 3024 if (!node) 3025 break; 3026 entry = rb_entry(node, struct btrfs_free_space, 3027 offset_index); 3028 continue; 3029 } 3030 cluster->window_start += bytes; 3031 } else { 3032 ret = entry->offset; 3033 3034 entry->offset += bytes; 3035 entry->bytes -= bytes; 3036 } 3037 3038 break; 3039 } 3040out: 3041 spin_unlock(&cluster->lock); 3042 3043 if (!ret) 3044 return 0; 3045 3046 spin_lock(&ctl->tree_lock); 3047 3048 if (!btrfs_free_space_trimmed(entry)) 3049 atomic64_add(bytes, &discard_ctl->discard_bytes_saved); 3050 3051 ctl->free_space -= bytes; 3052 if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) 3053 ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; 3054 3055 spin_lock(&cluster->lock); 3056 if (entry->bytes == 0) { 3057 rb_erase(&entry->offset_index, &cluster->root); 3058 ctl->free_extents--; 3059 if (entry->bitmap) { 3060 kmem_cache_free(btrfs_free_space_bitmap_cachep, 3061 entry->bitmap); 3062 ctl->total_bitmaps--; 3063 ctl->op->recalc_thresholds(ctl); 3064 } else if (!btrfs_free_space_trimmed(entry)) { 3065 ctl->discardable_extents[BTRFS_STAT_CURR]--; 3066 } 3067 kmem_cache_free(btrfs_free_space_cachep, entry); 3068 } 3069 3070 spin_unlock(&cluster->lock); 3071 spin_unlock(&ctl->tree_lock); 3072 3073 return ret; 3074} 3075 3076static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group, 3077 struct btrfs_free_space *entry, 3078 struct btrfs_free_cluster *cluster, 3079 u64 offset, u64 bytes, 3080 u64 cont1_bytes, u64 min_bytes) 3081{ 3082 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3083 unsigned long next_zero; 3084 unsigned long i; 3085 unsigned long want_bits; 3086 unsigned long min_bits; 3087 unsigned long found_bits; 3088 unsigned long max_bits = 0; 3089 unsigned long start = 0; 3090 unsigned long total_found = 0; 3091 int ret; 3092 3093 i = offset_to_bit(entry->offset, ctl->unit, 3094 max_t(u64, offset, entry->offset)); 3095 want_bits = bytes_to_bits(bytes, ctl->unit); 3096 min_bits = bytes_to_bits(min_bytes, ctl->unit); 3097 3098 /* 3099 * Don't bother looking for a cluster in this bitmap if it's heavily 3100 * fragmented. 3101 */ 3102 if (entry->max_extent_size && 3103 entry->max_extent_size < cont1_bytes) 3104 return -ENOSPC; 3105again: 3106 found_bits = 0; 3107 for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { 3108 next_zero = find_next_zero_bit(entry->bitmap, 3109 BITS_PER_BITMAP, i); 3110 if (next_zero - i >= min_bits) { 3111 found_bits = next_zero - i; 3112 if (found_bits > max_bits) 3113 max_bits = found_bits; 3114 break; 3115 } 3116 if (next_zero - i > max_bits) 3117 max_bits = next_zero - i; 3118 i = next_zero; 3119 } 3120 3121 if (!found_bits) { 3122 entry->max_extent_size = (u64)max_bits * ctl->unit; 3123 return -ENOSPC; 3124 } 3125 3126 if (!total_found) { 3127 start = i; 3128 cluster->max_size = 0; 3129 } 3130 3131 total_found += found_bits; 3132 3133 if (cluster->max_size < found_bits * ctl->unit) 3134 cluster->max_size = found_bits * ctl->unit; 3135 3136 if (total_found < want_bits || cluster->max_size < cont1_bytes) { 3137 i = next_zero + 1; 3138 goto again; 3139 } 3140 3141 cluster->window_start = start * ctl->unit + entry->offset; 3142 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3143 ret = tree_insert_offset(&cluster->root, entry->offset, 3144 &entry->offset_index, 1); 3145 ASSERT(!ret); /* -EEXIST; Logic error */ 3146 3147 trace_btrfs_setup_cluster(block_group, cluster, 3148 total_found * ctl->unit, 1); 3149 return 0; 3150} 3151 3152/* 3153 * This searches the block group for just extents to fill the cluster with. 3154 * Try to find a cluster with at least bytes total bytes, at least one 3155 * extent of cont1_bytes, and other clusters of at least min_bytes. 3156 */ 3157static noinline int 3158setup_cluster_no_bitmap(struct btrfs_block_group *block_group, 3159 struct btrfs_free_cluster *cluster, 3160 struct list_head *bitmaps, u64 offset, u64 bytes, 3161 u64 cont1_bytes, u64 min_bytes) 3162{ 3163 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3164 struct btrfs_free_space *first = NULL; 3165 struct btrfs_free_space *entry = NULL; 3166 struct btrfs_free_space *last; 3167 struct rb_node *node; 3168 u64 window_free; 3169 u64 max_extent; 3170 u64 total_size = 0; 3171 3172 entry = tree_search_offset(ctl, offset, 0, 1); 3173 if (!entry) 3174 return -ENOSPC; 3175 3176 /* 3177 * We don't want bitmaps, so just move along until we find a normal 3178 * extent entry. 3179 */ 3180 while (entry->bitmap || entry->bytes < min_bytes) { 3181 if (entry->bitmap && list_empty(&entry->list)) 3182 list_add_tail(&entry->list, bitmaps); 3183 node = rb_next(&entry->offset_index); 3184 if (!node) 3185 return -ENOSPC; 3186 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3187 } 3188 3189 window_free = entry->bytes; 3190 max_extent = entry->bytes; 3191 first = entry; 3192 last = entry; 3193 3194 for (node = rb_next(&entry->offset_index); node; 3195 node = rb_next(&entry->offset_index)) { 3196 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3197 3198 if (entry->bitmap) { 3199 if (list_empty(&entry->list)) 3200 list_add_tail(&entry->list, bitmaps); 3201 continue; 3202 } 3203 3204 if (entry->bytes < min_bytes) 3205 continue; 3206 3207 last = entry; 3208 window_free += entry->bytes; 3209 if (entry->bytes > max_extent) 3210 max_extent = entry->bytes; 3211 } 3212 3213 if (window_free < bytes || max_extent < cont1_bytes) 3214 return -ENOSPC; 3215 3216 cluster->window_start = first->offset; 3217 3218 node = &first->offset_index; 3219 3220 /* 3221 * now we've found our entries, pull them out of the free space 3222 * cache and put them into the cluster rbtree 3223 */ 3224 do { 3225 int ret; 3226 3227 entry = rb_entry(node, struct btrfs_free_space, offset_index); 3228 node = rb_next(&entry->offset_index); 3229 if (entry->bitmap || entry->bytes < min_bytes) 3230 continue; 3231 3232 rb_erase(&entry->offset_index, &ctl->free_space_offset); 3233 ret = tree_insert_offset(&cluster->root, entry->offset, 3234 &entry->offset_index, 0); 3235 total_size += entry->bytes; 3236 ASSERT(!ret); /* -EEXIST; Logic error */ 3237 } while (node && entry != last); 3238 3239 cluster->max_size = max_extent; 3240 trace_btrfs_setup_cluster(block_group, cluster, total_size, 0); 3241 return 0; 3242} 3243 3244/* 3245 * This specifically looks for bitmaps that may work in the cluster, we assume 3246 * that we have already failed to find extents that will work. 3247 */ 3248static noinline int 3249setup_cluster_bitmap(struct btrfs_block_group *block_group, 3250 struct btrfs_free_cluster *cluster, 3251 struct list_head *bitmaps, u64 offset, u64 bytes, 3252 u64 cont1_bytes, u64 min_bytes) 3253{ 3254 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3255 struct btrfs_free_space *entry = NULL; 3256 int ret = -ENOSPC; 3257 u64 bitmap_offset = offset_to_bitmap(ctl, offset); 3258 3259 if (ctl->total_bitmaps == 0) 3260 return -ENOSPC; 3261 3262 /* 3263 * The bitmap that covers offset won't be in the list unless offset 3264 * is just its start offset. 3265 */ 3266 if (!list_empty(bitmaps)) 3267 entry = list_first_entry(bitmaps, struct btrfs_free_space, list); 3268 3269 if (!entry || entry->offset != bitmap_offset) { 3270 entry = tree_search_offset(ctl, bitmap_offset, 1, 0); 3271 if (entry && list_empty(&entry->list)) 3272 list_add(&entry->list, bitmaps); 3273 } 3274 3275 list_for_each_entry(entry, bitmaps, list) { 3276 if (entry->bytes < bytes) 3277 continue; 3278 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, 3279 bytes, cont1_bytes, min_bytes); 3280 if (!ret) 3281 return 0; 3282 } 3283 3284 /* 3285 * The bitmaps list has all the bitmaps that record free space 3286 * starting after offset, so no more search is required. 3287 */ 3288 return -ENOSPC; 3289} 3290 3291/* 3292 * here we try to find a cluster of blocks in a block group. The goal 3293 * is to find at least bytes+empty_size. 3294 * We might not find them all in one contiguous area. 3295 * 3296 * returns zero and sets up cluster if things worked out, otherwise 3297 * it returns -enospc 3298 */ 3299int btrfs_find_space_cluster(struct btrfs_block_group *block_group, 3300 struct btrfs_free_cluster *cluster, 3301 u64 offset, u64 bytes, u64 empty_size) 3302{ 3303 struct btrfs_fs_info *fs_info = block_group->fs_info; 3304 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3305 struct btrfs_free_space *entry, *tmp; 3306 LIST_HEAD(bitmaps); 3307 u64 min_bytes; 3308 u64 cont1_bytes; 3309 int ret; 3310 3311 /* 3312 * Choose the minimum extent size we'll require for this 3313 * cluster. For SSD_SPREAD, don't allow any fragmentation. 3314 * For metadata, allow allocates with smaller extents. For 3315 * data, keep it dense. 3316 */ 3317 if (btrfs_test_opt(fs_info, SSD_SPREAD)) { 3318 cont1_bytes = min_bytes = bytes + empty_size; 3319 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 3320 cont1_bytes = bytes; 3321 min_bytes = fs_info->sectorsize; 3322 } else { 3323 cont1_bytes = max(bytes, (bytes + empty_size) >> 2); 3324 min_bytes = fs_info->sectorsize; 3325 } 3326 3327 spin_lock(&ctl->tree_lock); 3328 3329 /* 3330 * If we know we don't have enough space to make a cluster don't even 3331 * bother doing all the work to try and find one. 3332 */ 3333 if (ctl->free_space < bytes) { 3334 spin_unlock(&ctl->tree_lock); 3335 return -ENOSPC; 3336 } 3337 3338 spin_lock(&cluster->lock); 3339 3340 /* someone already found a cluster, hooray */ 3341 if (cluster->block_group) { 3342 ret = 0; 3343 goto out; 3344 } 3345 3346 trace_btrfs_find_cluster(block_group, offset, bytes, empty_size, 3347 min_bytes); 3348 3349 ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, 3350 bytes + empty_size, 3351 cont1_bytes, min_bytes); 3352 if (ret) 3353 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, 3354 offset, bytes + empty_size, 3355 cont1_bytes, min_bytes); 3356 3357 /* Clear our temporary list */ 3358 list_for_each_entry_safe(entry, tmp, &bitmaps, list) 3359 list_del_init(&entry->list); 3360 3361 if (!ret) { 3362 btrfs_get_block_group(block_group); 3363 list_add_tail(&cluster->block_group_list, 3364 &block_group->cluster_list); 3365 cluster->block_group = block_group; 3366 } else { 3367 trace_btrfs_failed_cluster_setup(block_group); 3368 } 3369out: 3370 spin_unlock(&cluster->lock); 3371 spin_unlock(&ctl->tree_lock); 3372 3373 return ret; 3374} 3375 3376/* 3377 * simple code to zero out a cluster 3378 */ 3379void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 3380{ 3381 spin_lock_init(&cluster->lock); 3382 spin_lock_init(&cluster->refill_lock); 3383 cluster->root = RB_ROOT; 3384 cluster->max_size = 0; 3385 cluster->fragmented = false; 3386 INIT_LIST_HEAD(&cluster->block_group_list); 3387 cluster->block_group = NULL; 3388} 3389 3390static int do_trimming(struct btrfs_block_group *block_group, 3391 u64 *total_trimmed, u64 start, u64 bytes, 3392 u64 reserved_start, u64 reserved_bytes, 3393 enum btrfs_trim_state reserved_trim_state, 3394 struct btrfs_trim_range *trim_entry) 3395{ 3396 struct btrfs_space_info *space_info = block_group->space_info; 3397 struct btrfs_fs_info *fs_info = block_group->fs_info; 3398 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3399 int ret; 3400 int update = 0; 3401 const u64 end = start + bytes; 3402 const u64 reserved_end = reserved_start + reserved_bytes; 3403 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3404 u64 trimmed = 0; 3405 3406 spin_lock(&space_info->lock); 3407 spin_lock(&block_group->lock); 3408 if (!block_group->ro) { 3409 block_group->reserved += reserved_bytes; 3410 space_info->bytes_reserved += reserved_bytes; 3411 update = 1; 3412 } 3413 spin_unlock(&block_group->lock); 3414 spin_unlock(&space_info->lock); 3415 3416 ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); 3417 if (!ret) { 3418 *total_trimmed += trimmed; 3419 trim_state = BTRFS_TRIM_STATE_TRIMMED; 3420 } 3421 3422 mutex_lock(&ctl->cache_writeout_mutex); 3423 if (reserved_start < start) 3424 __btrfs_add_free_space(fs_info, ctl, reserved_start, 3425 start - reserved_start, 3426 reserved_trim_state); 3427 if (start + bytes < reserved_start + reserved_bytes) 3428 __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end, 3429 reserved_trim_state); 3430 __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state); 3431 list_del(&trim_entry->list); 3432 mutex_unlock(&ctl->cache_writeout_mutex); 3433 3434 if (update) { 3435 spin_lock(&space_info->lock); 3436 spin_lock(&block_group->lock); 3437 if (block_group->ro) 3438 space_info->bytes_readonly += reserved_bytes; 3439 block_group->reserved -= reserved_bytes; 3440 space_info->bytes_reserved -= reserved_bytes; 3441 spin_unlock(&block_group->lock); 3442 spin_unlock(&space_info->lock); 3443 } 3444 3445 return ret; 3446} 3447 3448/* 3449 * If @async is set, then we will trim 1 region and return. 3450 */ 3451static int trim_no_bitmap(struct btrfs_block_group *block_group, 3452 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3453 bool async) 3454{ 3455 struct btrfs_discard_ctl *discard_ctl = 3456 &block_group->fs_info->discard_ctl; 3457 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3458 struct btrfs_free_space *entry; 3459 struct rb_node *node; 3460 int ret = 0; 3461 u64 extent_start; 3462 u64 extent_bytes; 3463 enum btrfs_trim_state extent_trim_state; 3464 u64 bytes; 3465 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3466 3467 while (start < end) { 3468 struct btrfs_trim_range trim_entry; 3469 3470 mutex_lock(&ctl->cache_writeout_mutex); 3471 spin_lock(&ctl->tree_lock); 3472 3473 if (ctl->free_space < minlen) 3474 goto out_unlock; 3475 3476 entry = tree_search_offset(ctl, start, 0, 1); 3477 if (!entry) 3478 goto out_unlock; 3479 3480 /* Skip bitmaps and if async, already trimmed entries */ 3481 while (entry->bitmap || 3482 (async && btrfs_free_space_trimmed(entry))) { 3483 node = rb_next(&entry->offset_index); 3484 if (!node) 3485 goto out_unlock; 3486 entry = rb_entry(node, struct btrfs_free_space, 3487 offset_index); 3488 } 3489 3490 if (entry->offset >= end) 3491 goto out_unlock; 3492 3493 extent_start = entry->offset; 3494 extent_bytes = entry->bytes; 3495 extent_trim_state = entry->trim_state; 3496 if (async) { 3497 start = entry->offset; 3498 bytes = entry->bytes; 3499 if (bytes < minlen) { 3500 spin_unlock(&ctl->tree_lock); 3501 mutex_unlock(&ctl->cache_writeout_mutex); 3502 goto next; 3503 } 3504 unlink_free_space(ctl, entry); 3505 /* 3506 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3507 * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim 3508 * X when we come back around. So trim it now. 3509 */ 3510 if (max_discard_size && 3511 bytes >= (max_discard_size + 3512 BTRFS_ASYNC_DISCARD_MIN_FILTER)) { 3513 bytes = max_discard_size; 3514 extent_bytes = max_discard_size; 3515 entry->offset += max_discard_size; 3516 entry->bytes -= max_discard_size; 3517 link_free_space(ctl, entry); 3518 } else { 3519 kmem_cache_free(btrfs_free_space_cachep, entry); 3520 } 3521 } else { 3522 start = max(start, extent_start); 3523 bytes = min(extent_start + extent_bytes, end) - start; 3524 if (bytes < minlen) { 3525 spin_unlock(&ctl->tree_lock); 3526 mutex_unlock(&ctl->cache_writeout_mutex); 3527 goto next; 3528 } 3529 3530 unlink_free_space(ctl, entry); 3531 kmem_cache_free(btrfs_free_space_cachep, entry); 3532 } 3533 3534 spin_unlock(&ctl->tree_lock); 3535 trim_entry.start = extent_start; 3536 trim_entry.bytes = extent_bytes; 3537 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3538 mutex_unlock(&ctl->cache_writeout_mutex); 3539 3540 ret = do_trimming(block_group, total_trimmed, start, bytes, 3541 extent_start, extent_bytes, extent_trim_state, 3542 &trim_entry); 3543 if (ret) { 3544 block_group->discard_cursor = start + bytes; 3545 break; 3546 } 3547next: 3548 start += bytes; 3549 block_group->discard_cursor = start; 3550 if (async && *total_trimmed) 3551 break; 3552 3553 if (fatal_signal_pending(current)) { 3554 ret = -ERESTARTSYS; 3555 break; 3556 } 3557 3558 cond_resched(); 3559 } 3560 3561 return ret; 3562 3563out_unlock: 3564 block_group->discard_cursor = btrfs_block_group_end(block_group); 3565 spin_unlock(&ctl->tree_lock); 3566 mutex_unlock(&ctl->cache_writeout_mutex); 3567 3568 return ret; 3569} 3570 3571/* 3572 * If we break out of trimming a bitmap prematurely, we should reset the 3573 * trimming bit. In a rather contrieved case, it's possible to race here so 3574 * reset the state to BTRFS_TRIM_STATE_UNTRIMMED. 3575 * 3576 * start = start of bitmap 3577 * end = near end of bitmap 3578 * 3579 * Thread 1: Thread 2: 3580 * trim_bitmaps(start) 3581 * trim_bitmaps(end) 3582 * end_trimming_bitmap() 3583 * reset_trimming_bitmap() 3584 */ 3585static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) 3586{ 3587 struct btrfs_free_space *entry; 3588 3589 spin_lock(&ctl->tree_lock); 3590 entry = tree_search_offset(ctl, offset, 1, 0); 3591 if (entry) { 3592 if (btrfs_free_space_trimmed(entry)) { 3593 ctl->discardable_extents[BTRFS_STAT_CURR] += 3594 entry->bitmap_extents; 3595 ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes; 3596 } 3597 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3598 } 3599 3600 spin_unlock(&ctl->tree_lock); 3601} 3602 3603static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl, 3604 struct btrfs_free_space *entry) 3605{ 3606 if (btrfs_free_space_trimming_bitmap(entry)) { 3607 entry->trim_state = BTRFS_TRIM_STATE_TRIMMED; 3608 ctl->discardable_extents[BTRFS_STAT_CURR] -= 3609 entry->bitmap_extents; 3610 ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes; 3611 } 3612} 3613 3614/* 3615 * If @async is set, then we will trim 1 region and return. 3616 */ 3617static int trim_bitmaps(struct btrfs_block_group *block_group, 3618 u64 *total_trimmed, u64 start, u64 end, u64 minlen, 3619 u64 maxlen, bool async) 3620{ 3621 struct btrfs_discard_ctl *discard_ctl = 3622 &block_group->fs_info->discard_ctl; 3623 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3624 struct btrfs_free_space *entry; 3625 int ret = 0; 3626 int ret2; 3627 u64 bytes; 3628 u64 offset = offset_to_bitmap(ctl, start); 3629 const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); 3630 3631 while (offset < end) { 3632 bool next_bitmap = false; 3633 struct btrfs_trim_range trim_entry; 3634 3635 mutex_lock(&ctl->cache_writeout_mutex); 3636 spin_lock(&ctl->tree_lock); 3637 3638 if (ctl->free_space < minlen) { 3639 block_group->discard_cursor = 3640 btrfs_block_group_end(block_group); 3641 spin_unlock(&ctl->tree_lock); 3642 mutex_unlock(&ctl->cache_writeout_mutex); 3643 break; 3644 } 3645 3646 entry = tree_search_offset(ctl, offset, 1, 0); 3647 /* 3648 * Bitmaps are marked trimmed lossily now to prevent constant 3649 * discarding of the same bitmap (the reason why we are bound 3650 * by the filters). So, retrim the block group bitmaps when we 3651 * are preparing to punt to the unused_bgs list. This uses 3652 * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED 3653 * which is the only discard index which sets minlen to 0. 3654 */ 3655 if (!entry || (async && minlen && start == offset && 3656 btrfs_free_space_trimmed(entry))) { 3657 spin_unlock(&ctl->tree_lock); 3658 mutex_unlock(&ctl->cache_writeout_mutex); 3659 next_bitmap = true; 3660 goto next; 3661 } 3662 3663 /* 3664 * Async discard bitmap trimming begins at by setting the start 3665 * to be key.objectid and the offset_to_bitmap() aligns to the 3666 * start of the bitmap. This lets us know we are fully 3667 * scanning the bitmap rather than only some portion of it. 3668 */ 3669 if (start == offset) 3670 entry->trim_state = BTRFS_TRIM_STATE_TRIMMING; 3671 3672 bytes = minlen; 3673 ret2 = search_bitmap(ctl, entry, &start, &bytes, false); 3674 if (ret2 || start >= end) { 3675 /* 3676 * We lossily consider a bitmap trimmed if we only skip 3677 * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER. 3678 */ 3679 if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER) 3680 end_trimming_bitmap(ctl, entry); 3681 else 3682 entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; 3683 spin_unlock(&ctl->tree_lock); 3684 mutex_unlock(&ctl->cache_writeout_mutex); 3685 next_bitmap = true; 3686 goto next; 3687 } 3688 3689 /* 3690 * We already trimmed a region, but are using the locking above 3691 * to reset the trim_state. 3692 */ 3693 if (async && *total_trimmed) { 3694 spin_unlock(&ctl->tree_lock); 3695 mutex_unlock(&ctl->cache_writeout_mutex); 3696 goto out; 3697 } 3698 3699 bytes = min(bytes, end - start); 3700 if (bytes < minlen || (async && maxlen && bytes > maxlen)) { 3701 spin_unlock(&ctl->tree_lock); 3702 mutex_unlock(&ctl->cache_writeout_mutex); 3703 goto next; 3704 } 3705 3706 /* 3707 * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. 3708 * If X < @minlen, we won't trim X when we come back around. 3709 * So trim it now. We differ here from trimming extents as we 3710 * don't keep individual state per bit. 3711 */ 3712 if (async && 3713 max_discard_size && 3714 bytes > (max_discard_size + minlen)) 3715 bytes = max_discard_size; 3716 3717 bitmap_clear_bits(ctl, entry, start, bytes); 3718 if (entry->bytes == 0) 3719 free_bitmap(ctl, entry); 3720 3721 spin_unlock(&ctl->tree_lock); 3722 trim_entry.start = start; 3723 trim_entry.bytes = bytes; 3724 list_add_tail(&trim_entry.list, &ctl->trimming_ranges); 3725 mutex_unlock(&ctl->cache_writeout_mutex); 3726 3727 ret = do_trimming(block_group, total_trimmed, start, bytes, 3728 start, bytes, 0, &trim_entry); 3729 if (ret) { 3730 reset_trimming_bitmap(ctl, offset); 3731 block_group->discard_cursor = 3732 btrfs_block_group_end(block_group); 3733 break; 3734 } 3735next: 3736 if (next_bitmap) { 3737 offset += BITS_PER_BITMAP * ctl->unit; 3738 start = offset; 3739 } else { 3740 start += bytes; 3741 } 3742 block_group->discard_cursor = start; 3743 3744 if (fatal_signal_pending(current)) { 3745 if (start != offset) 3746 reset_trimming_bitmap(ctl, offset); 3747 ret = -ERESTARTSYS; 3748 break; 3749 } 3750 3751 cond_resched(); 3752 } 3753 3754 if (offset >= end) 3755 block_group->discard_cursor = end; 3756 3757out: 3758 return ret; 3759} 3760 3761int btrfs_trim_block_group(struct btrfs_block_group *block_group, 3762 u64 *trimmed, u64 start, u64 end, u64 minlen) 3763{ 3764 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 3765 int ret; 3766 u64 rem = 0; 3767 3768 *trimmed = 0; 3769 3770 spin_lock(&block_group->lock); 3771 if (block_group->removed) { 3772 spin_unlock(&block_group->lock); 3773 return 0; 3774 } 3775 btrfs_freeze_block_group(block_group); 3776 spin_unlock(&block_group->lock); 3777 3778 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false); 3779 if (ret) 3780 goto out; 3781 3782 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false); 3783 div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem); 3784 /* If we ended in the middle of a bitmap, reset the trimming flag */ 3785 if (rem) 3786 reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end)); 3787out: 3788 btrfs_unfreeze_block_group(block_group); 3789 return ret; 3790} 3791 3792int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, 3793 u64 *trimmed, u64 start, u64 end, u64 minlen, 3794 bool async) 3795{ 3796 int ret; 3797 3798 *trimmed = 0; 3799 3800 spin_lock(&block_group->lock); 3801 if (block_group->removed) { 3802 spin_unlock(&block_group->lock); 3803 return 0; 3804 } 3805 btrfs_freeze_block_group(block_group); 3806 spin_unlock(&block_group->lock); 3807 3808 ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async); 3809 btrfs_unfreeze_block_group(block_group); 3810 3811 return ret; 3812} 3813 3814int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, 3815 u64 *trimmed, u64 start, u64 end, u64 minlen, 3816 u64 maxlen, bool async) 3817{ 3818 int ret; 3819 3820 *trimmed = 0; 3821 3822 spin_lock(&block_group->lock); 3823 if (block_group->removed) { 3824 spin_unlock(&block_group->lock); 3825 return 0; 3826 } 3827 btrfs_freeze_block_group(block_group); 3828 spin_unlock(&block_group->lock); 3829 3830 ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen, 3831 async); 3832 3833 btrfs_unfreeze_block_group(block_group); 3834 3835 return ret; 3836} 3837 3838/* 3839 * Find the left-most item in the cache tree, and then return the 3840 * smallest inode number in the item. 3841 * 3842 * Note: the returned inode number may not be the smallest one in 3843 * the tree, if the left-most item is a bitmap. 3844 */ 3845u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) 3846{ 3847 struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl; 3848 struct btrfs_free_space *entry = NULL; 3849 u64 ino = 0; 3850 3851 spin_lock(&ctl->tree_lock); 3852 3853 if (RB_EMPTY_ROOT(&ctl->free_space_offset)) 3854 goto out; 3855 3856 entry = rb_entry(rb_first(&ctl->free_space_offset), 3857 struct btrfs_free_space, offset_index); 3858 3859 if (!entry->bitmap) { 3860 ino = entry->offset; 3861 3862 unlink_free_space(ctl, entry); 3863 entry->offset++; 3864 entry->bytes--; 3865 if (!entry->bytes) 3866 kmem_cache_free(btrfs_free_space_cachep, entry); 3867 else 3868 link_free_space(ctl, entry); 3869 } else { 3870 u64 offset = 0; 3871 u64 count = 1; 3872 int ret; 3873 3874 ret = search_bitmap(ctl, entry, &offset, &count, true); 3875 /* Logic error; Should be empty if it can't find anything */ 3876 ASSERT(!ret); 3877 3878 ino = offset; 3879 bitmap_clear_bits(ctl, entry, offset, 1); 3880 if (entry->bytes == 0) 3881 free_bitmap(ctl, entry); 3882 } 3883out: 3884 spin_unlock(&ctl->tree_lock); 3885 3886 return ino; 3887} 3888 3889struct inode *lookup_free_ino_inode(struct btrfs_root *root, 3890 struct btrfs_path *path) 3891{ 3892 struct inode *inode = NULL; 3893 3894 spin_lock(&root->ino_cache_lock); 3895 if (root->ino_cache_inode) 3896 inode = igrab(root->ino_cache_inode); 3897 spin_unlock(&root->ino_cache_lock); 3898 if (inode) 3899 return inode; 3900 3901 inode = __lookup_free_space_inode(root, path, 0); 3902 if (IS_ERR(inode)) 3903 return inode; 3904 3905 spin_lock(&root->ino_cache_lock); 3906 if (!btrfs_fs_closing(root->fs_info)) 3907 root->ino_cache_inode = igrab(inode); 3908 spin_unlock(&root->ino_cache_lock); 3909 3910 return inode; 3911} 3912 3913int create_free_ino_inode(struct btrfs_root *root, 3914 struct btrfs_trans_handle *trans, 3915 struct btrfs_path *path) 3916{ 3917 return __create_free_space_inode(root, trans, path, 3918 BTRFS_FREE_INO_OBJECTID, 0); 3919} 3920 3921int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root) 3922{ 3923 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3924 struct btrfs_path *path; 3925 struct inode *inode; 3926 int ret = 0; 3927 u64 root_gen = btrfs_root_generation(&root->root_item); 3928 3929 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3930 return 0; 3931 3932 /* 3933 * If we're unmounting then just return, since this does a search on the 3934 * normal root and not the commit root and we could deadlock. 3935 */ 3936 if (btrfs_fs_closing(fs_info)) 3937 return 0; 3938 3939 path = btrfs_alloc_path(); 3940 if (!path) 3941 return 0; 3942 3943 inode = lookup_free_ino_inode(root, path); 3944 if (IS_ERR(inode)) 3945 goto out; 3946 3947 if (root_gen != BTRFS_I(inode)->generation) 3948 goto out_put; 3949 3950 ret = __load_free_space_cache(root, inode, ctl, path, 0); 3951 3952 if (ret < 0) 3953 btrfs_err(fs_info, 3954 "failed to load free ino cache for root %llu", 3955 root->root_key.objectid); 3956out_put: 3957 iput(inode); 3958out: 3959 btrfs_free_path(path); 3960 return ret; 3961} 3962 3963int btrfs_write_out_ino_cache(struct btrfs_root *root, 3964 struct btrfs_trans_handle *trans, 3965 struct btrfs_path *path, 3966 struct inode *inode) 3967{ 3968 struct btrfs_fs_info *fs_info = root->fs_info; 3969 struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; 3970 int ret; 3971 struct btrfs_io_ctl io_ctl; 3972 bool release_metadata = true; 3973 3974 if (!btrfs_test_opt(fs_info, INODE_MAP_CACHE)) 3975 return 0; 3976 3977 memset(&io_ctl, 0, sizeof(io_ctl)); 3978 ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans); 3979 if (!ret) { 3980 /* 3981 * At this point writepages() didn't error out, so our metadata 3982 * reservation is released when the writeback finishes, at 3983 * inode.c:btrfs_finish_ordered_io(), regardless of it finishing 3984 * with or without an error. 3985 */ 3986 release_metadata = false; 3987 ret = btrfs_wait_cache_io_root(root, trans, &io_ctl, path); 3988 } 3989 3990 if (ret) { 3991 if (release_metadata) 3992 btrfs_delalloc_release_metadata(BTRFS_I(inode), 3993 inode->i_size, true); 3994 btrfs_debug(fs_info, 3995 "failed to write free ino cache for root %llu error %d", 3996 root->root_key.objectid, ret); 3997 } 3998 3999 return ret; 4000} 4001 4002#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 4003/* 4004 * Use this if you need to make a bitmap or extent entry specifically, it 4005 * doesn't do any of the merging that add_free_space does, this acts a lot like 4006 * how the free space cache loading stuff works, so you can get really weird 4007 * configurations. 4008 */ 4009int test_add_free_space_entry(struct btrfs_block_group *cache, 4010 u64 offset, u64 bytes, bool bitmap) 4011{ 4012 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4013 struct btrfs_free_space *info = NULL, *bitmap_info; 4014 void *map = NULL; 4015 enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED; 4016 u64 bytes_added; 4017 int ret; 4018 4019again: 4020 if (!info) { 4021 info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); 4022 if (!info) 4023 return -ENOMEM; 4024 } 4025 4026 if (!bitmap) { 4027 spin_lock(&ctl->tree_lock); 4028 info->offset = offset; 4029 info->bytes = bytes; 4030 info->max_extent_size = 0; 4031 ret = link_free_space(ctl, info); 4032 spin_unlock(&ctl->tree_lock); 4033 if (ret) 4034 kmem_cache_free(btrfs_free_space_cachep, info); 4035 return ret; 4036 } 4037 4038 if (!map) { 4039 map = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS); 4040 if (!map) { 4041 kmem_cache_free(btrfs_free_space_cachep, info); 4042 return -ENOMEM; 4043 } 4044 } 4045 4046 spin_lock(&ctl->tree_lock); 4047 bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4048 1, 0); 4049 if (!bitmap_info) { 4050 info->bitmap = map; 4051 map = NULL; 4052 add_new_bitmap(ctl, info, offset); 4053 bitmap_info = info; 4054 info = NULL; 4055 } 4056 4057 bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, 4058 trim_state); 4059 4060 bytes -= bytes_added; 4061 offset += bytes_added; 4062 spin_unlock(&ctl->tree_lock); 4063 4064 if (bytes) 4065 goto again; 4066 4067 if (info) 4068 kmem_cache_free(btrfs_free_space_cachep, info); 4069 if (map) 4070 kmem_cache_free(btrfs_free_space_bitmap_cachep, map); 4071 return 0; 4072} 4073 4074/* 4075 * Checks to see if the given range is in the free space cache. This is really 4076 * just used to check the absence of space, so if there is free space in the 4077 * range at all we will return 1. 4078 */ 4079int test_check_exists(struct btrfs_block_group *cache, 4080 u64 offset, u64 bytes) 4081{ 4082 struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; 4083 struct btrfs_free_space *info; 4084 int ret = 0; 4085 4086 spin_lock(&ctl->tree_lock); 4087 info = tree_search_offset(ctl, offset, 0, 0); 4088 if (!info) { 4089 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 4090 1, 0); 4091 if (!info) 4092 goto out; 4093 } 4094 4095have_info: 4096 if (info->bitmap) { 4097 u64 bit_off, bit_bytes; 4098 struct rb_node *n; 4099 struct btrfs_free_space *tmp; 4100 4101 bit_off = offset; 4102 bit_bytes = ctl->unit; 4103 ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false); 4104 if (!ret) { 4105 if (bit_off == offset) { 4106 ret = 1; 4107 goto out; 4108 } else if (bit_off > offset && 4109 offset + bytes > bit_off) { 4110 ret = 1; 4111 goto out; 4112 } 4113 } 4114 4115 n = rb_prev(&info->offset_index); 4116 while (n) { 4117 tmp = rb_entry(n, struct btrfs_free_space, 4118 offset_index); 4119 if (tmp->offset + tmp->bytes < offset) 4120 break; 4121 if (offset + bytes < tmp->offset) { 4122 n = rb_prev(&tmp->offset_index); 4123 continue; 4124 } 4125 info = tmp; 4126 goto have_info; 4127 } 4128 4129 n = rb_next(&info->offset_index); 4130 while (n) { 4131 tmp = rb_entry(n, struct btrfs_free_space, 4132 offset_index); 4133 if (offset + bytes < tmp->offset) 4134 break; 4135 if (tmp->offset + tmp->bytes < offset) { 4136 n = rb_next(&tmp->offset_index); 4137 continue; 4138 } 4139 info = tmp; 4140 goto have_info; 4141 } 4142 4143 ret = 0; 4144 goto out; 4145 } 4146 4147 if (info->offset == offset) { 4148 ret = 1; 4149 goto out; 4150 } 4151 4152 if (offset > info->offset && offset < info->offset + info->bytes) 4153 ret = 1; 4154out: 4155 spin_unlock(&ctl->tree_lock); 4156 return ret; 4157} 4158#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */ 4159