1// SPDX-License-Identifier: GPL-2.0 2 3#include <linux/jiffies.h> 4#include <linux/kernel.h> 5#include <linux/ktime.h> 6#include <linux/list.h> 7#include <linux/math64.h> 8#include <linux/sizes.h> 9#include <linux/workqueue.h> 10#include "ctree.h" 11#include "block-group.h" 12#include "discard.h" 13#include "free-space-cache.h" 14 15/* 16 * This contains the logic to handle async discard. 17 * 18 * Async discard manages trimming of free space outside of transaction commit. 19 * Discarding is done by managing the block_groups on a LRU list based on free 20 * space recency. Two passes are used to first prioritize discarding extents 21 * and then allow for trimming in the bitmap the best opportunity to coalesce. 22 * The block_groups are maintained on multiple lists to allow for multiple 23 * passes with different discard filter requirements. A delayed work item is 24 * used to manage discarding with timeout determined by a max of the delay 25 * incurred by the iops rate limit, the byte rate limit, and the max delay of 26 * BTRFS_DISCARD_MAX_DELAY. 27 * 28 * Note, this only keeps track of block_groups that are explicitly for data. 29 * Mixed block_groups are not supported. 30 * 31 * The first list is special to manage discarding of fully free block groups. 32 * This is necessary because we issue a final trim for a full free block group 33 * after forgetting it. When a block group becomes unused, instead of directly 34 * being added to the unused_bgs list, we add it to this first list. Then 35 * from there, if it becomes fully discarded, we place it onto the unused_bgs 36 * list. 37 * 38 * The in-memory free space cache serves as the backing state for discard. 39 * Consequently this means there is no persistence. We opt to load all the 40 * block groups in as not discarded, so the mount case degenerates to the 41 * crashing case. 42 * 43 * As the free space cache uses bitmaps, there exists a tradeoff between 44 * ease/efficiency for find_free_extent() and the accuracy of discard state. 45 * Here we opt to let untrimmed regions merge with everything while only letting 46 * trimmed regions merge with other trimmed regions. This can cause 47 * overtrimming, but the coalescing benefit seems to be worth it. Additionally, 48 * bitmap state is tracked as a whole. If we're able to fully trim a bitmap, 49 * the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in, 50 * this resets the state and we will retry trimming the whole bitmap. This is a 51 * tradeoff between discard state accuracy and the cost of accounting. 52 */ 53 54/* This is an initial delay to give some chance for block reuse */ 55#define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC) 56#define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC) 57 58/* Target completion latency of discarding all discardable extents */ 59#define BTRFS_DISCARD_TARGET_MSEC (6 * 60 * 60UL * MSEC_PER_SEC) 60#define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL) 61#define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL) 62#define BTRFS_DISCARD_MAX_IOPS (10U) 63 64/* Montonically decreasing minimum length filters after index 0 */ 65static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = { 66 0, 67 BTRFS_ASYNC_DISCARD_MAX_FILTER, 68 BTRFS_ASYNC_DISCARD_MIN_FILTER 69}; 70 71static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl, 72 struct btrfs_block_group *block_group) 73{ 74 return &discard_ctl->discard_list[block_group->discard_index]; 75} 76 77static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, 78 struct btrfs_block_group *block_group) 79{ 80 if (!btrfs_run_discard_work(discard_ctl)) 81 return; 82 83 if (list_empty(&block_group->discard_list) || 84 block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { 85 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) 86 block_group->discard_index = BTRFS_DISCARD_INDEX_START; 87 block_group->discard_eligible_time = (ktime_get_ns() + 88 BTRFS_DISCARD_DELAY); 89 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; 90 } 91 92 list_move_tail(&block_group->discard_list, 93 get_discard_list(discard_ctl, block_group)); 94} 95 96static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, 97 struct btrfs_block_group *block_group) 98{ 99 if (!btrfs_is_block_group_data_only(block_group)) 100 return; 101 102 spin_lock(&discard_ctl->lock); 103 __add_to_discard_list(discard_ctl, block_group); 104 spin_unlock(&discard_ctl->lock); 105} 106 107static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl, 108 struct btrfs_block_group *block_group) 109{ 110 spin_lock(&discard_ctl->lock); 111 112 if (!btrfs_run_discard_work(discard_ctl)) { 113 spin_unlock(&discard_ctl->lock); 114 return; 115 } 116 117 list_del_init(&block_group->discard_list); 118 119 block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED; 120 block_group->discard_eligible_time = (ktime_get_ns() + 121 BTRFS_DISCARD_UNUSED_DELAY); 122 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; 123 list_add_tail(&block_group->discard_list, 124 &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]); 125 126 spin_unlock(&discard_ctl->lock); 127} 128 129static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl, 130 struct btrfs_block_group *block_group) 131{ 132 bool running = false; 133 134 spin_lock(&discard_ctl->lock); 135 136 if (block_group == discard_ctl->block_group) { 137 running = true; 138 discard_ctl->block_group = NULL; 139 } 140 141 block_group->discard_eligible_time = 0; 142 list_del_init(&block_group->discard_list); 143 144 spin_unlock(&discard_ctl->lock); 145 146 return running; 147} 148 149/** 150 * find_next_block_group - find block_group that's up next for discarding 151 * @discard_ctl: discard control 152 * @now: current time 153 * 154 * Iterate over the discard lists to find the next block_group up for 155 * discarding checking the discard_eligible_time of block_group. 156 */ 157static struct btrfs_block_group *find_next_block_group( 158 struct btrfs_discard_ctl *discard_ctl, 159 u64 now) 160{ 161 struct btrfs_block_group *ret_block_group = NULL, *block_group; 162 int i; 163 164 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { 165 struct list_head *discard_list = &discard_ctl->discard_list[i]; 166 167 if (!list_empty(discard_list)) { 168 block_group = list_first_entry(discard_list, 169 struct btrfs_block_group, 170 discard_list); 171 172 if (!ret_block_group) 173 ret_block_group = block_group; 174 175 if (ret_block_group->discard_eligible_time < now) 176 break; 177 178 if (ret_block_group->discard_eligible_time > 179 block_group->discard_eligible_time) 180 ret_block_group = block_group; 181 } 182 } 183 184 return ret_block_group; 185} 186 187/** 188 * peek_discard_list - wrap find_next_block_group() 189 * @discard_ctl: discard control 190 * @discard_state: the discard_state of the block_group after state management 191 * @discard_index: the discard_index of the block_group after state management 192 * 193 * This wraps find_next_block_group() and sets the block_group to be in use. 194 * discard_state's control flow is managed here. Variables related to 195 * discard_state are reset here as needed (eg discard_cursor). @discard_state 196 * and @discard_index are remembered as it may change while we're discarding, 197 * but we want the discard to execute in the context determined here. 198 */ 199static struct btrfs_block_group *peek_discard_list( 200 struct btrfs_discard_ctl *discard_ctl, 201 enum btrfs_discard_state *discard_state, 202 int *discard_index, u64 now) 203{ 204 struct btrfs_block_group *block_group; 205 206 spin_lock(&discard_ctl->lock); 207again: 208 block_group = find_next_block_group(discard_ctl, now); 209 210 if (block_group && now >= block_group->discard_eligible_time) { 211 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED && 212 block_group->used != 0) { 213 if (btrfs_is_block_group_data_only(block_group)) 214 __add_to_discard_list(discard_ctl, block_group); 215 else 216 list_del_init(&block_group->discard_list); 217 goto again; 218 } 219 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) { 220 block_group->discard_cursor = block_group->start; 221 block_group->discard_state = BTRFS_DISCARD_EXTENTS; 222 } 223 discard_ctl->block_group = block_group; 224 } 225 if (block_group) { 226 *discard_state = block_group->discard_state; 227 *discard_index = block_group->discard_index; 228 } 229 spin_unlock(&discard_ctl->lock); 230 231 return block_group; 232} 233 234/** 235 * btrfs_discard_check_filter - updates a block groups filters 236 * @block_group: block group of interest 237 * @bytes: recently freed region size after coalescing 238 * 239 * Async discard maintains multiple lists with progressively smaller filters 240 * to prioritize discarding based on size. Should a free space that matches 241 * a larger filter be returned to the free_space_cache, prioritize that discard 242 * by moving @block_group to the proper filter. 243 */ 244void btrfs_discard_check_filter(struct btrfs_block_group *block_group, 245 u64 bytes) 246{ 247 struct btrfs_discard_ctl *discard_ctl; 248 249 if (!block_group || 250 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 251 return; 252 253 discard_ctl = &block_group->fs_info->discard_ctl; 254 255 if (block_group->discard_index > BTRFS_DISCARD_INDEX_START && 256 bytes >= discard_minlen[block_group->discard_index - 1]) { 257 int i; 258 259 remove_from_discard_list(discard_ctl, block_group); 260 261 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS; 262 i++) { 263 if (bytes >= discard_minlen[i]) { 264 block_group->discard_index = i; 265 add_to_discard_list(discard_ctl, block_group); 266 break; 267 } 268 } 269 } 270} 271 272/** 273 * btrfs_update_discard_index - moves a block group along the discard lists 274 * @discard_ctl: discard control 275 * @block_group: block_group of interest 276 * 277 * Increment @block_group's discard_index. If it falls of the list, let it be. 278 * Otherwise add it back to the appropriate list. 279 */ 280static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl, 281 struct btrfs_block_group *block_group) 282{ 283 block_group->discard_index++; 284 if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) { 285 block_group->discard_index = 1; 286 return; 287 } 288 289 add_to_discard_list(discard_ctl, block_group); 290} 291 292/** 293 * btrfs_discard_cancel_work - remove a block_group from the discard lists 294 * @discard_ctl: discard control 295 * @block_group: block_group of interest 296 * 297 * This removes @block_group from the discard lists. If necessary, it waits on 298 * the current work and then reschedules the delayed work. 299 */ 300void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl, 301 struct btrfs_block_group *block_group) 302{ 303 if (remove_from_discard_list(discard_ctl, block_group)) { 304 cancel_delayed_work_sync(&discard_ctl->work); 305 btrfs_discard_schedule_work(discard_ctl, true); 306 } 307} 308 309/** 310 * btrfs_discard_queue_work - handles queuing the block_groups 311 * @discard_ctl: discard control 312 * @block_group: block_group of interest 313 * 314 * This maintains the LRU order of the discard lists. 315 */ 316void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl, 317 struct btrfs_block_group *block_group) 318{ 319 if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) 320 return; 321 322 if (block_group->used == 0) 323 add_to_discard_unused_list(discard_ctl, block_group); 324 else 325 add_to_discard_list(discard_ctl, block_group); 326 327 if (!delayed_work_pending(&discard_ctl->work)) 328 btrfs_discard_schedule_work(discard_ctl, false); 329} 330 331static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, 332 u64 now, bool override) 333{ 334 struct btrfs_block_group *block_group; 335 336 if (!btrfs_run_discard_work(discard_ctl)) 337 return; 338 if (!override && delayed_work_pending(&discard_ctl->work)) 339 return; 340 341 block_group = find_next_block_group(discard_ctl, now); 342 if (block_group) { 343 unsigned long delay = discard_ctl->delay; 344 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit); 345 346 /* 347 * A single delayed workqueue item is responsible for 348 * discarding, so we can manage the bytes rate limit by keeping 349 * track of the previous discard. 350 */ 351 if (kbps_limit && discard_ctl->prev_discard) { 352 u64 bps_limit = ((u64)kbps_limit) * SZ_1K; 353 u64 bps_delay = div64_u64(discard_ctl->prev_discard * 354 MSEC_PER_SEC, bps_limit); 355 356 delay = max(delay, msecs_to_jiffies(bps_delay)); 357 } 358 359 /* 360 * This timeout is to hopefully prevent immediate discarding 361 * in a recently allocated block group. 362 */ 363 if (now < block_group->discard_eligible_time) { 364 u64 bg_timeout = block_group->discard_eligible_time - now; 365 366 delay = max(delay, nsecs_to_jiffies(bg_timeout)); 367 } 368 369 mod_delayed_work(discard_ctl->discard_workers, 370 &discard_ctl->work, delay); 371 } 372} 373 374/* 375 * btrfs_discard_schedule_work - responsible for scheduling the discard work 376 * @discard_ctl: discard control 377 * @override: override the current timer 378 * 379 * Discards are issued by a delayed workqueue item. @override is used to 380 * update the current delay as the baseline delay interval is reevaluated on 381 * transaction commit. This is also maxed with any other rate limit. 382 */ 383void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl, 384 bool override) 385{ 386 const u64 now = ktime_get_ns(); 387 388 spin_lock(&discard_ctl->lock); 389 __btrfs_discard_schedule_work(discard_ctl, now, override); 390 spin_unlock(&discard_ctl->lock); 391} 392 393/** 394 * btrfs_finish_discard_pass - determine next step of a block_group 395 * @discard_ctl: discard control 396 * @block_group: block_group of interest 397 * 398 * This determines the next step for a block group after it's finished going 399 * through a pass on a discard list. If it is unused and fully trimmed, we can 400 * mark it unused and send it to the unused_bgs path. Otherwise, pass it onto 401 * the appropriate filter list or let it fall off. 402 */ 403static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl, 404 struct btrfs_block_group *block_group) 405{ 406 remove_from_discard_list(discard_ctl, block_group); 407 408 if (block_group->used == 0) { 409 if (btrfs_is_free_space_trimmed(block_group)) 410 btrfs_mark_bg_unused(block_group); 411 else 412 add_to_discard_unused_list(discard_ctl, block_group); 413 } else { 414 btrfs_update_discard_index(discard_ctl, block_group); 415 } 416} 417 418/** 419 * btrfs_discard_workfn - discard work function 420 * @work: work 421 * 422 * This finds the next block_group to start discarding and then discards a 423 * single region. It does this in a two-pass fashion: first extents and second 424 * bitmaps. Completely discarded block groups are sent to the unused_bgs path. 425 */ 426static void btrfs_discard_workfn(struct work_struct *work) 427{ 428 struct btrfs_discard_ctl *discard_ctl; 429 struct btrfs_block_group *block_group; 430 enum btrfs_discard_state discard_state; 431 int discard_index = 0; 432 u64 trimmed = 0; 433 u64 minlen = 0; 434 u64 now = ktime_get_ns(); 435 436 discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work); 437 438 block_group = peek_discard_list(discard_ctl, &discard_state, 439 &discard_index, now); 440 if (!block_group || !btrfs_run_discard_work(discard_ctl)) 441 return; 442 if (now < block_group->discard_eligible_time) { 443 btrfs_discard_schedule_work(discard_ctl, false); 444 return; 445 } 446 447 /* Perform discarding */ 448 minlen = discard_minlen[discard_index]; 449 450 if (discard_state == BTRFS_DISCARD_BITMAPS) { 451 u64 maxlen = 0; 452 453 /* 454 * Use the previous levels minimum discard length as the max 455 * length filter. In the case something is added to make a 456 * region go beyond the max filter, the entire bitmap is set 457 * back to BTRFS_TRIM_STATE_UNTRIMMED. 458 */ 459 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED) 460 maxlen = discard_minlen[discard_index - 1]; 461 462 btrfs_trim_block_group_bitmaps(block_group, &trimmed, 463 block_group->discard_cursor, 464 btrfs_block_group_end(block_group), 465 minlen, maxlen, true); 466 discard_ctl->discard_bitmap_bytes += trimmed; 467 } else { 468 btrfs_trim_block_group_extents(block_group, &trimmed, 469 block_group->discard_cursor, 470 btrfs_block_group_end(block_group), 471 minlen, true); 472 discard_ctl->discard_extent_bytes += trimmed; 473 } 474 475 discard_ctl->prev_discard = trimmed; 476 477 /* Determine next steps for a block_group */ 478 if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) { 479 if (discard_state == BTRFS_DISCARD_BITMAPS) { 480 btrfs_finish_discard_pass(discard_ctl, block_group); 481 } else { 482 block_group->discard_cursor = block_group->start; 483 spin_lock(&discard_ctl->lock); 484 if (block_group->discard_state != 485 BTRFS_DISCARD_RESET_CURSOR) 486 block_group->discard_state = 487 BTRFS_DISCARD_BITMAPS; 488 spin_unlock(&discard_ctl->lock); 489 } 490 } 491 492 spin_lock(&discard_ctl->lock); 493 discard_ctl->block_group = NULL; 494 __btrfs_discard_schedule_work(discard_ctl, now, false); 495 spin_unlock(&discard_ctl->lock); 496} 497 498/** 499 * btrfs_run_discard_work - determines if async discard should be running 500 * @discard_ctl: discard control 501 * 502 * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set. 503 */ 504bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl) 505{ 506 struct btrfs_fs_info *fs_info = container_of(discard_ctl, 507 struct btrfs_fs_info, 508 discard_ctl); 509 510 return (!(fs_info->sb->s_flags & SB_RDONLY) && 511 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags)); 512} 513 514/** 515 * btrfs_discard_calc_delay - recalculate the base delay 516 * @discard_ctl: discard control 517 * 518 * Recalculate the base delay which is based off the total number of 519 * discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms) 520 * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC). 521 */ 522void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl) 523{ 524 s32 discardable_extents; 525 s64 discardable_bytes; 526 u32 iops_limit; 527 unsigned long delay; 528 unsigned long lower_limit = BTRFS_DISCARD_MIN_DELAY_MSEC; 529 530 discardable_extents = atomic_read(&discard_ctl->discardable_extents); 531 if (!discardable_extents) 532 return; 533 534 spin_lock(&discard_ctl->lock); 535 536 /* 537 * The following is to fix a potential -1 discrepenancy that we're not 538 * sure how to reproduce. But given that this is the only place that 539 * utilizes these numbers and this is only called by from 540 * btrfs_finish_extent_commit() which is synchronized, we can correct 541 * here. 542 */ 543 if (discardable_extents < 0) 544 atomic_add(-discardable_extents, 545 &discard_ctl->discardable_extents); 546 547 discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes); 548 if (discardable_bytes < 0) 549 atomic64_add(-discardable_bytes, 550 &discard_ctl->discardable_bytes); 551 552 if (discardable_extents <= 0) { 553 spin_unlock(&discard_ctl->lock); 554 return; 555 } 556 557 iops_limit = READ_ONCE(discard_ctl->iops_limit); 558 if (iops_limit) 559 lower_limit = max_t(unsigned long, lower_limit, 560 MSEC_PER_SEC / iops_limit); 561 562 delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents; 563 delay = clamp(delay, lower_limit, BTRFS_DISCARD_MAX_DELAY_MSEC); 564 discard_ctl->delay = msecs_to_jiffies(delay); 565 566 spin_unlock(&discard_ctl->lock); 567} 568 569/** 570 * btrfs_discard_update_discardable - propagate discard counters 571 * @block_group: block_group of interest 572 * @ctl: free_space_ctl of @block_group 573 * 574 * This propagates deltas of counters up to the discard_ctl. It maintains a 575 * current counter and a previous counter passing the delta up to the global 576 * stat. Then the current counter value becomes the previous counter value. 577 */ 578void btrfs_discard_update_discardable(struct btrfs_block_group *block_group, 579 struct btrfs_free_space_ctl *ctl) 580{ 581 struct btrfs_discard_ctl *discard_ctl; 582 s32 extents_delta; 583 s64 bytes_delta; 584 585 if (!block_group || 586 !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) || 587 !btrfs_is_block_group_data_only(block_group)) 588 return; 589 590 discard_ctl = &block_group->fs_info->discard_ctl; 591 592 extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] - 593 ctl->discardable_extents[BTRFS_STAT_PREV]; 594 if (extents_delta) { 595 atomic_add(extents_delta, &discard_ctl->discardable_extents); 596 ctl->discardable_extents[BTRFS_STAT_PREV] = 597 ctl->discardable_extents[BTRFS_STAT_CURR]; 598 } 599 600 bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] - 601 ctl->discardable_bytes[BTRFS_STAT_PREV]; 602 if (bytes_delta) { 603 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes); 604 ctl->discardable_bytes[BTRFS_STAT_PREV] = 605 ctl->discardable_bytes[BTRFS_STAT_CURR]; 606 } 607} 608 609/** 610 * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists 611 * @fs_info: fs_info of interest 612 * 613 * The unused_bgs list needs to be punted to the discard lists because the 614 * order of operations is changed. In the normal sychronous discard path, the 615 * block groups are trimmed via a single large trim in transaction commit. This 616 * is ultimately what we are trying to avoid with asynchronous discard. Thus, 617 * it must be done before going down the unused_bgs path. 618 */ 619void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info) 620{ 621 struct btrfs_block_group *block_group, *next; 622 623 spin_lock(&fs_info->unused_bgs_lock); 624 /* We enabled async discard, so punt all to the queue */ 625 list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs, 626 bg_list) { 627 list_del_init(&block_group->bg_list); 628 btrfs_put_block_group(block_group); 629 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); 630 } 631 spin_unlock(&fs_info->unused_bgs_lock); 632} 633 634/** 635 * btrfs_discard_purge_list - purge discard lists 636 * @discard_ctl: discard control 637 * 638 * If we are disabling async discard, we may have intercepted block groups that 639 * are completely free and ready for the unused_bgs path. As discarding will 640 * now happen in transaction commit or not at all, we can safely mark the 641 * corresponding block groups as unused and they will be sent on their merry 642 * way to the unused_bgs list. 643 */ 644static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl) 645{ 646 struct btrfs_block_group *block_group, *next; 647 int i; 648 649 spin_lock(&discard_ctl->lock); 650 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) { 651 list_for_each_entry_safe(block_group, next, 652 &discard_ctl->discard_list[i], 653 discard_list) { 654 list_del_init(&block_group->discard_list); 655 spin_unlock(&discard_ctl->lock); 656 if (block_group->used == 0) 657 btrfs_mark_bg_unused(block_group); 658 spin_lock(&discard_ctl->lock); 659 } 660 } 661 spin_unlock(&discard_ctl->lock); 662} 663 664void btrfs_discard_resume(struct btrfs_fs_info *fs_info) 665{ 666 if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) { 667 btrfs_discard_cleanup(fs_info); 668 return; 669 } 670 671 btrfs_discard_punt_unused_bgs_list(fs_info); 672 673 set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); 674} 675 676void btrfs_discard_stop(struct btrfs_fs_info *fs_info) 677{ 678 clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags); 679} 680 681void btrfs_discard_init(struct btrfs_fs_info *fs_info) 682{ 683 struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl; 684 int i; 685 686 spin_lock_init(&discard_ctl->lock); 687 INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn); 688 689 for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) 690 INIT_LIST_HEAD(&discard_ctl->discard_list[i]); 691 692 discard_ctl->prev_discard = 0; 693 atomic_set(&discard_ctl->discardable_extents, 0); 694 atomic64_set(&discard_ctl->discardable_bytes, 0); 695 discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE; 696 discard_ctl->delay = BTRFS_DISCARD_MAX_DELAY_MSEC; 697 discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS; 698 discard_ctl->kbps_limit = 0; 699 discard_ctl->discard_extent_bytes = 0; 700 discard_ctl->discard_bitmap_bytes = 0; 701 atomic64_set(&discard_ctl->discard_bytes_saved, 0); 702} 703 704void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info) 705{ 706 btrfs_discard_stop(fs_info); 707 cancel_delayed_work_sync(&fs_info->discard_ctl.work); 708 btrfs_discard_purge_list(&fs_info->discard_ctl); 709} 710