162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci#include <linux/jiffies.h>
462306a36Sopenharmony_ci#include <linux/kernel.h>
562306a36Sopenharmony_ci#include <linux/ktime.h>
662306a36Sopenharmony_ci#include <linux/list.h>
762306a36Sopenharmony_ci#include <linux/math64.h>
862306a36Sopenharmony_ci#include <linux/sizes.h>
962306a36Sopenharmony_ci#include <linux/workqueue.h>
1062306a36Sopenharmony_ci#include "ctree.h"
1162306a36Sopenharmony_ci#include "block-group.h"
1262306a36Sopenharmony_ci#include "discard.h"
1362306a36Sopenharmony_ci#include "free-space-cache.h"
1462306a36Sopenharmony_ci#include "fs.h"
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci/*
1762306a36Sopenharmony_ci * This contains the logic to handle async discard.
1862306a36Sopenharmony_ci *
1962306a36Sopenharmony_ci * Async discard manages trimming of free space outside of transaction commit.
2062306a36Sopenharmony_ci * Discarding is done by managing the block_groups on a LRU list based on free
2162306a36Sopenharmony_ci * space recency.  Two passes are used to first prioritize discarding extents
2262306a36Sopenharmony_ci * and then allow for trimming in the bitmap the best opportunity to coalesce.
2362306a36Sopenharmony_ci * The block_groups are maintained on multiple lists to allow for multiple
2462306a36Sopenharmony_ci * passes with different discard filter requirements.  A delayed work item is
2562306a36Sopenharmony_ci * used to manage discarding with timeout determined by a max of the delay
2662306a36Sopenharmony_ci * incurred by the iops rate limit, the byte rate limit, and the max delay of
2762306a36Sopenharmony_ci * BTRFS_DISCARD_MAX_DELAY.
2862306a36Sopenharmony_ci *
2962306a36Sopenharmony_ci * Note, this only keeps track of block_groups that are explicitly for data.
3062306a36Sopenharmony_ci * Mixed block_groups are not supported.
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci * The first list is special to manage discarding of fully free block groups.
3362306a36Sopenharmony_ci * This is necessary because we issue a final trim for a full free block group
3462306a36Sopenharmony_ci * after forgetting it.  When a block group becomes unused, instead of directly
3562306a36Sopenharmony_ci * being added to the unused_bgs list, we add it to this first list.  Then
3662306a36Sopenharmony_ci * from there, if it becomes fully discarded, we place it onto the unused_bgs
3762306a36Sopenharmony_ci * list.
3862306a36Sopenharmony_ci *
3962306a36Sopenharmony_ci * The in-memory free space cache serves as the backing state for discard.
4062306a36Sopenharmony_ci * Consequently this means there is no persistence.  We opt to load all the
4162306a36Sopenharmony_ci * block groups in as not discarded, so the mount case degenerates to the
4262306a36Sopenharmony_ci * crashing case.
4362306a36Sopenharmony_ci *
4462306a36Sopenharmony_ci * As the free space cache uses bitmaps, there exists a tradeoff between
4562306a36Sopenharmony_ci * ease/efficiency for find_free_extent() and the accuracy of discard state.
4662306a36Sopenharmony_ci * Here we opt to let untrimmed regions merge with everything while only letting
4762306a36Sopenharmony_ci * trimmed regions merge with other trimmed regions.  This can cause
4862306a36Sopenharmony_ci * overtrimming, but the coalescing benefit seems to be worth it.  Additionally,
4962306a36Sopenharmony_ci * bitmap state is tracked as a whole.  If we're able to fully trim a bitmap,
5062306a36Sopenharmony_ci * the trimmed flag is set on the bitmap.  Otherwise, if an allocation comes in,
5162306a36Sopenharmony_ci * this resets the state and we will retry trimming the whole bitmap.  This is a
5262306a36Sopenharmony_ci * tradeoff between discard state accuracy and the cost of accounting.
5362306a36Sopenharmony_ci */
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci/* This is an initial delay to give some chance for block reuse */
5662306a36Sopenharmony_ci#define BTRFS_DISCARD_DELAY		(120ULL * NSEC_PER_SEC)
5762306a36Sopenharmony_ci#define BTRFS_DISCARD_UNUSED_DELAY	(10ULL * NSEC_PER_SEC)
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci#define BTRFS_DISCARD_MIN_DELAY_MSEC	(1UL)
6062306a36Sopenharmony_ci#define BTRFS_DISCARD_MAX_DELAY_MSEC	(1000UL)
6162306a36Sopenharmony_ci#define BTRFS_DISCARD_MAX_IOPS		(1000U)
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci/* Monotonically decreasing minimum length filters after index 0 */
6462306a36Sopenharmony_cistatic int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
6562306a36Sopenharmony_ci	0,
6662306a36Sopenharmony_ci	BTRFS_ASYNC_DISCARD_MAX_FILTER,
6762306a36Sopenharmony_ci	BTRFS_ASYNC_DISCARD_MIN_FILTER
6862306a36Sopenharmony_ci};
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_cistatic struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
7162306a36Sopenharmony_ci					  struct btrfs_block_group *block_group)
7262306a36Sopenharmony_ci{
7362306a36Sopenharmony_ci	return &discard_ctl->discard_list[block_group->discard_index];
7462306a36Sopenharmony_ci}
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci/*
7762306a36Sopenharmony_ci * Determine if async discard should be running.
7862306a36Sopenharmony_ci *
7962306a36Sopenharmony_ci * @discard_ctl: discard control
8062306a36Sopenharmony_ci *
8162306a36Sopenharmony_ci * Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
8262306a36Sopenharmony_ci */
8362306a36Sopenharmony_cistatic bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
8462306a36Sopenharmony_ci{
8562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = container_of(discard_ctl,
8662306a36Sopenharmony_ci						     struct btrfs_fs_info,
8762306a36Sopenharmony_ci						     discard_ctl);
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	return (!(fs_info->sb->s_flags & SB_RDONLY) &&
9062306a36Sopenharmony_ci		test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
9162306a36Sopenharmony_ci}
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_cistatic void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
9462306a36Sopenharmony_ci				  struct btrfs_block_group *block_group)
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	lockdep_assert_held(&discard_ctl->lock);
9762306a36Sopenharmony_ci	if (!btrfs_run_discard_work(discard_ctl))
9862306a36Sopenharmony_ci		return;
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	if (list_empty(&block_group->discard_list) ||
10162306a36Sopenharmony_ci	    block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
10262306a36Sopenharmony_ci		if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
10362306a36Sopenharmony_ci			block_group->discard_index = BTRFS_DISCARD_INDEX_START;
10462306a36Sopenharmony_ci		block_group->discard_eligible_time = (ktime_get_ns() +
10562306a36Sopenharmony_ci						      BTRFS_DISCARD_DELAY);
10662306a36Sopenharmony_ci		block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
10762306a36Sopenharmony_ci	}
10862306a36Sopenharmony_ci	if (list_empty(&block_group->discard_list))
10962306a36Sopenharmony_ci		btrfs_get_block_group(block_group);
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	list_move_tail(&block_group->discard_list,
11262306a36Sopenharmony_ci		       get_discard_list(discard_ctl, block_group));
11362306a36Sopenharmony_ci}
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_cistatic void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
11662306a36Sopenharmony_ci				struct btrfs_block_group *block_group)
11762306a36Sopenharmony_ci{
11862306a36Sopenharmony_ci	if (!btrfs_is_block_group_data_only(block_group))
11962306a36Sopenharmony_ci		return;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	spin_lock(&discard_ctl->lock);
12262306a36Sopenharmony_ci	__add_to_discard_list(discard_ctl, block_group);
12362306a36Sopenharmony_ci	spin_unlock(&discard_ctl->lock);
12462306a36Sopenharmony_ci}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_cistatic void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
12762306a36Sopenharmony_ci				       struct btrfs_block_group *block_group)
12862306a36Sopenharmony_ci{
12962306a36Sopenharmony_ci	bool queued;
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	spin_lock(&discard_ctl->lock);
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	queued = !list_empty(&block_group->discard_list);
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	if (!btrfs_run_discard_work(discard_ctl)) {
13662306a36Sopenharmony_ci		spin_unlock(&discard_ctl->lock);
13762306a36Sopenharmony_ci		return;
13862306a36Sopenharmony_ci	}
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	list_del_init(&block_group->discard_list);
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
14362306a36Sopenharmony_ci	block_group->discard_eligible_time = (ktime_get_ns() +
14462306a36Sopenharmony_ci					      BTRFS_DISCARD_UNUSED_DELAY);
14562306a36Sopenharmony_ci	block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
14662306a36Sopenharmony_ci	if (!queued)
14762306a36Sopenharmony_ci		btrfs_get_block_group(block_group);
14862306a36Sopenharmony_ci	list_add_tail(&block_group->discard_list,
14962306a36Sopenharmony_ci		      &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	spin_unlock(&discard_ctl->lock);
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_cistatic bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
15562306a36Sopenharmony_ci				     struct btrfs_block_group *block_group)
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci	bool running = false;
15862306a36Sopenharmony_ci	bool queued = false;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	spin_lock(&discard_ctl->lock);
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	if (block_group == discard_ctl->block_group) {
16362306a36Sopenharmony_ci		running = true;
16462306a36Sopenharmony_ci		discard_ctl->block_group = NULL;
16562306a36Sopenharmony_ci	}
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	block_group->discard_eligible_time = 0;
16862306a36Sopenharmony_ci	queued = !list_empty(&block_group->discard_list);
16962306a36Sopenharmony_ci	list_del_init(&block_group->discard_list);
17062306a36Sopenharmony_ci	/*
17162306a36Sopenharmony_ci	 * If the block group is currently running in the discard workfn, we
17262306a36Sopenharmony_ci	 * don't want to deref it, since it's still being used by the workfn.
17362306a36Sopenharmony_ci	 * The workfn will notice this case and deref the block group when it is
17462306a36Sopenharmony_ci	 * finished.
17562306a36Sopenharmony_ci	 */
17662306a36Sopenharmony_ci	if (queued && !running)
17762306a36Sopenharmony_ci		btrfs_put_block_group(block_group);
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	spin_unlock(&discard_ctl->lock);
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	return running;
18262306a36Sopenharmony_ci}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci/*
18562306a36Sopenharmony_ci * Find block_group that's up next for discarding.
18662306a36Sopenharmony_ci *
18762306a36Sopenharmony_ci * @discard_ctl:  discard control
18862306a36Sopenharmony_ci * @now:          current time
18962306a36Sopenharmony_ci *
19062306a36Sopenharmony_ci * Iterate over the discard lists to find the next block_group up for
19162306a36Sopenharmony_ci * discarding checking the discard_eligible_time of block_group.
19262306a36Sopenharmony_ci */
19362306a36Sopenharmony_cistatic struct btrfs_block_group *find_next_block_group(
19462306a36Sopenharmony_ci					struct btrfs_discard_ctl *discard_ctl,
19562306a36Sopenharmony_ci					u64 now)
19662306a36Sopenharmony_ci{
19762306a36Sopenharmony_ci	struct btrfs_block_group *ret_block_group = NULL, *block_group;
19862306a36Sopenharmony_ci	int i;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
20162306a36Sopenharmony_ci		struct list_head *discard_list = &discard_ctl->discard_list[i];
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci		if (!list_empty(discard_list)) {
20462306a36Sopenharmony_ci			block_group = list_first_entry(discard_list,
20562306a36Sopenharmony_ci						       struct btrfs_block_group,
20662306a36Sopenharmony_ci						       discard_list);
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci			if (!ret_block_group)
20962306a36Sopenharmony_ci				ret_block_group = block_group;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci			if (ret_block_group->discard_eligible_time < now)
21262306a36Sopenharmony_ci				break;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci			if (ret_block_group->discard_eligible_time >
21562306a36Sopenharmony_ci			    block_group->discard_eligible_time)
21662306a36Sopenharmony_ci				ret_block_group = block_group;
21762306a36Sopenharmony_ci		}
21862306a36Sopenharmony_ci	}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	return ret_block_group;
22162306a36Sopenharmony_ci}
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci/*
22462306a36Sopenharmony_ci * Look up next block group and set it for use.
22562306a36Sopenharmony_ci *
22662306a36Sopenharmony_ci * @discard_ctl:   discard control
22762306a36Sopenharmony_ci * @discard_state: the discard_state of the block_group after state management
22862306a36Sopenharmony_ci * @discard_index: the discard_index of the block_group after state management
22962306a36Sopenharmony_ci * @now:           time when discard was invoked, in ns
23062306a36Sopenharmony_ci *
23162306a36Sopenharmony_ci * Wrap find_next_block_group() and set the block_group to be in use.
23262306a36Sopenharmony_ci * @discard_state's control flow is managed here.  Variables related to
23362306a36Sopenharmony_ci * @discard_state are reset here as needed (eg. @discard_cursor).  @discard_state
23462306a36Sopenharmony_ci * and @discard_index are remembered as it may change while we're discarding,
23562306a36Sopenharmony_ci * but we want the discard to execute in the context determined here.
23662306a36Sopenharmony_ci */
23762306a36Sopenharmony_cistatic struct btrfs_block_group *peek_discard_list(
23862306a36Sopenharmony_ci					struct btrfs_discard_ctl *discard_ctl,
23962306a36Sopenharmony_ci					enum btrfs_discard_state *discard_state,
24062306a36Sopenharmony_ci					int *discard_index, u64 now)
24162306a36Sopenharmony_ci{
24262306a36Sopenharmony_ci	struct btrfs_block_group *block_group;
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	spin_lock(&discard_ctl->lock);
24562306a36Sopenharmony_ciagain:
24662306a36Sopenharmony_ci	block_group = find_next_block_group(discard_ctl, now);
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	if (block_group && now >= block_group->discard_eligible_time) {
24962306a36Sopenharmony_ci		if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
25062306a36Sopenharmony_ci		    block_group->used != 0) {
25162306a36Sopenharmony_ci			if (btrfs_is_block_group_data_only(block_group)) {
25262306a36Sopenharmony_ci				__add_to_discard_list(discard_ctl, block_group);
25362306a36Sopenharmony_ci			} else {
25462306a36Sopenharmony_ci				list_del_init(&block_group->discard_list);
25562306a36Sopenharmony_ci				btrfs_put_block_group(block_group);
25662306a36Sopenharmony_ci			}
25762306a36Sopenharmony_ci			goto again;
25862306a36Sopenharmony_ci		}
25962306a36Sopenharmony_ci		if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
26062306a36Sopenharmony_ci			block_group->discard_cursor = block_group->start;
26162306a36Sopenharmony_ci			block_group->discard_state = BTRFS_DISCARD_EXTENTS;
26262306a36Sopenharmony_ci		}
26362306a36Sopenharmony_ci		discard_ctl->block_group = block_group;
26462306a36Sopenharmony_ci	}
26562306a36Sopenharmony_ci	if (block_group) {
26662306a36Sopenharmony_ci		*discard_state = block_group->discard_state;
26762306a36Sopenharmony_ci		*discard_index = block_group->discard_index;
26862306a36Sopenharmony_ci	}
26962306a36Sopenharmony_ci	spin_unlock(&discard_ctl->lock);
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	return block_group;
27262306a36Sopenharmony_ci}
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci/*
27562306a36Sopenharmony_ci * Update a block group's filters.
27662306a36Sopenharmony_ci *
27762306a36Sopenharmony_ci * @block_group:  block group of interest
27862306a36Sopenharmony_ci * @bytes:        recently freed region size after coalescing
27962306a36Sopenharmony_ci *
28062306a36Sopenharmony_ci * Async discard maintains multiple lists with progressively smaller filters
28162306a36Sopenharmony_ci * to prioritize discarding based on size.  Should a free space that matches
28262306a36Sopenharmony_ci * a larger filter be returned to the free_space_cache, prioritize that discard
28362306a36Sopenharmony_ci * by moving @block_group to the proper filter.
28462306a36Sopenharmony_ci */
28562306a36Sopenharmony_civoid btrfs_discard_check_filter(struct btrfs_block_group *block_group,
28662306a36Sopenharmony_ci				u64 bytes)
28762306a36Sopenharmony_ci{
28862306a36Sopenharmony_ci	struct btrfs_discard_ctl *discard_ctl;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	if (!block_group ||
29162306a36Sopenharmony_ci	    !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
29262306a36Sopenharmony_ci		return;
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	discard_ctl = &block_group->fs_info->discard_ctl;
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
29762306a36Sopenharmony_ci	    bytes >= discard_minlen[block_group->discard_index - 1]) {
29862306a36Sopenharmony_ci		int i;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci		remove_from_discard_list(discard_ctl, block_group);
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci		for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
30362306a36Sopenharmony_ci		     i++) {
30462306a36Sopenharmony_ci			if (bytes >= discard_minlen[i]) {
30562306a36Sopenharmony_ci				block_group->discard_index = i;
30662306a36Sopenharmony_ci				add_to_discard_list(discard_ctl, block_group);
30762306a36Sopenharmony_ci				break;
30862306a36Sopenharmony_ci			}
30962306a36Sopenharmony_ci		}
31062306a36Sopenharmony_ci	}
31162306a36Sopenharmony_ci}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci/*
31462306a36Sopenharmony_ci * Move a block group along the discard lists.
31562306a36Sopenharmony_ci *
31662306a36Sopenharmony_ci * @discard_ctl: discard control
31762306a36Sopenharmony_ci * @block_group: block_group of interest
31862306a36Sopenharmony_ci *
31962306a36Sopenharmony_ci * Increment @block_group's discard_index.  If it falls of the list, let it be.
32062306a36Sopenharmony_ci * Otherwise add it back to the appropriate list.
32162306a36Sopenharmony_ci */
32262306a36Sopenharmony_cistatic void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
32362306a36Sopenharmony_ci				       struct btrfs_block_group *block_group)
32462306a36Sopenharmony_ci{
32562306a36Sopenharmony_ci	block_group->discard_index++;
32662306a36Sopenharmony_ci	if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
32762306a36Sopenharmony_ci		block_group->discard_index = 1;
32862306a36Sopenharmony_ci		return;
32962306a36Sopenharmony_ci	}
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	add_to_discard_list(discard_ctl, block_group);
33262306a36Sopenharmony_ci}
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci/*
33562306a36Sopenharmony_ci * Remove a block_group from the discard lists.
33662306a36Sopenharmony_ci *
33762306a36Sopenharmony_ci * @discard_ctl: discard control
33862306a36Sopenharmony_ci * @block_group: block_group of interest
33962306a36Sopenharmony_ci *
34062306a36Sopenharmony_ci * Remove @block_group from the discard lists.  If necessary, wait on the
34162306a36Sopenharmony_ci * current work and then reschedule the delayed work.
34262306a36Sopenharmony_ci */
34362306a36Sopenharmony_civoid btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
34462306a36Sopenharmony_ci			       struct btrfs_block_group *block_group)
34562306a36Sopenharmony_ci{
34662306a36Sopenharmony_ci	if (remove_from_discard_list(discard_ctl, block_group)) {
34762306a36Sopenharmony_ci		cancel_delayed_work_sync(&discard_ctl->work);
34862306a36Sopenharmony_ci		btrfs_discard_schedule_work(discard_ctl, true);
34962306a36Sopenharmony_ci	}
35062306a36Sopenharmony_ci}
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci/*
35362306a36Sopenharmony_ci * Handles queuing the block_groups.
35462306a36Sopenharmony_ci *
35562306a36Sopenharmony_ci * @discard_ctl: discard control
35662306a36Sopenharmony_ci * @block_group: block_group of interest
35762306a36Sopenharmony_ci *
35862306a36Sopenharmony_ci * Maintain the LRU order of the discard lists.
35962306a36Sopenharmony_ci */
36062306a36Sopenharmony_civoid btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
36162306a36Sopenharmony_ci			      struct btrfs_block_group *block_group)
36262306a36Sopenharmony_ci{
36362306a36Sopenharmony_ci	if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
36462306a36Sopenharmony_ci		return;
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	if (block_group->used == 0)
36762306a36Sopenharmony_ci		add_to_discard_unused_list(discard_ctl, block_group);
36862306a36Sopenharmony_ci	else
36962306a36Sopenharmony_ci		add_to_discard_list(discard_ctl, block_group);
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	if (!delayed_work_pending(&discard_ctl->work))
37262306a36Sopenharmony_ci		btrfs_discard_schedule_work(discard_ctl, false);
37362306a36Sopenharmony_ci}
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_cistatic void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
37662306a36Sopenharmony_ci					  u64 now, bool override)
37762306a36Sopenharmony_ci{
37862306a36Sopenharmony_ci	struct btrfs_block_group *block_group;
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	if (!btrfs_run_discard_work(discard_ctl))
38162306a36Sopenharmony_ci		return;
38262306a36Sopenharmony_ci	if (!override && delayed_work_pending(&discard_ctl->work))
38362306a36Sopenharmony_ci		return;
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci	block_group = find_next_block_group(discard_ctl, now);
38662306a36Sopenharmony_ci	if (block_group) {
38762306a36Sopenharmony_ci		u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
38862306a36Sopenharmony_ci		u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci		/*
39162306a36Sopenharmony_ci		 * A single delayed workqueue item is responsible for
39262306a36Sopenharmony_ci		 * discarding, so we can manage the bytes rate limit by keeping
39362306a36Sopenharmony_ci		 * track of the previous discard.
39462306a36Sopenharmony_ci		 */
39562306a36Sopenharmony_ci		if (kbps_limit && discard_ctl->prev_discard) {
39662306a36Sopenharmony_ci			u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
39762306a36Sopenharmony_ci			u64 bps_delay = div64_u64(discard_ctl->prev_discard *
39862306a36Sopenharmony_ci						  NSEC_PER_SEC, bps_limit);
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci			delay = max(delay, bps_delay);
40162306a36Sopenharmony_ci		}
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci		/*
40462306a36Sopenharmony_ci		 * This timeout is to hopefully prevent immediate discarding
40562306a36Sopenharmony_ci		 * in a recently allocated block group.
40662306a36Sopenharmony_ci		 */
40762306a36Sopenharmony_ci		if (now < block_group->discard_eligible_time) {
40862306a36Sopenharmony_ci			u64 bg_timeout = block_group->discard_eligible_time - now;
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci			delay = max(delay, bg_timeout);
41162306a36Sopenharmony_ci		}
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci		if (override && discard_ctl->prev_discard) {
41462306a36Sopenharmony_ci			u64 elapsed = now - discard_ctl->prev_discard_time;
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci			if (delay > elapsed)
41762306a36Sopenharmony_ci				delay -= elapsed;
41862306a36Sopenharmony_ci			else
41962306a36Sopenharmony_ci				delay = 0;
42062306a36Sopenharmony_ci		}
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci		mod_delayed_work(discard_ctl->discard_workers,
42362306a36Sopenharmony_ci				 &discard_ctl->work, nsecs_to_jiffies(delay));
42462306a36Sopenharmony_ci	}
42562306a36Sopenharmony_ci}
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci/*
42862306a36Sopenharmony_ci * Responsible for scheduling the discard work.
42962306a36Sopenharmony_ci *
43062306a36Sopenharmony_ci * @discard_ctl:  discard control
43162306a36Sopenharmony_ci * @override:     override the current timer
43262306a36Sopenharmony_ci *
43362306a36Sopenharmony_ci * Discards are issued by a delayed workqueue item.  @override is used to
43462306a36Sopenharmony_ci * update the current delay as the baseline delay interval is reevaluated on
43562306a36Sopenharmony_ci * transaction commit.  This is also maxed with any other rate limit.
43662306a36Sopenharmony_ci */
43762306a36Sopenharmony_civoid btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
43862306a36Sopenharmony_ci				 bool override)
43962306a36Sopenharmony_ci{
44062306a36Sopenharmony_ci	const u64 now = ktime_get_ns();
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	spin_lock(&discard_ctl->lock);
44362306a36Sopenharmony_ci	__btrfs_discard_schedule_work(discard_ctl, now, override);
44462306a36Sopenharmony_ci	spin_unlock(&discard_ctl->lock);
44562306a36Sopenharmony_ci}
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci/*
44862306a36Sopenharmony_ci * Determine next step of a block_group.
44962306a36Sopenharmony_ci *
45062306a36Sopenharmony_ci * @discard_ctl: discard control
45162306a36Sopenharmony_ci * @block_group: block_group of interest
45262306a36Sopenharmony_ci *
45362306a36Sopenharmony_ci * Determine the next step for a block group after it's finished going through
45462306a36Sopenharmony_ci * a pass on a discard list.  If it is unused and fully trimmed, we can mark it
45562306a36Sopenharmony_ci * unused and send it to the unused_bgs path.  Otherwise, pass it onto the
45662306a36Sopenharmony_ci * appropriate filter list or let it fall off.
45762306a36Sopenharmony_ci */
45862306a36Sopenharmony_cistatic void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
45962306a36Sopenharmony_ci				      struct btrfs_block_group *block_group)
46062306a36Sopenharmony_ci{
46162306a36Sopenharmony_ci	remove_from_discard_list(discard_ctl, block_group);
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci	if (block_group->used == 0) {
46462306a36Sopenharmony_ci		if (btrfs_is_free_space_trimmed(block_group))
46562306a36Sopenharmony_ci			btrfs_mark_bg_unused(block_group);
46662306a36Sopenharmony_ci		else
46762306a36Sopenharmony_ci			add_to_discard_unused_list(discard_ctl, block_group);
46862306a36Sopenharmony_ci	} else {
46962306a36Sopenharmony_ci		btrfs_update_discard_index(discard_ctl, block_group);
47062306a36Sopenharmony_ci	}
47162306a36Sopenharmony_ci}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci/*
47462306a36Sopenharmony_ci * Discard work queue callback
47562306a36Sopenharmony_ci *
47662306a36Sopenharmony_ci * @work: work
47762306a36Sopenharmony_ci *
47862306a36Sopenharmony_ci * Find the next block_group to start discarding and then discard a single
47962306a36Sopenharmony_ci * region.  It does this in a two-pass fashion: first extents and second
48062306a36Sopenharmony_ci * bitmaps.  Completely discarded block groups are sent to the unused_bgs path.
48162306a36Sopenharmony_ci */
48262306a36Sopenharmony_cistatic void btrfs_discard_workfn(struct work_struct *work)
48362306a36Sopenharmony_ci{
48462306a36Sopenharmony_ci	struct btrfs_discard_ctl *discard_ctl;
48562306a36Sopenharmony_ci	struct btrfs_block_group *block_group;
48662306a36Sopenharmony_ci	enum btrfs_discard_state discard_state;
48762306a36Sopenharmony_ci	int discard_index = 0;
48862306a36Sopenharmony_ci	u64 trimmed = 0;
48962306a36Sopenharmony_ci	u64 minlen = 0;
49062306a36Sopenharmony_ci	u64 now = ktime_get_ns();
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci	discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	block_group = peek_discard_list(discard_ctl, &discard_state,
49562306a36Sopenharmony_ci					&discard_index, now);
49662306a36Sopenharmony_ci	if (!block_group || !btrfs_run_discard_work(discard_ctl))
49762306a36Sopenharmony_ci		return;
49862306a36Sopenharmony_ci	if (now < block_group->discard_eligible_time) {
49962306a36Sopenharmony_ci		btrfs_discard_schedule_work(discard_ctl, false);
50062306a36Sopenharmony_ci		return;
50162306a36Sopenharmony_ci	}
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci	/* Perform discarding */
50462306a36Sopenharmony_ci	minlen = discard_minlen[discard_index];
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	if (discard_state == BTRFS_DISCARD_BITMAPS) {
50762306a36Sopenharmony_ci		u64 maxlen = 0;
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci		/*
51062306a36Sopenharmony_ci		 * Use the previous levels minimum discard length as the max
51162306a36Sopenharmony_ci		 * length filter.  In the case something is added to make a
51262306a36Sopenharmony_ci		 * region go beyond the max filter, the entire bitmap is set
51362306a36Sopenharmony_ci		 * back to BTRFS_TRIM_STATE_UNTRIMMED.
51462306a36Sopenharmony_ci		 */
51562306a36Sopenharmony_ci		if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
51662306a36Sopenharmony_ci			maxlen = discard_minlen[discard_index - 1];
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci		btrfs_trim_block_group_bitmaps(block_group, &trimmed,
51962306a36Sopenharmony_ci				       block_group->discard_cursor,
52062306a36Sopenharmony_ci				       btrfs_block_group_end(block_group),
52162306a36Sopenharmony_ci				       minlen, maxlen, true);
52262306a36Sopenharmony_ci		discard_ctl->discard_bitmap_bytes += trimmed;
52362306a36Sopenharmony_ci	} else {
52462306a36Sopenharmony_ci		btrfs_trim_block_group_extents(block_group, &trimmed,
52562306a36Sopenharmony_ci				       block_group->discard_cursor,
52662306a36Sopenharmony_ci				       btrfs_block_group_end(block_group),
52762306a36Sopenharmony_ci				       minlen, true);
52862306a36Sopenharmony_ci		discard_ctl->discard_extent_bytes += trimmed;
52962306a36Sopenharmony_ci	}
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	/* Determine next steps for a block_group */
53262306a36Sopenharmony_ci	if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
53362306a36Sopenharmony_ci		if (discard_state == BTRFS_DISCARD_BITMAPS) {
53462306a36Sopenharmony_ci			btrfs_finish_discard_pass(discard_ctl, block_group);
53562306a36Sopenharmony_ci		} else {
53662306a36Sopenharmony_ci			block_group->discard_cursor = block_group->start;
53762306a36Sopenharmony_ci			spin_lock(&discard_ctl->lock);
53862306a36Sopenharmony_ci			if (block_group->discard_state !=
53962306a36Sopenharmony_ci			    BTRFS_DISCARD_RESET_CURSOR)
54062306a36Sopenharmony_ci				block_group->discard_state =
54162306a36Sopenharmony_ci							BTRFS_DISCARD_BITMAPS;
54262306a36Sopenharmony_ci			spin_unlock(&discard_ctl->lock);
54362306a36Sopenharmony_ci		}
54462306a36Sopenharmony_ci	}
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci	now = ktime_get_ns();
54762306a36Sopenharmony_ci	spin_lock(&discard_ctl->lock);
54862306a36Sopenharmony_ci	discard_ctl->prev_discard = trimmed;
54962306a36Sopenharmony_ci	discard_ctl->prev_discard_time = now;
55062306a36Sopenharmony_ci	/*
55162306a36Sopenharmony_ci	 * If the block group was removed from the discard list while it was
55262306a36Sopenharmony_ci	 * running in this workfn, then we didn't deref it, since this function
55362306a36Sopenharmony_ci	 * still owned that reference. But we set the discard_ctl->block_group
55462306a36Sopenharmony_ci	 * back to NULL, so we can use that condition to know that now we need
55562306a36Sopenharmony_ci	 * to deref the block_group.
55662306a36Sopenharmony_ci	 */
55762306a36Sopenharmony_ci	if (discard_ctl->block_group == NULL)
55862306a36Sopenharmony_ci		btrfs_put_block_group(block_group);
55962306a36Sopenharmony_ci	discard_ctl->block_group = NULL;
56062306a36Sopenharmony_ci	__btrfs_discard_schedule_work(discard_ctl, now, false);
56162306a36Sopenharmony_ci	spin_unlock(&discard_ctl->lock);
56262306a36Sopenharmony_ci}
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci/*
56562306a36Sopenharmony_ci * Recalculate the base delay.
56662306a36Sopenharmony_ci *
56762306a36Sopenharmony_ci * @discard_ctl: discard control
56862306a36Sopenharmony_ci *
56962306a36Sopenharmony_ci * Recalculate the base delay which is based off the total number of
57062306a36Sopenharmony_ci * discardable_extents.  Clamp this between the lower_limit (iops_limit or 1ms)
57162306a36Sopenharmony_ci * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
57262306a36Sopenharmony_ci */
57362306a36Sopenharmony_civoid btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
57462306a36Sopenharmony_ci{
57562306a36Sopenharmony_ci	s32 discardable_extents;
57662306a36Sopenharmony_ci	s64 discardable_bytes;
57762306a36Sopenharmony_ci	u32 iops_limit;
57862306a36Sopenharmony_ci	unsigned long min_delay = BTRFS_DISCARD_MIN_DELAY_MSEC;
57962306a36Sopenharmony_ci	unsigned long delay;
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	discardable_extents = atomic_read(&discard_ctl->discardable_extents);
58262306a36Sopenharmony_ci	if (!discardable_extents)
58362306a36Sopenharmony_ci		return;
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci	spin_lock(&discard_ctl->lock);
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	/*
58862306a36Sopenharmony_ci	 * The following is to fix a potential -1 discrepancy that we're not
58962306a36Sopenharmony_ci	 * sure how to reproduce. But given that this is the only place that
59062306a36Sopenharmony_ci	 * utilizes these numbers and this is only called by from
59162306a36Sopenharmony_ci	 * btrfs_finish_extent_commit() which is synchronized, we can correct
59262306a36Sopenharmony_ci	 * here.
59362306a36Sopenharmony_ci	 */
59462306a36Sopenharmony_ci	if (discardable_extents < 0)
59562306a36Sopenharmony_ci		atomic_add(-discardable_extents,
59662306a36Sopenharmony_ci			   &discard_ctl->discardable_extents);
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_ci	discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
59962306a36Sopenharmony_ci	if (discardable_bytes < 0)
60062306a36Sopenharmony_ci		atomic64_add(-discardable_bytes,
60162306a36Sopenharmony_ci			     &discard_ctl->discardable_bytes);
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci	if (discardable_extents <= 0) {
60462306a36Sopenharmony_ci		spin_unlock(&discard_ctl->lock);
60562306a36Sopenharmony_ci		return;
60662306a36Sopenharmony_ci	}
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ci	iops_limit = READ_ONCE(discard_ctl->iops_limit);
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	if (iops_limit) {
61162306a36Sopenharmony_ci		delay = MSEC_PER_SEC / iops_limit;
61262306a36Sopenharmony_ci	} else {
61362306a36Sopenharmony_ci		/*
61462306a36Sopenharmony_ci		 * Unset iops_limit means go as fast as possible, so allow a
61562306a36Sopenharmony_ci		 * delay of 0.
61662306a36Sopenharmony_ci		 */
61762306a36Sopenharmony_ci		delay = 0;
61862306a36Sopenharmony_ci		min_delay = 0;
61962306a36Sopenharmony_ci	}
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	delay = clamp(delay, min_delay, BTRFS_DISCARD_MAX_DELAY_MSEC);
62262306a36Sopenharmony_ci	discard_ctl->delay_ms = delay;
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci	spin_unlock(&discard_ctl->lock);
62562306a36Sopenharmony_ci}
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci/*
62862306a36Sopenharmony_ci * Propagate discard counters.
62962306a36Sopenharmony_ci *
63062306a36Sopenharmony_ci * @block_group: block_group of interest
63162306a36Sopenharmony_ci *
63262306a36Sopenharmony_ci * Propagate deltas of counters up to the discard_ctl.  It maintains a current
63362306a36Sopenharmony_ci * counter and a previous counter passing the delta up to the global stat.
63462306a36Sopenharmony_ci * Then the current counter value becomes the previous counter value.
63562306a36Sopenharmony_ci */
63662306a36Sopenharmony_civoid btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
63762306a36Sopenharmony_ci{
63862306a36Sopenharmony_ci	struct btrfs_free_space_ctl *ctl;
63962306a36Sopenharmony_ci	struct btrfs_discard_ctl *discard_ctl;
64062306a36Sopenharmony_ci	s32 extents_delta;
64162306a36Sopenharmony_ci	s64 bytes_delta;
64262306a36Sopenharmony_ci
64362306a36Sopenharmony_ci	if (!block_group ||
64462306a36Sopenharmony_ci	    !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
64562306a36Sopenharmony_ci	    !btrfs_is_block_group_data_only(block_group))
64662306a36Sopenharmony_ci		return;
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	ctl = block_group->free_space_ctl;
64962306a36Sopenharmony_ci	discard_ctl = &block_group->fs_info->discard_ctl;
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	lockdep_assert_held(&ctl->tree_lock);
65262306a36Sopenharmony_ci	extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
65362306a36Sopenharmony_ci			ctl->discardable_extents[BTRFS_STAT_PREV];
65462306a36Sopenharmony_ci	if (extents_delta) {
65562306a36Sopenharmony_ci		atomic_add(extents_delta, &discard_ctl->discardable_extents);
65662306a36Sopenharmony_ci		ctl->discardable_extents[BTRFS_STAT_PREV] =
65762306a36Sopenharmony_ci			ctl->discardable_extents[BTRFS_STAT_CURR];
65862306a36Sopenharmony_ci	}
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci	bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
66162306a36Sopenharmony_ci		      ctl->discardable_bytes[BTRFS_STAT_PREV];
66262306a36Sopenharmony_ci	if (bytes_delta) {
66362306a36Sopenharmony_ci		atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
66462306a36Sopenharmony_ci		ctl->discardable_bytes[BTRFS_STAT_PREV] =
66562306a36Sopenharmony_ci			ctl->discardable_bytes[BTRFS_STAT_CURR];
66662306a36Sopenharmony_ci	}
66762306a36Sopenharmony_ci}
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci/*
67062306a36Sopenharmony_ci * Punt unused_bgs list to discard lists.
67162306a36Sopenharmony_ci *
67262306a36Sopenharmony_ci * @fs_info: fs_info of interest
67362306a36Sopenharmony_ci *
67462306a36Sopenharmony_ci * The unused_bgs list needs to be punted to the discard lists because the
67562306a36Sopenharmony_ci * order of operations is changed.  In the normal synchronous discard path, the
67662306a36Sopenharmony_ci * block groups are trimmed via a single large trim in transaction commit.  This
67762306a36Sopenharmony_ci * is ultimately what we are trying to avoid with asynchronous discard.  Thus,
67862306a36Sopenharmony_ci * it must be done before going down the unused_bgs path.
67962306a36Sopenharmony_ci */
68062306a36Sopenharmony_civoid btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
68162306a36Sopenharmony_ci{
68262306a36Sopenharmony_ci	struct btrfs_block_group *block_group, *next;
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci	spin_lock(&fs_info->unused_bgs_lock);
68562306a36Sopenharmony_ci	/* We enabled async discard, so punt all to the queue */
68662306a36Sopenharmony_ci	list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
68762306a36Sopenharmony_ci				 bg_list) {
68862306a36Sopenharmony_ci		list_del_init(&block_group->bg_list);
68962306a36Sopenharmony_ci		btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
69062306a36Sopenharmony_ci		/*
69162306a36Sopenharmony_ci		 * This put is for the get done by btrfs_mark_bg_unused.
69262306a36Sopenharmony_ci		 * Queueing discard incremented it for discard's reference.
69362306a36Sopenharmony_ci		 */
69462306a36Sopenharmony_ci		btrfs_put_block_group(block_group);
69562306a36Sopenharmony_ci	}
69662306a36Sopenharmony_ci	spin_unlock(&fs_info->unused_bgs_lock);
69762306a36Sopenharmony_ci}
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci/*
70062306a36Sopenharmony_ci * Purge discard lists.
70162306a36Sopenharmony_ci *
70262306a36Sopenharmony_ci * @discard_ctl: discard control
70362306a36Sopenharmony_ci *
70462306a36Sopenharmony_ci * If we are disabling async discard, we may have intercepted block groups that
70562306a36Sopenharmony_ci * are completely free and ready for the unused_bgs path.  As discarding will
70662306a36Sopenharmony_ci * now happen in transaction commit or not at all, we can safely mark the
70762306a36Sopenharmony_ci * corresponding block groups as unused and they will be sent on their merry
70862306a36Sopenharmony_ci * way to the unused_bgs list.
70962306a36Sopenharmony_ci */
71062306a36Sopenharmony_cistatic void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
71162306a36Sopenharmony_ci{
71262306a36Sopenharmony_ci	struct btrfs_block_group *block_group, *next;
71362306a36Sopenharmony_ci	int i;
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	spin_lock(&discard_ctl->lock);
71662306a36Sopenharmony_ci	for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
71762306a36Sopenharmony_ci		list_for_each_entry_safe(block_group, next,
71862306a36Sopenharmony_ci					 &discard_ctl->discard_list[i],
71962306a36Sopenharmony_ci					 discard_list) {
72062306a36Sopenharmony_ci			list_del_init(&block_group->discard_list);
72162306a36Sopenharmony_ci			spin_unlock(&discard_ctl->lock);
72262306a36Sopenharmony_ci			if (block_group->used == 0)
72362306a36Sopenharmony_ci				btrfs_mark_bg_unused(block_group);
72462306a36Sopenharmony_ci			spin_lock(&discard_ctl->lock);
72562306a36Sopenharmony_ci			btrfs_put_block_group(block_group);
72662306a36Sopenharmony_ci		}
72762306a36Sopenharmony_ci	}
72862306a36Sopenharmony_ci	spin_unlock(&discard_ctl->lock);
72962306a36Sopenharmony_ci}
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_civoid btrfs_discard_resume(struct btrfs_fs_info *fs_info)
73262306a36Sopenharmony_ci{
73362306a36Sopenharmony_ci	if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
73462306a36Sopenharmony_ci		btrfs_discard_cleanup(fs_info);
73562306a36Sopenharmony_ci		return;
73662306a36Sopenharmony_ci	}
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	btrfs_discard_punt_unused_bgs_list(fs_info);
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci	set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
74162306a36Sopenharmony_ci}
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_civoid btrfs_discard_stop(struct btrfs_fs_info *fs_info)
74462306a36Sopenharmony_ci{
74562306a36Sopenharmony_ci	clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
74662306a36Sopenharmony_ci}
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_civoid btrfs_discard_init(struct btrfs_fs_info *fs_info)
74962306a36Sopenharmony_ci{
75062306a36Sopenharmony_ci	struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
75162306a36Sopenharmony_ci	int i;
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci	spin_lock_init(&discard_ctl->lock);
75462306a36Sopenharmony_ci	INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
75562306a36Sopenharmony_ci
75662306a36Sopenharmony_ci	for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
75762306a36Sopenharmony_ci		INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	discard_ctl->prev_discard = 0;
76062306a36Sopenharmony_ci	discard_ctl->prev_discard_time = 0;
76162306a36Sopenharmony_ci	atomic_set(&discard_ctl->discardable_extents, 0);
76262306a36Sopenharmony_ci	atomic64_set(&discard_ctl->discardable_bytes, 0);
76362306a36Sopenharmony_ci	discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
76462306a36Sopenharmony_ci	discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
76562306a36Sopenharmony_ci	discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
76662306a36Sopenharmony_ci	discard_ctl->kbps_limit = 0;
76762306a36Sopenharmony_ci	discard_ctl->discard_extent_bytes = 0;
76862306a36Sopenharmony_ci	discard_ctl->discard_bitmap_bytes = 0;
76962306a36Sopenharmony_ci	atomic64_set(&discard_ctl->discard_bytes_saved, 0);
77062306a36Sopenharmony_ci}
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_civoid btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
77362306a36Sopenharmony_ci{
77462306a36Sopenharmony_ci	btrfs_discard_stop(fs_info);
77562306a36Sopenharmony_ci	cancel_delayed_work_sync(&fs_info->discard_ctl.work);
77662306a36Sopenharmony_ci	btrfs_discard_purge_list(&fs_info->discard_ctl);
77762306a36Sopenharmony_ci}
778