18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci#include "misc.h" 48c2ecf20Sopenharmony_ci#include "ctree.h" 58c2ecf20Sopenharmony_ci#include "space-info.h" 68c2ecf20Sopenharmony_ci#include "sysfs.h" 78c2ecf20Sopenharmony_ci#include "volumes.h" 88c2ecf20Sopenharmony_ci#include "free-space-cache.h" 98c2ecf20Sopenharmony_ci#include "ordered-data.h" 108c2ecf20Sopenharmony_ci#include "transaction.h" 118c2ecf20Sopenharmony_ci#include "block-group.h" 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci/* 148c2ecf20Sopenharmony_ci * HOW DOES SPACE RESERVATION WORK 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * If you want to know about delalloc specifically, there is a separate comment 178c2ecf20Sopenharmony_ci * for that with the delalloc code. This comment is about how the whole system 188c2ecf20Sopenharmony_ci * works generally. 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * BASIC CONCEPTS 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * 1) space_info. This is the ultimate arbiter of how much space we can use. 238c2ecf20Sopenharmony_ci * There's a description of the bytes_ fields with the struct declaration, 248c2ecf20Sopenharmony_ci * refer to that for specifics on each field. Suffice it to say that for 258c2ecf20Sopenharmony_ci * reservations we care about total_bytes - SUM(space_info->bytes_) when 268c2ecf20Sopenharmony_ci * determining if there is space to make an allocation. There is a space_info 278c2ecf20Sopenharmony_ci * for METADATA, SYSTEM, and DATA areas. 288c2ecf20Sopenharmony_ci * 298c2ecf20Sopenharmony_ci * 2) block_rsv's. These are basically buckets for every different type of 308c2ecf20Sopenharmony_ci * metadata reservation we have. You can see the comment in the block_rsv 318c2ecf20Sopenharmony_ci * code on the rules for each type, but generally block_rsv->reserved is how 328c2ecf20Sopenharmony_ci * much space is accounted for in space_info->bytes_may_use. 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * 3) btrfs_calc*_size. These are the worst case calculations we used based 358c2ecf20Sopenharmony_ci * on the number of items we will want to modify. We have one for changing 368c2ecf20Sopenharmony_ci * items, and one for inserting new items. Generally we use these helpers to 378c2ecf20Sopenharmony_ci * determine the size of the block reserves, and then use the actual bytes 388c2ecf20Sopenharmony_ci * values to adjust the space_info counters. 398c2ecf20Sopenharmony_ci * 408c2ecf20Sopenharmony_ci * MAKING RESERVATIONS, THE NORMAL CASE 418c2ecf20Sopenharmony_ci * 428c2ecf20Sopenharmony_ci * We call into either btrfs_reserve_data_bytes() or 438c2ecf20Sopenharmony_ci * btrfs_reserve_metadata_bytes(), depending on which we're looking for, with 448c2ecf20Sopenharmony_ci * num_bytes we want to reserve. 458c2ecf20Sopenharmony_ci * 468c2ecf20Sopenharmony_ci * ->reserve 478c2ecf20Sopenharmony_ci * space_info->bytes_may_reserve += num_bytes 488c2ecf20Sopenharmony_ci * 498c2ecf20Sopenharmony_ci * ->extent allocation 508c2ecf20Sopenharmony_ci * Call btrfs_add_reserved_bytes() which does 518c2ecf20Sopenharmony_ci * space_info->bytes_may_reserve -= num_bytes 528c2ecf20Sopenharmony_ci * space_info->bytes_reserved += extent_bytes 538c2ecf20Sopenharmony_ci * 548c2ecf20Sopenharmony_ci * ->insert reference 558c2ecf20Sopenharmony_ci * Call btrfs_update_block_group() which does 568c2ecf20Sopenharmony_ci * space_info->bytes_reserved -= extent_bytes 578c2ecf20Sopenharmony_ci * space_info->bytes_used += extent_bytes 588c2ecf20Sopenharmony_ci * 598c2ecf20Sopenharmony_ci * MAKING RESERVATIONS, FLUSHING NORMALLY (non-priority) 608c2ecf20Sopenharmony_ci * 618c2ecf20Sopenharmony_ci * Assume we are unable to simply make the reservation because we do not have 628c2ecf20Sopenharmony_ci * enough space 638c2ecf20Sopenharmony_ci * 648c2ecf20Sopenharmony_ci * -> __reserve_bytes 658c2ecf20Sopenharmony_ci * create a reserve_ticket with ->bytes set to our reservation, add it to 668c2ecf20Sopenharmony_ci * the tail of space_info->tickets, kick async flush thread 678c2ecf20Sopenharmony_ci * 688c2ecf20Sopenharmony_ci * ->handle_reserve_ticket 698c2ecf20Sopenharmony_ci * wait on ticket->wait for ->bytes to be reduced to 0, or ->error to be set 708c2ecf20Sopenharmony_ci * on the ticket. 718c2ecf20Sopenharmony_ci * 728c2ecf20Sopenharmony_ci * -> btrfs_async_reclaim_metadata_space/btrfs_async_reclaim_data_space 738c2ecf20Sopenharmony_ci * Flushes various things attempting to free up space. 748c2ecf20Sopenharmony_ci * 758c2ecf20Sopenharmony_ci * -> btrfs_try_granting_tickets() 768c2ecf20Sopenharmony_ci * This is called by anything that either subtracts space from 778c2ecf20Sopenharmony_ci * space_info->bytes_may_use, ->bytes_pinned, etc, or adds to the 788c2ecf20Sopenharmony_ci * space_info->total_bytes. This loops through the ->priority_tickets and 798c2ecf20Sopenharmony_ci * then the ->tickets list checking to see if the reservation can be 808c2ecf20Sopenharmony_ci * completed. If it can the space is added to space_info->bytes_may_use and 818c2ecf20Sopenharmony_ci * the ticket is woken up. 828c2ecf20Sopenharmony_ci * 838c2ecf20Sopenharmony_ci * -> ticket wakeup 848c2ecf20Sopenharmony_ci * Check if ->bytes == 0, if it does we got our reservation and we can carry 858c2ecf20Sopenharmony_ci * on, if not return the appropriate error (ENOSPC, but can be EINTR if we 868c2ecf20Sopenharmony_ci * were interrupted.) 878c2ecf20Sopenharmony_ci * 888c2ecf20Sopenharmony_ci * MAKING RESERVATIONS, FLUSHING HIGH PRIORITY 898c2ecf20Sopenharmony_ci * 908c2ecf20Sopenharmony_ci * Same as the above, except we add ourselves to the 918c2ecf20Sopenharmony_ci * space_info->priority_tickets, and we do not use ticket->wait, we simply 928c2ecf20Sopenharmony_ci * call flush_space() ourselves for the states that are safe for us to call 938c2ecf20Sopenharmony_ci * without deadlocking and hope for the best. 948c2ecf20Sopenharmony_ci * 958c2ecf20Sopenharmony_ci * THE FLUSHING STATES 968c2ecf20Sopenharmony_ci * 978c2ecf20Sopenharmony_ci * Generally speaking we will have two cases for each state, a "nice" state 988c2ecf20Sopenharmony_ci * and a "ALL THE THINGS" state. In btrfs we delay a lot of work in order to 998c2ecf20Sopenharmony_ci * reduce the locking over head on the various trees, and even to keep from 1008c2ecf20Sopenharmony_ci * doing any work at all in the case of delayed refs. Each of these delayed 1018c2ecf20Sopenharmony_ci * things however hold reservations, and so letting them run allows us to 1028c2ecf20Sopenharmony_ci * reclaim space so we can make new reservations. 1038c2ecf20Sopenharmony_ci * 1048c2ecf20Sopenharmony_ci * FLUSH_DELAYED_ITEMS 1058c2ecf20Sopenharmony_ci * Every inode has a delayed item to update the inode. Take a simple write 1068c2ecf20Sopenharmony_ci * for example, we would update the inode item at write time to update the 1078c2ecf20Sopenharmony_ci * mtime, and then again at finish_ordered_io() time in order to update the 1088c2ecf20Sopenharmony_ci * isize or bytes. We keep these delayed items to coalesce these operations 1098c2ecf20Sopenharmony_ci * into a single operation done on demand. These are an easy way to reclaim 1108c2ecf20Sopenharmony_ci * metadata space. 1118c2ecf20Sopenharmony_ci * 1128c2ecf20Sopenharmony_ci * FLUSH_DELALLOC 1138c2ecf20Sopenharmony_ci * Look at the delalloc comment to get an idea of how much space is reserved 1148c2ecf20Sopenharmony_ci * for delayed allocation. We can reclaim some of this space simply by 1158c2ecf20Sopenharmony_ci * running delalloc, but usually we need to wait for ordered extents to 1168c2ecf20Sopenharmony_ci * reclaim the bulk of this space. 1178c2ecf20Sopenharmony_ci * 1188c2ecf20Sopenharmony_ci * FLUSH_DELAYED_REFS 1198c2ecf20Sopenharmony_ci * We have a block reserve for the outstanding delayed refs space, and every 1208c2ecf20Sopenharmony_ci * delayed ref operation holds a reservation. Running these is a quick way 1218c2ecf20Sopenharmony_ci * to reclaim space, but we want to hold this until the end because COW can 1228c2ecf20Sopenharmony_ci * churn a lot and we can avoid making some extent tree modifications if we 1238c2ecf20Sopenharmony_ci * are able to delay for as long as possible. 1248c2ecf20Sopenharmony_ci * 1258c2ecf20Sopenharmony_ci * ALLOC_CHUNK 1268c2ecf20Sopenharmony_ci * We will skip this the first time through space reservation, because of 1278c2ecf20Sopenharmony_ci * overcommit and we don't want to have a lot of useless metadata space when 1288c2ecf20Sopenharmony_ci * our worst case reservations will likely never come true. 1298c2ecf20Sopenharmony_ci * 1308c2ecf20Sopenharmony_ci * RUN_DELAYED_IPUTS 1318c2ecf20Sopenharmony_ci * If we're freeing inodes we're likely freeing checksums, file extent 1328c2ecf20Sopenharmony_ci * items, and extent tree items. Loads of space could be freed up by these 1338c2ecf20Sopenharmony_ci * operations, however they won't be usable until the transaction commits. 1348c2ecf20Sopenharmony_ci * 1358c2ecf20Sopenharmony_ci * COMMIT_TRANS 1368c2ecf20Sopenharmony_ci * may_commit_transaction() is the ultimate arbiter on whether we commit the 1378c2ecf20Sopenharmony_ci * transaction or not. In order to avoid constantly churning we do all the 1388c2ecf20Sopenharmony_ci * above flushing first and then commit the transaction as the last resort. 1398c2ecf20Sopenharmony_ci * However we need to take into account things like pinned space that would 1408c2ecf20Sopenharmony_ci * be freed, plus any delayed work we may not have gotten rid of in the case 1418c2ecf20Sopenharmony_ci * of metadata. 1428c2ecf20Sopenharmony_ci * 1438c2ecf20Sopenharmony_ci * OVERCOMMIT 1448c2ecf20Sopenharmony_ci * 1458c2ecf20Sopenharmony_ci * Because we hold so many reservations for metadata we will allow you to 1468c2ecf20Sopenharmony_ci * reserve more space than is currently free in the currently allocate 1478c2ecf20Sopenharmony_ci * metadata space. This only happens with metadata, data does not allow 1488c2ecf20Sopenharmony_ci * overcommitting. 1498c2ecf20Sopenharmony_ci * 1508c2ecf20Sopenharmony_ci * You can see the current logic for when we allow overcommit in 1518c2ecf20Sopenharmony_ci * btrfs_can_overcommit(), but it only applies to unallocated space. If there 1528c2ecf20Sopenharmony_ci * is no unallocated space to be had, all reservations are kept within the 1538c2ecf20Sopenharmony_ci * free space in the allocated metadata chunks. 1548c2ecf20Sopenharmony_ci * 1558c2ecf20Sopenharmony_ci * Because of overcommitting, you generally want to use the 1568c2ecf20Sopenharmony_ci * btrfs_can_overcommit() logic for metadata allocations, as it does the right 1578c2ecf20Sopenharmony_ci * thing with or without extra unallocated space. 1588c2ecf20Sopenharmony_ci */ 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ciu64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info, 1618c2ecf20Sopenharmony_ci bool may_use_included) 1628c2ecf20Sopenharmony_ci{ 1638c2ecf20Sopenharmony_ci ASSERT(s_info); 1648c2ecf20Sopenharmony_ci return s_info->bytes_used + s_info->bytes_reserved + 1658c2ecf20Sopenharmony_ci s_info->bytes_pinned + s_info->bytes_readonly + 1668c2ecf20Sopenharmony_ci (may_use_included ? s_info->bytes_may_use : 0); 1678c2ecf20Sopenharmony_ci} 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci/* 1708c2ecf20Sopenharmony_ci * after adding space to the filesystem, we need to clear the full flags 1718c2ecf20Sopenharmony_ci * on all the space infos. 1728c2ecf20Sopenharmony_ci */ 1738c2ecf20Sopenharmony_civoid btrfs_clear_space_info_full(struct btrfs_fs_info *info) 1748c2ecf20Sopenharmony_ci{ 1758c2ecf20Sopenharmony_ci struct list_head *head = &info->space_info; 1768c2ecf20Sopenharmony_ci struct btrfs_space_info *found; 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci list_for_each_entry(found, head, list) 1798c2ecf20Sopenharmony_ci found->full = 0; 1808c2ecf20Sopenharmony_ci} 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_cistatic int create_space_info(struct btrfs_fs_info *info, u64 flags) 1838c2ecf20Sopenharmony_ci{ 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info; 1868c2ecf20Sopenharmony_ci int i; 1878c2ecf20Sopenharmony_ci int ret; 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci space_info = kzalloc(sizeof(*space_info), GFP_NOFS); 1908c2ecf20Sopenharmony_ci if (!space_info) 1918c2ecf20Sopenharmony_ci return -ENOMEM; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci ret = percpu_counter_init(&space_info->total_bytes_pinned, 0, 1948c2ecf20Sopenharmony_ci GFP_KERNEL); 1958c2ecf20Sopenharmony_ci if (ret) { 1968c2ecf20Sopenharmony_ci kfree(space_info); 1978c2ecf20Sopenharmony_ci return ret; 1988c2ecf20Sopenharmony_ci } 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 2018c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&space_info->block_groups[i]); 2028c2ecf20Sopenharmony_ci init_rwsem(&space_info->groups_sem); 2038c2ecf20Sopenharmony_ci spin_lock_init(&space_info->lock); 2048c2ecf20Sopenharmony_ci space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; 2058c2ecf20Sopenharmony_ci space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; 2068c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&space_info->ro_bgs); 2078c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&space_info->tickets); 2088c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&space_info->priority_tickets); 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci ret = btrfs_sysfs_add_space_info_type(info, space_info); 2118c2ecf20Sopenharmony_ci if (ret) 2128c2ecf20Sopenharmony_ci return ret; 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci list_add(&space_info->list, &info->space_info); 2158c2ecf20Sopenharmony_ci if (flags & BTRFS_BLOCK_GROUP_DATA) 2168c2ecf20Sopenharmony_ci info->data_sinfo = space_info; 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci return ret; 2198c2ecf20Sopenharmony_ci} 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ciint btrfs_init_space_info(struct btrfs_fs_info *fs_info) 2228c2ecf20Sopenharmony_ci{ 2238c2ecf20Sopenharmony_ci struct btrfs_super_block *disk_super; 2248c2ecf20Sopenharmony_ci u64 features; 2258c2ecf20Sopenharmony_ci u64 flags; 2268c2ecf20Sopenharmony_ci int mixed = 0; 2278c2ecf20Sopenharmony_ci int ret; 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci disk_super = fs_info->super_copy; 2308c2ecf20Sopenharmony_ci if (!btrfs_super_root(disk_super)) 2318c2ecf20Sopenharmony_ci return -EINVAL; 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci features = btrfs_super_incompat_flags(disk_super); 2348c2ecf20Sopenharmony_ci if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 2358c2ecf20Sopenharmony_ci mixed = 1; 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci flags = BTRFS_BLOCK_GROUP_SYSTEM; 2388c2ecf20Sopenharmony_ci ret = create_space_info(fs_info, flags); 2398c2ecf20Sopenharmony_ci if (ret) 2408c2ecf20Sopenharmony_ci goto out; 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci if (mixed) { 2438c2ecf20Sopenharmony_ci flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; 2448c2ecf20Sopenharmony_ci ret = create_space_info(fs_info, flags); 2458c2ecf20Sopenharmony_ci } else { 2468c2ecf20Sopenharmony_ci flags = BTRFS_BLOCK_GROUP_METADATA; 2478c2ecf20Sopenharmony_ci ret = create_space_info(fs_info, flags); 2488c2ecf20Sopenharmony_ci if (ret) 2498c2ecf20Sopenharmony_ci goto out; 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci flags = BTRFS_BLOCK_GROUP_DATA; 2528c2ecf20Sopenharmony_ci ret = create_space_info(fs_info, flags); 2538c2ecf20Sopenharmony_ci } 2548c2ecf20Sopenharmony_ciout: 2558c2ecf20Sopenharmony_ci return ret; 2568c2ecf20Sopenharmony_ci} 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_civoid btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, 2598c2ecf20Sopenharmony_ci u64 total_bytes, u64 bytes_used, 2608c2ecf20Sopenharmony_ci u64 bytes_readonly, 2618c2ecf20Sopenharmony_ci struct btrfs_space_info **space_info) 2628c2ecf20Sopenharmony_ci{ 2638c2ecf20Sopenharmony_ci struct btrfs_space_info *found; 2648c2ecf20Sopenharmony_ci int factor; 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci factor = btrfs_bg_type_to_factor(flags); 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci found = btrfs_find_space_info(info, flags); 2698c2ecf20Sopenharmony_ci ASSERT(found); 2708c2ecf20Sopenharmony_ci spin_lock(&found->lock); 2718c2ecf20Sopenharmony_ci found->total_bytes += total_bytes; 2728c2ecf20Sopenharmony_ci found->disk_total += total_bytes * factor; 2738c2ecf20Sopenharmony_ci found->bytes_used += bytes_used; 2748c2ecf20Sopenharmony_ci found->disk_used += bytes_used * factor; 2758c2ecf20Sopenharmony_ci found->bytes_readonly += bytes_readonly; 2768c2ecf20Sopenharmony_ci if (total_bytes > 0) 2778c2ecf20Sopenharmony_ci found->full = 0; 2788c2ecf20Sopenharmony_ci btrfs_try_granting_tickets(info, found); 2798c2ecf20Sopenharmony_ci spin_unlock(&found->lock); 2808c2ecf20Sopenharmony_ci *space_info = found; 2818c2ecf20Sopenharmony_ci} 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_cistruct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, 2848c2ecf20Sopenharmony_ci u64 flags) 2858c2ecf20Sopenharmony_ci{ 2868c2ecf20Sopenharmony_ci struct list_head *head = &info->space_info; 2878c2ecf20Sopenharmony_ci struct btrfs_space_info *found; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci list_for_each_entry(found, head, list) { 2928c2ecf20Sopenharmony_ci if (found->flags & flags) 2938c2ecf20Sopenharmony_ci return found; 2948c2ecf20Sopenharmony_ci } 2958c2ecf20Sopenharmony_ci return NULL; 2968c2ecf20Sopenharmony_ci} 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_cistatic u64 calc_available_free_space(struct btrfs_fs_info *fs_info, 2998c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, 3008c2ecf20Sopenharmony_ci enum btrfs_reserve_flush_enum flush) 3018c2ecf20Sopenharmony_ci{ 3028c2ecf20Sopenharmony_ci u64 profile; 3038c2ecf20Sopenharmony_ci u64 avail; 3048c2ecf20Sopenharmony_ci int factor; 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM) 3078c2ecf20Sopenharmony_ci profile = btrfs_system_alloc_profile(fs_info); 3088c2ecf20Sopenharmony_ci else 3098c2ecf20Sopenharmony_ci profile = btrfs_metadata_alloc_profile(fs_info); 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci avail = atomic64_read(&fs_info->free_chunk_space); 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci /* 3148c2ecf20Sopenharmony_ci * If we have dup, raid1 or raid10 then only half of the free 3158c2ecf20Sopenharmony_ci * space is actually usable. For raid56, the space info used 3168c2ecf20Sopenharmony_ci * doesn't include the parity drive, so we don't have to 3178c2ecf20Sopenharmony_ci * change the math 3188c2ecf20Sopenharmony_ci */ 3198c2ecf20Sopenharmony_ci factor = btrfs_bg_type_to_factor(profile); 3208c2ecf20Sopenharmony_ci avail = div_u64(avail, factor); 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_ci /* 3238c2ecf20Sopenharmony_ci * If we aren't flushing all things, let us overcommit up to 3248c2ecf20Sopenharmony_ci * 1/2th of the space. If we can flush, don't let us overcommit 3258c2ecf20Sopenharmony_ci * too much, let it overcommit up to 1/8 of the space. 3268c2ecf20Sopenharmony_ci */ 3278c2ecf20Sopenharmony_ci if (flush == BTRFS_RESERVE_FLUSH_ALL) 3288c2ecf20Sopenharmony_ci avail >>= 3; 3298c2ecf20Sopenharmony_ci else 3308c2ecf20Sopenharmony_ci avail >>= 1; 3318c2ecf20Sopenharmony_ci return avail; 3328c2ecf20Sopenharmony_ci} 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ciint btrfs_can_overcommit(struct btrfs_fs_info *fs_info, 3358c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, u64 bytes, 3368c2ecf20Sopenharmony_ci enum btrfs_reserve_flush_enum flush) 3378c2ecf20Sopenharmony_ci{ 3388c2ecf20Sopenharmony_ci u64 avail; 3398c2ecf20Sopenharmony_ci u64 used; 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci /* Don't overcommit when in mixed mode */ 3428c2ecf20Sopenharmony_ci if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) 3438c2ecf20Sopenharmony_ci return 0; 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci used = btrfs_space_info_used(space_info, true); 3468c2ecf20Sopenharmony_ci avail = calc_available_free_space(fs_info, space_info, flush); 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci if (used + bytes < space_info->total_bytes + avail) 3498c2ecf20Sopenharmony_ci return 1; 3508c2ecf20Sopenharmony_ci return 0; 3518c2ecf20Sopenharmony_ci} 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_cistatic void remove_ticket(struct btrfs_space_info *space_info, 3548c2ecf20Sopenharmony_ci struct reserve_ticket *ticket) 3558c2ecf20Sopenharmony_ci{ 3568c2ecf20Sopenharmony_ci if (!list_empty(&ticket->list)) { 3578c2ecf20Sopenharmony_ci list_del_init(&ticket->list); 3588c2ecf20Sopenharmony_ci ASSERT(space_info->reclaim_size >= ticket->bytes); 3598c2ecf20Sopenharmony_ci space_info->reclaim_size -= ticket->bytes; 3608c2ecf20Sopenharmony_ci } 3618c2ecf20Sopenharmony_ci} 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_ci/* 3648c2ecf20Sopenharmony_ci * This is for space we already have accounted in space_info->bytes_may_use, so 3658c2ecf20Sopenharmony_ci * basically when we're returning space from block_rsv's. 3668c2ecf20Sopenharmony_ci */ 3678c2ecf20Sopenharmony_civoid btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, 3688c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info) 3698c2ecf20Sopenharmony_ci{ 3708c2ecf20Sopenharmony_ci struct list_head *head; 3718c2ecf20Sopenharmony_ci enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci lockdep_assert_held(&space_info->lock); 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_ci head = &space_info->priority_tickets; 3768c2ecf20Sopenharmony_ciagain: 3778c2ecf20Sopenharmony_ci while (!list_empty(head)) { 3788c2ecf20Sopenharmony_ci struct reserve_ticket *ticket; 3798c2ecf20Sopenharmony_ci u64 used = btrfs_space_info_used(space_info, true); 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci ticket = list_first_entry(head, struct reserve_ticket, list); 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci /* Check and see if our ticket can be satisified now. */ 3848c2ecf20Sopenharmony_ci if ((used + ticket->bytes <= space_info->total_bytes) || 3858c2ecf20Sopenharmony_ci btrfs_can_overcommit(fs_info, space_info, ticket->bytes, 3868c2ecf20Sopenharmony_ci flush)) { 3878c2ecf20Sopenharmony_ci btrfs_space_info_update_bytes_may_use(fs_info, 3888c2ecf20Sopenharmony_ci space_info, 3898c2ecf20Sopenharmony_ci ticket->bytes); 3908c2ecf20Sopenharmony_ci remove_ticket(space_info, ticket); 3918c2ecf20Sopenharmony_ci ticket->bytes = 0; 3928c2ecf20Sopenharmony_ci space_info->tickets_id++; 3938c2ecf20Sopenharmony_ci wake_up(&ticket->wait); 3948c2ecf20Sopenharmony_ci } else { 3958c2ecf20Sopenharmony_ci break; 3968c2ecf20Sopenharmony_ci } 3978c2ecf20Sopenharmony_ci } 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci if (head == &space_info->priority_tickets) { 4008c2ecf20Sopenharmony_ci head = &space_info->tickets; 4018c2ecf20Sopenharmony_ci flush = BTRFS_RESERVE_FLUSH_ALL; 4028c2ecf20Sopenharmony_ci goto again; 4038c2ecf20Sopenharmony_ci } 4048c2ecf20Sopenharmony_ci} 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci#define DUMP_BLOCK_RSV(fs_info, rsv_name) \ 4078c2ecf20Sopenharmony_cido { \ 4088c2ecf20Sopenharmony_ci struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \ 4098c2ecf20Sopenharmony_ci spin_lock(&__rsv->lock); \ 4108c2ecf20Sopenharmony_ci btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \ 4118c2ecf20Sopenharmony_ci __rsv->size, __rsv->reserved); \ 4128c2ecf20Sopenharmony_ci spin_unlock(&__rsv->lock); \ 4138c2ecf20Sopenharmony_ci} while (0) 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_cistatic void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info, 4168c2ecf20Sopenharmony_ci struct btrfs_space_info *info) 4178c2ecf20Sopenharmony_ci{ 4188c2ecf20Sopenharmony_ci lockdep_assert_held(&info->lock); 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci /* The free space could be negative in case of overcommit */ 4218c2ecf20Sopenharmony_ci btrfs_info(fs_info, "space_info %llu has %lld free, is %sfull", 4228c2ecf20Sopenharmony_ci info->flags, 4238c2ecf20Sopenharmony_ci (s64)(info->total_bytes - btrfs_space_info_used(info, true)), 4248c2ecf20Sopenharmony_ci info->full ? "" : "not "); 4258c2ecf20Sopenharmony_ci btrfs_info(fs_info, 4268c2ecf20Sopenharmony_ci "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu", 4278c2ecf20Sopenharmony_ci info->total_bytes, info->bytes_used, info->bytes_pinned, 4288c2ecf20Sopenharmony_ci info->bytes_reserved, info->bytes_may_use, 4298c2ecf20Sopenharmony_ci info->bytes_readonly); 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci DUMP_BLOCK_RSV(fs_info, global_block_rsv); 4328c2ecf20Sopenharmony_ci DUMP_BLOCK_RSV(fs_info, trans_block_rsv); 4338c2ecf20Sopenharmony_ci DUMP_BLOCK_RSV(fs_info, chunk_block_rsv); 4348c2ecf20Sopenharmony_ci DUMP_BLOCK_RSV(fs_info, delayed_block_rsv); 4358c2ecf20Sopenharmony_ci DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv); 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci} 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_civoid btrfs_dump_space_info(struct btrfs_fs_info *fs_info, 4408c2ecf20Sopenharmony_ci struct btrfs_space_info *info, u64 bytes, 4418c2ecf20Sopenharmony_ci int dump_block_groups) 4428c2ecf20Sopenharmony_ci{ 4438c2ecf20Sopenharmony_ci struct btrfs_block_group *cache; 4448c2ecf20Sopenharmony_ci int index = 0; 4458c2ecf20Sopenharmony_ci 4468c2ecf20Sopenharmony_ci spin_lock(&info->lock); 4478c2ecf20Sopenharmony_ci __btrfs_dump_space_info(fs_info, info); 4488c2ecf20Sopenharmony_ci spin_unlock(&info->lock); 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci if (!dump_block_groups) 4518c2ecf20Sopenharmony_ci return; 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci down_read(&info->groups_sem); 4548c2ecf20Sopenharmony_ciagain: 4558c2ecf20Sopenharmony_ci list_for_each_entry(cache, &info->block_groups[index], list) { 4568c2ecf20Sopenharmony_ci spin_lock(&cache->lock); 4578c2ecf20Sopenharmony_ci btrfs_info(fs_info, 4588c2ecf20Sopenharmony_ci "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s", 4598c2ecf20Sopenharmony_ci cache->start, cache->length, cache->used, cache->pinned, 4608c2ecf20Sopenharmony_ci cache->reserved, cache->ro ? "[readonly]" : ""); 4618c2ecf20Sopenharmony_ci spin_unlock(&cache->lock); 4628c2ecf20Sopenharmony_ci btrfs_dump_free_space(cache, bytes); 4638c2ecf20Sopenharmony_ci } 4648c2ecf20Sopenharmony_ci if (++index < BTRFS_NR_RAID_TYPES) 4658c2ecf20Sopenharmony_ci goto again; 4668c2ecf20Sopenharmony_ci up_read(&info->groups_sem); 4678c2ecf20Sopenharmony_ci} 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_cistatic inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, 4708c2ecf20Sopenharmony_ci u64 to_reclaim) 4718c2ecf20Sopenharmony_ci{ 4728c2ecf20Sopenharmony_ci u64 bytes; 4738c2ecf20Sopenharmony_ci u64 nr; 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci bytes = btrfs_calc_insert_metadata_size(fs_info, 1); 4768c2ecf20Sopenharmony_ci nr = div64_u64(to_reclaim, bytes); 4778c2ecf20Sopenharmony_ci if (!nr) 4788c2ecf20Sopenharmony_ci nr = 1; 4798c2ecf20Sopenharmony_ci return nr; 4808c2ecf20Sopenharmony_ci} 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_ci#define EXTENT_SIZE_PER_ITEM SZ_256K 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci/* 4858c2ecf20Sopenharmony_ci * shrink metadata reservation for delalloc 4868c2ecf20Sopenharmony_ci */ 4878c2ecf20Sopenharmony_cistatic void shrink_delalloc(struct btrfs_fs_info *fs_info, 4888c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, 4898c2ecf20Sopenharmony_ci u64 to_reclaim, bool wait_ordered) 4908c2ecf20Sopenharmony_ci{ 4918c2ecf20Sopenharmony_ci struct btrfs_trans_handle *trans; 4928c2ecf20Sopenharmony_ci u64 delalloc_bytes; 4938c2ecf20Sopenharmony_ci u64 dio_bytes; 4948c2ecf20Sopenharmony_ci u64 items; 4958c2ecf20Sopenharmony_ci long time_left; 4968c2ecf20Sopenharmony_ci int loops; 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci /* Calc the number of the pages we need flush for space reservation */ 4998c2ecf20Sopenharmony_ci if (to_reclaim == U64_MAX) { 5008c2ecf20Sopenharmony_ci items = U64_MAX; 5018c2ecf20Sopenharmony_ci } else { 5028c2ecf20Sopenharmony_ci /* 5038c2ecf20Sopenharmony_ci * to_reclaim is set to however much metadata we need to 5048c2ecf20Sopenharmony_ci * reclaim, but reclaiming that much data doesn't really track 5058c2ecf20Sopenharmony_ci * exactly, so increase the amount to reclaim by 2x in order to 5068c2ecf20Sopenharmony_ci * make sure we're flushing enough delalloc to hopefully reclaim 5078c2ecf20Sopenharmony_ci * some metadata reservations. 5088c2ecf20Sopenharmony_ci */ 5098c2ecf20Sopenharmony_ci items = calc_reclaim_items_nr(fs_info, to_reclaim) * 2; 5108c2ecf20Sopenharmony_ci to_reclaim = items * EXTENT_SIZE_PER_ITEM; 5118c2ecf20Sopenharmony_ci } 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci trans = (struct btrfs_trans_handle *)current->journal_info; 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci delalloc_bytes = percpu_counter_sum_positive( 5168c2ecf20Sopenharmony_ci &fs_info->delalloc_bytes); 5178c2ecf20Sopenharmony_ci dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); 5188c2ecf20Sopenharmony_ci if (delalloc_bytes == 0 && dio_bytes == 0) { 5198c2ecf20Sopenharmony_ci if (trans) 5208c2ecf20Sopenharmony_ci return; 5218c2ecf20Sopenharmony_ci if (wait_ordered) 5228c2ecf20Sopenharmony_ci btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); 5238c2ecf20Sopenharmony_ci return; 5248c2ecf20Sopenharmony_ci } 5258c2ecf20Sopenharmony_ci 5268c2ecf20Sopenharmony_ci /* 5278c2ecf20Sopenharmony_ci * If we are doing more ordered than delalloc we need to just wait on 5288c2ecf20Sopenharmony_ci * ordered extents, otherwise we'll waste time trying to flush delalloc 5298c2ecf20Sopenharmony_ci * that likely won't give us the space back we need. 5308c2ecf20Sopenharmony_ci */ 5318c2ecf20Sopenharmony_ci if (dio_bytes > delalloc_bytes) 5328c2ecf20Sopenharmony_ci wait_ordered = true; 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_ci loops = 0; 5358c2ecf20Sopenharmony_ci while ((delalloc_bytes || dio_bytes) && loops < 3) { 5368c2ecf20Sopenharmony_ci u64 nr_pages = min(delalloc_bytes, to_reclaim) >> PAGE_SHIFT; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci btrfs_start_delalloc_roots(fs_info, nr_pages, true); 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci loops++; 5418c2ecf20Sopenharmony_ci if (wait_ordered && !trans) { 5428c2ecf20Sopenharmony_ci btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1); 5438c2ecf20Sopenharmony_ci } else { 5448c2ecf20Sopenharmony_ci time_left = schedule_timeout_killable(1); 5458c2ecf20Sopenharmony_ci if (time_left) 5468c2ecf20Sopenharmony_ci break; 5478c2ecf20Sopenharmony_ci } 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 5508c2ecf20Sopenharmony_ci if (list_empty(&space_info->tickets) && 5518c2ecf20Sopenharmony_ci list_empty(&space_info->priority_tickets)) { 5528c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 5538c2ecf20Sopenharmony_ci break; 5548c2ecf20Sopenharmony_ci } 5558c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci delalloc_bytes = percpu_counter_sum_positive( 5588c2ecf20Sopenharmony_ci &fs_info->delalloc_bytes); 5598c2ecf20Sopenharmony_ci dio_bytes = percpu_counter_sum_positive(&fs_info->dio_bytes); 5608c2ecf20Sopenharmony_ci } 5618c2ecf20Sopenharmony_ci} 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci/** 5648c2ecf20Sopenharmony_ci * maybe_commit_transaction - possibly commit the transaction if its ok to 5658c2ecf20Sopenharmony_ci * @root - the root we're allocating for 5668c2ecf20Sopenharmony_ci * @bytes - the number of bytes we want to reserve 5678c2ecf20Sopenharmony_ci * @force - force the commit 5688c2ecf20Sopenharmony_ci * 5698c2ecf20Sopenharmony_ci * This will check to make sure that committing the transaction will actually 5708c2ecf20Sopenharmony_ci * get us somewhere and then commit the transaction if it does. Otherwise it 5718c2ecf20Sopenharmony_ci * will return -ENOSPC. 5728c2ecf20Sopenharmony_ci */ 5738c2ecf20Sopenharmony_cistatic int may_commit_transaction(struct btrfs_fs_info *fs_info, 5748c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info) 5758c2ecf20Sopenharmony_ci{ 5768c2ecf20Sopenharmony_ci struct reserve_ticket *ticket = NULL; 5778c2ecf20Sopenharmony_ci struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; 5788c2ecf20Sopenharmony_ci struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; 5798c2ecf20Sopenharmony_ci struct btrfs_block_rsv *trans_rsv = &fs_info->trans_block_rsv; 5808c2ecf20Sopenharmony_ci struct btrfs_trans_handle *trans; 5818c2ecf20Sopenharmony_ci u64 reclaim_bytes = 0; 5828c2ecf20Sopenharmony_ci u64 bytes_needed = 0; 5838c2ecf20Sopenharmony_ci u64 cur_free_bytes = 0; 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci trans = (struct btrfs_trans_handle *)current->journal_info; 5868c2ecf20Sopenharmony_ci if (trans) 5878c2ecf20Sopenharmony_ci return -EAGAIN; 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 5908c2ecf20Sopenharmony_ci cur_free_bytes = btrfs_space_info_used(space_info, true); 5918c2ecf20Sopenharmony_ci if (cur_free_bytes < space_info->total_bytes) 5928c2ecf20Sopenharmony_ci cur_free_bytes = space_info->total_bytes - cur_free_bytes; 5938c2ecf20Sopenharmony_ci else 5948c2ecf20Sopenharmony_ci cur_free_bytes = 0; 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci if (!list_empty(&space_info->priority_tickets)) 5978c2ecf20Sopenharmony_ci ticket = list_first_entry(&space_info->priority_tickets, 5988c2ecf20Sopenharmony_ci struct reserve_ticket, list); 5998c2ecf20Sopenharmony_ci else if (!list_empty(&space_info->tickets)) 6008c2ecf20Sopenharmony_ci ticket = list_first_entry(&space_info->tickets, 6018c2ecf20Sopenharmony_ci struct reserve_ticket, list); 6028c2ecf20Sopenharmony_ci if (ticket) 6038c2ecf20Sopenharmony_ci bytes_needed = ticket->bytes; 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci if (bytes_needed > cur_free_bytes) 6068c2ecf20Sopenharmony_ci bytes_needed -= cur_free_bytes; 6078c2ecf20Sopenharmony_ci else 6088c2ecf20Sopenharmony_ci bytes_needed = 0; 6098c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci if (!bytes_needed) 6128c2ecf20Sopenharmony_ci return 0; 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci trans = btrfs_join_transaction(fs_info->extent_root); 6158c2ecf20Sopenharmony_ci if (IS_ERR(trans)) 6168c2ecf20Sopenharmony_ci return PTR_ERR(trans); 6178c2ecf20Sopenharmony_ci 6188c2ecf20Sopenharmony_ci /* 6198c2ecf20Sopenharmony_ci * See if there is enough pinned space to make this reservation, or if 6208c2ecf20Sopenharmony_ci * we have block groups that are going to be freed, allowing us to 6218c2ecf20Sopenharmony_ci * possibly do a chunk allocation the next loop through. 6228c2ecf20Sopenharmony_ci */ 6238c2ecf20Sopenharmony_ci if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags) || 6248c2ecf20Sopenharmony_ci __percpu_counter_compare(&space_info->total_bytes_pinned, 6258c2ecf20Sopenharmony_ci bytes_needed, 6268c2ecf20Sopenharmony_ci BTRFS_TOTAL_BYTES_PINNED_BATCH) >= 0) 6278c2ecf20Sopenharmony_ci goto commit; 6288c2ecf20Sopenharmony_ci 6298c2ecf20Sopenharmony_ci /* 6308c2ecf20Sopenharmony_ci * See if there is some space in the delayed insertion reserve for this 6318c2ecf20Sopenharmony_ci * reservation. If the space_info's don't match (like for DATA or 6328c2ecf20Sopenharmony_ci * SYSTEM) then just go enospc, reclaiming this space won't recover any 6338c2ecf20Sopenharmony_ci * space to satisfy those reservations. 6348c2ecf20Sopenharmony_ci */ 6358c2ecf20Sopenharmony_ci if (space_info != delayed_rsv->space_info) 6368c2ecf20Sopenharmony_ci goto enospc; 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci spin_lock(&delayed_rsv->lock); 6398c2ecf20Sopenharmony_ci reclaim_bytes += delayed_rsv->reserved; 6408c2ecf20Sopenharmony_ci spin_unlock(&delayed_rsv->lock); 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci spin_lock(&delayed_refs_rsv->lock); 6438c2ecf20Sopenharmony_ci reclaim_bytes += delayed_refs_rsv->reserved; 6448c2ecf20Sopenharmony_ci spin_unlock(&delayed_refs_rsv->lock); 6458c2ecf20Sopenharmony_ci 6468c2ecf20Sopenharmony_ci spin_lock(&trans_rsv->lock); 6478c2ecf20Sopenharmony_ci reclaim_bytes += trans_rsv->reserved; 6488c2ecf20Sopenharmony_ci spin_unlock(&trans_rsv->lock); 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci if (reclaim_bytes >= bytes_needed) 6518c2ecf20Sopenharmony_ci goto commit; 6528c2ecf20Sopenharmony_ci bytes_needed -= reclaim_bytes; 6538c2ecf20Sopenharmony_ci 6548c2ecf20Sopenharmony_ci if (__percpu_counter_compare(&space_info->total_bytes_pinned, 6558c2ecf20Sopenharmony_ci bytes_needed, 6568c2ecf20Sopenharmony_ci BTRFS_TOTAL_BYTES_PINNED_BATCH) < 0) 6578c2ecf20Sopenharmony_ci goto enospc; 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_cicommit: 6608c2ecf20Sopenharmony_ci return btrfs_commit_transaction(trans); 6618c2ecf20Sopenharmony_cienospc: 6628c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 6638c2ecf20Sopenharmony_ci return -ENOSPC; 6648c2ecf20Sopenharmony_ci} 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci/* 6678c2ecf20Sopenharmony_ci * Try to flush some data based on policy set by @state. This is only advisory 6688c2ecf20Sopenharmony_ci * and may fail for various reasons. The caller is supposed to examine the 6698c2ecf20Sopenharmony_ci * state of @space_info to detect the outcome. 6708c2ecf20Sopenharmony_ci */ 6718c2ecf20Sopenharmony_cistatic void flush_space(struct btrfs_fs_info *fs_info, 6728c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, u64 num_bytes, 6738c2ecf20Sopenharmony_ci int state) 6748c2ecf20Sopenharmony_ci{ 6758c2ecf20Sopenharmony_ci struct btrfs_root *root = fs_info->extent_root; 6768c2ecf20Sopenharmony_ci struct btrfs_trans_handle *trans; 6778c2ecf20Sopenharmony_ci int nr; 6788c2ecf20Sopenharmony_ci int ret = 0; 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_ci switch (state) { 6818c2ecf20Sopenharmony_ci case FLUSH_DELAYED_ITEMS_NR: 6828c2ecf20Sopenharmony_ci case FLUSH_DELAYED_ITEMS: 6838c2ecf20Sopenharmony_ci if (state == FLUSH_DELAYED_ITEMS_NR) 6848c2ecf20Sopenharmony_ci nr = calc_reclaim_items_nr(fs_info, num_bytes) * 2; 6858c2ecf20Sopenharmony_ci else 6868c2ecf20Sopenharmony_ci nr = -1; 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci trans = btrfs_join_transaction(root); 6898c2ecf20Sopenharmony_ci if (IS_ERR(trans)) { 6908c2ecf20Sopenharmony_ci ret = PTR_ERR(trans); 6918c2ecf20Sopenharmony_ci break; 6928c2ecf20Sopenharmony_ci } 6938c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_items_nr(trans, nr); 6948c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 6958c2ecf20Sopenharmony_ci break; 6968c2ecf20Sopenharmony_ci case FLUSH_DELALLOC: 6978c2ecf20Sopenharmony_ci case FLUSH_DELALLOC_WAIT: 6988c2ecf20Sopenharmony_ci shrink_delalloc(fs_info, space_info, num_bytes, 6998c2ecf20Sopenharmony_ci state == FLUSH_DELALLOC_WAIT); 7008c2ecf20Sopenharmony_ci break; 7018c2ecf20Sopenharmony_ci case FLUSH_DELAYED_REFS_NR: 7028c2ecf20Sopenharmony_ci case FLUSH_DELAYED_REFS: 7038c2ecf20Sopenharmony_ci trans = btrfs_join_transaction(root); 7048c2ecf20Sopenharmony_ci if (IS_ERR(trans)) { 7058c2ecf20Sopenharmony_ci ret = PTR_ERR(trans); 7068c2ecf20Sopenharmony_ci break; 7078c2ecf20Sopenharmony_ci } 7088c2ecf20Sopenharmony_ci if (state == FLUSH_DELAYED_REFS_NR) 7098c2ecf20Sopenharmony_ci nr = calc_reclaim_items_nr(fs_info, num_bytes); 7108c2ecf20Sopenharmony_ci else 7118c2ecf20Sopenharmony_ci nr = 0; 7128c2ecf20Sopenharmony_ci btrfs_run_delayed_refs(trans, nr); 7138c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 7148c2ecf20Sopenharmony_ci break; 7158c2ecf20Sopenharmony_ci case ALLOC_CHUNK: 7168c2ecf20Sopenharmony_ci case ALLOC_CHUNK_FORCE: 7178c2ecf20Sopenharmony_ci trans = btrfs_join_transaction(root); 7188c2ecf20Sopenharmony_ci if (IS_ERR(trans)) { 7198c2ecf20Sopenharmony_ci ret = PTR_ERR(trans); 7208c2ecf20Sopenharmony_ci break; 7218c2ecf20Sopenharmony_ci } 7228c2ecf20Sopenharmony_ci ret = btrfs_chunk_alloc(trans, 7238c2ecf20Sopenharmony_ci btrfs_get_alloc_profile(fs_info, space_info->flags), 7248c2ecf20Sopenharmony_ci (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE : 7258c2ecf20Sopenharmony_ci CHUNK_ALLOC_FORCE); 7268c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 7278c2ecf20Sopenharmony_ci if (ret > 0 || ret == -ENOSPC) 7288c2ecf20Sopenharmony_ci ret = 0; 7298c2ecf20Sopenharmony_ci break; 7308c2ecf20Sopenharmony_ci case RUN_DELAYED_IPUTS: 7318c2ecf20Sopenharmony_ci /* 7328c2ecf20Sopenharmony_ci * If we have pending delayed iputs then we could free up a 7338c2ecf20Sopenharmony_ci * bunch of pinned space, so make sure we run the iputs before 7348c2ecf20Sopenharmony_ci * we do our pinned bytes check below. 7358c2ecf20Sopenharmony_ci */ 7368c2ecf20Sopenharmony_ci btrfs_run_delayed_iputs(fs_info); 7378c2ecf20Sopenharmony_ci btrfs_wait_on_delayed_iputs(fs_info); 7388c2ecf20Sopenharmony_ci break; 7398c2ecf20Sopenharmony_ci case COMMIT_TRANS: 7408c2ecf20Sopenharmony_ci ret = may_commit_transaction(fs_info, space_info); 7418c2ecf20Sopenharmony_ci break; 7428c2ecf20Sopenharmony_ci default: 7438c2ecf20Sopenharmony_ci ret = -ENOSPC; 7448c2ecf20Sopenharmony_ci break; 7458c2ecf20Sopenharmony_ci } 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci trace_btrfs_flush_space(fs_info, space_info->flags, num_bytes, state, 7488c2ecf20Sopenharmony_ci ret); 7498c2ecf20Sopenharmony_ci return; 7508c2ecf20Sopenharmony_ci} 7518c2ecf20Sopenharmony_ci 7528c2ecf20Sopenharmony_cistatic inline u64 7538c2ecf20Sopenharmony_cibtrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, 7548c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info) 7558c2ecf20Sopenharmony_ci{ 7568c2ecf20Sopenharmony_ci u64 used; 7578c2ecf20Sopenharmony_ci u64 avail; 7588c2ecf20Sopenharmony_ci u64 expected; 7598c2ecf20Sopenharmony_ci u64 to_reclaim = space_info->reclaim_size; 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci lockdep_assert_held(&space_info->lock); 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ci avail = calc_available_free_space(fs_info, space_info, 7648c2ecf20Sopenharmony_ci BTRFS_RESERVE_FLUSH_ALL); 7658c2ecf20Sopenharmony_ci used = btrfs_space_info_used(space_info, true); 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci /* 7688c2ecf20Sopenharmony_ci * We may be flushing because suddenly we have less space than we had 7698c2ecf20Sopenharmony_ci * before, and now we're well over-committed based on our current free 7708c2ecf20Sopenharmony_ci * space. If that's the case add in our overage so we make sure to put 7718c2ecf20Sopenharmony_ci * appropriate pressure on the flushing state machine. 7728c2ecf20Sopenharmony_ci */ 7738c2ecf20Sopenharmony_ci if (space_info->total_bytes + avail < used) 7748c2ecf20Sopenharmony_ci to_reclaim += used - (space_info->total_bytes + avail); 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_ci if (to_reclaim) 7778c2ecf20Sopenharmony_ci return to_reclaim; 7788c2ecf20Sopenharmony_ci 7798c2ecf20Sopenharmony_ci to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); 7808c2ecf20Sopenharmony_ci if (btrfs_can_overcommit(fs_info, space_info, to_reclaim, 7818c2ecf20Sopenharmony_ci BTRFS_RESERVE_FLUSH_ALL)) 7828c2ecf20Sopenharmony_ci return 0; 7838c2ecf20Sopenharmony_ci 7848c2ecf20Sopenharmony_ci used = btrfs_space_info_used(space_info, true); 7858c2ecf20Sopenharmony_ci 7868c2ecf20Sopenharmony_ci if (btrfs_can_overcommit(fs_info, space_info, SZ_1M, 7878c2ecf20Sopenharmony_ci BTRFS_RESERVE_FLUSH_ALL)) 7888c2ecf20Sopenharmony_ci expected = div_factor_fine(space_info->total_bytes, 95); 7898c2ecf20Sopenharmony_ci else 7908c2ecf20Sopenharmony_ci expected = div_factor_fine(space_info->total_bytes, 90); 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci if (used > expected) 7938c2ecf20Sopenharmony_ci to_reclaim = used - expected; 7948c2ecf20Sopenharmony_ci else 7958c2ecf20Sopenharmony_ci to_reclaim = 0; 7968c2ecf20Sopenharmony_ci to_reclaim = min(to_reclaim, space_info->bytes_may_use + 7978c2ecf20Sopenharmony_ci space_info->bytes_reserved); 7988c2ecf20Sopenharmony_ci return to_reclaim; 7998c2ecf20Sopenharmony_ci} 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_cistatic inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info, 8028c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, 8038c2ecf20Sopenharmony_ci u64 used) 8048c2ecf20Sopenharmony_ci{ 8058c2ecf20Sopenharmony_ci u64 thresh = div_factor_fine(space_info->total_bytes, 98); 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_ci /* If we're just plain full then async reclaim just slows us down. */ 8088c2ecf20Sopenharmony_ci if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh) 8098c2ecf20Sopenharmony_ci return 0; 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info)) 8128c2ecf20Sopenharmony_ci return 0; 8138c2ecf20Sopenharmony_ci 8148c2ecf20Sopenharmony_ci return (used >= thresh && !btrfs_fs_closing(fs_info) && 8158c2ecf20Sopenharmony_ci !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state)); 8168c2ecf20Sopenharmony_ci} 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_cistatic bool steal_from_global_rsv(struct btrfs_fs_info *fs_info, 8198c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, 8208c2ecf20Sopenharmony_ci struct reserve_ticket *ticket) 8218c2ecf20Sopenharmony_ci{ 8228c2ecf20Sopenharmony_ci struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 8238c2ecf20Sopenharmony_ci u64 min_bytes; 8248c2ecf20Sopenharmony_ci 8258c2ecf20Sopenharmony_ci if (global_rsv->space_info != space_info) 8268c2ecf20Sopenharmony_ci return false; 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci spin_lock(&global_rsv->lock); 8298c2ecf20Sopenharmony_ci min_bytes = div_factor(global_rsv->size, 1); 8308c2ecf20Sopenharmony_ci if (global_rsv->reserved < min_bytes + ticket->bytes) { 8318c2ecf20Sopenharmony_ci spin_unlock(&global_rsv->lock); 8328c2ecf20Sopenharmony_ci return false; 8338c2ecf20Sopenharmony_ci } 8348c2ecf20Sopenharmony_ci global_rsv->reserved -= ticket->bytes; 8358c2ecf20Sopenharmony_ci remove_ticket(space_info, ticket); 8368c2ecf20Sopenharmony_ci ticket->bytes = 0; 8378c2ecf20Sopenharmony_ci wake_up(&ticket->wait); 8388c2ecf20Sopenharmony_ci space_info->tickets_id++; 8398c2ecf20Sopenharmony_ci if (global_rsv->reserved < global_rsv->size) 8408c2ecf20Sopenharmony_ci global_rsv->full = 0; 8418c2ecf20Sopenharmony_ci spin_unlock(&global_rsv->lock); 8428c2ecf20Sopenharmony_ci 8438c2ecf20Sopenharmony_ci return true; 8448c2ecf20Sopenharmony_ci} 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_ci/* 8478c2ecf20Sopenharmony_ci * maybe_fail_all_tickets - we've exhausted our flushing, start failing tickets 8488c2ecf20Sopenharmony_ci * @fs_info - fs_info for this fs 8498c2ecf20Sopenharmony_ci * @space_info - the space info we were flushing 8508c2ecf20Sopenharmony_ci * 8518c2ecf20Sopenharmony_ci * We call this when we've exhausted our flushing ability and haven't made 8528c2ecf20Sopenharmony_ci * progress in satisfying tickets. The reservation code handles tickets in 8538c2ecf20Sopenharmony_ci * order, so if there is a large ticket first and then smaller ones we could 8548c2ecf20Sopenharmony_ci * very well satisfy the smaller tickets. This will attempt to wake up any 8558c2ecf20Sopenharmony_ci * tickets in the list to catch this case. 8568c2ecf20Sopenharmony_ci * 8578c2ecf20Sopenharmony_ci * This function returns true if it was able to make progress by clearing out 8588c2ecf20Sopenharmony_ci * other tickets, or if it stumbles across a ticket that was smaller than the 8598c2ecf20Sopenharmony_ci * first ticket. 8608c2ecf20Sopenharmony_ci */ 8618c2ecf20Sopenharmony_cistatic bool maybe_fail_all_tickets(struct btrfs_fs_info *fs_info, 8628c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info) 8638c2ecf20Sopenharmony_ci{ 8648c2ecf20Sopenharmony_ci struct reserve_ticket *ticket; 8658c2ecf20Sopenharmony_ci u64 tickets_id = space_info->tickets_id; 8668c2ecf20Sopenharmony_ci u64 first_ticket_bytes = 0; 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_ci if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) { 8698c2ecf20Sopenharmony_ci btrfs_info(fs_info, "cannot satisfy tickets, dumping space info"); 8708c2ecf20Sopenharmony_ci __btrfs_dump_space_info(fs_info, space_info); 8718c2ecf20Sopenharmony_ci } 8728c2ecf20Sopenharmony_ci 8738c2ecf20Sopenharmony_ci while (!list_empty(&space_info->tickets) && 8748c2ecf20Sopenharmony_ci tickets_id == space_info->tickets_id) { 8758c2ecf20Sopenharmony_ci ticket = list_first_entry(&space_info->tickets, 8768c2ecf20Sopenharmony_ci struct reserve_ticket, list); 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_ci if (ticket->steal && 8798c2ecf20Sopenharmony_ci steal_from_global_rsv(fs_info, space_info, ticket)) 8808c2ecf20Sopenharmony_ci return true; 8818c2ecf20Sopenharmony_ci 8828c2ecf20Sopenharmony_ci /* 8838c2ecf20Sopenharmony_ci * may_commit_transaction will avoid committing the transaction 8848c2ecf20Sopenharmony_ci * if it doesn't feel like the space reclaimed by the commit 8858c2ecf20Sopenharmony_ci * would result in the ticket succeeding. However if we have a 8868c2ecf20Sopenharmony_ci * smaller ticket in the queue it may be small enough to be 8878c2ecf20Sopenharmony_ci * satisified by committing the transaction, so if any 8888c2ecf20Sopenharmony_ci * subsequent ticket is smaller than the first ticket go ahead 8898c2ecf20Sopenharmony_ci * and send us back for another loop through the enospc flushing 8908c2ecf20Sopenharmony_ci * code. 8918c2ecf20Sopenharmony_ci */ 8928c2ecf20Sopenharmony_ci if (first_ticket_bytes == 0) 8938c2ecf20Sopenharmony_ci first_ticket_bytes = ticket->bytes; 8948c2ecf20Sopenharmony_ci else if (first_ticket_bytes > ticket->bytes) 8958c2ecf20Sopenharmony_ci return true; 8968c2ecf20Sopenharmony_ci 8978c2ecf20Sopenharmony_ci if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) 8988c2ecf20Sopenharmony_ci btrfs_info(fs_info, "failing ticket with %llu bytes", 8998c2ecf20Sopenharmony_ci ticket->bytes); 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci remove_ticket(space_info, ticket); 9028c2ecf20Sopenharmony_ci ticket->error = -ENOSPC; 9038c2ecf20Sopenharmony_ci wake_up(&ticket->wait); 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci /* 9068c2ecf20Sopenharmony_ci * We're just throwing tickets away, so more flushing may not 9078c2ecf20Sopenharmony_ci * trip over btrfs_try_granting_tickets, so we need to call it 9088c2ecf20Sopenharmony_ci * here to see if we can make progress with the next ticket in 9098c2ecf20Sopenharmony_ci * the list. 9108c2ecf20Sopenharmony_ci */ 9118c2ecf20Sopenharmony_ci btrfs_try_granting_tickets(fs_info, space_info); 9128c2ecf20Sopenharmony_ci } 9138c2ecf20Sopenharmony_ci return (tickets_id != space_info->tickets_id); 9148c2ecf20Sopenharmony_ci} 9158c2ecf20Sopenharmony_ci 9168c2ecf20Sopenharmony_ci/* 9178c2ecf20Sopenharmony_ci * This is for normal flushers, we can wait all goddamned day if we want to. We 9188c2ecf20Sopenharmony_ci * will loop and continuously try to flush as long as we are making progress. 9198c2ecf20Sopenharmony_ci * We count progress as clearing off tickets each time we have to loop. 9208c2ecf20Sopenharmony_ci */ 9218c2ecf20Sopenharmony_cistatic void btrfs_async_reclaim_metadata_space(struct work_struct *work) 9228c2ecf20Sopenharmony_ci{ 9238c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info; 9248c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info; 9258c2ecf20Sopenharmony_ci u64 to_reclaim; 9268c2ecf20Sopenharmony_ci int flush_state; 9278c2ecf20Sopenharmony_ci int commit_cycles = 0; 9288c2ecf20Sopenharmony_ci u64 last_tickets_id; 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work); 9318c2ecf20Sopenharmony_ci space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 9348c2ecf20Sopenharmony_ci to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info); 9358c2ecf20Sopenharmony_ci if (!to_reclaim) { 9368c2ecf20Sopenharmony_ci space_info->flush = 0; 9378c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 9388c2ecf20Sopenharmony_ci return; 9398c2ecf20Sopenharmony_ci } 9408c2ecf20Sopenharmony_ci last_tickets_id = space_info->tickets_id; 9418c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 9428c2ecf20Sopenharmony_ci 9438c2ecf20Sopenharmony_ci flush_state = FLUSH_DELAYED_ITEMS_NR; 9448c2ecf20Sopenharmony_ci do { 9458c2ecf20Sopenharmony_ci flush_space(fs_info, space_info, to_reclaim, flush_state); 9468c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 9478c2ecf20Sopenharmony_ci if (list_empty(&space_info->tickets)) { 9488c2ecf20Sopenharmony_ci space_info->flush = 0; 9498c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 9508c2ecf20Sopenharmony_ci return; 9518c2ecf20Sopenharmony_ci } 9528c2ecf20Sopenharmony_ci to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, 9538c2ecf20Sopenharmony_ci space_info); 9548c2ecf20Sopenharmony_ci if (last_tickets_id == space_info->tickets_id) { 9558c2ecf20Sopenharmony_ci flush_state++; 9568c2ecf20Sopenharmony_ci } else { 9578c2ecf20Sopenharmony_ci last_tickets_id = space_info->tickets_id; 9588c2ecf20Sopenharmony_ci flush_state = FLUSH_DELAYED_ITEMS_NR; 9598c2ecf20Sopenharmony_ci if (commit_cycles) 9608c2ecf20Sopenharmony_ci commit_cycles--; 9618c2ecf20Sopenharmony_ci } 9628c2ecf20Sopenharmony_ci 9638c2ecf20Sopenharmony_ci /* 9648c2ecf20Sopenharmony_ci * We don't want to force a chunk allocation until we've tried 9658c2ecf20Sopenharmony_ci * pretty hard to reclaim space. Think of the case where we 9668c2ecf20Sopenharmony_ci * freed up a bunch of space and so have a lot of pinned space 9678c2ecf20Sopenharmony_ci * to reclaim. We would rather use that than possibly create a 9688c2ecf20Sopenharmony_ci * underutilized metadata chunk. So if this is our first run 9698c2ecf20Sopenharmony_ci * through the flushing state machine skip ALLOC_CHUNK_FORCE and 9708c2ecf20Sopenharmony_ci * commit the transaction. If nothing has changed the next go 9718c2ecf20Sopenharmony_ci * around then we can force a chunk allocation. 9728c2ecf20Sopenharmony_ci */ 9738c2ecf20Sopenharmony_ci if (flush_state == ALLOC_CHUNK_FORCE && !commit_cycles) 9748c2ecf20Sopenharmony_ci flush_state++; 9758c2ecf20Sopenharmony_ci 9768c2ecf20Sopenharmony_ci if (flush_state > COMMIT_TRANS) { 9778c2ecf20Sopenharmony_ci commit_cycles++; 9788c2ecf20Sopenharmony_ci if (commit_cycles > 2) { 9798c2ecf20Sopenharmony_ci if (maybe_fail_all_tickets(fs_info, space_info)) { 9808c2ecf20Sopenharmony_ci flush_state = FLUSH_DELAYED_ITEMS_NR; 9818c2ecf20Sopenharmony_ci commit_cycles--; 9828c2ecf20Sopenharmony_ci } else { 9838c2ecf20Sopenharmony_ci space_info->flush = 0; 9848c2ecf20Sopenharmony_ci } 9858c2ecf20Sopenharmony_ci } else { 9868c2ecf20Sopenharmony_ci flush_state = FLUSH_DELAYED_ITEMS_NR; 9878c2ecf20Sopenharmony_ci } 9888c2ecf20Sopenharmony_ci } 9898c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 9908c2ecf20Sopenharmony_ci } while (flush_state <= COMMIT_TRANS); 9918c2ecf20Sopenharmony_ci} 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci/* 9948c2ecf20Sopenharmony_ci * FLUSH_DELALLOC_WAIT: 9958c2ecf20Sopenharmony_ci * Space is freed from flushing delalloc in one of two ways. 9968c2ecf20Sopenharmony_ci * 9978c2ecf20Sopenharmony_ci * 1) compression is on and we allocate less space than we reserved 9988c2ecf20Sopenharmony_ci * 2) we are overwriting existing space 9998c2ecf20Sopenharmony_ci * 10008c2ecf20Sopenharmony_ci * For #1 that extra space is reclaimed as soon as the delalloc pages are 10018c2ecf20Sopenharmony_ci * COWed, by way of btrfs_add_reserved_bytes() which adds the actual extent 10028c2ecf20Sopenharmony_ci * length to ->bytes_reserved, and subtracts the reserved space from 10038c2ecf20Sopenharmony_ci * ->bytes_may_use. 10048c2ecf20Sopenharmony_ci * 10058c2ecf20Sopenharmony_ci * For #2 this is trickier. Once the ordered extent runs we will drop the 10068c2ecf20Sopenharmony_ci * extent in the range we are overwriting, which creates a delayed ref for 10078c2ecf20Sopenharmony_ci * that freed extent. This however is not reclaimed until the transaction 10088c2ecf20Sopenharmony_ci * commits, thus the next stages. 10098c2ecf20Sopenharmony_ci * 10108c2ecf20Sopenharmony_ci * RUN_DELAYED_IPUTS 10118c2ecf20Sopenharmony_ci * If we are freeing inodes, we want to make sure all delayed iputs have 10128c2ecf20Sopenharmony_ci * completed, because they could have been on an inode with i_nlink == 0, and 10138c2ecf20Sopenharmony_ci * thus have been truncated and freed up space. But again this space is not 10148c2ecf20Sopenharmony_ci * immediately re-usable, it comes in the form of a delayed ref, which must be 10158c2ecf20Sopenharmony_ci * run and then the transaction must be committed. 10168c2ecf20Sopenharmony_ci * 10178c2ecf20Sopenharmony_ci * FLUSH_DELAYED_REFS 10188c2ecf20Sopenharmony_ci * The above two cases generate delayed refs that will affect 10198c2ecf20Sopenharmony_ci * ->total_bytes_pinned. However this counter can be inconsistent with 10208c2ecf20Sopenharmony_ci * reality if there are outstanding delayed refs. This is because we adjust 10218c2ecf20Sopenharmony_ci * the counter based solely on the current set of delayed refs and disregard 10228c2ecf20Sopenharmony_ci * any on-disk state which might include more refs. So for example, if we 10238c2ecf20Sopenharmony_ci * have an extent with 2 references, but we only drop 1, we'll see that there 10248c2ecf20Sopenharmony_ci * is a negative delayed ref count for the extent and assume that the space 10258c2ecf20Sopenharmony_ci * will be freed, and thus increase ->total_bytes_pinned. 10268c2ecf20Sopenharmony_ci * 10278c2ecf20Sopenharmony_ci * Running the delayed refs gives us the actual real view of what will be 10288c2ecf20Sopenharmony_ci * freed at the transaction commit time. This stage will not actually free 10298c2ecf20Sopenharmony_ci * space for us, it just makes sure that may_commit_transaction() has all of 10308c2ecf20Sopenharmony_ci * the information it needs to make the right decision. 10318c2ecf20Sopenharmony_ci * 10328c2ecf20Sopenharmony_ci * COMMIT_TRANS 10338c2ecf20Sopenharmony_ci * This is where we reclaim all of the pinned space generated by the previous 10348c2ecf20Sopenharmony_ci * two stages. We will not commit the transaction if we don't think we're 10358c2ecf20Sopenharmony_ci * likely to satisfy our request, which means if our current free space + 10368c2ecf20Sopenharmony_ci * total_bytes_pinned < reservation we will not commit. This is why the 10378c2ecf20Sopenharmony_ci * previous states are actually important, to make sure we know for sure 10388c2ecf20Sopenharmony_ci * whether committing the transaction will allow us to make progress. 10398c2ecf20Sopenharmony_ci * 10408c2ecf20Sopenharmony_ci * ALLOC_CHUNK_FORCE 10418c2ecf20Sopenharmony_ci * For data we start with alloc chunk force, however we could have been full 10428c2ecf20Sopenharmony_ci * before, and then the transaction commit could have freed new block groups, 10438c2ecf20Sopenharmony_ci * so if we now have space to allocate do the force chunk allocation. 10448c2ecf20Sopenharmony_ci */ 10458c2ecf20Sopenharmony_cistatic const enum btrfs_flush_state data_flush_states[] = { 10468c2ecf20Sopenharmony_ci FLUSH_DELALLOC_WAIT, 10478c2ecf20Sopenharmony_ci RUN_DELAYED_IPUTS, 10488c2ecf20Sopenharmony_ci FLUSH_DELAYED_REFS, 10498c2ecf20Sopenharmony_ci COMMIT_TRANS, 10508c2ecf20Sopenharmony_ci ALLOC_CHUNK_FORCE, 10518c2ecf20Sopenharmony_ci}; 10528c2ecf20Sopenharmony_ci 10538c2ecf20Sopenharmony_cistatic void btrfs_async_reclaim_data_space(struct work_struct *work) 10548c2ecf20Sopenharmony_ci{ 10558c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info; 10568c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info; 10578c2ecf20Sopenharmony_ci u64 last_tickets_id; 10588c2ecf20Sopenharmony_ci int flush_state = 0; 10598c2ecf20Sopenharmony_ci 10608c2ecf20Sopenharmony_ci fs_info = container_of(work, struct btrfs_fs_info, async_data_reclaim_work); 10618c2ecf20Sopenharmony_ci space_info = fs_info->data_sinfo; 10628c2ecf20Sopenharmony_ci 10638c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 10648c2ecf20Sopenharmony_ci if (list_empty(&space_info->tickets)) { 10658c2ecf20Sopenharmony_ci space_info->flush = 0; 10668c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 10678c2ecf20Sopenharmony_ci return; 10688c2ecf20Sopenharmony_ci } 10698c2ecf20Sopenharmony_ci last_tickets_id = space_info->tickets_id; 10708c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 10718c2ecf20Sopenharmony_ci 10728c2ecf20Sopenharmony_ci while (!space_info->full) { 10738c2ecf20Sopenharmony_ci flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE); 10748c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 10758c2ecf20Sopenharmony_ci if (list_empty(&space_info->tickets)) { 10768c2ecf20Sopenharmony_ci space_info->flush = 0; 10778c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 10788c2ecf20Sopenharmony_ci return; 10798c2ecf20Sopenharmony_ci } 10808c2ecf20Sopenharmony_ci last_tickets_id = space_info->tickets_id; 10818c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 10828c2ecf20Sopenharmony_ci } 10838c2ecf20Sopenharmony_ci 10848c2ecf20Sopenharmony_ci while (flush_state < ARRAY_SIZE(data_flush_states)) { 10858c2ecf20Sopenharmony_ci flush_space(fs_info, space_info, U64_MAX, 10868c2ecf20Sopenharmony_ci data_flush_states[flush_state]); 10878c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 10888c2ecf20Sopenharmony_ci if (list_empty(&space_info->tickets)) { 10898c2ecf20Sopenharmony_ci space_info->flush = 0; 10908c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 10918c2ecf20Sopenharmony_ci return; 10928c2ecf20Sopenharmony_ci } 10938c2ecf20Sopenharmony_ci 10948c2ecf20Sopenharmony_ci if (last_tickets_id == space_info->tickets_id) { 10958c2ecf20Sopenharmony_ci flush_state++; 10968c2ecf20Sopenharmony_ci } else { 10978c2ecf20Sopenharmony_ci last_tickets_id = space_info->tickets_id; 10988c2ecf20Sopenharmony_ci flush_state = 0; 10998c2ecf20Sopenharmony_ci } 11008c2ecf20Sopenharmony_ci 11018c2ecf20Sopenharmony_ci if (flush_state >= ARRAY_SIZE(data_flush_states)) { 11028c2ecf20Sopenharmony_ci if (space_info->full) { 11038c2ecf20Sopenharmony_ci if (maybe_fail_all_tickets(fs_info, space_info)) 11048c2ecf20Sopenharmony_ci flush_state = 0; 11058c2ecf20Sopenharmony_ci else 11068c2ecf20Sopenharmony_ci space_info->flush = 0; 11078c2ecf20Sopenharmony_ci } else { 11088c2ecf20Sopenharmony_ci flush_state = 0; 11098c2ecf20Sopenharmony_ci } 11108c2ecf20Sopenharmony_ci } 11118c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 11128c2ecf20Sopenharmony_ci } 11138c2ecf20Sopenharmony_ci} 11148c2ecf20Sopenharmony_ci 11158c2ecf20Sopenharmony_civoid btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info) 11168c2ecf20Sopenharmony_ci{ 11178c2ecf20Sopenharmony_ci INIT_WORK(&fs_info->async_reclaim_work, btrfs_async_reclaim_metadata_space); 11188c2ecf20Sopenharmony_ci INIT_WORK(&fs_info->async_data_reclaim_work, btrfs_async_reclaim_data_space); 11198c2ecf20Sopenharmony_ci} 11208c2ecf20Sopenharmony_ci 11218c2ecf20Sopenharmony_cistatic const enum btrfs_flush_state priority_flush_states[] = { 11228c2ecf20Sopenharmony_ci FLUSH_DELAYED_ITEMS_NR, 11238c2ecf20Sopenharmony_ci FLUSH_DELAYED_ITEMS, 11248c2ecf20Sopenharmony_ci ALLOC_CHUNK, 11258c2ecf20Sopenharmony_ci}; 11268c2ecf20Sopenharmony_ci 11278c2ecf20Sopenharmony_cistatic const enum btrfs_flush_state evict_flush_states[] = { 11288c2ecf20Sopenharmony_ci FLUSH_DELAYED_ITEMS_NR, 11298c2ecf20Sopenharmony_ci FLUSH_DELAYED_ITEMS, 11308c2ecf20Sopenharmony_ci FLUSH_DELAYED_REFS_NR, 11318c2ecf20Sopenharmony_ci FLUSH_DELAYED_REFS, 11328c2ecf20Sopenharmony_ci FLUSH_DELALLOC, 11338c2ecf20Sopenharmony_ci FLUSH_DELALLOC_WAIT, 11348c2ecf20Sopenharmony_ci ALLOC_CHUNK, 11358c2ecf20Sopenharmony_ci COMMIT_TRANS, 11368c2ecf20Sopenharmony_ci}; 11378c2ecf20Sopenharmony_ci 11388c2ecf20Sopenharmony_cistatic void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info, 11398c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, 11408c2ecf20Sopenharmony_ci struct reserve_ticket *ticket, 11418c2ecf20Sopenharmony_ci const enum btrfs_flush_state *states, 11428c2ecf20Sopenharmony_ci int states_nr) 11438c2ecf20Sopenharmony_ci{ 11448c2ecf20Sopenharmony_ci u64 to_reclaim; 11458c2ecf20Sopenharmony_ci int flush_state; 11468c2ecf20Sopenharmony_ci 11478c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 11488c2ecf20Sopenharmony_ci to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info); 11498c2ecf20Sopenharmony_ci if (!to_reclaim) { 11508c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 11518c2ecf20Sopenharmony_ci return; 11528c2ecf20Sopenharmony_ci } 11538c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 11548c2ecf20Sopenharmony_ci 11558c2ecf20Sopenharmony_ci flush_state = 0; 11568c2ecf20Sopenharmony_ci do { 11578c2ecf20Sopenharmony_ci flush_space(fs_info, space_info, to_reclaim, states[flush_state]); 11588c2ecf20Sopenharmony_ci flush_state++; 11598c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 11608c2ecf20Sopenharmony_ci if (ticket->bytes == 0) { 11618c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 11628c2ecf20Sopenharmony_ci return; 11638c2ecf20Sopenharmony_ci } 11648c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 11658c2ecf20Sopenharmony_ci } while (flush_state < states_nr); 11668c2ecf20Sopenharmony_ci} 11678c2ecf20Sopenharmony_ci 11688c2ecf20Sopenharmony_cistatic void priority_reclaim_data_space(struct btrfs_fs_info *fs_info, 11698c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, 11708c2ecf20Sopenharmony_ci struct reserve_ticket *ticket) 11718c2ecf20Sopenharmony_ci{ 11728c2ecf20Sopenharmony_ci while (!space_info->full) { 11738c2ecf20Sopenharmony_ci flush_space(fs_info, space_info, U64_MAX, ALLOC_CHUNK_FORCE); 11748c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 11758c2ecf20Sopenharmony_ci if (ticket->bytes == 0) { 11768c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 11778c2ecf20Sopenharmony_ci return; 11788c2ecf20Sopenharmony_ci } 11798c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 11808c2ecf20Sopenharmony_ci } 11818c2ecf20Sopenharmony_ci} 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_cistatic void wait_reserve_ticket(struct btrfs_fs_info *fs_info, 11848c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, 11858c2ecf20Sopenharmony_ci struct reserve_ticket *ticket) 11868c2ecf20Sopenharmony_ci 11878c2ecf20Sopenharmony_ci{ 11888c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 11898c2ecf20Sopenharmony_ci int ret = 0; 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 11928c2ecf20Sopenharmony_ci while (ticket->bytes > 0 && ticket->error == 0) { 11938c2ecf20Sopenharmony_ci ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE); 11948c2ecf20Sopenharmony_ci if (ret) { 11958c2ecf20Sopenharmony_ci /* 11968c2ecf20Sopenharmony_ci * Delete us from the list. After we unlock the space 11978c2ecf20Sopenharmony_ci * info, we don't want the async reclaim job to reserve 11988c2ecf20Sopenharmony_ci * space for this ticket. If that would happen, then the 11998c2ecf20Sopenharmony_ci * ticket's task would not known that space was reserved 12008c2ecf20Sopenharmony_ci * despite getting an error, resulting in a space leak 12018c2ecf20Sopenharmony_ci * (bytes_may_use counter of our space_info). 12028c2ecf20Sopenharmony_ci */ 12038c2ecf20Sopenharmony_ci remove_ticket(space_info, ticket); 12048c2ecf20Sopenharmony_ci ticket->error = -EINTR; 12058c2ecf20Sopenharmony_ci break; 12068c2ecf20Sopenharmony_ci } 12078c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 12088c2ecf20Sopenharmony_ci 12098c2ecf20Sopenharmony_ci schedule(); 12108c2ecf20Sopenharmony_ci 12118c2ecf20Sopenharmony_ci finish_wait(&ticket->wait, &wait); 12128c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 12138c2ecf20Sopenharmony_ci } 12148c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 12158c2ecf20Sopenharmony_ci} 12168c2ecf20Sopenharmony_ci 12178c2ecf20Sopenharmony_ci/** 12188c2ecf20Sopenharmony_ci * handle_reserve_ticket - do the appropriate flushing and waiting for a ticket 12198c2ecf20Sopenharmony_ci * @fs_info - the fs 12208c2ecf20Sopenharmony_ci * @space_info - the space_info for the reservation 12218c2ecf20Sopenharmony_ci * @ticket - the ticket for the reservation 12228c2ecf20Sopenharmony_ci * @flush - how much we can flush 12238c2ecf20Sopenharmony_ci * 12248c2ecf20Sopenharmony_ci * This does the work of figuring out how to flush for the ticket, waiting for 12258c2ecf20Sopenharmony_ci * the reservation, and returning the appropriate error if there is one. 12268c2ecf20Sopenharmony_ci */ 12278c2ecf20Sopenharmony_cistatic int handle_reserve_ticket(struct btrfs_fs_info *fs_info, 12288c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, 12298c2ecf20Sopenharmony_ci struct reserve_ticket *ticket, 12308c2ecf20Sopenharmony_ci enum btrfs_reserve_flush_enum flush) 12318c2ecf20Sopenharmony_ci{ 12328c2ecf20Sopenharmony_ci int ret; 12338c2ecf20Sopenharmony_ci 12348c2ecf20Sopenharmony_ci switch (flush) { 12358c2ecf20Sopenharmony_ci case BTRFS_RESERVE_FLUSH_DATA: 12368c2ecf20Sopenharmony_ci case BTRFS_RESERVE_FLUSH_ALL: 12378c2ecf20Sopenharmony_ci case BTRFS_RESERVE_FLUSH_ALL_STEAL: 12388c2ecf20Sopenharmony_ci wait_reserve_ticket(fs_info, space_info, ticket); 12398c2ecf20Sopenharmony_ci break; 12408c2ecf20Sopenharmony_ci case BTRFS_RESERVE_FLUSH_LIMIT: 12418c2ecf20Sopenharmony_ci priority_reclaim_metadata_space(fs_info, space_info, ticket, 12428c2ecf20Sopenharmony_ci priority_flush_states, 12438c2ecf20Sopenharmony_ci ARRAY_SIZE(priority_flush_states)); 12448c2ecf20Sopenharmony_ci break; 12458c2ecf20Sopenharmony_ci case BTRFS_RESERVE_FLUSH_EVICT: 12468c2ecf20Sopenharmony_ci priority_reclaim_metadata_space(fs_info, space_info, ticket, 12478c2ecf20Sopenharmony_ci evict_flush_states, 12488c2ecf20Sopenharmony_ci ARRAY_SIZE(evict_flush_states)); 12498c2ecf20Sopenharmony_ci break; 12508c2ecf20Sopenharmony_ci case BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE: 12518c2ecf20Sopenharmony_ci priority_reclaim_data_space(fs_info, space_info, ticket); 12528c2ecf20Sopenharmony_ci break; 12538c2ecf20Sopenharmony_ci default: 12548c2ecf20Sopenharmony_ci ASSERT(0); 12558c2ecf20Sopenharmony_ci break; 12568c2ecf20Sopenharmony_ci } 12578c2ecf20Sopenharmony_ci 12588c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 12598c2ecf20Sopenharmony_ci ret = ticket->error; 12608c2ecf20Sopenharmony_ci if (ticket->bytes || ticket->error) { 12618c2ecf20Sopenharmony_ci /* 12628c2ecf20Sopenharmony_ci * We were a priority ticket, so we need to delete ourselves 12638c2ecf20Sopenharmony_ci * from the list. Because we could have other priority tickets 12648c2ecf20Sopenharmony_ci * behind us that require less space, run 12658c2ecf20Sopenharmony_ci * btrfs_try_granting_tickets() to see if their reservations can 12668c2ecf20Sopenharmony_ci * now be made. 12678c2ecf20Sopenharmony_ci */ 12688c2ecf20Sopenharmony_ci if (!list_empty(&ticket->list)) { 12698c2ecf20Sopenharmony_ci remove_ticket(space_info, ticket); 12708c2ecf20Sopenharmony_ci btrfs_try_granting_tickets(fs_info, space_info); 12718c2ecf20Sopenharmony_ci } 12728c2ecf20Sopenharmony_ci 12738c2ecf20Sopenharmony_ci if (!ret) 12748c2ecf20Sopenharmony_ci ret = -ENOSPC; 12758c2ecf20Sopenharmony_ci } 12768c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 12778c2ecf20Sopenharmony_ci ASSERT(list_empty(&ticket->list)); 12788c2ecf20Sopenharmony_ci /* 12798c2ecf20Sopenharmony_ci * Check that we can't have an error set if the reservation succeeded, 12808c2ecf20Sopenharmony_ci * as that would confuse tasks and lead them to error out without 12818c2ecf20Sopenharmony_ci * releasing reserved space (if an error happens the expectation is that 12828c2ecf20Sopenharmony_ci * space wasn't reserved at all). 12838c2ecf20Sopenharmony_ci */ 12848c2ecf20Sopenharmony_ci ASSERT(!(ticket->bytes == 0 && ticket->error)); 12858c2ecf20Sopenharmony_ci return ret; 12868c2ecf20Sopenharmony_ci} 12878c2ecf20Sopenharmony_ci 12888c2ecf20Sopenharmony_ci/* 12898c2ecf20Sopenharmony_ci * This returns true if this flush state will go through the ordinary flushing 12908c2ecf20Sopenharmony_ci * code. 12918c2ecf20Sopenharmony_ci */ 12928c2ecf20Sopenharmony_cistatic inline bool is_normal_flushing(enum btrfs_reserve_flush_enum flush) 12938c2ecf20Sopenharmony_ci{ 12948c2ecf20Sopenharmony_ci return (flush == BTRFS_RESERVE_FLUSH_ALL) || 12958c2ecf20Sopenharmony_ci (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL); 12968c2ecf20Sopenharmony_ci} 12978c2ecf20Sopenharmony_ci 12988c2ecf20Sopenharmony_ci/** 12998c2ecf20Sopenharmony_ci * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space 13008c2ecf20Sopenharmony_ci * @root - the root we're allocating for 13018c2ecf20Sopenharmony_ci * @space_info - the space info we want to allocate from 13028c2ecf20Sopenharmony_ci * @orig_bytes - the number of bytes we want 13038c2ecf20Sopenharmony_ci * @flush - whether or not we can flush to make our reservation 13048c2ecf20Sopenharmony_ci * 13058c2ecf20Sopenharmony_ci * This will reserve orig_bytes number of bytes from the space info associated 13068c2ecf20Sopenharmony_ci * with the block_rsv. If there is not enough space it will make an attempt to 13078c2ecf20Sopenharmony_ci * flush out space to make room. It will do this by flushing delalloc if 13088c2ecf20Sopenharmony_ci * possible or committing the transaction. If flush is 0 then no attempts to 13098c2ecf20Sopenharmony_ci * regain reservations will be made and this will fail if there is not enough 13108c2ecf20Sopenharmony_ci * space already. 13118c2ecf20Sopenharmony_ci */ 13128c2ecf20Sopenharmony_cistatic int __reserve_bytes(struct btrfs_fs_info *fs_info, 13138c2ecf20Sopenharmony_ci struct btrfs_space_info *space_info, u64 orig_bytes, 13148c2ecf20Sopenharmony_ci enum btrfs_reserve_flush_enum flush) 13158c2ecf20Sopenharmony_ci{ 13168c2ecf20Sopenharmony_ci struct work_struct *async_work; 13178c2ecf20Sopenharmony_ci struct reserve_ticket ticket; 13188c2ecf20Sopenharmony_ci u64 used; 13198c2ecf20Sopenharmony_ci int ret = 0; 13208c2ecf20Sopenharmony_ci bool pending_tickets; 13218c2ecf20Sopenharmony_ci 13228c2ecf20Sopenharmony_ci ASSERT(orig_bytes); 13238c2ecf20Sopenharmony_ci ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_ALL); 13248c2ecf20Sopenharmony_ci 13258c2ecf20Sopenharmony_ci if (flush == BTRFS_RESERVE_FLUSH_DATA) 13268c2ecf20Sopenharmony_ci async_work = &fs_info->async_data_reclaim_work; 13278c2ecf20Sopenharmony_ci else 13288c2ecf20Sopenharmony_ci async_work = &fs_info->async_reclaim_work; 13298c2ecf20Sopenharmony_ci 13308c2ecf20Sopenharmony_ci spin_lock(&space_info->lock); 13318c2ecf20Sopenharmony_ci ret = -ENOSPC; 13328c2ecf20Sopenharmony_ci used = btrfs_space_info_used(space_info, true); 13338c2ecf20Sopenharmony_ci 13348c2ecf20Sopenharmony_ci /* 13358c2ecf20Sopenharmony_ci * We don't want NO_FLUSH allocations to jump everybody, they can 13368c2ecf20Sopenharmony_ci * generally handle ENOSPC in a different way, so treat them the same as 13378c2ecf20Sopenharmony_ci * normal flushers when it comes to skipping pending tickets. 13388c2ecf20Sopenharmony_ci */ 13398c2ecf20Sopenharmony_ci if (is_normal_flushing(flush) || (flush == BTRFS_RESERVE_NO_FLUSH)) 13408c2ecf20Sopenharmony_ci pending_tickets = !list_empty(&space_info->tickets) || 13418c2ecf20Sopenharmony_ci !list_empty(&space_info->priority_tickets); 13428c2ecf20Sopenharmony_ci else 13438c2ecf20Sopenharmony_ci pending_tickets = !list_empty(&space_info->priority_tickets); 13448c2ecf20Sopenharmony_ci 13458c2ecf20Sopenharmony_ci /* 13468c2ecf20Sopenharmony_ci * Carry on if we have enough space (short-circuit) OR call 13478c2ecf20Sopenharmony_ci * can_overcommit() to ensure we can overcommit to continue. 13488c2ecf20Sopenharmony_ci */ 13498c2ecf20Sopenharmony_ci if (!pending_tickets && 13508c2ecf20Sopenharmony_ci ((used + orig_bytes <= space_info->total_bytes) || 13518c2ecf20Sopenharmony_ci btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { 13528c2ecf20Sopenharmony_ci btrfs_space_info_update_bytes_may_use(fs_info, space_info, 13538c2ecf20Sopenharmony_ci orig_bytes); 13548c2ecf20Sopenharmony_ci ret = 0; 13558c2ecf20Sopenharmony_ci } 13568c2ecf20Sopenharmony_ci 13578c2ecf20Sopenharmony_ci /* 13588c2ecf20Sopenharmony_ci * If we couldn't make a reservation then setup our reservation ticket 13598c2ecf20Sopenharmony_ci * and kick the async worker if it's not already running. 13608c2ecf20Sopenharmony_ci * 13618c2ecf20Sopenharmony_ci * If we are a priority flusher then we just need to add our ticket to 13628c2ecf20Sopenharmony_ci * the list and we will do our own flushing further down. 13638c2ecf20Sopenharmony_ci */ 13648c2ecf20Sopenharmony_ci if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { 13658c2ecf20Sopenharmony_ci ticket.bytes = orig_bytes; 13668c2ecf20Sopenharmony_ci ticket.error = 0; 13678c2ecf20Sopenharmony_ci space_info->reclaim_size += ticket.bytes; 13688c2ecf20Sopenharmony_ci init_waitqueue_head(&ticket.wait); 13698c2ecf20Sopenharmony_ci ticket.steal = (flush == BTRFS_RESERVE_FLUSH_ALL_STEAL); 13708c2ecf20Sopenharmony_ci if (flush == BTRFS_RESERVE_FLUSH_ALL || 13718c2ecf20Sopenharmony_ci flush == BTRFS_RESERVE_FLUSH_ALL_STEAL || 13728c2ecf20Sopenharmony_ci flush == BTRFS_RESERVE_FLUSH_DATA) { 13738c2ecf20Sopenharmony_ci list_add_tail(&ticket.list, &space_info->tickets); 13748c2ecf20Sopenharmony_ci if (!space_info->flush) { 13758c2ecf20Sopenharmony_ci space_info->flush = 1; 13768c2ecf20Sopenharmony_ci trace_btrfs_trigger_flush(fs_info, 13778c2ecf20Sopenharmony_ci space_info->flags, 13788c2ecf20Sopenharmony_ci orig_bytes, flush, 13798c2ecf20Sopenharmony_ci "enospc"); 13808c2ecf20Sopenharmony_ci queue_work(system_unbound_wq, async_work); 13818c2ecf20Sopenharmony_ci } 13828c2ecf20Sopenharmony_ci } else { 13838c2ecf20Sopenharmony_ci list_add_tail(&ticket.list, 13848c2ecf20Sopenharmony_ci &space_info->priority_tickets); 13858c2ecf20Sopenharmony_ci } 13868c2ecf20Sopenharmony_ci } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { 13878c2ecf20Sopenharmony_ci used += orig_bytes; 13888c2ecf20Sopenharmony_ci /* 13898c2ecf20Sopenharmony_ci * We will do the space reservation dance during log replay, 13908c2ecf20Sopenharmony_ci * which means we won't have fs_info->fs_root set, so don't do 13918c2ecf20Sopenharmony_ci * the async reclaim as we will panic. 13928c2ecf20Sopenharmony_ci */ 13938c2ecf20Sopenharmony_ci if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) && 13948c2ecf20Sopenharmony_ci need_do_async_reclaim(fs_info, space_info, used) && 13958c2ecf20Sopenharmony_ci !work_busy(&fs_info->async_reclaim_work)) { 13968c2ecf20Sopenharmony_ci trace_btrfs_trigger_flush(fs_info, space_info->flags, 13978c2ecf20Sopenharmony_ci orig_bytes, flush, "preempt"); 13988c2ecf20Sopenharmony_ci queue_work(system_unbound_wq, 13998c2ecf20Sopenharmony_ci &fs_info->async_reclaim_work); 14008c2ecf20Sopenharmony_ci } 14018c2ecf20Sopenharmony_ci } 14028c2ecf20Sopenharmony_ci spin_unlock(&space_info->lock); 14038c2ecf20Sopenharmony_ci if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) 14048c2ecf20Sopenharmony_ci return ret; 14058c2ecf20Sopenharmony_ci 14068c2ecf20Sopenharmony_ci return handle_reserve_ticket(fs_info, space_info, &ticket, flush); 14078c2ecf20Sopenharmony_ci} 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_ci/** 14108c2ecf20Sopenharmony_ci * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space 14118c2ecf20Sopenharmony_ci * @root - the root we're allocating for 14128c2ecf20Sopenharmony_ci * @block_rsv - the block_rsv we're allocating for 14138c2ecf20Sopenharmony_ci * @orig_bytes - the number of bytes we want 14148c2ecf20Sopenharmony_ci * @flush - whether or not we can flush to make our reservation 14158c2ecf20Sopenharmony_ci * 14168c2ecf20Sopenharmony_ci * This will reserve orig_bytes number of bytes from the space info associated 14178c2ecf20Sopenharmony_ci * with the block_rsv. If there is not enough space it will make an attempt to 14188c2ecf20Sopenharmony_ci * flush out space to make room. It will do this by flushing delalloc if 14198c2ecf20Sopenharmony_ci * possible or committing the transaction. If flush is 0 then no attempts to 14208c2ecf20Sopenharmony_ci * regain reservations will be made and this will fail if there is not enough 14218c2ecf20Sopenharmony_ci * space already. 14228c2ecf20Sopenharmony_ci */ 14238c2ecf20Sopenharmony_ciint btrfs_reserve_metadata_bytes(struct btrfs_root *root, 14248c2ecf20Sopenharmony_ci struct btrfs_block_rsv *block_rsv, 14258c2ecf20Sopenharmony_ci u64 orig_bytes, 14268c2ecf20Sopenharmony_ci enum btrfs_reserve_flush_enum flush) 14278c2ecf20Sopenharmony_ci{ 14288c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 14298c2ecf20Sopenharmony_ci struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; 14308c2ecf20Sopenharmony_ci int ret; 14318c2ecf20Sopenharmony_ci 14328c2ecf20Sopenharmony_ci ret = __reserve_bytes(fs_info, block_rsv->space_info, orig_bytes, flush); 14338c2ecf20Sopenharmony_ci if (ret == -ENOSPC && 14348c2ecf20Sopenharmony_ci unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { 14358c2ecf20Sopenharmony_ci if (block_rsv != global_rsv && 14368c2ecf20Sopenharmony_ci !btrfs_block_rsv_use_bytes(global_rsv, orig_bytes)) 14378c2ecf20Sopenharmony_ci ret = 0; 14388c2ecf20Sopenharmony_ci } 14398c2ecf20Sopenharmony_ci if (ret == -ENOSPC) { 14408c2ecf20Sopenharmony_ci trace_btrfs_space_reservation(fs_info, "space_info:enospc", 14418c2ecf20Sopenharmony_ci block_rsv->space_info->flags, 14428c2ecf20Sopenharmony_ci orig_bytes, 1); 14438c2ecf20Sopenharmony_ci 14448c2ecf20Sopenharmony_ci if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) 14458c2ecf20Sopenharmony_ci btrfs_dump_space_info(fs_info, block_rsv->space_info, 14468c2ecf20Sopenharmony_ci orig_bytes, 0); 14478c2ecf20Sopenharmony_ci } 14488c2ecf20Sopenharmony_ci return ret; 14498c2ecf20Sopenharmony_ci} 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_ci/** 14528c2ecf20Sopenharmony_ci * btrfs_reserve_data_bytes - try to reserve data bytes for an allocation 14538c2ecf20Sopenharmony_ci * @fs_info - the filesystem 14548c2ecf20Sopenharmony_ci * @bytes - the number of bytes we need 14558c2ecf20Sopenharmony_ci * @flush - how we are allowed to flush 14568c2ecf20Sopenharmony_ci * 14578c2ecf20Sopenharmony_ci * This will reserve bytes from the data space info. If there is not enough 14588c2ecf20Sopenharmony_ci * space then we will attempt to flush space as specified by flush. 14598c2ecf20Sopenharmony_ci */ 14608c2ecf20Sopenharmony_ciint btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes, 14618c2ecf20Sopenharmony_ci enum btrfs_reserve_flush_enum flush) 14628c2ecf20Sopenharmony_ci{ 14638c2ecf20Sopenharmony_ci struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; 14648c2ecf20Sopenharmony_ci int ret; 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_ci ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA || 14678c2ecf20Sopenharmony_ci flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE); 14688c2ecf20Sopenharmony_ci ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA); 14698c2ecf20Sopenharmony_ci 14708c2ecf20Sopenharmony_ci ret = __reserve_bytes(fs_info, data_sinfo, bytes, flush); 14718c2ecf20Sopenharmony_ci if (ret == -ENOSPC) { 14728c2ecf20Sopenharmony_ci trace_btrfs_space_reservation(fs_info, "space_info:enospc", 14738c2ecf20Sopenharmony_ci data_sinfo->flags, bytes, 1); 14748c2ecf20Sopenharmony_ci if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) 14758c2ecf20Sopenharmony_ci btrfs_dump_space_info(fs_info, data_sinfo, bytes, 0); 14768c2ecf20Sopenharmony_ci } 14778c2ecf20Sopenharmony_ci return ret; 14788c2ecf20Sopenharmony_ci} 1479