18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci 38c2ecf20Sopenharmony_ci#include "ctree.h" 48c2ecf20Sopenharmony_ci#include "delalloc-space.h" 58c2ecf20Sopenharmony_ci#include "block-rsv.h" 68c2ecf20Sopenharmony_ci#include "btrfs_inode.h" 78c2ecf20Sopenharmony_ci#include "space-info.h" 88c2ecf20Sopenharmony_ci#include "transaction.h" 98c2ecf20Sopenharmony_ci#include "qgroup.h" 108c2ecf20Sopenharmony_ci#include "block-group.h" 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci/* 138c2ecf20Sopenharmony_ci * HOW DOES THIS WORK 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * There are two stages to data reservations, one for data and one for metadata 168c2ecf20Sopenharmony_ci * to handle the new extents and checksums generated by writing data. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci * DATA RESERVATION 208c2ecf20Sopenharmony_ci * The general flow of the data reservation is as follows 218c2ecf20Sopenharmony_ci * 228c2ecf20Sopenharmony_ci * -> Reserve 238c2ecf20Sopenharmony_ci * We call into btrfs_reserve_data_bytes() for the user request bytes that 248c2ecf20Sopenharmony_ci * they wish to write. We make this reservation and add it to 258c2ecf20Sopenharmony_ci * space_info->bytes_may_use. We set EXTENT_DELALLOC on the inode io_tree 268c2ecf20Sopenharmony_ci * for the range and carry on if this is buffered, or follow up trying to 278c2ecf20Sopenharmony_ci * make a real allocation if we are pre-allocating or doing O_DIRECT. 288c2ecf20Sopenharmony_ci * 298c2ecf20Sopenharmony_ci * -> Use 308c2ecf20Sopenharmony_ci * At writepages()/prealloc/O_DIRECT time we will call into 318c2ecf20Sopenharmony_ci * btrfs_reserve_extent() for some part or all of this range of bytes. We 328c2ecf20Sopenharmony_ci * will make the allocation and subtract space_info->bytes_may_use by the 338c2ecf20Sopenharmony_ci * original requested length and increase the space_info->bytes_reserved by 348c2ecf20Sopenharmony_ci * the allocated length. This distinction is important because compression 358c2ecf20Sopenharmony_ci * may allocate a smaller on disk extent than we previously reserved. 368c2ecf20Sopenharmony_ci * 378c2ecf20Sopenharmony_ci * -> Allocation 388c2ecf20Sopenharmony_ci * finish_ordered_io() will insert the new file extent item for this range, 398c2ecf20Sopenharmony_ci * and then add a delayed ref update for the extent tree. Once that delayed 408c2ecf20Sopenharmony_ci * ref is written the extent size is subtracted from 418c2ecf20Sopenharmony_ci * space_info->bytes_reserved and added to space_info->bytes_used. 428c2ecf20Sopenharmony_ci * 438c2ecf20Sopenharmony_ci * Error handling 448c2ecf20Sopenharmony_ci * 458c2ecf20Sopenharmony_ci * -> By the reservation maker 468c2ecf20Sopenharmony_ci * This is the simplest case, we haven't completed our operation and we know 478c2ecf20Sopenharmony_ci * how much we reserved, we can simply call 488c2ecf20Sopenharmony_ci * btrfs_free_reserved_data_space*() and it will be removed from 498c2ecf20Sopenharmony_ci * space_info->bytes_may_use. 508c2ecf20Sopenharmony_ci * 518c2ecf20Sopenharmony_ci * -> After the reservation has been made, but before cow_file_range() 528c2ecf20Sopenharmony_ci * This is specifically for the delalloc case. You must clear 538c2ecf20Sopenharmony_ci * EXTENT_DELALLOC with the EXTENT_CLEAR_DATA_RESV bit, and the range will 548c2ecf20Sopenharmony_ci * be subtracted from space_info->bytes_may_use. 558c2ecf20Sopenharmony_ci * 568c2ecf20Sopenharmony_ci * METADATA RESERVATION 578c2ecf20Sopenharmony_ci * The general metadata reservation lifetimes are discussed elsewhere, this 588c2ecf20Sopenharmony_ci * will just focus on how it is used for delalloc space. 598c2ecf20Sopenharmony_ci * 608c2ecf20Sopenharmony_ci * We keep track of two things on a per inode bases 618c2ecf20Sopenharmony_ci * 628c2ecf20Sopenharmony_ci * ->outstanding_extents 638c2ecf20Sopenharmony_ci * This is the number of file extent items we'll need to handle all of the 648c2ecf20Sopenharmony_ci * outstanding DELALLOC space we have in this inode. We limit the maximum 658c2ecf20Sopenharmony_ci * size of an extent, so a large contiguous dirty area may require more than 668c2ecf20Sopenharmony_ci * one outstanding_extent, which is why count_max_extents() is used to 678c2ecf20Sopenharmony_ci * determine how many outstanding_extents get added. 688c2ecf20Sopenharmony_ci * 698c2ecf20Sopenharmony_ci * ->csum_bytes 708c2ecf20Sopenharmony_ci * This is essentially how many dirty bytes we have for this inode, so we 718c2ecf20Sopenharmony_ci * can calculate the number of checksum items we would have to add in order 728c2ecf20Sopenharmony_ci * to checksum our outstanding data. 738c2ecf20Sopenharmony_ci * 748c2ecf20Sopenharmony_ci * We keep a per-inode block_rsv in order to make it easier to keep track of 758c2ecf20Sopenharmony_ci * our reservation. We use btrfs_calculate_inode_block_rsv_size() to 768c2ecf20Sopenharmony_ci * calculate the current theoretical maximum reservation we would need for the 778c2ecf20Sopenharmony_ci * metadata for this inode. We call this and then adjust our reservation as 788c2ecf20Sopenharmony_ci * necessary, either by attempting to reserve more space, or freeing up excess 798c2ecf20Sopenharmony_ci * space. 808c2ecf20Sopenharmony_ci * 818c2ecf20Sopenharmony_ci * OUTSTANDING_EXTENTS HANDLING 828c2ecf20Sopenharmony_ci * 838c2ecf20Sopenharmony_ci * ->outstanding_extents is used for keeping track of how many extents we will 848c2ecf20Sopenharmony_ci * need to use for this inode, and it will fluctuate depending on where you are 858c2ecf20Sopenharmony_ci * in the life cycle of the dirty data. Consider the following normal case for 868c2ecf20Sopenharmony_ci * a completely clean inode, with a num_bytes < our maximum allowed extent size 878c2ecf20Sopenharmony_ci * 888c2ecf20Sopenharmony_ci * -> reserve 898c2ecf20Sopenharmony_ci * ->outstanding_extents += 1 (current value is 1) 908c2ecf20Sopenharmony_ci * 918c2ecf20Sopenharmony_ci * -> set_delalloc 928c2ecf20Sopenharmony_ci * ->outstanding_extents += 1 (currrent value is 2) 938c2ecf20Sopenharmony_ci * 948c2ecf20Sopenharmony_ci * -> btrfs_delalloc_release_extents() 958c2ecf20Sopenharmony_ci * ->outstanding_extents -= 1 (current value is 1) 968c2ecf20Sopenharmony_ci * 978c2ecf20Sopenharmony_ci * We must call this once we are done, as we hold our reservation for the 988c2ecf20Sopenharmony_ci * duration of our operation, and then assume set_delalloc will update the 998c2ecf20Sopenharmony_ci * counter appropriately. 1008c2ecf20Sopenharmony_ci * 1018c2ecf20Sopenharmony_ci * -> add ordered extent 1028c2ecf20Sopenharmony_ci * ->outstanding_extents += 1 (current value is 2) 1038c2ecf20Sopenharmony_ci * 1048c2ecf20Sopenharmony_ci * -> btrfs_clear_delalloc_extent 1058c2ecf20Sopenharmony_ci * ->outstanding_extents -= 1 (current value is 1) 1068c2ecf20Sopenharmony_ci * 1078c2ecf20Sopenharmony_ci * -> finish_ordered_io/btrfs_remove_ordered_extent 1088c2ecf20Sopenharmony_ci * ->outstanding_extents -= 1 (current value is 0) 1098c2ecf20Sopenharmony_ci * 1108c2ecf20Sopenharmony_ci * Each stage is responsible for their own accounting of the extent, thus 1118c2ecf20Sopenharmony_ci * making error handling and cleanup easier. 1128c2ecf20Sopenharmony_ci */ 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ciint btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) 1158c2ecf20Sopenharmony_ci{ 1168c2ecf20Sopenharmony_ci struct btrfs_root *root = inode->root; 1178c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 1188c2ecf20Sopenharmony_ci enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_DATA; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci /* Make sure bytes are sectorsize aligned */ 1218c2ecf20Sopenharmony_ci bytes = ALIGN(bytes, fs_info->sectorsize); 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci if (btrfs_is_free_space_inode(inode)) 1248c2ecf20Sopenharmony_ci flush = BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci return btrfs_reserve_data_bytes(fs_info, bytes, flush); 1278c2ecf20Sopenharmony_ci} 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ciint btrfs_check_data_free_space(struct btrfs_inode *inode, 1308c2ecf20Sopenharmony_ci struct extent_changeset **reserved, u64 start, u64 len) 1318c2ecf20Sopenharmony_ci{ 1328c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 1338c2ecf20Sopenharmony_ci int ret; 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci /* align the range */ 1368c2ecf20Sopenharmony_ci len = round_up(start + len, fs_info->sectorsize) - 1378c2ecf20Sopenharmony_ci round_down(start, fs_info->sectorsize); 1388c2ecf20Sopenharmony_ci start = round_down(start, fs_info->sectorsize); 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci ret = btrfs_alloc_data_chunk_ondemand(inode, len); 1418c2ecf20Sopenharmony_ci if (ret < 0) 1428c2ecf20Sopenharmony_ci return ret; 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ 1458c2ecf20Sopenharmony_ci ret = btrfs_qgroup_reserve_data(inode, reserved, start, len); 1468c2ecf20Sopenharmony_ci if (ret < 0) 1478c2ecf20Sopenharmony_ci btrfs_free_reserved_data_space_noquota(fs_info, len); 1488c2ecf20Sopenharmony_ci else 1498c2ecf20Sopenharmony_ci ret = 0; 1508c2ecf20Sopenharmony_ci return ret; 1518c2ecf20Sopenharmony_ci} 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci/* 1548c2ecf20Sopenharmony_ci * Called if we need to clear a data reservation for this inode 1558c2ecf20Sopenharmony_ci * Normally in a error case. 1568c2ecf20Sopenharmony_ci * 1578c2ecf20Sopenharmony_ci * This one will *NOT* use accurate qgroup reserved space API, just for case 1588c2ecf20Sopenharmony_ci * which we can't sleep and is sure it won't affect qgroup reserved space. 1598c2ecf20Sopenharmony_ci * Like clear_bit_hook(). 1608c2ecf20Sopenharmony_ci */ 1618c2ecf20Sopenharmony_civoid btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info, 1628c2ecf20Sopenharmony_ci u64 len) 1638c2ecf20Sopenharmony_ci{ 1648c2ecf20Sopenharmony_ci struct btrfs_space_info *data_sinfo; 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci ASSERT(IS_ALIGNED(len, fs_info->sectorsize)); 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci data_sinfo = fs_info->data_sinfo; 1698c2ecf20Sopenharmony_ci btrfs_space_info_free_bytes_may_use(fs_info, data_sinfo, len); 1708c2ecf20Sopenharmony_ci} 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci/* 1738c2ecf20Sopenharmony_ci * Called if we need to clear a data reservation for this inode 1748c2ecf20Sopenharmony_ci * Normally in a error case. 1758c2ecf20Sopenharmony_ci * 1768c2ecf20Sopenharmony_ci * This one will handle the per-inode data rsv map for accurate reserved 1778c2ecf20Sopenharmony_ci * space framework. 1788c2ecf20Sopenharmony_ci */ 1798c2ecf20Sopenharmony_civoid btrfs_free_reserved_data_space(struct btrfs_inode *inode, 1808c2ecf20Sopenharmony_ci struct extent_changeset *reserved, u64 start, u64 len) 1818c2ecf20Sopenharmony_ci{ 1828c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci /* Make sure the range is aligned to sectorsize */ 1858c2ecf20Sopenharmony_ci len = round_up(start + len, fs_info->sectorsize) - 1868c2ecf20Sopenharmony_ci round_down(start, fs_info->sectorsize); 1878c2ecf20Sopenharmony_ci start = round_down(start, fs_info->sectorsize); 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci btrfs_free_reserved_data_space_noquota(fs_info, len); 1908c2ecf20Sopenharmony_ci btrfs_qgroup_free_data(inode, reserved, start, len); 1918c2ecf20Sopenharmony_ci} 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci/** 1948c2ecf20Sopenharmony_ci * btrfs_inode_rsv_release - release any excessive reservation. 1958c2ecf20Sopenharmony_ci * @inode - the inode we need to release from. 1968c2ecf20Sopenharmony_ci * @qgroup_free - free or convert qgroup meta. 1978c2ecf20Sopenharmony_ci * Unlike normal operation, qgroup meta reservation needs to know if we are 1988c2ecf20Sopenharmony_ci * freeing qgroup reservation or just converting it into per-trans. Normally 1998c2ecf20Sopenharmony_ci * @qgroup_free is true for error handling, and false for normal release. 2008c2ecf20Sopenharmony_ci * 2018c2ecf20Sopenharmony_ci * This is the same as btrfs_block_rsv_release, except that it handles the 2028c2ecf20Sopenharmony_ci * tracepoint for the reservation. 2038c2ecf20Sopenharmony_ci */ 2048c2ecf20Sopenharmony_cistatic void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) 2058c2ecf20Sopenharmony_ci{ 2068c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 2078c2ecf20Sopenharmony_ci struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 2088c2ecf20Sopenharmony_ci u64 released = 0; 2098c2ecf20Sopenharmony_ci u64 qgroup_to_release = 0; 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci /* 2128c2ecf20Sopenharmony_ci * Since we statically set the block_rsv->size we just want to say we 2138c2ecf20Sopenharmony_ci * are releasing 0 bytes, and then we'll just get the reservation over 2148c2ecf20Sopenharmony_ci * the size free'd. 2158c2ecf20Sopenharmony_ci */ 2168c2ecf20Sopenharmony_ci released = btrfs_block_rsv_release(fs_info, block_rsv, 0, 2178c2ecf20Sopenharmony_ci &qgroup_to_release); 2188c2ecf20Sopenharmony_ci if (released > 0) 2198c2ecf20Sopenharmony_ci trace_btrfs_space_reservation(fs_info, "delalloc", 2208c2ecf20Sopenharmony_ci btrfs_ino(inode), released, 0); 2218c2ecf20Sopenharmony_ci if (qgroup_free) 2228c2ecf20Sopenharmony_ci btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); 2238c2ecf20Sopenharmony_ci else 2248c2ecf20Sopenharmony_ci btrfs_qgroup_convert_reserved_meta(inode->root, 2258c2ecf20Sopenharmony_ci qgroup_to_release); 2268c2ecf20Sopenharmony_ci} 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_cistatic void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, 2298c2ecf20Sopenharmony_ci struct btrfs_inode *inode) 2308c2ecf20Sopenharmony_ci{ 2318c2ecf20Sopenharmony_ci struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 2328c2ecf20Sopenharmony_ci u64 reserve_size = 0; 2338c2ecf20Sopenharmony_ci u64 qgroup_rsv_size = 0; 2348c2ecf20Sopenharmony_ci u64 csum_leaves; 2358c2ecf20Sopenharmony_ci unsigned outstanding_extents; 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci lockdep_assert_held(&inode->lock); 2388c2ecf20Sopenharmony_ci outstanding_extents = inode->outstanding_extents; 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci /* 2418c2ecf20Sopenharmony_ci * Insert size for the number of outstanding extents, 1 normal size for 2428c2ecf20Sopenharmony_ci * updating the inode. 2438c2ecf20Sopenharmony_ci */ 2448c2ecf20Sopenharmony_ci if (outstanding_extents) { 2458c2ecf20Sopenharmony_ci reserve_size = btrfs_calc_insert_metadata_size(fs_info, 2468c2ecf20Sopenharmony_ci outstanding_extents); 2478c2ecf20Sopenharmony_ci reserve_size += btrfs_calc_metadata_size(fs_info, 1); 2488c2ecf20Sopenharmony_ci } 2498c2ecf20Sopenharmony_ci csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, 2508c2ecf20Sopenharmony_ci inode->csum_bytes); 2518c2ecf20Sopenharmony_ci reserve_size += btrfs_calc_insert_metadata_size(fs_info, 2528c2ecf20Sopenharmony_ci csum_leaves); 2538c2ecf20Sopenharmony_ci /* 2548c2ecf20Sopenharmony_ci * For qgroup rsv, the calculation is very simple: 2558c2ecf20Sopenharmony_ci * account one nodesize for each outstanding extent 2568c2ecf20Sopenharmony_ci * 2578c2ecf20Sopenharmony_ci * This is overestimating in most cases. 2588c2ecf20Sopenharmony_ci */ 2598c2ecf20Sopenharmony_ci qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize; 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci spin_lock(&block_rsv->lock); 2628c2ecf20Sopenharmony_ci block_rsv->size = reserve_size; 2638c2ecf20Sopenharmony_ci block_rsv->qgroup_rsv_size = qgroup_rsv_size; 2648c2ecf20Sopenharmony_ci spin_unlock(&block_rsv->lock); 2658c2ecf20Sopenharmony_ci} 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_cistatic void calc_inode_reservations(struct btrfs_fs_info *fs_info, 2688c2ecf20Sopenharmony_ci u64 num_bytes, u64 *meta_reserve, 2698c2ecf20Sopenharmony_ci u64 *qgroup_reserve) 2708c2ecf20Sopenharmony_ci{ 2718c2ecf20Sopenharmony_ci u64 nr_extents = count_max_extents(num_bytes); 2728c2ecf20Sopenharmony_ci u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes); 2738c2ecf20Sopenharmony_ci u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ci *meta_reserve = btrfs_calc_insert_metadata_size(fs_info, 2768c2ecf20Sopenharmony_ci nr_extents + csum_leaves); 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci /* 2798c2ecf20Sopenharmony_ci * finish_ordered_io has to update the inode, so add the space required 2808c2ecf20Sopenharmony_ci * for an inode update. 2818c2ecf20Sopenharmony_ci */ 2828c2ecf20Sopenharmony_ci *meta_reserve += inode_update; 2838c2ecf20Sopenharmony_ci *qgroup_reserve = nr_extents * fs_info->nodesize; 2848c2ecf20Sopenharmony_ci} 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ciint btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) 2878c2ecf20Sopenharmony_ci{ 2888c2ecf20Sopenharmony_ci struct btrfs_root *root = inode->root; 2898c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 2908c2ecf20Sopenharmony_ci struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 2918c2ecf20Sopenharmony_ci u64 meta_reserve, qgroup_reserve; 2928c2ecf20Sopenharmony_ci unsigned nr_extents; 2938c2ecf20Sopenharmony_ci enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; 2948c2ecf20Sopenharmony_ci int ret = 0; 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci /* 2978c2ecf20Sopenharmony_ci * If we are a free space inode we need to not flush since we will be in 2988c2ecf20Sopenharmony_ci * the middle of a transaction commit. We also don't need the delalloc 2998c2ecf20Sopenharmony_ci * mutex since we won't race with anybody. We need this mostly to make 3008c2ecf20Sopenharmony_ci * lockdep shut its filthy mouth. 3018c2ecf20Sopenharmony_ci * 3028c2ecf20Sopenharmony_ci * If we have a transaction open (can happen if we call truncate_block 3038c2ecf20Sopenharmony_ci * from truncate), then we need FLUSH_LIMIT so we don't deadlock. 3048c2ecf20Sopenharmony_ci */ 3058c2ecf20Sopenharmony_ci if (btrfs_is_free_space_inode(inode)) { 3068c2ecf20Sopenharmony_ci flush = BTRFS_RESERVE_NO_FLUSH; 3078c2ecf20Sopenharmony_ci } else { 3088c2ecf20Sopenharmony_ci if (current->journal_info) 3098c2ecf20Sopenharmony_ci flush = BTRFS_RESERVE_FLUSH_LIMIT; 3108c2ecf20Sopenharmony_ci } 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci num_bytes = ALIGN(num_bytes, fs_info->sectorsize); 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci /* 3158c2ecf20Sopenharmony_ci * We always want to do it this way, every other way is wrong and ends 3168c2ecf20Sopenharmony_ci * in tears. Pre-reserving the amount we are going to add will always 3178c2ecf20Sopenharmony_ci * be the right way, because otherwise if we have enough parallelism we 3188c2ecf20Sopenharmony_ci * could end up with thousands of inodes all holding little bits of 3198c2ecf20Sopenharmony_ci * reservations they were able to make previously and the only way to 3208c2ecf20Sopenharmony_ci * reclaim that space is to ENOSPC out the operations and clear 3218c2ecf20Sopenharmony_ci * everything out and try again, which is bad. This way we just 3228c2ecf20Sopenharmony_ci * over-reserve slightly, and clean up the mess when we are done. 3238c2ecf20Sopenharmony_ci */ 3248c2ecf20Sopenharmony_ci calc_inode_reservations(fs_info, num_bytes, &meta_reserve, 3258c2ecf20Sopenharmony_ci &qgroup_reserve); 3268c2ecf20Sopenharmony_ci ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true); 3278c2ecf20Sopenharmony_ci if (ret) 3288c2ecf20Sopenharmony_ci return ret; 3298c2ecf20Sopenharmony_ci ret = btrfs_reserve_metadata_bytes(root, block_rsv, meta_reserve, flush); 3308c2ecf20Sopenharmony_ci if (ret) { 3318c2ecf20Sopenharmony_ci btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); 3328c2ecf20Sopenharmony_ci return ret; 3338c2ecf20Sopenharmony_ci } 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci /* 3368c2ecf20Sopenharmony_ci * Now we need to update our outstanding extents and csum bytes _first_ 3378c2ecf20Sopenharmony_ci * and then add the reservation to the block_rsv. This keeps us from 3388c2ecf20Sopenharmony_ci * racing with an ordered completion or some such that would think it 3398c2ecf20Sopenharmony_ci * needs to free the reservation we just made. 3408c2ecf20Sopenharmony_ci */ 3418c2ecf20Sopenharmony_ci spin_lock(&inode->lock); 3428c2ecf20Sopenharmony_ci nr_extents = count_max_extents(num_bytes); 3438c2ecf20Sopenharmony_ci btrfs_mod_outstanding_extents(inode, nr_extents); 3448c2ecf20Sopenharmony_ci inode->csum_bytes += num_bytes; 3458c2ecf20Sopenharmony_ci btrfs_calculate_inode_block_rsv_size(fs_info, inode); 3468c2ecf20Sopenharmony_ci spin_unlock(&inode->lock); 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_ci /* Now we can safely add our space to our block rsv */ 3498c2ecf20Sopenharmony_ci btrfs_block_rsv_add_bytes(block_rsv, meta_reserve, false); 3508c2ecf20Sopenharmony_ci trace_btrfs_space_reservation(root->fs_info, "delalloc", 3518c2ecf20Sopenharmony_ci btrfs_ino(inode), meta_reserve, 1); 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci spin_lock(&block_rsv->lock); 3548c2ecf20Sopenharmony_ci block_rsv->qgroup_rsv_reserved += qgroup_reserve; 3558c2ecf20Sopenharmony_ci spin_unlock(&block_rsv->lock); 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci return 0; 3588c2ecf20Sopenharmony_ci} 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci/** 3618c2ecf20Sopenharmony_ci * btrfs_delalloc_release_metadata - release a metadata reservation for an inode 3628c2ecf20Sopenharmony_ci * @inode: the inode to release the reservation for. 3638c2ecf20Sopenharmony_ci * @num_bytes: the number of bytes we are releasing. 3648c2ecf20Sopenharmony_ci * @qgroup_free: free qgroup reservation or convert it to per-trans reservation 3658c2ecf20Sopenharmony_ci * 3668c2ecf20Sopenharmony_ci * This will release the metadata reservation for an inode. This can be called 3678c2ecf20Sopenharmony_ci * once we complete IO for a given set of bytes to release their metadata 3688c2ecf20Sopenharmony_ci * reservations, or on error for the same reason. 3698c2ecf20Sopenharmony_ci */ 3708c2ecf20Sopenharmony_civoid btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, 3718c2ecf20Sopenharmony_ci bool qgroup_free) 3728c2ecf20Sopenharmony_ci{ 3738c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 3748c2ecf20Sopenharmony_ci 3758c2ecf20Sopenharmony_ci num_bytes = ALIGN(num_bytes, fs_info->sectorsize); 3768c2ecf20Sopenharmony_ci spin_lock(&inode->lock); 3778c2ecf20Sopenharmony_ci inode->csum_bytes -= num_bytes; 3788c2ecf20Sopenharmony_ci btrfs_calculate_inode_block_rsv_size(fs_info, inode); 3798c2ecf20Sopenharmony_ci spin_unlock(&inode->lock); 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci if (btrfs_is_testing(fs_info)) 3828c2ecf20Sopenharmony_ci return; 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci btrfs_inode_rsv_release(inode, qgroup_free); 3858c2ecf20Sopenharmony_ci} 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_ci/** 3888c2ecf20Sopenharmony_ci * btrfs_delalloc_release_extents - release our outstanding_extents 3898c2ecf20Sopenharmony_ci * @inode: the inode to balance the reservation for. 3908c2ecf20Sopenharmony_ci * @num_bytes: the number of bytes we originally reserved with 3918c2ecf20Sopenharmony_ci * 3928c2ecf20Sopenharmony_ci * When we reserve space we increase outstanding_extents for the extents we may 3938c2ecf20Sopenharmony_ci * add. Once we've set the range as delalloc or created our ordered extents we 3948c2ecf20Sopenharmony_ci * have outstanding_extents to track the real usage, so we use this to free our 3958c2ecf20Sopenharmony_ci * temporarily tracked outstanding_extents. This _must_ be used in conjunction 3968c2ecf20Sopenharmony_ci * with btrfs_delalloc_reserve_metadata. 3978c2ecf20Sopenharmony_ci */ 3988c2ecf20Sopenharmony_civoid btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) 3998c2ecf20Sopenharmony_ci{ 4008c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 4018c2ecf20Sopenharmony_ci unsigned num_extents; 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci spin_lock(&inode->lock); 4048c2ecf20Sopenharmony_ci num_extents = count_max_extents(num_bytes); 4058c2ecf20Sopenharmony_ci btrfs_mod_outstanding_extents(inode, -num_extents); 4068c2ecf20Sopenharmony_ci btrfs_calculate_inode_block_rsv_size(fs_info, inode); 4078c2ecf20Sopenharmony_ci spin_unlock(&inode->lock); 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci if (btrfs_is_testing(fs_info)) 4108c2ecf20Sopenharmony_ci return; 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci btrfs_inode_rsv_release(inode, true); 4138c2ecf20Sopenharmony_ci} 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci/** 4168c2ecf20Sopenharmony_ci * btrfs_delalloc_reserve_space - reserve data and metadata space for 4178c2ecf20Sopenharmony_ci * delalloc 4188c2ecf20Sopenharmony_ci * @inode: inode we're writing to 4198c2ecf20Sopenharmony_ci * @start: start range we are writing to 4208c2ecf20Sopenharmony_ci * @len: how long the range we are writing to 4218c2ecf20Sopenharmony_ci * @reserved: mandatory parameter, record actually reserved qgroup ranges of 4228c2ecf20Sopenharmony_ci * current reservation. 4238c2ecf20Sopenharmony_ci * 4248c2ecf20Sopenharmony_ci * This will do the following things 4258c2ecf20Sopenharmony_ci * 4268c2ecf20Sopenharmony_ci * - reserve space in data space info for num bytes 4278c2ecf20Sopenharmony_ci * and reserve precious corresponding qgroup space 4288c2ecf20Sopenharmony_ci * (Done in check_data_free_space) 4298c2ecf20Sopenharmony_ci * 4308c2ecf20Sopenharmony_ci * - reserve space for metadata space, based on the number of outstanding 4318c2ecf20Sopenharmony_ci * extents and how much csums will be needed 4328c2ecf20Sopenharmony_ci * also reserve metadata space in a per root over-reserve method. 4338c2ecf20Sopenharmony_ci * - add to the inodes->delalloc_bytes 4348c2ecf20Sopenharmony_ci * - add it to the fs_info's delalloc inodes list. 4358c2ecf20Sopenharmony_ci * (Above 3 all done in delalloc_reserve_metadata) 4368c2ecf20Sopenharmony_ci * 4378c2ecf20Sopenharmony_ci * Return 0 for success 4388c2ecf20Sopenharmony_ci * Return <0 for error(-ENOSPC or -EQUOT) 4398c2ecf20Sopenharmony_ci */ 4408c2ecf20Sopenharmony_ciint btrfs_delalloc_reserve_space(struct btrfs_inode *inode, 4418c2ecf20Sopenharmony_ci struct extent_changeset **reserved, u64 start, u64 len) 4428c2ecf20Sopenharmony_ci{ 4438c2ecf20Sopenharmony_ci int ret; 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci ret = btrfs_check_data_free_space(inode, reserved, start, len); 4468c2ecf20Sopenharmony_ci if (ret < 0) 4478c2ecf20Sopenharmony_ci return ret; 4488c2ecf20Sopenharmony_ci ret = btrfs_delalloc_reserve_metadata(inode, len); 4498c2ecf20Sopenharmony_ci if (ret < 0) 4508c2ecf20Sopenharmony_ci btrfs_free_reserved_data_space(inode, *reserved, start, len); 4518c2ecf20Sopenharmony_ci return ret; 4528c2ecf20Sopenharmony_ci} 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci/** 4558c2ecf20Sopenharmony_ci * btrfs_delalloc_release_space - release data and metadata space for delalloc 4568c2ecf20Sopenharmony_ci * @inode: inode we're releasing space for 4578c2ecf20Sopenharmony_ci * @start: start position of the space already reserved 4588c2ecf20Sopenharmony_ci * @len: the len of the space already reserved 4598c2ecf20Sopenharmony_ci * @release_bytes: the len of the space we consumed or didn't use 4608c2ecf20Sopenharmony_ci * 4618c2ecf20Sopenharmony_ci * This function will release the metadata space that was not used and will 4628c2ecf20Sopenharmony_ci * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes 4638c2ecf20Sopenharmony_ci * list if there are no delalloc bytes left. 4648c2ecf20Sopenharmony_ci * Also it will handle the qgroup reserved space. 4658c2ecf20Sopenharmony_ci */ 4668c2ecf20Sopenharmony_civoid btrfs_delalloc_release_space(struct btrfs_inode *inode, 4678c2ecf20Sopenharmony_ci struct extent_changeset *reserved, 4688c2ecf20Sopenharmony_ci u64 start, u64 len, bool qgroup_free) 4698c2ecf20Sopenharmony_ci{ 4708c2ecf20Sopenharmony_ci btrfs_delalloc_release_metadata(inode, len, qgroup_free); 4718c2ecf20Sopenharmony_ci btrfs_free_reserved_data_space(inode, reserved, start, len); 4728c2ecf20Sopenharmony_ci} 473