162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#include "messages.h" 462306a36Sopenharmony_ci#include "ctree.h" 562306a36Sopenharmony_ci#include "delalloc-space.h" 662306a36Sopenharmony_ci#include "block-rsv.h" 762306a36Sopenharmony_ci#include "btrfs_inode.h" 862306a36Sopenharmony_ci#include "space-info.h" 962306a36Sopenharmony_ci#include "transaction.h" 1062306a36Sopenharmony_ci#include "qgroup.h" 1162306a36Sopenharmony_ci#include "block-group.h" 1262306a36Sopenharmony_ci#include "fs.h" 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci/* 1562306a36Sopenharmony_ci * HOW DOES THIS WORK 1662306a36Sopenharmony_ci * 1762306a36Sopenharmony_ci * There are two stages to data reservations, one for data and one for metadata 1862306a36Sopenharmony_ci * to handle the new extents and checksums generated by writing data. 1962306a36Sopenharmony_ci * 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * DATA RESERVATION 2262306a36Sopenharmony_ci * The general flow of the data reservation is as follows 2362306a36Sopenharmony_ci * 2462306a36Sopenharmony_ci * -> Reserve 2562306a36Sopenharmony_ci * We call into btrfs_reserve_data_bytes() for the user request bytes that 2662306a36Sopenharmony_ci * they wish to write. We make this reservation and add it to 2762306a36Sopenharmony_ci * space_info->bytes_may_use. We set EXTENT_DELALLOC on the inode io_tree 2862306a36Sopenharmony_ci * for the range and carry on if this is buffered, or follow up trying to 2962306a36Sopenharmony_ci * make a real allocation if we are pre-allocating or doing O_DIRECT. 3062306a36Sopenharmony_ci * 3162306a36Sopenharmony_ci * -> Use 3262306a36Sopenharmony_ci * At writepages()/prealloc/O_DIRECT time we will call into 3362306a36Sopenharmony_ci * btrfs_reserve_extent() for some part or all of this range of bytes. We 3462306a36Sopenharmony_ci * will make the allocation and subtract space_info->bytes_may_use by the 3562306a36Sopenharmony_ci * original requested length and increase the space_info->bytes_reserved by 3662306a36Sopenharmony_ci * the allocated length. This distinction is important because compression 3762306a36Sopenharmony_ci * may allocate a smaller on disk extent than we previously reserved. 3862306a36Sopenharmony_ci * 3962306a36Sopenharmony_ci * -> Allocation 4062306a36Sopenharmony_ci * finish_ordered_io() will insert the new file extent item for this range, 4162306a36Sopenharmony_ci * and then add a delayed ref update for the extent tree. Once that delayed 4262306a36Sopenharmony_ci * ref is written the extent size is subtracted from 4362306a36Sopenharmony_ci * space_info->bytes_reserved and added to space_info->bytes_used. 4462306a36Sopenharmony_ci * 4562306a36Sopenharmony_ci * Error handling 4662306a36Sopenharmony_ci * 4762306a36Sopenharmony_ci * -> By the reservation maker 4862306a36Sopenharmony_ci * This is the simplest case, we haven't completed our operation and we know 4962306a36Sopenharmony_ci * how much we reserved, we can simply call 5062306a36Sopenharmony_ci * btrfs_free_reserved_data_space*() and it will be removed from 5162306a36Sopenharmony_ci * space_info->bytes_may_use. 5262306a36Sopenharmony_ci * 5362306a36Sopenharmony_ci * -> After the reservation has been made, but before cow_file_range() 5462306a36Sopenharmony_ci * This is specifically for the delalloc case. You must clear 5562306a36Sopenharmony_ci * EXTENT_DELALLOC with the EXTENT_CLEAR_DATA_RESV bit, and the range will 5662306a36Sopenharmony_ci * be subtracted from space_info->bytes_may_use. 5762306a36Sopenharmony_ci * 5862306a36Sopenharmony_ci * METADATA RESERVATION 5962306a36Sopenharmony_ci * The general metadata reservation lifetimes are discussed elsewhere, this 6062306a36Sopenharmony_ci * will just focus on how it is used for delalloc space. 6162306a36Sopenharmony_ci * 6262306a36Sopenharmony_ci * We keep track of two things on a per inode bases 6362306a36Sopenharmony_ci * 6462306a36Sopenharmony_ci * ->outstanding_extents 6562306a36Sopenharmony_ci * This is the number of file extent items we'll need to handle all of the 6662306a36Sopenharmony_ci * outstanding DELALLOC space we have in this inode. We limit the maximum 6762306a36Sopenharmony_ci * size of an extent, so a large contiguous dirty area may require more than 6862306a36Sopenharmony_ci * one outstanding_extent, which is why count_max_extents() is used to 6962306a36Sopenharmony_ci * determine how many outstanding_extents get added. 7062306a36Sopenharmony_ci * 7162306a36Sopenharmony_ci * ->csum_bytes 7262306a36Sopenharmony_ci * This is essentially how many dirty bytes we have for this inode, so we 7362306a36Sopenharmony_ci * can calculate the number of checksum items we would have to add in order 7462306a36Sopenharmony_ci * to checksum our outstanding data. 7562306a36Sopenharmony_ci * 7662306a36Sopenharmony_ci * We keep a per-inode block_rsv in order to make it easier to keep track of 7762306a36Sopenharmony_ci * our reservation. We use btrfs_calculate_inode_block_rsv_size() to 7862306a36Sopenharmony_ci * calculate the current theoretical maximum reservation we would need for the 7962306a36Sopenharmony_ci * metadata for this inode. We call this and then adjust our reservation as 8062306a36Sopenharmony_ci * necessary, either by attempting to reserve more space, or freeing up excess 8162306a36Sopenharmony_ci * space. 8262306a36Sopenharmony_ci * 8362306a36Sopenharmony_ci * OUTSTANDING_EXTENTS HANDLING 8462306a36Sopenharmony_ci * 8562306a36Sopenharmony_ci * ->outstanding_extents is used for keeping track of how many extents we will 8662306a36Sopenharmony_ci * need to use for this inode, and it will fluctuate depending on where you are 8762306a36Sopenharmony_ci * in the life cycle of the dirty data. Consider the following normal case for 8862306a36Sopenharmony_ci * a completely clean inode, with a num_bytes < our maximum allowed extent size 8962306a36Sopenharmony_ci * 9062306a36Sopenharmony_ci * -> reserve 9162306a36Sopenharmony_ci * ->outstanding_extents += 1 (current value is 1) 9262306a36Sopenharmony_ci * 9362306a36Sopenharmony_ci * -> set_delalloc 9462306a36Sopenharmony_ci * ->outstanding_extents += 1 (current value is 2) 9562306a36Sopenharmony_ci * 9662306a36Sopenharmony_ci * -> btrfs_delalloc_release_extents() 9762306a36Sopenharmony_ci * ->outstanding_extents -= 1 (current value is 1) 9862306a36Sopenharmony_ci * 9962306a36Sopenharmony_ci * We must call this once we are done, as we hold our reservation for the 10062306a36Sopenharmony_ci * duration of our operation, and then assume set_delalloc will update the 10162306a36Sopenharmony_ci * counter appropriately. 10262306a36Sopenharmony_ci * 10362306a36Sopenharmony_ci * -> add ordered extent 10462306a36Sopenharmony_ci * ->outstanding_extents += 1 (current value is 2) 10562306a36Sopenharmony_ci * 10662306a36Sopenharmony_ci * -> btrfs_clear_delalloc_extent 10762306a36Sopenharmony_ci * ->outstanding_extents -= 1 (current value is 1) 10862306a36Sopenharmony_ci * 10962306a36Sopenharmony_ci * -> finish_ordered_io/btrfs_remove_ordered_extent 11062306a36Sopenharmony_ci * ->outstanding_extents -= 1 (current value is 0) 11162306a36Sopenharmony_ci * 11262306a36Sopenharmony_ci * Each stage is responsible for their own accounting of the extent, thus 11362306a36Sopenharmony_ci * making error handling and cleanup easier. 11462306a36Sopenharmony_ci */ 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ciint btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes) 11762306a36Sopenharmony_ci{ 11862306a36Sopenharmony_ci struct btrfs_root *root = inode->root; 11962306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 12062306a36Sopenharmony_ci enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_DATA; 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci /* Make sure bytes are sectorsize aligned */ 12362306a36Sopenharmony_ci bytes = ALIGN(bytes, fs_info->sectorsize); 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci if (btrfs_is_free_space_inode(inode)) 12662306a36Sopenharmony_ci flush = BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci return btrfs_reserve_data_bytes(fs_info, bytes, flush); 12962306a36Sopenharmony_ci} 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ciint btrfs_check_data_free_space(struct btrfs_inode *inode, 13262306a36Sopenharmony_ci struct extent_changeset **reserved, u64 start, 13362306a36Sopenharmony_ci u64 len, bool noflush) 13462306a36Sopenharmony_ci{ 13562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 13662306a36Sopenharmony_ci enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_DATA; 13762306a36Sopenharmony_ci int ret; 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci /* align the range */ 14062306a36Sopenharmony_ci len = round_up(start + len, fs_info->sectorsize) - 14162306a36Sopenharmony_ci round_down(start, fs_info->sectorsize); 14262306a36Sopenharmony_ci start = round_down(start, fs_info->sectorsize); 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci if (noflush) 14562306a36Sopenharmony_ci flush = BTRFS_RESERVE_NO_FLUSH; 14662306a36Sopenharmony_ci else if (btrfs_is_free_space_inode(inode)) 14762306a36Sopenharmony_ci flush = BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci ret = btrfs_reserve_data_bytes(fs_info, len, flush); 15062306a36Sopenharmony_ci if (ret < 0) 15162306a36Sopenharmony_ci return ret; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci /* Use new btrfs_qgroup_reserve_data to reserve precious data space. */ 15462306a36Sopenharmony_ci ret = btrfs_qgroup_reserve_data(inode, reserved, start, len); 15562306a36Sopenharmony_ci if (ret < 0) { 15662306a36Sopenharmony_ci btrfs_free_reserved_data_space_noquota(fs_info, len); 15762306a36Sopenharmony_ci extent_changeset_free(*reserved); 15862306a36Sopenharmony_ci *reserved = NULL; 15962306a36Sopenharmony_ci } else { 16062306a36Sopenharmony_ci ret = 0; 16162306a36Sopenharmony_ci } 16262306a36Sopenharmony_ci return ret; 16362306a36Sopenharmony_ci} 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci/* 16662306a36Sopenharmony_ci * Called if we need to clear a data reservation for this inode 16762306a36Sopenharmony_ci * Normally in a error case. 16862306a36Sopenharmony_ci * 16962306a36Sopenharmony_ci * This one will *NOT* use accurate qgroup reserved space API, just for case 17062306a36Sopenharmony_ci * which we can't sleep and is sure it won't affect qgroup reserved space. 17162306a36Sopenharmony_ci * Like clear_bit_hook(). 17262306a36Sopenharmony_ci */ 17362306a36Sopenharmony_civoid btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info, 17462306a36Sopenharmony_ci u64 len) 17562306a36Sopenharmony_ci{ 17662306a36Sopenharmony_ci struct btrfs_space_info *data_sinfo; 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci ASSERT(IS_ALIGNED(len, fs_info->sectorsize)); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci data_sinfo = fs_info->data_sinfo; 18162306a36Sopenharmony_ci btrfs_space_info_free_bytes_may_use(fs_info, data_sinfo, len); 18262306a36Sopenharmony_ci} 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci/* 18562306a36Sopenharmony_ci * Called if we need to clear a data reservation for this inode 18662306a36Sopenharmony_ci * Normally in a error case. 18762306a36Sopenharmony_ci * 18862306a36Sopenharmony_ci * This one will handle the per-inode data rsv map for accurate reserved 18962306a36Sopenharmony_ci * space framework. 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_civoid btrfs_free_reserved_data_space(struct btrfs_inode *inode, 19262306a36Sopenharmony_ci struct extent_changeset *reserved, u64 start, u64 len) 19362306a36Sopenharmony_ci{ 19462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci /* Make sure the range is aligned to sectorsize */ 19762306a36Sopenharmony_ci len = round_up(start + len, fs_info->sectorsize) - 19862306a36Sopenharmony_ci round_down(start, fs_info->sectorsize); 19962306a36Sopenharmony_ci start = round_down(start, fs_info->sectorsize); 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci btrfs_free_reserved_data_space_noquota(fs_info, len); 20262306a36Sopenharmony_ci btrfs_qgroup_free_data(inode, reserved, start, len, NULL); 20362306a36Sopenharmony_ci} 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci/* 20662306a36Sopenharmony_ci * Release any excessive reservations for an inode. 20762306a36Sopenharmony_ci * 20862306a36Sopenharmony_ci * @inode: the inode we need to release from 20962306a36Sopenharmony_ci * @qgroup_free: free or convert qgroup meta. Unlike normal operation, qgroup 21062306a36Sopenharmony_ci * meta reservation needs to know if we are freeing qgroup 21162306a36Sopenharmony_ci * reservation or just converting it into per-trans. Normally 21262306a36Sopenharmony_ci * @qgroup_free is true for error handling, and false for normal 21362306a36Sopenharmony_ci * release. 21462306a36Sopenharmony_ci * 21562306a36Sopenharmony_ci * This is the same as btrfs_block_rsv_release, except that it handles the 21662306a36Sopenharmony_ci * tracepoint for the reservation. 21762306a36Sopenharmony_ci */ 21862306a36Sopenharmony_cistatic void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free) 21962306a36Sopenharmony_ci{ 22062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 22162306a36Sopenharmony_ci struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 22262306a36Sopenharmony_ci u64 released = 0; 22362306a36Sopenharmony_ci u64 qgroup_to_release = 0; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci /* 22662306a36Sopenharmony_ci * Since we statically set the block_rsv->size we just want to say we 22762306a36Sopenharmony_ci * are releasing 0 bytes, and then we'll just get the reservation over 22862306a36Sopenharmony_ci * the size free'd. 22962306a36Sopenharmony_ci */ 23062306a36Sopenharmony_ci released = btrfs_block_rsv_release(fs_info, block_rsv, 0, 23162306a36Sopenharmony_ci &qgroup_to_release); 23262306a36Sopenharmony_ci if (released > 0) 23362306a36Sopenharmony_ci trace_btrfs_space_reservation(fs_info, "delalloc", 23462306a36Sopenharmony_ci btrfs_ino(inode), released, 0); 23562306a36Sopenharmony_ci if (qgroup_free) 23662306a36Sopenharmony_ci btrfs_qgroup_free_meta_prealloc(inode->root, qgroup_to_release); 23762306a36Sopenharmony_ci else 23862306a36Sopenharmony_ci btrfs_qgroup_convert_reserved_meta(inode->root, 23962306a36Sopenharmony_ci qgroup_to_release); 24062306a36Sopenharmony_ci} 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_cistatic void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, 24362306a36Sopenharmony_ci struct btrfs_inode *inode) 24462306a36Sopenharmony_ci{ 24562306a36Sopenharmony_ci struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 24662306a36Sopenharmony_ci u64 reserve_size = 0; 24762306a36Sopenharmony_ci u64 qgroup_rsv_size = 0; 24862306a36Sopenharmony_ci unsigned outstanding_extents; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci lockdep_assert_held(&inode->lock); 25162306a36Sopenharmony_ci outstanding_extents = inode->outstanding_extents; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci /* 25462306a36Sopenharmony_ci * Insert size for the number of outstanding extents, 1 normal size for 25562306a36Sopenharmony_ci * updating the inode. 25662306a36Sopenharmony_ci */ 25762306a36Sopenharmony_ci if (outstanding_extents) { 25862306a36Sopenharmony_ci reserve_size = btrfs_calc_insert_metadata_size(fs_info, 25962306a36Sopenharmony_ci outstanding_extents); 26062306a36Sopenharmony_ci reserve_size += btrfs_calc_metadata_size(fs_info, 1); 26162306a36Sopenharmony_ci } 26262306a36Sopenharmony_ci if (!(inode->flags & BTRFS_INODE_NODATASUM)) { 26362306a36Sopenharmony_ci u64 csum_leaves; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); 26662306a36Sopenharmony_ci reserve_size += btrfs_calc_insert_metadata_size(fs_info, csum_leaves); 26762306a36Sopenharmony_ci } 26862306a36Sopenharmony_ci /* 26962306a36Sopenharmony_ci * For qgroup rsv, the calculation is very simple: 27062306a36Sopenharmony_ci * account one nodesize for each outstanding extent 27162306a36Sopenharmony_ci * 27262306a36Sopenharmony_ci * This is overestimating in most cases. 27362306a36Sopenharmony_ci */ 27462306a36Sopenharmony_ci qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize; 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci spin_lock(&block_rsv->lock); 27762306a36Sopenharmony_ci block_rsv->size = reserve_size; 27862306a36Sopenharmony_ci block_rsv->qgroup_rsv_size = qgroup_rsv_size; 27962306a36Sopenharmony_ci spin_unlock(&block_rsv->lock); 28062306a36Sopenharmony_ci} 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_cistatic void calc_inode_reservations(struct btrfs_inode *inode, 28362306a36Sopenharmony_ci u64 num_bytes, u64 disk_num_bytes, 28462306a36Sopenharmony_ci u64 *meta_reserve, u64 *qgroup_reserve) 28562306a36Sopenharmony_ci{ 28662306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 28762306a36Sopenharmony_ci u64 nr_extents = count_max_extents(fs_info, num_bytes); 28862306a36Sopenharmony_ci u64 csum_leaves; 28962306a36Sopenharmony_ci u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci if (inode->flags & BTRFS_INODE_NODATASUM) 29262306a36Sopenharmony_ci csum_leaves = 0; 29362306a36Sopenharmony_ci else 29462306a36Sopenharmony_ci csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci *meta_reserve = btrfs_calc_insert_metadata_size(fs_info, 29762306a36Sopenharmony_ci nr_extents + csum_leaves); 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci /* 30062306a36Sopenharmony_ci * finish_ordered_io has to update the inode, so add the space required 30162306a36Sopenharmony_ci * for an inode update. 30262306a36Sopenharmony_ci */ 30362306a36Sopenharmony_ci *meta_reserve += inode_update; 30462306a36Sopenharmony_ci *qgroup_reserve = nr_extents * fs_info->nodesize; 30562306a36Sopenharmony_ci} 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ciint btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, 30862306a36Sopenharmony_ci u64 disk_num_bytes, bool noflush) 30962306a36Sopenharmony_ci{ 31062306a36Sopenharmony_ci struct btrfs_root *root = inode->root; 31162306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 31262306a36Sopenharmony_ci struct btrfs_block_rsv *block_rsv = &inode->block_rsv; 31362306a36Sopenharmony_ci u64 meta_reserve, qgroup_reserve; 31462306a36Sopenharmony_ci unsigned nr_extents; 31562306a36Sopenharmony_ci enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; 31662306a36Sopenharmony_ci int ret = 0; 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci /* 31962306a36Sopenharmony_ci * If we are a free space inode we need to not flush since we will be in 32062306a36Sopenharmony_ci * the middle of a transaction commit. We also don't need the delalloc 32162306a36Sopenharmony_ci * mutex since we won't race with anybody. We need this mostly to make 32262306a36Sopenharmony_ci * lockdep shut its filthy mouth. 32362306a36Sopenharmony_ci * 32462306a36Sopenharmony_ci * If we have a transaction open (can happen if we call truncate_block 32562306a36Sopenharmony_ci * from truncate), then we need FLUSH_LIMIT so we don't deadlock. 32662306a36Sopenharmony_ci */ 32762306a36Sopenharmony_ci if (noflush || btrfs_is_free_space_inode(inode)) { 32862306a36Sopenharmony_ci flush = BTRFS_RESERVE_NO_FLUSH; 32962306a36Sopenharmony_ci } else { 33062306a36Sopenharmony_ci if (current->journal_info) 33162306a36Sopenharmony_ci flush = BTRFS_RESERVE_FLUSH_LIMIT; 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci num_bytes = ALIGN(num_bytes, fs_info->sectorsize); 33562306a36Sopenharmony_ci disk_num_bytes = ALIGN(disk_num_bytes, fs_info->sectorsize); 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci /* 33862306a36Sopenharmony_ci * We always want to do it this way, every other way is wrong and ends 33962306a36Sopenharmony_ci * in tears. Pre-reserving the amount we are going to add will always 34062306a36Sopenharmony_ci * be the right way, because otherwise if we have enough parallelism we 34162306a36Sopenharmony_ci * could end up with thousands of inodes all holding little bits of 34262306a36Sopenharmony_ci * reservations they were able to make previously and the only way to 34362306a36Sopenharmony_ci * reclaim that space is to ENOSPC out the operations and clear 34462306a36Sopenharmony_ci * everything out and try again, which is bad. This way we just 34562306a36Sopenharmony_ci * over-reserve slightly, and clean up the mess when we are done. 34662306a36Sopenharmony_ci */ 34762306a36Sopenharmony_ci calc_inode_reservations(inode, num_bytes, disk_num_bytes, 34862306a36Sopenharmony_ci &meta_reserve, &qgroup_reserve); 34962306a36Sopenharmony_ci ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true, 35062306a36Sopenharmony_ci noflush); 35162306a36Sopenharmony_ci if (ret) 35262306a36Sopenharmony_ci return ret; 35362306a36Sopenharmony_ci ret = btrfs_reserve_metadata_bytes(fs_info, block_rsv, meta_reserve, flush); 35462306a36Sopenharmony_ci if (ret) { 35562306a36Sopenharmony_ci btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve); 35662306a36Sopenharmony_ci return ret; 35762306a36Sopenharmony_ci } 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci /* 36062306a36Sopenharmony_ci * Now we need to update our outstanding extents and csum bytes _first_ 36162306a36Sopenharmony_ci * and then add the reservation to the block_rsv. This keeps us from 36262306a36Sopenharmony_ci * racing with an ordered completion or some such that would think it 36362306a36Sopenharmony_ci * needs to free the reservation we just made. 36462306a36Sopenharmony_ci */ 36562306a36Sopenharmony_ci nr_extents = count_max_extents(fs_info, num_bytes); 36662306a36Sopenharmony_ci spin_lock(&inode->lock); 36762306a36Sopenharmony_ci btrfs_mod_outstanding_extents(inode, nr_extents); 36862306a36Sopenharmony_ci if (!(inode->flags & BTRFS_INODE_NODATASUM)) 36962306a36Sopenharmony_ci inode->csum_bytes += disk_num_bytes; 37062306a36Sopenharmony_ci btrfs_calculate_inode_block_rsv_size(fs_info, inode); 37162306a36Sopenharmony_ci spin_unlock(&inode->lock); 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci /* Now we can safely add our space to our block rsv */ 37462306a36Sopenharmony_ci btrfs_block_rsv_add_bytes(block_rsv, meta_reserve, false); 37562306a36Sopenharmony_ci trace_btrfs_space_reservation(root->fs_info, "delalloc", 37662306a36Sopenharmony_ci btrfs_ino(inode), meta_reserve, 1); 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci spin_lock(&block_rsv->lock); 37962306a36Sopenharmony_ci block_rsv->qgroup_rsv_reserved += qgroup_reserve; 38062306a36Sopenharmony_ci spin_unlock(&block_rsv->lock); 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci return 0; 38362306a36Sopenharmony_ci} 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci/* 38662306a36Sopenharmony_ci * Release a metadata reservation for an inode. 38762306a36Sopenharmony_ci * 38862306a36Sopenharmony_ci * @inode: the inode to release the reservation for. 38962306a36Sopenharmony_ci * @num_bytes: the number of bytes we are releasing. 39062306a36Sopenharmony_ci * @qgroup_free: free qgroup reservation or convert it to per-trans reservation 39162306a36Sopenharmony_ci * 39262306a36Sopenharmony_ci * This will release the metadata reservation for an inode. This can be called 39362306a36Sopenharmony_ci * once we complete IO for a given set of bytes to release their metadata 39462306a36Sopenharmony_ci * reservations, or on error for the same reason. 39562306a36Sopenharmony_ci */ 39662306a36Sopenharmony_civoid btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes, 39762306a36Sopenharmony_ci bool qgroup_free) 39862306a36Sopenharmony_ci{ 39962306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci num_bytes = ALIGN(num_bytes, fs_info->sectorsize); 40262306a36Sopenharmony_ci spin_lock(&inode->lock); 40362306a36Sopenharmony_ci if (!(inode->flags & BTRFS_INODE_NODATASUM)) 40462306a36Sopenharmony_ci inode->csum_bytes -= num_bytes; 40562306a36Sopenharmony_ci btrfs_calculate_inode_block_rsv_size(fs_info, inode); 40662306a36Sopenharmony_ci spin_unlock(&inode->lock); 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci if (btrfs_is_testing(fs_info)) 40962306a36Sopenharmony_ci return; 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci btrfs_inode_rsv_release(inode, qgroup_free); 41262306a36Sopenharmony_ci} 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci/* 41562306a36Sopenharmony_ci * Release our outstanding_extents for an inode. 41662306a36Sopenharmony_ci * 41762306a36Sopenharmony_ci * @inode: the inode to balance the reservation for. 41862306a36Sopenharmony_ci * @num_bytes: the number of bytes we originally reserved with 41962306a36Sopenharmony_ci * 42062306a36Sopenharmony_ci * When we reserve space we increase outstanding_extents for the extents we may 42162306a36Sopenharmony_ci * add. Once we've set the range as delalloc or created our ordered extents we 42262306a36Sopenharmony_ci * have outstanding_extents to track the real usage, so we use this to free our 42362306a36Sopenharmony_ci * temporarily tracked outstanding_extents. This _must_ be used in conjunction 42462306a36Sopenharmony_ci * with btrfs_delalloc_reserve_metadata. 42562306a36Sopenharmony_ci */ 42662306a36Sopenharmony_civoid btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) 42762306a36Sopenharmony_ci{ 42862306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = inode->root->fs_info; 42962306a36Sopenharmony_ci unsigned num_extents; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci spin_lock(&inode->lock); 43262306a36Sopenharmony_ci num_extents = count_max_extents(fs_info, num_bytes); 43362306a36Sopenharmony_ci btrfs_mod_outstanding_extents(inode, -num_extents); 43462306a36Sopenharmony_ci btrfs_calculate_inode_block_rsv_size(fs_info, inode); 43562306a36Sopenharmony_ci spin_unlock(&inode->lock); 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci if (btrfs_is_testing(fs_info)) 43862306a36Sopenharmony_ci return; 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci btrfs_inode_rsv_release(inode, true); 44162306a36Sopenharmony_ci} 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci/* 44462306a36Sopenharmony_ci * Reserve data and metadata space for delalloc 44562306a36Sopenharmony_ci * 44662306a36Sopenharmony_ci * @inode: inode we're writing to 44762306a36Sopenharmony_ci * @start: start range we are writing to 44862306a36Sopenharmony_ci * @len: how long the range we are writing to 44962306a36Sopenharmony_ci * @reserved: mandatory parameter, record actually reserved qgroup ranges of 45062306a36Sopenharmony_ci * current reservation. 45162306a36Sopenharmony_ci * 45262306a36Sopenharmony_ci * This will do the following things 45362306a36Sopenharmony_ci * 45462306a36Sopenharmony_ci * - reserve space in data space info for num bytes and reserve precious 45562306a36Sopenharmony_ci * corresponding qgroup space 45662306a36Sopenharmony_ci * (Done in check_data_free_space) 45762306a36Sopenharmony_ci * 45862306a36Sopenharmony_ci * - reserve space for metadata space, based on the number of outstanding 45962306a36Sopenharmony_ci * extents and how much csums will be needed also reserve metadata space in a 46062306a36Sopenharmony_ci * per root over-reserve method. 46162306a36Sopenharmony_ci * - add to the inodes->delalloc_bytes 46262306a36Sopenharmony_ci * - add it to the fs_info's delalloc inodes list. 46362306a36Sopenharmony_ci * (Above 3 all done in delalloc_reserve_metadata) 46462306a36Sopenharmony_ci * 46562306a36Sopenharmony_ci * Return 0 for success 46662306a36Sopenharmony_ci * Return <0 for error(-ENOSPC or -EDQUOT) 46762306a36Sopenharmony_ci */ 46862306a36Sopenharmony_ciint btrfs_delalloc_reserve_space(struct btrfs_inode *inode, 46962306a36Sopenharmony_ci struct extent_changeset **reserved, u64 start, u64 len) 47062306a36Sopenharmony_ci{ 47162306a36Sopenharmony_ci int ret; 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci ret = btrfs_check_data_free_space(inode, reserved, start, len, false); 47462306a36Sopenharmony_ci if (ret < 0) 47562306a36Sopenharmony_ci return ret; 47662306a36Sopenharmony_ci ret = btrfs_delalloc_reserve_metadata(inode, len, len, false); 47762306a36Sopenharmony_ci if (ret < 0) { 47862306a36Sopenharmony_ci btrfs_free_reserved_data_space(inode, *reserved, start, len); 47962306a36Sopenharmony_ci extent_changeset_free(*reserved); 48062306a36Sopenharmony_ci *reserved = NULL; 48162306a36Sopenharmony_ci } 48262306a36Sopenharmony_ci return ret; 48362306a36Sopenharmony_ci} 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci/* 48662306a36Sopenharmony_ci * Release data and metadata space for delalloc 48762306a36Sopenharmony_ci * 48862306a36Sopenharmony_ci * @inode: inode we're releasing space for 48962306a36Sopenharmony_ci * @reserved: list of changed/reserved ranges 49062306a36Sopenharmony_ci * @start: start position of the space already reserved 49162306a36Sopenharmony_ci * @len: length of the space already reserved 49262306a36Sopenharmony_ci * @qgroup_free: should qgroup reserved-space also be freed 49362306a36Sopenharmony_ci * 49462306a36Sopenharmony_ci * Release the metadata space that was not used and will decrement 49562306a36Sopenharmony_ci * ->delalloc_bytes and remove it from the fs_info->delalloc_inodes list if 49662306a36Sopenharmony_ci * there are no delalloc bytes left. Also it will handle the qgroup reserved 49762306a36Sopenharmony_ci * space. 49862306a36Sopenharmony_ci */ 49962306a36Sopenharmony_civoid btrfs_delalloc_release_space(struct btrfs_inode *inode, 50062306a36Sopenharmony_ci struct extent_changeset *reserved, 50162306a36Sopenharmony_ci u64 start, u64 len, bool qgroup_free) 50262306a36Sopenharmony_ci{ 50362306a36Sopenharmony_ci btrfs_delalloc_release_metadata(inode, len, qgroup_free); 50462306a36Sopenharmony_ci btrfs_free_reserved_data_space(inode, reserved, start, len); 50562306a36Sopenharmony_ci} 506