18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2007 Oracle. All rights reserved. 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <linux/fs.h> 78c2ecf20Sopenharmony_ci#include <linux/slab.h> 88c2ecf20Sopenharmony_ci#include <linux/sched.h> 98c2ecf20Sopenharmony_ci#include <linux/writeback.h> 108c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 118c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 128c2ecf20Sopenharmony_ci#include <linux/uuid.h> 138c2ecf20Sopenharmony_ci#include "misc.h" 148c2ecf20Sopenharmony_ci#include "ctree.h" 158c2ecf20Sopenharmony_ci#include "disk-io.h" 168c2ecf20Sopenharmony_ci#include "transaction.h" 178c2ecf20Sopenharmony_ci#include "locking.h" 188c2ecf20Sopenharmony_ci#include "tree-log.h" 198c2ecf20Sopenharmony_ci#include "inode-map.h" 208c2ecf20Sopenharmony_ci#include "volumes.h" 218c2ecf20Sopenharmony_ci#include "dev-replace.h" 228c2ecf20Sopenharmony_ci#include "qgroup.h" 238c2ecf20Sopenharmony_ci#include "block-group.h" 248c2ecf20Sopenharmony_ci#include "space-info.h" 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci#define BTRFS_ROOT_TRANS_TAG 0 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci/* 298c2ecf20Sopenharmony_ci * Transaction states and transitions 308c2ecf20Sopenharmony_ci * 318c2ecf20Sopenharmony_ci * No running transaction (fs tree blocks are not modified) 328c2ecf20Sopenharmony_ci * | 338c2ecf20Sopenharmony_ci * | To next stage: 348c2ecf20Sopenharmony_ci * | Call start_transaction() variants. Except btrfs_join_transaction_nostart(). 358c2ecf20Sopenharmony_ci * V 368c2ecf20Sopenharmony_ci * Transaction N [[TRANS_STATE_RUNNING]] 378c2ecf20Sopenharmony_ci * | 388c2ecf20Sopenharmony_ci * | New trans handles can be attached to transaction N by calling all 398c2ecf20Sopenharmony_ci * | start_transaction() variants. 408c2ecf20Sopenharmony_ci * | 418c2ecf20Sopenharmony_ci * | To next stage: 428c2ecf20Sopenharmony_ci * | Call btrfs_commit_transaction() on any trans handle attached to 438c2ecf20Sopenharmony_ci * | transaction N 448c2ecf20Sopenharmony_ci * V 458c2ecf20Sopenharmony_ci * Transaction N [[TRANS_STATE_COMMIT_START]] 468c2ecf20Sopenharmony_ci * | 478c2ecf20Sopenharmony_ci * | Will wait for previous running transaction to completely finish if there 488c2ecf20Sopenharmony_ci * | is one 498c2ecf20Sopenharmony_ci * | 508c2ecf20Sopenharmony_ci * | Then one of the following happes: 518c2ecf20Sopenharmony_ci * | - Wait for all other trans handle holders to release. 528c2ecf20Sopenharmony_ci * | The btrfs_commit_transaction() caller will do the commit work. 538c2ecf20Sopenharmony_ci * | - Wait for current transaction to be committed by others. 548c2ecf20Sopenharmony_ci * | Other btrfs_commit_transaction() caller will do the commit work. 558c2ecf20Sopenharmony_ci * | 568c2ecf20Sopenharmony_ci * | At this stage, only btrfs_join_transaction*() variants can attach 578c2ecf20Sopenharmony_ci * | to this running transaction. 588c2ecf20Sopenharmony_ci * | All other variants will wait for current one to finish and attach to 598c2ecf20Sopenharmony_ci * | transaction N+1. 608c2ecf20Sopenharmony_ci * | 618c2ecf20Sopenharmony_ci * | To next stage: 628c2ecf20Sopenharmony_ci * | Caller is chosen to commit transaction N, and all other trans handle 638c2ecf20Sopenharmony_ci * | haven been released. 648c2ecf20Sopenharmony_ci * V 658c2ecf20Sopenharmony_ci * Transaction N [[TRANS_STATE_COMMIT_DOING]] 668c2ecf20Sopenharmony_ci * | 678c2ecf20Sopenharmony_ci * | The heavy lifting transaction work is started. 688c2ecf20Sopenharmony_ci * | From running delayed refs (modifying extent tree) to creating pending 698c2ecf20Sopenharmony_ci * | snapshots, running qgroups. 708c2ecf20Sopenharmony_ci * | In short, modify supporting trees to reflect modifications of subvolume 718c2ecf20Sopenharmony_ci * | trees. 728c2ecf20Sopenharmony_ci * | 738c2ecf20Sopenharmony_ci * | At this stage, all start_transaction() calls will wait for this 748c2ecf20Sopenharmony_ci * | transaction to finish and attach to transaction N+1. 758c2ecf20Sopenharmony_ci * | 768c2ecf20Sopenharmony_ci * | To next stage: 778c2ecf20Sopenharmony_ci * | Until all supporting trees are updated. 788c2ecf20Sopenharmony_ci * V 798c2ecf20Sopenharmony_ci * Transaction N [[TRANS_STATE_UNBLOCKED]] 808c2ecf20Sopenharmony_ci * | Transaction N+1 818c2ecf20Sopenharmony_ci * | All needed trees are modified, thus we only [[TRANS_STATE_RUNNING]] 828c2ecf20Sopenharmony_ci * | need to write them back to disk and update | 838c2ecf20Sopenharmony_ci * | super blocks. | 848c2ecf20Sopenharmony_ci * | | 858c2ecf20Sopenharmony_ci * | At this stage, new transaction is allowed to | 868c2ecf20Sopenharmony_ci * | start. | 878c2ecf20Sopenharmony_ci * | All new start_transaction() calls will be | 888c2ecf20Sopenharmony_ci * | attached to transid N+1. | 898c2ecf20Sopenharmony_ci * | | 908c2ecf20Sopenharmony_ci * | To next stage: | 918c2ecf20Sopenharmony_ci * | Until all tree blocks are super blocks are | 928c2ecf20Sopenharmony_ci * | written to block devices | 938c2ecf20Sopenharmony_ci * V | 948c2ecf20Sopenharmony_ci * Transaction N [[TRANS_STATE_COMPLETED]] V 958c2ecf20Sopenharmony_ci * All tree blocks and super blocks are written. Transaction N+1 968c2ecf20Sopenharmony_ci * This transaction is finished and all its [[TRANS_STATE_COMMIT_START]] 978c2ecf20Sopenharmony_ci * data structures will be cleaned up. | Life goes on 988c2ecf20Sopenharmony_ci */ 998c2ecf20Sopenharmony_cistatic const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { 1008c2ecf20Sopenharmony_ci [TRANS_STATE_RUNNING] = 0U, 1018c2ecf20Sopenharmony_ci [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), 1028c2ecf20Sopenharmony_ci [TRANS_STATE_COMMIT_DOING] = (__TRANS_START | 1038c2ecf20Sopenharmony_ci __TRANS_ATTACH | 1048c2ecf20Sopenharmony_ci __TRANS_JOIN | 1058c2ecf20Sopenharmony_ci __TRANS_JOIN_NOSTART), 1068c2ecf20Sopenharmony_ci [TRANS_STATE_UNBLOCKED] = (__TRANS_START | 1078c2ecf20Sopenharmony_ci __TRANS_ATTACH | 1088c2ecf20Sopenharmony_ci __TRANS_JOIN | 1098c2ecf20Sopenharmony_ci __TRANS_JOIN_NOLOCK | 1108c2ecf20Sopenharmony_ci __TRANS_JOIN_NOSTART), 1118c2ecf20Sopenharmony_ci [TRANS_STATE_COMPLETED] = (__TRANS_START | 1128c2ecf20Sopenharmony_ci __TRANS_ATTACH | 1138c2ecf20Sopenharmony_ci __TRANS_JOIN | 1148c2ecf20Sopenharmony_ci __TRANS_JOIN_NOLOCK | 1158c2ecf20Sopenharmony_ci __TRANS_JOIN_NOSTART), 1168c2ecf20Sopenharmony_ci}; 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_civoid btrfs_put_transaction(struct btrfs_transaction *transaction) 1198c2ecf20Sopenharmony_ci{ 1208c2ecf20Sopenharmony_ci WARN_ON(refcount_read(&transaction->use_count) == 0); 1218c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&transaction->use_count)) { 1228c2ecf20Sopenharmony_ci BUG_ON(!list_empty(&transaction->list)); 1238c2ecf20Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT( 1248c2ecf20Sopenharmony_ci &transaction->delayed_refs.href_root.rb_root)); 1258c2ecf20Sopenharmony_ci WARN_ON(!RB_EMPTY_ROOT( 1268c2ecf20Sopenharmony_ci &transaction->delayed_refs.dirty_extent_root)); 1278c2ecf20Sopenharmony_ci if (transaction->delayed_refs.pending_csums) 1288c2ecf20Sopenharmony_ci btrfs_err(transaction->fs_info, 1298c2ecf20Sopenharmony_ci "pending csums is %llu", 1308c2ecf20Sopenharmony_ci transaction->delayed_refs.pending_csums); 1318c2ecf20Sopenharmony_ci /* 1328c2ecf20Sopenharmony_ci * If any block groups are found in ->deleted_bgs then it's 1338c2ecf20Sopenharmony_ci * because the transaction was aborted and a commit did not 1348c2ecf20Sopenharmony_ci * happen (things failed before writing the new superblock 1358c2ecf20Sopenharmony_ci * and calling btrfs_finish_extent_commit()), so we can not 1368c2ecf20Sopenharmony_ci * discard the physical locations of the block groups. 1378c2ecf20Sopenharmony_ci */ 1388c2ecf20Sopenharmony_ci while (!list_empty(&transaction->deleted_bgs)) { 1398c2ecf20Sopenharmony_ci struct btrfs_block_group *cache; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci cache = list_first_entry(&transaction->deleted_bgs, 1428c2ecf20Sopenharmony_ci struct btrfs_block_group, 1438c2ecf20Sopenharmony_ci bg_list); 1448c2ecf20Sopenharmony_ci list_del_init(&cache->bg_list); 1458c2ecf20Sopenharmony_ci btrfs_unfreeze_block_group(cache); 1468c2ecf20Sopenharmony_ci btrfs_put_block_group(cache); 1478c2ecf20Sopenharmony_ci } 1488c2ecf20Sopenharmony_ci WARN_ON(!list_empty(&transaction->dev_update_list)); 1498c2ecf20Sopenharmony_ci kfree(transaction); 1508c2ecf20Sopenharmony_ci } 1518c2ecf20Sopenharmony_ci} 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_cistatic noinline void switch_commit_roots(struct btrfs_trans_handle *trans) 1548c2ecf20Sopenharmony_ci{ 1558c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans = trans->transaction; 1568c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 1578c2ecf20Sopenharmony_ci struct btrfs_root *root, *tmp; 1588c2ecf20Sopenharmony_ci struct btrfs_caching_control *caching_ctl, *next; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci down_write(&fs_info->commit_root_sem); 1618c2ecf20Sopenharmony_ci list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits, 1628c2ecf20Sopenharmony_ci dirty_list) { 1638c2ecf20Sopenharmony_ci list_del_init(&root->dirty_list); 1648c2ecf20Sopenharmony_ci free_extent_buffer(root->commit_root); 1658c2ecf20Sopenharmony_ci root->commit_root = btrfs_root_node(root); 1668c2ecf20Sopenharmony_ci if (is_fstree(root->root_key.objectid)) 1678c2ecf20Sopenharmony_ci btrfs_unpin_free_ino(root); 1688c2ecf20Sopenharmony_ci extent_io_tree_release(&root->dirty_log_pages); 1698c2ecf20Sopenharmony_ci btrfs_qgroup_clean_swapped_blocks(root); 1708c2ecf20Sopenharmony_ci } 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci /* We can free old roots now. */ 1738c2ecf20Sopenharmony_ci spin_lock(&cur_trans->dropped_roots_lock); 1748c2ecf20Sopenharmony_ci while (!list_empty(&cur_trans->dropped_roots)) { 1758c2ecf20Sopenharmony_ci root = list_first_entry(&cur_trans->dropped_roots, 1768c2ecf20Sopenharmony_ci struct btrfs_root, root_list); 1778c2ecf20Sopenharmony_ci list_del_init(&root->root_list); 1788c2ecf20Sopenharmony_ci spin_unlock(&cur_trans->dropped_roots_lock); 1798c2ecf20Sopenharmony_ci btrfs_free_log(trans, root); 1808c2ecf20Sopenharmony_ci btrfs_drop_and_free_fs_root(fs_info, root); 1818c2ecf20Sopenharmony_ci spin_lock(&cur_trans->dropped_roots_lock); 1828c2ecf20Sopenharmony_ci } 1838c2ecf20Sopenharmony_ci spin_unlock(&cur_trans->dropped_roots_lock); 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci /* 1868c2ecf20Sopenharmony_ci * We have to update the last_byte_to_unpin under the commit_root_sem, 1878c2ecf20Sopenharmony_ci * at the same time we swap out the commit roots. 1888c2ecf20Sopenharmony_ci * 1898c2ecf20Sopenharmony_ci * This is because we must have a real view of the last spot the caching 1908c2ecf20Sopenharmony_ci * kthreads were while caching. Consider the following views of the 1918c2ecf20Sopenharmony_ci * extent tree for a block group 1928c2ecf20Sopenharmony_ci * 1938c2ecf20Sopenharmony_ci * commit root 1948c2ecf20Sopenharmony_ci * +----+----+----+----+----+----+----+ 1958c2ecf20Sopenharmony_ci * |\\\\| |\\\\|\\\\| |\\\\|\\\\| 1968c2ecf20Sopenharmony_ci * +----+----+----+----+----+----+----+ 1978c2ecf20Sopenharmony_ci * 0 1 2 3 4 5 6 7 1988c2ecf20Sopenharmony_ci * 1998c2ecf20Sopenharmony_ci * new commit root 2008c2ecf20Sopenharmony_ci * +----+----+----+----+----+----+----+ 2018c2ecf20Sopenharmony_ci * | | | |\\\\| | |\\\\| 2028c2ecf20Sopenharmony_ci * +----+----+----+----+----+----+----+ 2038c2ecf20Sopenharmony_ci * 0 1 2 3 4 5 6 7 2048c2ecf20Sopenharmony_ci * 2058c2ecf20Sopenharmony_ci * If the cache_ctl->progress was at 3, then we are only allowed to 2068c2ecf20Sopenharmony_ci * unpin [0,1) and [2,3], because the caching thread has already 2078c2ecf20Sopenharmony_ci * processed those extents. We are not allowed to unpin [5,6), because 2088c2ecf20Sopenharmony_ci * the caching thread will re-start it's search from 3, and thus find 2098c2ecf20Sopenharmony_ci * the hole from [4,6) to add to the free space cache. 2108c2ecf20Sopenharmony_ci */ 2118c2ecf20Sopenharmony_ci list_for_each_entry_safe(caching_ctl, next, 2128c2ecf20Sopenharmony_ci &fs_info->caching_block_groups, list) { 2138c2ecf20Sopenharmony_ci struct btrfs_block_group *cache = caching_ctl->block_group; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci if (btrfs_block_group_done(cache)) { 2168c2ecf20Sopenharmony_ci cache->last_byte_to_unpin = (u64)-1; 2178c2ecf20Sopenharmony_ci list_del_init(&caching_ctl->list); 2188c2ecf20Sopenharmony_ci btrfs_put_caching_control(caching_ctl); 2198c2ecf20Sopenharmony_ci } else { 2208c2ecf20Sopenharmony_ci cache->last_byte_to_unpin = caching_ctl->progress; 2218c2ecf20Sopenharmony_ci } 2228c2ecf20Sopenharmony_ci } 2238c2ecf20Sopenharmony_ci up_write(&fs_info->commit_root_sem); 2248c2ecf20Sopenharmony_ci} 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_cistatic inline void extwriter_counter_inc(struct btrfs_transaction *trans, 2278c2ecf20Sopenharmony_ci unsigned int type) 2288c2ecf20Sopenharmony_ci{ 2298c2ecf20Sopenharmony_ci if (type & TRANS_EXTWRITERS) 2308c2ecf20Sopenharmony_ci atomic_inc(&trans->num_extwriters); 2318c2ecf20Sopenharmony_ci} 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_cistatic inline void extwriter_counter_dec(struct btrfs_transaction *trans, 2348c2ecf20Sopenharmony_ci unsigned int type) 2358c2ecf20Sopenharmony_ci{ 2368c2ecf20Sopenharmony_ci if (type & TRANS_EXTWRITERS) 2378c2ecf20Sopenharmony_ci atomic_dec(&trans->num_extwriters); 2388c2ecf20Sopenharmony_ci} 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_cistatic inline void extwriter_counter_init(struct btrfs_transaction *trans, 2418c2ecf20Sopenharmony_ci unsigned int type) 2428c2ecf20Sopenharmony_ci{ 2438c2ecf20Sopenharmony_ci atomic_set(&trans->num_extwriters, ((type & TRANS_EXTWRITERS) ? 1 : 0)); 2448c2ecf20Sopenharmony_ci} 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_cistatic inline int extwriter_counter_read(struct btrfs_transaction *trans) 2478c2ecf20Sopenharmony_ci{ 2488c2ecf20Sopenharmony_ci return atomic_read(&trans->num_extwriters); 2498c2ecf20Sopenharmony_ci} 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci/* 2528c2ecf20Sopenharmony_ci * To be called after all the new block groups attached to the transaction 2538c2ecf20Sopenharmony_ci * handle have been created (btrfs_create_pending_block_groups()). 2548c2ecf20Sopenharmony_ci */ 2558c2ecf20Sopenharmony_civoid btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans) 2568c2ecf20Sopenharmony_ci{ 2578c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci if (!trans->chunk_bytes_reserved) 2608c2ecf20Sopenharmony_ci return; 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci WARN_ON_ONCE(!list_empty(&trans->new_bgs)); 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv, 2658c2ecf20Sopenharmony_ci trans->chunk_bytes_reserved, NULL); 2668c2ecf20Sopenharmony_ci trans->chunk_bytes_reserved = 0; 2678c2ecf20Sopenharmony_ci} 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci/* 2708c2ecf20Sopenharmony_ci * either allocate a new transaction or hop into the existing one 2718c2ecf20Sopenharmony_ci */ 2728c2ecf20Sopenharmony_cistatic noinline int join_transaction(struct btrfs_fs_info *fs_info, 2738c2ecf20Sopenharmony_ci unsigned int type) 2748c2ecf20Sopenharmony_ci{ 2758c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans; 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 2788c2ecf20Sopenharmony_ciloop: 2798c2ecf20Sopenharmony_ci /* The file system has been taken offline. No new transactions. */ 2808c2ecf20Sopenharmony_ci if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { 2818c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 2828c2ecf20Sopenharmony_ci return -EROFS; 2838c2ecf20Sopenharmony_ci } 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci cur_trans = fs_info->running_transaction; 2868c2ecf20Sopenharmony_ci if (cur_trans) { 2878c2ecf20Sopenharmony_ci if (TRANS_ABORTED(cur_trans)) { 2888c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 2898c2ecf20Sopenharmony_ci return cur_trans->aborted; 2908c2ecf20Sopenharmony_ci } 2918c2ecf20Sopenharmony_ci if (btrfs_blocked_trans_types[cur_trans->state] & type) { 2928c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 2938c2ecf20Sopenharmony_ci return -EBUSY; 2948c2ecf20Sopenharmony_ci } 2958c2ecf20Sopenharmony_ci refcount_inc(&cur_trans->use_count); 2968c2ecf20Sopenharmony_ci atomic_inc(&cur_trans->num_writers); 2978c2ecf20Sopenharmony_ci extwriter_counter_inc(cur_trans, type); 2988c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 2998c2ecf20Sopenharmony_ci return 0; 3008c2ecf20Sopenharmony_ci } 3018c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci /* 3048c2ecf20Sopenharmony_ci * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the 3058c2ecf20Sopenharmony_ci * current transaction, and commit it. If there is no transaction, just 3068c2ecf20Sopenharmony_ci * return ENOENT. 3078c2ecf20Sopenharmony_ci */ 3088c2ecf20Sopenharmony_ci if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART) 3098c2ecf20Sopenharmony_ci return -ENOENT; 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci /* 3128c2ecf20Sopenharmony_ci * JOIN_NOLOCK only happens during the transaction commit, so 3138c2ecf20Sopenharmony_ci * it is impossible that ->running_transaction is NULL 3148c2ecf20Sopenharmony_ci */ 3158c2ecf20Sopenharmony_ci BUG_ON(type == TRANS_JOIN_NOLOCK); 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci cur_trans = kmalloc(sizeof(*cur_trans), GFP_NOFS); 3188c2ecf20Sopenharmony_ci if (!cur_trans) 3198c2ecf20Sopenharmony_ci return -ENOMEM; 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 3228c2ecf20Sopenharmony_ci if (fs_info->running_transaction) { 3238c2ecf20Sopenharmony_ci /* 3248c2ecf20Sopenharmony_ci * someone started a transaction after we unlocked. Make sure 3258c2ecf20Sopenharmony_ci * to redo the checks above 3268c2ecf20Sopenharmony_ci */ 3278c2ecf20Sopenharmony_ci kfree(cur_trans); 3288c2ecf20Sopenharmony_ci goto loop; 3298c2ecf20Sopenharmony_ci } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { 3308c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 3318c2ecf20Sopenharmony_ci kfree(cur_trans); 3328c2ecf20Sopenharmony_ci return -EROFS; 3338c2ecf20Sopenharmony_ci } 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci cur_trans->fs_info = fs_info; 3368c2ecf20Sopenharmony_ci atomic_set(&cur_trans->pending_ordered, 0); 3378c2ecf20Sopenharmony_ci init_waitqueue_head(&cur_trans->pending_wait); 3388c2ecf20Sopenharmony_ci atomic_set(&cur_trans->num_writers, 1); 3398c2ecf20Sopenharmony_ci extwriter_counter_init(cur_trans, type); 3408c2ecf20Sopenharmony_ci init_waitqueue_head(&cur_trans->writer_wait); 3418c2ecf20Sopenharmony_ci init_waitqueue_head(&cur_trans->commit_wait); 3428c2ecf20Sopenharmony_ci cur_trans->state = TRANS_STATE_RUNNING; 3438c2ecf20Sopenharmony_ci /* 3448c2ecf20Sopenharmony_ci * One for this trans handle, one so it will live on until we 3458c2ecf20Sopenharmony_ci * commit the transaction. 3468c2ecf20Sopenharmony_ci */ 3478c2ecf20Sopenharmony_ci refcount_set(&cur_trans->use_count, 2); 3488c2ecf20Sopenharmony_ci cur_trans->flags = 0; 3498c2ecf20Sopenharmony_ci cur_trans->start_time = ktime_get_seconds(); 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_ci memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci cur_trans->delayed_refs.href_root = RB_ROOT_CACHED; 3548c2ecf20Sopenharmony_ci cur_trans->delayed_refs.dirty_extent_root = RB_ROOT; 3558c2ecf20Sopenharmony_ci atomic_set(&cur_trans->delayed_refs.num_entries, 0); 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci /* 3588c2ecf20Sopenharmony_ci * although the tree mod log is per file system and not per transaction, 3598c2ecf20Sopenharmony_ci * the log must never go across transaction boundaries. 3608c2ecf20Sopenharmony_ci */ 3618c2ecf20Sopenharmony_ci smp_mb(); 3628c2ecf20Sopenharmony_ci if (!list_empty(&fs_info->tree_mod_seq_list)) 3638c2ecf20Sopenharmony_ci WARN(1, KERN_ERR "BTRFS: tree_mod_seq_list not empty when creating a fresh transaction\n"); 3648c2ecf20Sopenharmony_ci if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) 3658c2ecf20Sopenharmony_ci WARN(1, KERN_ERR "BTRFS: tree_mod_log rb tree not empty when creating a fresh transaction\n"); 3668c2ecf20Sopenharmony_ci atomic64_set(&fs_info->tree_mod_seq, 0); 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci spin_lock_init(&cur_trans->delayed_refs.lock); 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cur_trans->pending_snapshots); 3718c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cur_trans->dev_update_list); 3728c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cur_trans->switch_commits); 3738c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cur_trans->dirty_bgs); 3748c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cur_trans->io_bgs); 3758c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cur_trans->dropped_roots); 3768c2ecf20Sopenharmony_ci mutex_init(&cur_trans->cache_write_mutex); 3778c2ecf20Sopenharmony_ci spin_lock_init(&cur_trans->dirty_bgs_lock); 3788c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&cur_trans->deleted_bgs); 3798c2ecf20Sopenharmony_ci spin_lock_init(&cur_trans->dropped_roots_lock); 3808c2ecf20Sopenharmony_ci list_add_tail(&cur_trans->list, &fs_info->trans_list); 3818c2ecf20Sopenharmony_ci extent_io_tree_init(fs_info, &cur_trans->dirty_pages, 3828c2ecf20Sopenharmony_ci IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode); 3838c2ecf20Sopenharmony_ci extent_io_tree_init(fs_info, &cur_trans->pinned_extents, 3848c2ecf20Sopenharmony_ci IO_TREE_FS_PINNED_EXTENTS, NULL); 3858c2ecf20Sopenharmony_ci fs_info->generation++; 3868c2ecf20Sopenharmony_ci cur_trans->transid = fs_info->generation; 3878c2ecf20Sopenharmony_ci fs_info->running_transaction = cur_trans; 3888c2ecf20Sopenharmony_ci cur_trans->aborted = 0; 3898c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci return 0; 3928c2ecf20Sopenharmony_ci} 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci/* 3958c2ecf20Sopenharmony_ci * This does all the record keeping required to make sure that a shareable root 3968c2ecf20Sopenharmony_ci * is properly recorded in a given transaction. This is required to make sure 3978c2ecf20Sopenharmony_ci * the old root from before we joined the transaction is deleted when the 3988c2ecf20Sopenharmony_ci * transaction commits. 3998c2ecf20Sopenharmony_ci */ 4008c2ecf20Sopenharmony_cistatic int record_root_in_trans(struct btrfs_trans_handle *trans, 4018c2ecf20Sopenharmony_ci struct btrfs_root *root, 4028c2ecf20Sopenharmony_ci int force) 4038c2ecf20Sopenharmony_ci{ 4048c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 4078c2ecf20Sopenharmony_ci root->last_trans < trans->transid) || force) { 4088c2ecf20Sopenharmony_ci WARN_ON(root == fs_info->extent_root); 4098c2ecf20Sopenharmony_ci WARN_ON(!force && root->commit_root != root->node); 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci /* 4128c2ecf20Sopenharmony_ci * see below for IN_TRANS_SETUP usage rules 4138c2ecf20Sopenharmony_ci * we have the reloc mutex held now, so there 4148c2ecf20Sopenharmony_ci * is only one writer in this function 4158c2ecf20Sopenharmony_ci */ 4168c2ecf20Sopenharmony_ci set_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state); 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci /* make sure readers find IN_TRANS_SETUP before 4198c2ecf20Sopenharmony_ci * they find our root->last_trans update 4208c2ecf20Sopenharmony_ci */ 4218c2ecf20Sopenharmony_ci smp_wmb(); 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci spin_lock(&fs_info->fs_roots_radix_lock); 4248c2ecf20Sopenharmony_ci if (root->last_trans == trans->transid && !force) { 4258c2ecf20Sopenharmony_ci spin_unlock(&fs_info->fs_roots_radix_lock); 4268c2ecf20Sopenharmony_ci return 0; 4278c2ecf20Sopenharmony_ci } 4288c2ecf20Sopenharmony_ci radix_tree_tag_set(&fs_info->fs_roots_radix, 4298c2ecf20Sopenharmony_ci (unsigned long)root->root_key.objectid, 4308c2ecf20Sopenharmony_ci BTRFS_ROOT_TRANS_TAG); 4318c2ecf20Sopenharmony_ci spin_unlock(&fs_info->fs_roots_radix_lock); 4328c2ecf20Sopenharmony_ci root->last_trans = trans->transid; 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci /* this is pretty tricky. We don't want to 4358c2ecf20Sopenharmony_ci * take the relocation lock in btrfs_record_root_in_trans 4368c2ecf20Sopenharmony_ci * unless we're really doing the first setup for this root in 4378c2ecf20Sopenharmony_ci * this transaction. 4388c2ecf20Sopenharmony_ci * 4398c2ecf20Sopenharmony_ci * Normally we'd use root->last_trans as a flag to decide 4408c2ecf20Sopenharmony_ci * if we want to take the expensive mutex. 4418c2ecf20Sopenharmony_ci * 4428c2ecf20Sopenharmony_ci * But, we have to set root->last_trans before we 4438c2ecf20Sopenharmony_ci * init the relocation root, otherwise, we trip over warnings 4448c2ecf20Sopenharmony_ci * in ctree.c. The solution used here is to flag ourselves 4458c2ecf20Sopenharmony_ci * with root IN_TRANS_SETUP. When this is 1, we're still 4468c2ecf20Sopenharmony_ci * fixing up the reloc trees and everyone must wait. 4478c2ecf20Sopenharmony_ci * 4488c2ecf20Sopenharmony_ci * When this is zero, they can trust root->last_trans and fly 4498c2ecf20Sopenharmony_ci * through btrfs_record_root_in_trans without having to take the 4508c2ecf20Sopenharmony_ci * lock. smp_wmb() makes sure that all the writes above are 4518c2ecf20Sopenharmony_ci * done before we pop in the zero below 4528c2ecf20Sopenharmony_ci */ 4538c2ecf20Sopenharmony_ci btrfs_init_reloc_root(trans, root); 4548c2ecf20Sopenharmony_ci smp_mb__before_atomic(); 4558c2ecf20Sopenharmony_ci clear_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state); 4568c2ecf20Sopenharmony_ci } 4578c2ecf20Sopenharmony_ci return 0; 4588c2ecf20Sopenharmony_ci} 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_civoid btrfs_add_dropped_root(struct btrfs_trans_handle *trans, 4628c2ecf20Sopenharmony_ci struct btrfs_root *root) 4638c2ecf20Sopenharmony_ci{ 4648c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 4658c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans = trans->transaction; 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ci /* Add ourselves to the transaction dropped list */ 4688c2ecf20Sopenharmony_ci spin_lock(&cur_trans->dropped_roots_lock); 4698c2ecf20Sopenharmony_ci list_add_tail(&root->root_list, &cur_trans->dropped_roots); 4708c2ecf20Sopenharmony_ci spin_unlock(&cur_trans->dropped_roots_lock); 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci /* Make sure we don't try to update the root at commit time */ 4738c2ecf20Sopenharmony_ci spin_lock(&fs_info->fs_roots_radix_lock); 4748c2ecf20Sopenharmony_ci radix_tree_tag_clear(&fs_info->fs_roots_radix, 4758c2ecf20Sopenharmony_ci (unsigned long)root->root_key.objectid, 4768c2ecf20Sopenharmony_ci BTRFS_ROOT_TRANS_TAG); 4778c2ecf20Sopenharmony_ci spin_unlock(&fs_info->fs_roots_radix_lock); 4788c2ecf20Sopenharmony_ci} 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_ciint btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 4818c2ecf20Sopenharmony_ci struct btrfs_root *root) 4828c2ecf20Sopenharmony_ci{ 4838c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) 4868c2ecf20Sopenharmony_ci return 0; 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci /* 4898c2ecf20Sopenharmony_ci * see record_root_in_trans for comments about IN_TRANS_SETUP usage 4908c2ecf20Sopenharmony_ci * and barriers 4918c2ecf20Sopenharmony_ci */ 4928c2ecf20Sopenharmony_ci smp_rmb(); 4938c2ecf20Sopenharmony_ci if (root->last_trans == trans->transid && 4948c2ecf20Sopenharmony_ci !test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state)) 4958c2ecf20Sopenharmony_ci return 0; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci mutex_lock(&fs_info->reloc_mutex); 4988c2ecf20Sopenharmony_ci record_root_in_trans(trans, root, 0); 4998c2ecf20Sopenharmony_ci mutex_unlock(&fs_info->reloc_mutex); 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ci return 0; 5028c2ecf20Sopenharmony_ci} 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_cistatic inline int is_transaction_blocked(struct btrfs_transaction *trans) 5058c2ecf20Sopenharmony_ci{ 5068c2ecf20Sopenharmony_ci return (trans->state >= TRANS_STATE_COMMIT_START && 5078c2ecf20Sopenharmony_ci trans->state < TRANS_STATE_UNBLOCKED && 5088c2ecf20Sopenharmony_ci !TRANS_ABORTED(trans)); 5098c2ecf20Sopenharmony_ci} 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_ci/* wait for commit against the current transaction to become unblocked 5128c2ecf20Sopenharmony_ci * when this is done, it is safe to start a new transaction, but the current 5138c2ecf20Sopenharmony_ci * transaction might not be fully on disk. 5148c2ecf20Sopenharmony_ci */ 5158c2ecf20Sopenharmony_cistatic void wait_current_trans(struct btrfs_fs_info *fs_info) 5168c2ecf20Sopenharmony_ci{ 5178c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans; 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 5208c2ecf20Sopenharmony_ci cur_trans = fs_info->running_transaction; 5218c2ecf20Sopenharmony_ci if (cur_trans && is_transaction_blocked(cur_trans)) { 5228c2ecf20Sopenharmony_ci refcount_inc(&cur_trans->use_count); 5238c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci wait_event(fs_info->transaction_wait, 5268c2ecf20Sopenharmony_ci cur_trans->state >= TRANS_STATE_UNBLOCKED || 5278c2ecf20Sopenharmony_ci TRANS_ABORTED(cur_trans)); 5288c2ecf20Sopenharmony_ci btrfs_put_transaction(cur_trans); 5298c2ecf20Sopenharmony_ci } else { 5308c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 5318c2ecf20Sopenharmony_ci } 5328c2ecf20Sopenharmony_ci} 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_cistatic int may_wait_transaction(struct btrfs_fs_info *fs_info, int type) 5358c2ecf20Sopenharmony_ci{ 5368c2ecf20Sopenharmony_ci if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) 5378c2ecf20Sopenharmony_ci return 0; 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci if (type == TRANS_START) 5408c2ecf20Sopenharmony_ci return 1; 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci return 0; 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_cistatic inline bool need_reserve_reloc_root(struct btrfs_root *root) 5468c2ecf20Sopenharmony_ci{ 5478c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci if (!fs_info->reloc_ctl || 5508c2ecf20Sopenharmony_ci !test_bit(BTRFS_ROOT_SHAREABLE, &root->state) || 5518c2ecf20Sopenharmony_ci root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 5528c2ecf20Sopenharmony_ci root->reloc_root) 5538c2ecf20Sopenharmony_ci return false; 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci return true; 5568c2ecf20Sopenharmony_ci} 5578c2ecf20Sopenharmony_ci 5588c2ecf20Sopenharmony_cistatic struct btrfs_trans_handle * 5598c2ecf20Sopenharmony_cistart_transaction(struct btrfs_root *root, unsigned int num_items, 5608c2ecf20Sopenharmony_ci unsigned int type, enum btrfs_reserve_flush_enum flush, 5618c2ecf20Sopenharmony_ci bool enforce_qgroups) 5628c2ecf20Sopenharmony_ci{ 5638c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 5648c2ecf20Sopenharmony_ci struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv; 5658c2ecf20Sopenharmony_ci struct btrfs_trans_handle *h; 5668c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans; 5678c2ecf20Sopenharmony_ci u64 num_bytes = 0; 5688c2ecf20Sopenharmony_ci u64 qgroup_reserved = 0; 5698c2ecf20Sopenharmony_ci bool reloc_reserved = false; 5708c2ecf20Sopenharmony_ci bool do_chunk_alloc = false; 5718c2ecf20Sopenharmony_ci int ret; 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci /* Send isn't supposed to start transactions. */ 5748c2ecf20Sopenharmony_ci ASSERT(current->journal_info != BTRFS_SEND_TRANS_STUB); 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) 5778c2ecf20Sopenharmony_ci return ERR_PTR(-EROFS); 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_ci if (current->journal_info) { 5808c2ecf20Sopenharmony_ci WARN_ON(type & TRANS_EXTWRITERS); 5818c2ecf20Sopenharmony_ci h = current->journal_info; 5828c2ecf20Sopenharmony_ci refcount_inc(&h->use_count); 5838c2ecf20Sopenharmony_ci WARN_ON(refcount_read(&h->use_count) > 2); 5848c2ecf20Sopenharmony_ci h->orig_rsv = h->block_rsv; 5858c2ecf20Sopenharmony_ci h->block_rsv = NULL; 5868c2ecf20Sopenharmony_ci goto got_it; 5878c2ecf20Sopenharmony_ci } 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci /* 5908c2ecf20Sopenharmony_ci * Do the reservation before we join the transaction so we can do all 5918c2ecf20Sopenharmony_ci * the appropriate flushing if need be. 5928c2ecf20Sopenharmony_ci */ 5938c2ecf20Sopenharmony_ci if (num_items && root != fs_info->chunk_root) { 5948c2ecf20Sopenharmony_ci struct btrfs_block_rsv *rsv = &fs_info->trans_block_rsv; 5958c2ecf20Sopenharmony_ci u64 delayed_refs_bytes = 0; 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci qgroup_reserved = num_items * fs_info->nodesize; 5988c2ecf20Sopenharmony_ci ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved, 5998c2ecf20Sopenharmony_ci enforce_qgroups); 6008c2ecf20Sopenharmony_ci if (ret) 6018c2ecf20Sopenharmony_ci return ERR_PTR(ret); 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_ci /* 6048c2ecf20Sopenharmony_ci * We want to reserve all the bytes we may need all at once, so 6058c2ecf20Sopenharmony_ci * we only do 1 enospc flushing cycle per transaction start. We 6068c2ecf20Sopenharmony_ci * accomplish this by simply assuming we'll do 2 x num_items 6078c2ecf20Sopenharmony_ci * worth of delayed refs updates in this trans handle, and 6088c2ecf20Sopenharmony_ci * refill that amount for whatever is missing in the reserve. 6098c2ecf20Sopenharmony_ci */ 6108c2ecf20Sopenharmony_ci num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items); 6118c2ecf20Sopenharmony_ci if (flush == BTRFS_RESERVE_FLUSH_ALL && 6128c2ecf20Sopenharmony_ci delayed_refs_rsv->full == 0) { 6138c2ecf20Sopenharmony_ci delayed_refs_bytes = num_bytes; 6148c2ecf20Sopenharmony_ci num_bytes <<= 1; 6158c2ecf20Sopenharmony_ci } 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci /* 6188c2ecf20Sopenharmony_ci * Do the reservation for the relocation root creation 6198c2ecf20Sopenharmony_ci */ 6208c2ecf20Sopenharmony_ci if (need_reserve_reloc_root(root)) { 6218c2ecf20Sopenharmony_ci num_bytes += fs_info->nodesize; 6228c2ecf20Sopenharmony_ci reloc_reserved = true; 6238c2ecf20Sopenharmony_ci } 6248c2ecf20Sopenharmony_ci 6258c2ecf20Sopenharmony_ci ret = btrfs_block_rsv_add(root, rsv, num_bytes, flush); 6268c2ecf20Sopenharmony_ci if (ret) 6278c2ecf20Sopenharmony_ci goto reserve_fail; 6288c2ecf20Sopenharmony_ci if (delayed_refs_bytes) { 6298c2ecf20Sopenharmony_ci btrfs_migrate_to_delayed_refs_rsv(fs_info, rsv, 6308c2ecf20Sopenharmony_ci delayed_refs_bytes); 6318c2ecf20Sopenharmony_ci num_bytes -= delayed_refs_bytes; 6328c2ecf20Sopenharmony_ci } 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_ci if (rsv->space_info->force_alloc) 6358c2ecf20Sopenharmony_ci do_chunk_alloc = true; 6368c2ecf20Sopenharmony_ci } else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL && 6378c2ecf20Sopenharmony_ci !delayed_refs_rsv->full) { 6388c2ecf20Sopenharmony_ci /* 6398c2ecf20Sopenharmony_ci * Some people call with btrfs_start_transaction(root, 0) 6408c2ecf20Sopenharmony_ci * because they can be throttled, but have some other mechanism 6418c2ecf20Sopenharmony_ci * for reserving space. We still want these guys to refill the 6428c2ecf20Sopenharmony_ci * delayed block_rsv so just add 1 items worth of reservation 6438c2ecf20Sopenharmony_ci * here. 6448c2ecf20Sopenharmony_ci */ 6458c2ecf20Sopenharmony_ci ret = btrfs_delayed_refs_rsv_refill(fs_info, flush); 6468c2ecf20Sopenharmony_ci if (ret) 6478c2ecf20Sopenharmony_ci goto reserve_fail; 6488c2ecf20Sopenharmony_ci } 6498c2ecf20Sopenharmony_ciagain: 6508c2ecf20Sopenharmony_ci h = kmem_cache_zalloc(btrfs_trans_handle_cachep, GFP_NOFS); 6518c2ecf20Sopenharmony_ci if (!h) { 6528c2ecf20Sopenharmony_ci ret = -ENOMEM; 6538c2ecf20Sopenharmony_ci goto alloc_fail; 6548c2ecf20Sopenharmony_ci } 6558c2ecf20Sopenharmony_ci 6568c2ecf20Sopenharmony_ci /* 6578c2ecf20Sopenharmony_ci * If we are JOIN_NOLOCK we're already committing a transaction and 6588c2ecf20Sopenharmony_ci * waiting on this guy, so we don't need to do the sb_start_intwrite 6598c2ecf20Sopenharmony_ci * because we're already holding a ref. We need this because we could 6608c2ecf20Sopenharmony_ci * have raced in and did an fsync() on a file which can kick a commit 6618c2ecf20Sopenharmony_ci * and then we deadlock with somebody doing a freeze. 6628c2ecf20Sopenharmony_ci * 6638c2ecf20Sopenharmony_ci * If we are ATTACH, it means we just want to catch the current 6648c2ecf20Sopenharmony_ci * transaction and commit it, so we needn't do sb_start_intwrite(). 6658c2ecf20Sopenharmony_ci */ 6668c2ecf20Sopenharmony_ci if (type & __TRANS_FREEZABLE) 6678c2ecf20Sopenharmony_ci sb_start_intwrite(fs_info->sb); 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci if (may_wait_transaction(fs_info, type)) 6708c2ecf20Sopenharmony_ci wait_current_trans(fs_info); 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_ci do { 6738c2ecf20Sopenharmony_ci ret = join_transaction(fs_info, type); 6748c2ecf20Sopenharmony_ci if (ret == -EBUSY) { 6758c2ecf20Sopenharmony_ci wait_current_trans(fs_info); 6768c2ecf20Sopenharmony_ci if (unlikely(type == TRANS_ATTACH || 6778c2ecf20Sopenharmony_ci type == TRANS_JOIN_NOSTART)) 6788c2ecf20Sopenharmony_ci ret = -ENOENT; 6798c2ecf20Sopenharmony_ci } 6808c2ecf20Sopenharmony_ci } while (ret == -EBUSY); 6818c2ecf20Sopenharmony_ci 6828c2ecf20Sopenharmony_ci if (ret < 0) 6838c2ecf20Sopenharmony_ci goto join_fail; 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci cur_trans = fs_info->running_transaction; 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci h->transid = cur_trans->transid; 6888c2ecf20Sopenharmony_ci h->transaction = cur_trans; 6898c2ecf20Sopenharmony_ci h->root = root; 6908c2ecf20Sopenharmony_ci refcount_set(&h->use_count, 1); 6918c2ecf20Sopenharmony_ci h->fs_info = root->fs_info; 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci h->type = type; 6948c2ecf20Sopenharmony_ci h->can_flush_pending_bgs = true; 6958c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&h->new_bgs); 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci smp_mb(); 6988c2ecf20Sopenharmony_ci if (cur_trans->state >= TRANS_STATE_COMMIT_START && 6998c2ecf20Sopenharmony_ci may_wait_transaction(fs_info, type)) { 7008c2ecf20Sopenharmony_ci current->journal_info = h; 7018c2ecf20Sopenharmony_ci btrfs_commit_transaction(h); 7028c2ecf20Sopenharmony_ci goto again; 7038c2ecf20Sopenharmony_ci } 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_ci if (num_bytes) { 7068c2ecf20Sopenharmony_ci trace_btrfs_space_reservation(fs_info, "transaction", 7078c2ecf20Sopenharmony_ci h->transid, num_bytes, 1); 7088c2ecf20Sopenharmony_ci h->block_rsv = &fs_info->trans_block_rsv; 7098c2ecf20Sopenharmony_ci h->bytes_reserved = num_bytes; 7108c2ecf20Sopenharmony_ci h->reloc_reserved = reloc_reserved; 7118c2ecf20Sopenharmony_ci } 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_cigot_it: 7148c2ecf20Sopenharmony_ci if (!current->journal_info) 7158c2ecf20Sopenharmony_ci current->journal_info = h; 7168c2ecf20Sopenharmony_ci 7178c2ecf20Sopenharmony_ci /* 7188c2ecf20Sopenharmony_ci * If the space_info is marked ALLOC_FORCE then we'll get upgraded to 7198c2ecf20Sopenharmony_ci * ALLOC_FORCE the first run through, and then we won't allocate for 7208c2ecf20Sopenharmony_ci * anybody else who races in later. We don't care about the return 7218c2ecf20Sopenharmony_ci * value here. 7228c2ecf20Sopenharmony_ci */ 7238c2ecf20Sopenharmony_ci if (do_chunk_alloc && num_bytes) { 7248c2ecf20Sopenharmony_ci u64 flags = h->block_rsv->space_info->flags; 7258c2ecf20Sopenharmony_ci 7268c2ecf20Sopenharmony_ci btrfs_chunk_alloc(h, btrfs_get_alloc_profile(fs_info, flags), 7278c2ecf20Sopenharmony_ci CHUNK_ALLOC_NO_FORCE); 7288c2ecf20Sopenharmony_ci } 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci /* 7318c2ecf20Sopenharmony_ci * btrfs_record_root_in_trans() needs to alloc new extents, and may 7328c2ecf20Sopenharmony_ci * call btrfs_join_transaction() while we're also starting a 7338c2ecf20Sopenharmony_ci * transaction. 7348c2ecf20Sopenharmony_ci * 7358c2ecf20Sopenharmony_ci * Thus it need to be called after current->journal_info initialized, 7368c2ecf20Sopenharmony_ci * or we can deadlock. 7378c2ecf20Sopenharmony_ci */ 7388c2ecf20Sopenharmony_ci btrfs_record_root_in_trans(h, root); 7398c2ecf20Sopenharmony_ci 7408c2ecf20Sopenharmony_ci return h; 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_cijoin_fail: 7438c2ecf20Sopenharmony_ci if (type & __TRANS_FREEZABLE) 7448c2ecf20Sopenharmony_ci sb_end_intwrite(fs_info->sb); 7458c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_trans_handle_cachep, h); 7468c2ecf20Sopenharmony_cialloc_fail: 7478c2ecf20Sopenharmony_ci if (num_bytes) 7488c2ecf20Sopenharmony_ci btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv, 7498c2ecf20Sopenharmony_ci num_bytes, NULL); 7508c2ecf20Sopenharmony_cireserve_fail: 7518c2ecf20Sopenharmony_ci btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved); 7528c2ecf20Sopenharmony_ci return ERR_PTR(ret); 7538c2ecf20Sopenharmony_ci} 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_cistruct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 7568c2ecf20Sopenharmony_ci unsigned int num_items) 7578c2ecf20Sopenharmony_ci{ 7588c2ecf20Sopenharmony_ci return start_transaction(root, num_items, TRANS_START, 7598c2ecf20Sopenharmony_ci BTRFS_RESERVE_FLUSH_ALL, true); 7608c2ecf20Sopenharmony_ci} 7618c2ecf20Sopenharmony_ci 7628c2ecf20Sopenharmony_cistruct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( 7638c2ecf20Sopenharmony_ci struct btrfs_root *root, 7648c2ecf20Sopenharmony_ci unsigned int num_items) 7658c2ecf20Sopenharmony_ci{ 7668c2ecf20Sopenharmony_ci return start_transaction(root, num_items, TRANS_START, 7678c2ecf20Sopenharmony_ci BTRFS_RESERVE_FLUSH_ALL_STEAL, false); 7688c2ecf20Sopenharmony_ci} 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_cistruct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) 7718c2ecf20Sopenharmony_ci{ 7728c2ecf20Sopenharmony_ci return start_transaction(root, 0, TRANS_JOIN, BTRFS_RESERVE_NO_FLUSH, 7738c2ecf20Sopenharmony_ci true); 7748c2ecf20Sopenharmony_ci} 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_cistruct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root) 7778c2ecf20Sopenharmony_ci{ 7788c2ecf20Sopenharmony_ci return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 7798c2ecf20Sopenharmony_ci BTRFS_RESERVE_NO_FLUSH, true); 7808c2ecf20Sopenharmony_ci} 7818c2ecf20Sopenharmony_ci 7828c2ecf20Sopenharmony_ci/* 7838c2ecf20Sopenharmony_ci * Similar to regular join but it never starts a transaction when none is 7848c2ecf20Sopenharmony_ci * running or after waiting for the current one to finish. 7858c2ecf20Sopenharmony_ci */ 7868c2ecf20Sopenharmony_cistruct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root) 7878c2ecf20Sopenharmony_ci{ 7888c2ecf20Sopenharmony_ci return start_transaction(root, 0, TRANS_JOIN_NOSTART, 7898c2ecf20Sopenharmony_ci BTRFS_RESERVE_NO_FLUSH, true); 7908c2ecf20Sopenharmony_ci} 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci/* 7938c2ecf20Sopenharmony_ci * btrfs_attach_transaction() - catch the running transaction 7948c2ecf20Sopenharmony_ci * 7958c2ecf20Sopenharmony_ci * It is used when we want to commit the current the transaction, but 7968c2ecf20Sopenharmony_ci * don't want to start a new one. 7978c2ecf20Sopenharmony_ci * 7988c2ecf20Sopenharmony_ci * Note: If this function return -ENOENT, it just means there is no 7998c2ecf20Sopenharmony_ci * running transaction. But it is possible that the inactive transaction 8008c2ecf20Sopenharmony_ci * is still in the memory, not fully on disk. If you hope there is no 8018c2ecf20Sopenharmony_ci * inactive transaction in the fs when -ENOENT is returned, you should 8028c2ecf20Sopenharmony_ci * invoke 8038c2ecf20Sopenharmony_ci * btrfs_attach_transaction_barrier() 8048c2ecf20Sopenharmony_ci */ 8058c2ecf20Sopenharmony_cistruct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) 8068c2ecf20Sopenharmony_ci{ 8078c2ecf20Sopenharmony_ci return start_transaction(root, 0, TRANS_ATTACH, 8088c2ecf20Sopenharmony_ci BTRFS_RESERVE_NO_FLUSH, true); 8098c2ecf20Sopenharmony_ci} 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci/* 8128c2ecf20Sopenharmony_ci * btrfs_attach_transaction_barrier() - catch the running transaction 8138c2ecf20Sopenharmony_ci * 8148c2ecf20Sopenharmony_ci * It is similar to the above function, the difference is this one 8158c2ecf20Sopenharmony_ci * will wait for all the inactive transactions until they fully 8168c2ecf20Sopenharmony_ci * complete. 8178c2ecf20Sopenharmony_ci */ 8188c2ecf20Sopenharmony_cistruct btrfs_trans_handle * 8198c2ecf20Sopenharmony_cibtrfs_attach_transaction_barrier(struct btrfs_root *root) 8208c2ecf20Sopenharmony_ci{ 8218c2ecf20Sopenharmony_ci struct btrfs_trans_handle *trans; 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci trans = start_transaction(root, 0, TRANS_ATTACH, 8248c2ecf20Sopenharmony_ci BTRFS_RESERVE_NO_FLUSH, true); 8258c2ecf20Sopenharmony_ci if (trans == ERR_PTR(-ENOENT)) { 8268c2ecf20Sopenharmony_ci int ret; 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci ret = btrfs_wait_for_commit(root->fs_info, 0); 8298c2ecf20Sopenharmony_ci if (ret) 8308c2ecf20Sopenharmony_ci return ERR_PTR(ret); 8318c2ecf20Sopenharmony_ci } 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci return trans; 8348c2ecf20Sopenharmony_ci} 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci/* wait for a transaction commit to be fully complete */ 8378c2ecf20Sopenharmony_cistatic noinline void wait_for_commit(struct btrfs_transaction *commit) 8388c2ecf20Sopenharmony_ci{ 8398c2ecf20Sopenharmony_ci wait_event(commit->commit_wait, commit->state == TRANS_STATE_COMPLETED); 8408c2ecf20Sopenharmony_ci} 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_ciint btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid) 8438c2ecf20Sopenharmony_ci{ 8448c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans = NULL, *t; 8458c2ecf20Sopenharmony_ci int ret = 0; 8468c2ecf20Sopenharmony_ci 8478c2ecf20Sopenharmony_ci if (transid) { 8488c2ecf20Sopenharmony_ci if (transid <= fs_info->last_trans_committed) 8498c2ecf20Sopenharmony_ci goto out; 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci /* find specified transaction */ 8528c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 8538c2ecf20Sopenharmony_ci list_for_each_entry(t, &fs_info->trans_list, list) { 8548c2ecf20Sopenharmony_ci if (t->transid == transid) { 8558c2ecf20Sopenharmony_ci cur_trans = t; 8568c2ecf20Sopenharmony_ci refcount_inc(&cur_trans->use_count); 8578c2ecf20Sopenharmony_ci ret = 0; 8588c2ecf20Sopenharmony_ci break; 8598c2ecf20Sopenharmony_ci } 8608c2ecf20Sopenharmony_ci if (t->transid > transid) { 8618c2ecf20Sopenharmony_ci ret = 0; 8628c2ecf20Sopenharmony_ci break; 8638c2ecf20Sopenharmony_ci } 8648c2ecf20Sopenharmony_ci } 8658c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 8668c2ecf20Sopenharmony_ci 8678c2ecf20Sopenharmony_ci /* 8688c2ecf20Sopenharmony_ci * The specified transaction doesn't exist, or we 8698c2ecf20Sopenharmony_ci * raced with btrfs_commit_transaction 8708c2ecf20Sopenharmony_ci */ 8718c2ecf20Sopenharmony_ci if (!cur_trans) { 8728c2ecf20Sopenharmony_ci if (transid > fs_info->last_trans_committed) 8738c2ecf20Sopenharmony_ci ret = -EINVAL; 8748c2ecf20Sopenharmony_ci goto out; 8758c2ecf20Sopenharmony_ci } 8768c2ecf20Sopenharmony_ci } else { 8778c2ecf20Sopenharmony_ci /* find newest transaction that is committing | committed */ 8788c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 8798c2ecf20Sopenharmony_ci list_for_each_entry_reverse(t, &fs_info->trans_list, 8808c2ecf20Sopenharmony_ci list) { 8818c2ecf20Sopenharmony_ci if (t->state >= TRANS_STATE_COMMIT_START) { 8828c2ecf20Sopenharmony_ci if (t->state == TRANS_STATE_COMPLETED) 8838c2ecf20Sopenharmony_ci break; 8848c2ecf20Sopenharmony_ci cur_trans = t; 8858c2ecf20Sopenharmony_ci refcount_inc(&cur_trans->use_count); 8868c2ecf20Sopenharmony_ci break; 8878c2ecf20Sopenharmony_ci } 8888c2ecf20Sopenharmony_ci } 8898c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 8908c2ecf20Sopenharmony_ci if (!cur_trans) 8918c2ecf20Sopenharmony_ci goto out; /* nothing committing|committed */ 8928c2ecf20Sopenharmony_ci } 8938c2ecf20Sopenharmony_ci 8948c2ecf20Sopenharmony_ci wait_for_commit(cur_trans); 8958c2ecf20Sopenharmony_ci ret = cur_trans->aborted; 8968c2ecf20Sopenharmony_ci btrfs_put_transaction(cur_trans); 8978c2ecf20Sopenharmony_ciout: 8988c2ecf20Sopenharmony_ci return ret; 8998c2ecf20Sopenharmony_ci} 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_civoid btrfs_throttle(struct btrfs_fs_info *fs_info) 9028c2ecf20Sopenharmony_ci{ 9038c2ecf20Sopenharmony_ci wait_current_trans(fs_info); 9048c2ecf20Sopenharmony_ci} 9058c2ecf20Sopenharmony_ci 9068c2ecf20Sopenharmony_cistatic int should_end_transaction(struct btrfs_trans_handle *trans) 9078c2ecf20Sopenharmony_ci{ 9088c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 9098c2ecf20Sopenharmony_ci 9108c2ecf20Sopenharmony_ci if (btrfs_check_space_for_delayed_refs(fs_info)) 9118c2ecf20Sopenharmony_ci return 1; 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_ci return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5); 9148c2ecf20Sopenharmony_ci} 9158c2ecf20Sopenharmony_ci 9168c2ecf20Sopenharmony_ciint btrfs_should_end_transaction(struct btrfs_trans_handle *trans) 9178c2ecf20Sopenharmony_ci{ 9188c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans = trans->transaction; 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci smp_mb(); 9218c2ecf20Sopenharmony_ci if (cur_trans->state >= TRANS_STATE_COMMIT_START || 9228c2ecf20Sopenharmony_ci cur_trans->delayed_refs.flushing) 9238c2ecf20Sopenharmony_ci return 1; 9248c2ecf20Sopenharmony_ci 9258c2ecf20Sopenharmony_ci return should_end_transaction(trans); 9268c2ecf20Sopenharmony_ci} 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_cistatic void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans) 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci{ 9318c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_ci if (!trans->block_rsv) { 9348c2ecf20Sopenharmony_ci ASSERT(!trans->bytes_reserved); 9358c2ecf20Sopenharmony_ci return; 9368c2ecf20Sopenharmony_ci } 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ci if (!trans->bytes_reserved) 9398c2ecf20Sopenharmony_ci return; 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_ci ASSERT(trans->block_rsv == &fs_info->trans_block_rsv); 9428c2ecf20Sopenharmony_ci trace_btrfs_space_reservation(fs_info, "transaction", 9438c2ecf20Sopenharmony_ci trans->transid, trans->bytes_reserved, 0); 9448c2ecf20Sopenharmony_ci btrfs_block_rsv_release(fs_info, trans->block_rsv, 9458c2ecf20Sopenharmony_ci trans->bytes_reserved, NULL); 9468c2ecf20Sopenharmony_ci trans->bytes_reserved = 0; 9478c2ecf20Sopenharmony_ci} 9488c2ecf20Sopenharmony_ci 9498c2ecf20Sopenharmony_cistatic int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 9508c2ecf20Sopenharmony_ci int throttle) 9518c2ecf20Sopenharmony_ci{ 9528c2ecf20Sopenharmony_ci struct btrfs_fs_info *info = trans->fs_info; 9538c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans = trans->transaction; 9548c2ecf20Sopenharmony_ci int err = 0; 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ci if (refcount_read(&trans->use_count) > 1) { 9578c2ecf20Sopenharmony_ci refcount_dec(&trans->use_count); 9588c2ecf20Sopenharmony_ci trans->block_rsv = trans->orig_rsv; 9598c2ecf20Sopenharmony_ci return 0; 9608c2ecf20Sopenharmony_ci } 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci btrfs_trans_release_metadata(trans); 9638c2ecf20Sopenharmony_ci trans->block_rsv = NULL; 9648c2ecf20Sopenharmony_ci 9658c2ecf20Sopenharmony_ci btrfs_create_pending_block_groups(trans); 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_ci btrfs_trans_release_chunk_metadata(trans); 9688c2ecf20Sopenharmony_ci 9698c2ecf20Sopenharmony_ci if (trans->type & __TRANS_FREEZABLE) 9708c2ecf20Sopenharmony_ci sb_end_intwrite(info->sb); 9718c2ecf20Sopenharmony_ci 9728c2ecf20Sopenharmony_ci WARN_ON(cur_trans != info->running_transaction); 9738c2ecf20Sopenharmony_ci WARN_ON(atomic_read(&cur_trans->num_writers) < 1); 9748c2ecf20Sopenharmony_ci atomic_dec(&cur_trans->num_writers); 9758c2ecf20Sopenharmony_ci extwriter_counter_dec(cur_trans, trans->type); 9768c2ecf20Sopenharmony_ci 9778c2ecf20Sopenharmony_ci cond_wake_up(&cur_trans->writer_wait); 9788c2ecf20Sopenharmony_ci btrfs_put_transaction(cur_trans); 9798c2ecf20Sopenharmony_ci 9808c2ecf20Sopenharmony_ci if (current->journal_info == trans) 9818c2ecf20Sopenharmony_ci current->journal_info = NULL; 9828c2ecf20Sopenharmony_ci 9838c2ecf20Sopenharmony_ci if (throttle) 9848c2ecf20Sopenharmony_ci btrfs_run_delayed_iputs(info); 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci if (TRANS_ABORTED(trans) || 9878c2ecf20Sopenharmony_ci test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) { 9888c2ecf20Sopenharmony_ci wake_up_process(info->transaction_kthread); 9898c2ecf20Sopenharmony_ci if (TRANS_ABORTED(trans)) 9908c2ecf20Sopenharmony_ci err = trans->aborted; 9918c2ecf20Sopenharmony_ci else 9928c2ecf20Sopenharmony_ci err = -EROFS; 9938c2ecf20Sopenharmony_ci } 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_trans_handle_cachep, trans); 9968c2ecf20Sopenharmony_ci return err; 9978c2ecf20Sopenharmony_ci} 9988c2ecf20Sopenharmony_ci 9998c2ecf20Sopenharmony_ciint btrfs_end_transaction(struct btrfs_trans_handle *trans) 10008c2ecf20Sopenharmony_ci{ 10018c2ecf20Sopenharmony_ci return __btrfs_end_transaction(trans, 0); 10028c2ecf20Sopenharmony_ci} 10038c2ecf20Sopenharmony_ci 10048c2ecf20Sopenharmony_ciint btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans) 10058c2ecf20Sopenharmony_ci{ 10068c2ecf20Sopenharmony_ci return __btrfs_end_transaction(trans, 1); 10078c2ecf20Sopenharmony_ci} 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ci/* 10108c2ecf20Sopenharmony_ci * when btree blocks are allocated, they have some corresponding bits set for 10118c2ecf20Sopenharmony_ci * them in one of two extent_io trees. This is used to make sure all of 10128c2ecf20Sopenharmony_ci * those extents are sent to disk but does not wait on them 10138c2ecf20Sopenharmony_ci */ 10148c2ecf20Sopenharmony_ciint btrfs_write_marked_extents(struct btrfs_fs_info *fs_info, 10158c2ecf20Sopenharmony_ci struct extent_io_tree *dirty_pages, int mark) 10168c2ecf20Sopenharmony_ci{ 10178c2ecf20Sopenharmony_ci int err = 0; 10188c2ecf20Sopenharmony_ci int werr = 0; 10198c2ecf20Sopenharmony_ci struct address_space *mapping = fs_info->btree_inode->i_mapping; 10208c2ecf20Sopenharmony_ci struct extent_state *cached_state = NULL; 10218c2ecf20Sopenharmony_ci u64 start = 0; 10228c2ecf20Sopenharmony_ci u64 end; 10238c2ecf20Sopenharmony_ci 10248c2ecf20Sopenharmony_ci atomic_inc(&BTRFS_I(fs_info->btree_inode)->sync_writers); 10258c2ecf20Sopenharmony_ci while (!find_first_extent_bit(dirty_pages, start, &start, &end, 10268c2ecf20Sopenharmony_ci mark, &cached_state)) { 10278c2ecf20Sopenharmony_ci bool wait_writeback = false; 10288c2ecf20Sopenharmony_ci 10298c2ecf20Sopenharmony_ci err = convert_extent_bit(dirty_pages, start, end, 10308c2ecf20Sopenharmony_ci EXTENT_NEED_WAIT, 10318c2ecf20Sopenharmony_ci mark, &cached_state); 10328c2ecf20Sopenharmony_ci /* 10338c2ecf20Sopenharmony_ci * convert_extent_bit can return -ENOMEM, which is most of the 10348c2ecf20Sopenharmony_ci * time a temporary error. So when it happens, ignore the error 10358c2ecf20Sopenharmony_ci * and wait for writeback of this range to finish - because we 10368c2ecf20Sopenharmony_ci * failed to set the bit EXTENT_NEED_WAIT for the range, a call 10378c2ecf20Sopenharmony_ci * to __btrfs_wait_marked_extents() would not know that 10388c2ecf20Sopenharmony_ci * writeback for this range started and therefore wouldn't 10398c2ecf20Sopenharmony_ci * wait for it to finish - we don't want to commit a 10408c2ecf20Sopenharmony_ci * superblock that points to btree nodes/leafs for which 10418c2ecf20Sopenharmony_ci * writeback hasn't finished yet (and without errors). 10428c2ecf20Sopenharmony_ci * We cleanup any entries left in the io tree when committing 10438c2ecf20Sopenharmony_ci * the transaction (through extent_io_tree_release()). 10448c2ecf20Sopenharmony_ci */ 10458c2ecf20Sopenharmony_ci if (err == -ENOMEM) { 10468c2ecf20Sopenharmony_ci err = 0; 10478c2ecf20Sopenharmony_ci wait_writeback = true; 10488c2ecf20Sopenharmony_ci } 10498c2ecf20Sopenharmony_ci if (!err) 10508c2ecf20Sopenharmony_ci err = filemap_fdatawrite_range(mapping, start, end); 10518c2ecf20Sopenharmony_ci if (err) 10528c2ecf20Sopenharmony_ci werr = err; 10538c2ecf20Sopenharmony_ci else if (wait_writeback) 10548c2ecf20Sopenharmony_ci werr = filemap_fdatawait_range(mapping, start, end); 10558c2ecf20Sopenharmony_ci free_extent_state(cached_state); 10568c2ecf20Sopenharmony_ci cached_state = NULL; 10578c2ecf20Sopenharmony_ci cond_resched(); 10588c2ecf20Sopenharmony_ci start = end + 1; 10598c2ecf20Sopenharmony_ci } 10608c2ecf20Sopenharmony_ci atomic_dec(&BTRFS_I(fs_info->btree_inode)->sync_writers); 10618c2ecf20Sopenharmony_ci return werr; 10628c2ecf20Sopenharmony_ci} 10638c2ecf20Sopenharmony_ci 10648c2ecf20Sopenharmony_ci/* 10658c2ecf20Sopenharmony_ci * when btree blocks are allocated, they have some corresponding bits set for 10668c2ecf20Sopenharmony_ci * them in one of two extent_io trees. This is used to make sure all of 10678c2ecf20Sopenharmony_ci * those extents are on disk for transaction or log commit. We wait 10688c2ecf20Sopenharmony_ci * on all the pages and clear them from the dirty pages state tree 10698c2ecf20Sopenharmony_ci */ 10708c2ecf20Sopenharmony_cistatic int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info, 10718c2ecf20Sopenharmony_ci struct extent_io_tree *dirty_pages) 10728c2ecf20Sopenharmony_ci{ 10738c2ecf20Sopenharmony_ci int err = 0; 10748c2ecf20Sopenharmony_ci int werr = 0; 10758c2ecf20Sopenharmony_ci struct address_space *mapping = fs_info->btree_inode->i_mapping; 10768c2ecf20Sopenharmony_ci struct extent_state *cached_state = NULL; 10778c2ecf20Sopenharmony_ci u64 start = 0; 10788c2ecf20Sopenharmony_ci u64 end; 10798c2ecf20Sopenharmony_ci 10808c2ecf20Sopenharmony_ci while (!find_first_extent_bit(dirty_pages, start, &start, &end, 10818c2ecf20Sopenharmony_ci EXTENT_NEED_WAIT, &cached_state)) { 10828c2ecf20Sopenharmony_ci /* 10838c2ecf20Sopenharmony_ci * Ignore -ENOMEM errors returned by clear_extent_bit(). 10848c2ecf20Sopenharmony_ci * When committing the transaction, we'll remove any entries 10858c2ecf20Sopenharmony_ci * left in the io tree. For a log commit, we don't remove them 10868c2ecf20Sopenharmony_ci * after committing the log because the tree can be accessed 10878c2ecf20Sopenharmony_ci * concurrently - we do it only at transaction commit time when 10888c2ecf20Sopenharmony_ci * it's safe to do it (through extent_io_tree_release()). 10898c2ecf20Sopenharmony_ci */ 10908c2ecf20Sopenharmony_ci err = clear_extent_bit(dirty_pages, start, end, 10918c2ecf20Sopenharmony_ci EXTENT_NEED_WAIT, 0, 0, &cached_state); 10928c2ecf20Sopenharmony_ci if (err == -ENOMEM) 10938c2ecf20Sopenharmony_ci err = 0; 10948c2ecf20Sopenharmony_ci if (!err) 10958c2ecf20Sopenharmony_ci err = filemap_fdatawait_range(mapping, start, end); 10968c2ecf20Sopenharmony_ci if (err) 10978c2ecf20Sopenharmony_ci werr = err; 10988c2ecf20Sopenharmony_ci free_extent_state(cached_state); 10998c2ecf20Sopenharmony_ci cached_state = NULL; 11008c2ecf20Sopenharmony_ci cond_resched(); 11018c2ecf20Sopenharmony_ci start = end + 1; 11028c2ecf20Sopenharmony_ci } 11038c2ecf20Sopenharmony_ci if (err) 11048c2ecf20Sopenharmony_ci werr = err; 11058c2ecf20Sopenharmony_ci return werr; 11068c2ecf20Sopenharmony_ci} 11078c2ecf20Sopenharmony_ci 11088c2ecf20Sopenharmony_cistatic int btrfs_wait_extents(struct btrfs_fs_info *fs_info, 11098c2ecf20Sopenharmony_ci struct extent_io_tree *dirty_pages) 11108c2ecf20Sopenharmony_ci{ 11118c2ecf20Sopenharmony_ci bool errors = false; 11128c2ecf20Sopenharmony_ci int err; 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_ci err = __btrfs_wait_marked_extents(fs_info, dirty_pages); 11158c2ecf20Sopenharmony_ci if (test_and_clear_bit(BTRFS_FS_BTREE_ERR, &fs_info->flags)) 11168c2ecf20Sopenharmony_ci errors = true; 11178c2ecf20Sopenharmony_ci 11188c2ecf20Sopenharmony_ci if (errors && !err) 11198c2ecf20Sopenharmony_ci err = -EIO; 11208c2ecf20Sopenharmony_ci return err; 11218c2ecf20Sopenharmony_ci} 11228c2ecf20Sopenharmony_ci 11238c2ecf20Sopenharmony_ciint btrfs_wait_tree_log_extents(struct btrfs_root *log_root, int mark) 11248c2ecf20Sopenharmony_ci{ 11258c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = log_root->fs_info; 11268c2ecf20Sopenharmony_ci struct extent_io_tree *dirty_pages = &log_root->dirty_log_pages; 11278c2ecf20Sopenharmony_ci bool errors = false; 11288c2ecf20Sopenharmony_ci int err; 11298c2ecf20Sopenharmony_ci 11308c2ecf20Sopenharmony_ci ASSERT(log_root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ci err = __btrfs_wait_marked_extents(fs_info, dirty_pages); 11338c2ecf20Sopenharmony_ci if ((mark & EXTENT_DIRTY) && 11348c2ecf20Sopenharmony_ci test_and_clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags)) 11358c2ecf20Sopenharmony_ci errors = true; 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_ci if ((mark & EXTENT_NEW) && 11388c2ecf20Sopenharmony_ci test_and_clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags)) 11398c2ecf20Sopenharmony_ci errors = true; 11408c2ecf20Sopenharmony_ci 11418c2ecf20Sopenharmony_ci if (errors && !err) 11428c2ecf20Sopenharmony_ci err = -EIO; 11438c2ecf20Sopenharmony_ci return err; 11448c2ecf20Sopenharmony_ci} 11458c2ecf20Sopenharmony_ci 11468c2ecf20Sopenharmony_ci/* 11478c2ecf20Sopenharmony_ci * When btree blocks are allocated the corresponding extents are marked dirty. 11488c2ecf20Sopenharmony_ci * This function ensures such extents are persisted on disk for transaction or 11498c2ecf20Sopenharmony_ci * log commit. 11508c2ecf20Sopenharmony_ci * 11518c2ecf20Sopenharmony_ci * @trans: transaction whose dirty pages we'd like to write 11528c2ecf20Sopenharmony_ci */ 11538c2ecf20Sopenharmony_cistatic int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans) 11548c2ecf20Sopenharmony_ci{ 11558c2ecf20Sopenharmony_ci int ret; 11568c2ecf20Sopenharmony_ci int ret2; 11578c2ecf20Sopenharmony_ci struct extent_io_tree *dirty_pages = &trans->transaction->dirty_pages; 11588c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 11598c2ecf20Sopenharmony_ci struct blk_plug plug; 11608c2ecf20Sopenharmony_ci 11618c2ecf20Sopenharmony_ci blk_start_plug(&plug); 11628c2ecf20Sopenharmony_ci ret = btrfs_write_marked_extents(fs_info, dirty_pages, EXTENT_DIRTY); 11638c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 11648c2ecf20Sopenharmony_ci ret2 = btrfs_wait_extents(fs_info, dirty_pages); 11658c2ecf20Sopenharmony_ci 11668c2ecf20Sopenharmony_ci extent_io_tree_release(&trans->transaction->dirty_pages); 11678c2ecf20Sopenharmony_ci 11688c2ecf20Sopenharmony_ci if (ret) 11698c2ecf20Sopenharmony_ci return ret; 11708c2ecf20Sopenharmony_ci else if (ret2) 11718c2ecf20Sopenharmony_ci return ret2; 11728c2ecf20Sopenharmony_ci else 11738c2ecf20Sopenharmony_ci return 0; 11748c2ecf20Sopenharmony_ci} 11758c2ecf20Sopenharmony_ci 11768c2ecf20Sopenharmony_ci/* 11778c2ecf20Sopenharmony_ci * this is used to update the root pointer in the tree of tree roots. 11788c2ecf20Sopenharmony_ci * 11798c2ecf20Sopenharmony_ci * But, in the case of the extent allocation tree, updating the root 11808c2ecf20Sopenharmony_ci * pointer may allocate blocks which may change the root of the extent 11818c2ecf20Sopenharmony_ci * allocation tree. 11828c2ecf20Sopenharmony_ci * 11838c2ecf20Sopenharmony_ci * So, this loops and repeats and makes sure the cowonly root didn't 11848c2ecf20Sopenharmony_ci * change while the root pointer was being updated in the metadata. 11858c2ecf20Sopenharmony_ci */ 11868c2ecf20Sopenharmony_cistatic int update_cowonly_root(struct btrfs_trans_handle *trans, 11878c2ecf20Sopenharmony_ci struct btrfs_root *root) 11888c2ecf20Sopenharmony_ci{ 11898c2ecf20Sopenharmony_ci int ret; 11908c2ecf20Sopenharmony_ci u64 old_root_bytenr; 11918c2ecf20Sopenharmony_ci u64 old_root_used; 11928c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 11938c2ecf20Sopenharmony_ci struct btrfs_root *tree_root = fs_info->tree_root; 11948c2ecf20Sopenharmony_ci 11958c2ecf20Sopenharmony_ci old_root_used = btrfs_root_used(&root->root_item); 11968c2ecf20Sopenharmony_ci 11978c2ecf20Sopenharmony_ci while (1) { 11988c2ecf20Sopenharmony_ci old_root_bytenr = btrfs_root_bytenr(&root->root_item); 11998c2ecf20Sopenharmony_ci if (old_root_bytenr == root->node->start && 12008c2ecf20Sopenharmony_ci old_root_used == btrfs_root_used(&root->root_item)) 12018c2ecf20Sopenharmony_ci break; 12028c2ecf20Sopenharmony_ci 12038c2ecf20Sopenharmony_ci btrfs_set_root_node(&root->root_item, root->node); 12048c2ecf20Sopenharmony_ci ret = btrfs_update_root(trans, tree_root, 12058c2ecf20Sopenharmony_ci &root->root_key, 12068c2ecf20Sopenharmony_ci &root->root_item); 12078c2ecf20Sopenharmony_ci if (ret) 12088c2ecf20Sopenharmony_ci return ret; 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci old_root_used = btrfs_root_used(&root->root_item); 12118c2ecf20Sopenharmony_ci } 12128c2ecf20Sopenharmony_ci 12138c2ecf20Sopenharmony_ci return 0; 12148c2ecf20Sopenharmony_ci} 12158c2ecf20Sopenharmony_ci 12168c2ecf20Sopenharmony_ci/* 12178c2ecf20Sopenharmony_ci * update all the cowonly tree roots on disk 12188c2ecf20Sopenharmony_ci * 12198c2ecf20Sopenharmony_ci * The error handling in this function may not be obvious. Any of the 12208c2ecf20Sopenharmony_ci * failures will cause the file system to go offline. We still need 12218c2ecf20Sopenharmony_ci * to clean up the delayed refs. 12228c2ecf20Sopenharmony_ci */ 12238c2ecf20Sopenharmony_cistatic noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans) 12248c2ecf20Sopenharmony_ci{ 12258c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 12268c2ecf20Sopenharmony_ci struct list_head *dirty_bgs = &trans->transaction->dirty_bgs; 12278c2ecf20Sopenharmony_ci struct list_head *io_bgs = &trans->transaction->io_bgs; 12288c2ecf20Sopenharmony_ci struct list_head *next; 12298c2ecf20Sopenharmony_ci struct extent_buffer *eb; 12308c2ecf20Sopenharmony_ci int ret; 12318c2ecf20Sopenharmony_ci 12328c2ecf20Sopenharmony_ci eb = btrfs_lock_root_node(fs_info->tree_root); 12338c2ecf20Sopenharmony_ci ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 12348c2ecf20Sopenharmony_ci 0, &eb, BTRFS_NESTING_COW); 12358c2ecf20Sopenharmony_ci btrfs_tree_unlock(eb); 12368c2ecf20Sopenharmony_ci free_extent_buffer(eb); 12378c2ecf20Sopenharmony_ci 12388c2ecf20Sopenharmony_ci if (ret) 12398c2ecf20Sopenharmony_ci return ret; 12408c2ecf20Sopenharmony_ci 12418c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); 12428c2ecf20Sopenharmony_ci if (ret) 12438c2ecf20Sopenharmony_ci return ret; 12448c2ecf20Sopenharmony_ci 12458c2ecf20Sopenharmony_ci ret = btrfs_run_dev_stats(trans); 12468c2ecf20Sopenharmony_ci if (ret) 12478c2ecf20Sopenharmony_ci return ret; 12488c2ecf20Sopenharmony_ci ret = btrfs_run_dev_replace(trans); 12498c2ecf20Sopenharmony_ci if (ret) 12508c2ecf20Sopenharmony_ci return ret; 12518c2ecf20Sopenharmony_ci ret = btrfs_run_qgroups(trans); 12528c2ecf20Sopenharmony_ci if (ret) 12538c2ecf20Sopenharmony_ci return ret; 12548c2ecf20Sopenharmony_ci 12558c2ecf20Sopenharmony_ci ret = btrfs_setup_space_cache(trans); 12568c2ecf20Sopenharmony_ci if (ret) 12578c2ecf20Sopenharmony_ci return ret; 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_ci /* run_qgroups might have added some more refs */ 12608c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); 12618c2ecf20Sopenharmony_ci if (ret) 12628c2ecf20Sopenharmony_ci return ret; 12638c2ecf20Sopenharmony_ciagain: 12648c2ecf20Sopenharmony_ci while (!list_empty(&fs_info->dirty_cowonly_roots)) { 12658c2ecf20Sopenharmony_ci struct btrfs_root *root; 12668c2ecf20Sopenharmony_ci next = fs_info->dirty_cowonly_roots.next; 12678c2ecf20Sopenharmony_ci list_del_init(next); 12688c2ecf20Sopenharmony_ci root = list_entry(next, struct btrfs_root, dirty_list); 12698c2ecf20Sopenharmony_ci clear_bit(BTRFS_ROOT_DIRTY, &root->state); 12708c2ecf20Sopenharmony_ci 12718c2ecf20Sopenharmony_ci if (root != fs_info->extent_root) 12728c2ecf20Sopenharmony_ci list_add_tail(&root->dirty_list, 12738c2ecf20Sopenharmony_ci &trans->transaction->switch_commits); 12748c2ecf20Sopenharmony_ci ret = update_cowonly_root(trans, root); 12758c2ecf20Sopenharmony_ci if (ret) 12768c2ecf20Sopenharmony_ci return ret; 12778c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); 12788c2ecf20Sopenharmony_ci if (ret) 12798c2ecf20Sopenharmony_ci return ret; 12808c2ecf20Sopenharmony_ci } 12818c2ecf20Sopenharmony_ci 12828c2ecf20Sopenharmony_ci while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) { 12838c2ecf20Sopenharmony_ci ret = btrfs_write_dirty_block_groups(trans); 12848c2ecf20Sopenharmony_ci if (ret) 12858c2ecf20Sopenharmony_ci return ret; 12868c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); 12878c2ecf20Sopenharmony_ci if (ret) 12888c2ecf20Sopenharmony_ci return ret; 12898c2ecf20Sopenharmony_ci } 12908c2ecf20Sopenharmony_ci 12918c2ecf20Sopenharmony_ci if (!list_empty(&fs_info->dirty_cowonly_roots)) 12928c2ecf20Sopenharmony_ci goto again; 12938c2ecf20Sopenharmony_ci 12948c2ecf20Sopenharmony_ci list_add_tail(&fs_info->extent_root->dirty_list, 12958c2ecf20Sopenharmony_ci &trans->transaction->switch_commits); 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_ci /* Update dev-replace pointer once everything is committed */ 12988c2ecf20Sopenharmony_ci fs_info->dev_replace.committed_cursor_left = 12998c2ecf20Sopenharmony_ci fs_info->dev_replace.cursor_left_last_write_of_item; 13008c2ecf20Sopenharmony_ci 13018c2ecf20Sopenharmony_ci return 0; 13028c2ecf20Sopenharmony_ci} 13038c2ecf20Sopenharmony_ci 13048c2ecf20Sopenharmony_ci/* 13058c2ecf20Sopenharmony_ci * dead roots are old snapshots that need to be deleted. This allocates 13068c2ecf20Sopenharmony_ci * a dirty root struct and adds it into the list of dead roots that need to 13078c2ecf20Sopenharmony_ci * be deleted 13088c2ecf20Sopenharmony_ci */ 13098c2ecf20Sopenharmony_civoid btrfs_add_dead_root(struct btrfs_root *root) 13108c2ecf20Sopenharmony_ci{ 13118c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 13128c2ecf20Sopenharmony_ci 13138c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 13148c2ecf20Sopenharmony_ci if (list_empty(&root->root_list)) { 13158c2ecf20Sopenharmony_ci btrfs_grab_root(root); 13168c2ecf20Sopenharmony_ci list_add_tail(&root->root_list, &fs_info->dead_roots); 13178c2ecf20Sopenharmony_ci } 13188c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 13198c2ecf20Sopenharmony_ci} 13208c2ecf20Sopenharmony_ci 13218c2ecf20Sopenharmony_ci/* 13228c2ecf20Sopenharmony_ci * update all the cowonly tree roots on disk 13238c2ecf20Sopenharmony_ci */ 13248c2ecf20Sopenharmony_cistatic noinline int commit_fs_roots(struct btrfs_trans_handle *trans) 13258c2ecf20Sopenharmony_ci{ 13268c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 13278c2ecf20Sopenharmony_ci struct btrfs_root *gang[8]; 13288c2ecf20Sopenharmony_ci int i; 13298c2ecf20Sopenharmony_ci int ret; 13308c2ecf20Sopenharmony_ci 13318c2ecf20Sopenharmony_ci spin_lock(&fs_info->fs_roots_radix_lock); 13328c2ecf20Sopenharmony_ci while (1) { 13338c2ecf20Sopenharmony_ci ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 13348c2ecf20Sopenharmony_ci (void **)gang, 0, 13358c2ecf20Sopenharmony_ci ARRAY_SIZE(gang), 13368c2ecf20Sopenharmony_ci BTRFS_ROOT_TRANS_TAG); 13378c2ecf20Sopenharmony_ci if (ret == 0) 13388c2ecf20Sopenharmony_ci break; 13398c2ecf20Sopenharmony_ci for (i = 0; i < ret; i++) { 13408c2ecf20Sopenharmony_ci struct btrfs_root *root = gang[i]; 13418c2ecf20Sopenharmony_ci int ret2; 13428c2ecf20Sopenharmony_ci 13438c2ecf20Sopenharmony_ci radix_tree_tag_clear(&fs_info->fs_roots_radix, 13448c2ecf20Sopenharmony_ci (unsigned long)root->root_key.objectid, 13458c2ecf20Sopenharmony_ci BTRFS_ROOT_TRANS_TAG); 13468c2ecf20Sopenharmony_ci spin_unlock(&fs_info->fs_roots_radix_lock); 13478c2ecf20Sopenharmony_ci 13488c2ecf20Sopenharmony_ci btrfs_free_log(trans, root); 13498c2ecf20Sopenharmony_ci btrfs_update_reloc_root(trans, root); 13508c2ecf20Sopenharmony_ci 13518c2ecf20Sopenharmony_ci btrfs_save_ino_cache(root, trans); 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci /* see comments in should_cow_block() */ 13548c2ecf20Sopenharmony_ci clear_bit(BTRFS_ROOT_FORCE_COW, &root->state); 13558c2ecf20Sopenharmony_ci smp_mb__after_atomic(); 13568c2ecf20Sopenharmony_ci 13578c2ecf20Sopenharmony_ci if (root->commit_root != root->node) { 13588c2ecf20Sopenharmony_ci list_add_tail(&root->dirty_list, 13598c2ecf20Sopenharmony_ci &trans->transaction->switch_commits); 13608c2ecf20Sopenharmony_ci btrfs_set_root_node(&root->root_item, 13618c2ecf20Sopenharmony_ci root->node); 13628c2ecf20Sopenharmony_ci } 13638c2ecf20Sopenharmony_ci 13648c2ecf20Sopenharmony_ci ret2 = btrfs_update_root(trans, fs_info->tree_root, 13658c2ecf20Sopenharmony_ci &root->root_key, 13668c2ecf20Sopenharmony_ci &root->root_item); 13678c2ecf20Sopenharmony_ci if (ret2) 13688c2ecf20Sopenharmony_ci return ret2; 13698c2ecf20Sopenharmony_ci spin_lock(&fs_info->fs_roots_radix_lock); 13708c2ecf20Sopenharmony_ci btrfs_qgroup_free_meta_all_pertrans(root); 13718c2ecf20Sopenharmony_ci } 13728c2ecf20Sopenharmony_ci } 13738c2ecf20Sopenharmony_ci spin_unlock(&fs_info->fs_roots_radix_lock); 13748c2ecf20Sopenharmony_ci return 0; 13758c2ecf20Sopenharmony_ci} 13768c2ecf20Sopenharmony_ci 13778c2ecf20Sopenharmony_ci/* 13788c2ecf20Sopenharmony_ci * defrag a given btree. 13798c2ecf20Sopenharmony_ci * Every leaf in the btree is read and defragged. 13808c2ecf20Sopenharmony_ci */ 13818c2ecf20Sopenharmony_ciint btrfs_defrag_root(struct btrfs_root *root) 13828c2ecf20Sopenharmony_ci{ 13838c2ecf20Sopenharmony_ci struct btrfs_fs_info *info = root->fs_info; 13848c2ecf20Sopenharmony_ci struct btrfs_trans_handle *trans; 13858c2ecf20Sopenharmony_ci int ret; 13868c2ecf20Sopenharmony_ci 13878c2ecf20Sopenharmony_ci if (test_and_set_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state)) 13888c2ecf20Sopenharmony_ci return 0; 13898c2ecf20Sopenharmony_ci 13908c2ecf20Sopenharmony_ci while (1) { 13918c2ecf20Sopenharmony_ci trans = btrfs_start_transaction(root, 0); 13928c2ecf20Sopenharmony_ci if (IS_ERR(trans)) { 13938c2ecf20Sopenharmony_ci ret = PTR_ERR(trans); 13948c2ecf20Sopenharmony_ci break; 13958c2ecf20Sopenharmony_ci } 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci ret = btrfs_defrag_leaves(trans, root); 13988c2ecf20Sopenharmony_ci 13998c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 14008c2ecf20Sopenharmony_ci btrfs_btree_balance_dirty(info); 14018c2ecf20Sopenharmony_ci cond_resched(); 14028c2ecf20Sopenharmony_ci 14038c2ecf20Sopenharmony_ci if (btrfs_fs_closing(info) || ret != -EAGAIN) 14048c2ecf20Sopenharmony_ci break; 14058c2ecf20Sopenharmony_ci 14068c2ecf20Sopenharmony_ci if (btrfs_defrag_cancelled(info)) { 14078c2ecf20Sopenharmony_ci btrfs_debug(info, "defrag_root cancelled"); 14088c2ecf20Sopenharmony_ci ret = -EAGAIN; 14098c2ecf20Sopenharmony_ci break; 14108c2ecf20Sopenharmony_ci } 14118c2ecf20Sopenharmony_ci } 14128c2ecf20Sopenharmony_ci clear_bit(BTRFS_ROOT_DEFRAG_RUNNING, &root->state); 14138c2ecf20Sopenharmony_ci return ret; 14148c2ecf20Sopenharmony_ci} 14158c2ecf20Sopenharmony_ci 14168c2ecf20Sopenharmony_ci/* 14178c2ecf20Sopenharmony_ci * Do all special snapshot related qgroup dirty hack. 14188c2ecf20Sopenharmony_ci * 14198c2ecf20Sopenharmony_ci * Will do all needed qgroup inherit and dirty hack like switch commit 14208c2ecf20Sopenharmony_ci * roots inside one transaction and write all btree into disk, to make 14218c2ecf20Sopenharmony_ci * qgroup works. 14228c2ecf20Sopenharmony_ci */ 14238c2ecf20Sopenharmony_cistatic int qgroup_account_snapshot(struct btrfs_trans_handle *trans, 14248c2ecf20Sopenharmony_ci struct btrfs_root *src, 14258c2ecf20Sopenharmony_ci struct btrfs_root *parent, 14268c2ecf20Sopenharmony_ci struct btrfs_qgroup_inherit *inherit, 14278c2ecf20Sopenharmony_ci u64 dst_objectid) 14288c2ecf20Sopenharmony_ci{ 14298c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = src->fs_info; 14308c2ecf20Sopenharmony_ci int ret; 14318c2ecf20Sopenharmony_ci 14328c2ecf20Sopenharmony_ci /* 14338c2ecf20Sopenharmony_ci * Save some performance in the case that qgroups are not 14348c2ecf20Sopenharmony_ci * enabled. If this check races with the ioctl, rescan will 14358c2ecf20Sopenharmony_ci * kick in anyway. 14368c2ecf20Sopenharmony_ci */ 14378c2ecf20Sopenharmony_ci if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) 14388c2ecf20Sopenharmony_ci return 0; 14398c2ecf20Sopenharmony_ci 14408c2ecf20Sopenharmony_ci /* 14418c2ecf20Sopenharmony_ci * Ensure dirty @src will be committed. Or, after coming 14428c2ecf20Sopenharmony_ci * commit_fs_roots() and switch_commit_roots(), any dirty but not 14438c2ecf20Sopenharmony_ci * recorded root will never be updated again, causing an outdated root 14448c2ecf20Sopenharmony_ci * item. 14458c2ecf20Sopenharmony_ci */ 14468c2ecf20Sopenharmony_ci record_root_in_trans(trans, src, 1); 14478c2ecf20Sopenharmony_ci 14488c2ecf20Sopenharmony_ci /* 14498c2ecf20Sopenharmony_ci * We are going to commit transaction, see btrfs_commit_transaction() 14508c2ecf20Sopenharmony_ci * comment for reason locking tree_log_mutex 14518c2ecf20Sopenharmony_ci */ 14528c2ecf20Sopenharmony_ci mutex_lock(&fs_info->tree_log_mutex); 14538c2ecf20Sopenharmony_ci 14548c2ecf20Sopenharmony_ci ret = commit_fs_roots(trans); 14558c2ecf20Sopenharmony_ci if (ret) 14568c2ecf20Sopenharmony_ci goto out; 14578c2ecf20Sopenharmony_ci ret = btrfs_qgroup_account_extents(trans); 14588c2ecf20Sopenharmony_ci if (ret < 0) 14598c2ecf20Sopenharmony_ci goto out; 14608c2ecf20Sopenharmony_ci 14618c2ecf20Sopenharmony_ci /* Now qgroup are all updated, we can inherit it to new qgroups */ 14628c2ecf20Sopenharmony_ci ret = btrfs_qgroup_inherit(trans, src->root_key.objectid, dst_objectid, 14638c2ecf20Sopenharmony_ci inherit); 14648c2ecf20Sopenharmony_ci if (ret < 0) 14658c2ecf20Sopenharmony_ci goto out; 14668c2ecf20Sopenharmony_ci 14678c2ecf20Sopenharmony_ci /* 14688c2ecf20Sopenharmony_ci * Now we do a simplified commit transaction, which will: 14698c2ecf20Sopenharmony_ci * 1) commit all subvolume and extent tree 14708c2ecf20Sopenharmony_ci * To ensure all subvolume and extent tree have a valid 14718c2ecf20Sopenharmony_ci * commit_root to accounting later insert_dir_item() 14728c2ecf20Sopenharmony_ci * 2) write all btree blocks onto disk 14738c2ecf20Sopenharmony_ci * This is to make sure later btree modification will be cowed 14748c2ecf20Sopenharmony_ci * Or commit_root can be populated and cause wrong qgroup numbers 14758c2ecf20Sopenharmony_ci * In this simplified commit, we don't really care about other trees 14768c2ecf20Sopenharmony_ci * like chunk and root tree, as they won't affect qgroup. 14778c2ecf20Sopenharmony_ci * And we don't write super to avoid half committed status. 14788c2ecf20Sopenharmony_ci */ 14798c2ecf20Sopenharmony_ci ret = commit_cowonly_roots(trans); 14808c2ecf20Sopenharmony_ci if (ret) 14818c2ecf20Sopenharmony_ci goto out; 14828c2ecf20Sopenharmony_ci switch_commit_roots(trans); 14838c2ecf20Sopenharmony_ci ret = btrfs_write_and_wait_transaction(trans); 14848c2ecf20Sopenharmony_ci if (ret) 14858c2ecf20Sopenharmony_ci btrfs_handle_fs_error(fs_info, ret, 14868c2ecf20Sopenharmony_ci "Error while writing out transaction for qgroup"); 14878c2ecf20Sopenharmony_ci 14888c2ecf20Sopenharmony_ciout: 14898c2ecf20Sopenharmony_ci mutex_unlock(&fs_info->tree_log_mutex); 14908c2ecf20Sopenharmony_ci 14918c2ecf20Sopenharmony_ci /* 14928c2ecf20Sopenharmony_ci * Force parent root to be updated, as we recorded it before so its 14938c2ecf20Sopenharmony_ci * last_trans == cur_transid. 14948c2ecf20Sopenharmony_ci * Or it won't be committed again onto disk after later 14958c2ecf20Sopenharmony_ci * insert_dir_item() 14968c2ecf20Sopenharmony_ci */ 14978c2ecf20Sopenharmony_ci if (!ret) 14988c2ecf20Sopenharmony_ci record_root_in_trans(trans, parent, 1); 14998c2ecf20Sopenharmony_ci return ret; 15008c2ecf20Sopenharmony_ci} 15018c2ecf20Sopenharmony_ci 15028c2ecf20Sopenharmony_ci/* 15038c2ecf20Sopenharmony_ci * new snapshots need to be created at a very specific time in the 15048c2ecf20Sopenharmony_ci * transaction commit. This does the actual creation. 15058c2ecf20Sopenharmony_ci * 15068c2ecf20Sopenharmony_ci * Note: 15078c2ecf20Sopenharmony_ci * If the error which may affect the commitment of the current transaction 15088c2ecf20Sopenharmony_ci * happens, we should return the error number. If the error which just affect 15098c2ecf20Sopenharmony_ci * the creation of the pending snapshots, just return 0. 15108c2ecf20Sopenharmony_ci */ 15118c2ecf20Sopenharmony_cistatic noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 15128c2ecf20Sopenharmony_ci struct btrfs_pending_snapshot *pending) 15138c2ecf20Sopenharmony_ci{ 15148c2ecf20Sopenharmony_ci 15158c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 15168c2ecf20Sopenharmony_ci struct btrfs_key key; 15178c2ecf20Sopenharmony_ci struct btrfs_root_item *new_root_item; 15188c2ecf20Sopenharmony_ci struct btrfs_root *tree_root = fs_info->tree_root; 15198c2ecf20Sopenharmony_ci struct btrfs_root *root = pending->root; 15208c2ecf20Sopenharmony_ci struct btrfs_root *parent_root; 15218c2ecf20Sopenharmony_ci struct btrfs_block_rsv *rsv; 15228c2ecf20Sopenharmony_ci struct inode *parent_inode; 15238c2ecf20Sopenharmony_ci struct btrfs_path *path; 15248c2ecf20Sopenharmony_ci struct btrfs_dir_item *dir_item; 15258c2ecf20Sopenharmony_ci struct dentry *dentry; 15268c2ecf20Sopenharmony_ci struct extent_buffer *tmp; 15278c2ecf20Sopenharmony_ci struct extent_buffer *old; 15288c2ecf20Sopenharmony_ci struct timespec64 cur_time; 15298c2ecf20Sopenharmony_ci int ret = 0; 15308c2ecf20Sopenharmony_ci u64 to_reserve = 0; 15318c2ecf20Sopenharmony_ci u64 index = 0; 15328c2ecf20Sopenharmony_ci u64 objectid; 15338c2ecf20Sopenharmony_ci u64 root_flags; 15348c2ecf20Sopenharmony_ci 15358c2ecf20Sopenharmony_ci ASSERT(pending->path); 15368c2ecf20Sopenharmony_ci path = pending->path; 15378c2ecf20Sopenharmony_ci 15388c2ecf20Sopenharmony_ci ASSERT(pending->root_item); 15398c2ecf20Sopenharmony_ci new_root_item = pending->root_item; 15408c2ecf20Sopenharmony_ci 15418c2ecf20Sopenharmony_ci pending->error = btrfs_find_free_objectid(tree_root, &objectid); 15428c2ecf20Sopenharmony_ci if (pending->error) 15438c2ecf20Sopenharmony_ci goto no_free_objectid; 15448c2ecf20Sopenharmony_ci 15458c2ecf20Sopenharmony_ci /* 15468c2ecf20Sopenharmony_ci * Make qgroup to skip current new snapshot's qgroupid, as it is 15478c2ecf20Sopenharmony_ci * accounted by later btrfs_qgroup_inherit(). 15488c2ecf20Sopenharmony_ci */ 15498c2ecf20Sopenharmony_ci btrfs_set_skip_qgroup(trans, objectid); 15508c2ecf20Sopenharmony_ci 15518c2ecf20Sopenharmony_ci btrfs_reloc_pre_snapshot(pending, &to_reserve); 15528c2ecf20Sopenharmony_ci 15538c2ecf20Sopenharmony_ci if (to_reserve > 0) { 15548c2ecf20Sopenharmony_ci pending->error = btrfs_block_rsv_add(root, 15558c2ecf20Sopenharmony_ci &pending->block_rsv, 15568c2ecf20Sopenharmony_ci to_reserve, 15578c2ecf20Sopenharmony_ci BTRFS_RESERVE_NO_FLUSH); 15588c2ecf20Sopenharmony_ci if (pending->error) 15598c2ecf20Sopenharmony_ci goto clear_skip_qgroup; 15608c2ecf20Sopenharmony_ci } 15618c2ecf20Sopenharmony_ci 15628c2ecf20Sopenharmony_ci key.objectid = objectid; 15638c2ecf20Sopenharmony_ci key.offset = (u64)-1; 15648c2ecf20Sopenharmony_ci key.type = BTRFS_ROOT_ITEM_KEY; 15658c2ecf20Sopenharmony_ci 15668c2ecf20Sopenharmony_ci rsv = trans->block_rsv; 15678c2ecf20Sopenharmony_ci trans->block_rsv = &pending->block_rsv; 15688c2ecf20Sopenharmony_ci trans->bytes_reserved = trans->block_rsv->reserved; 15698c2ecf20Sopenharmony_ci trace_btrfs_space_reservation(fs_info, "transaction", 15708c2ecf20Sopenharmony_ci trans->transid, 15718c2ecf20Sopenharmony_ci trans->bytes_reserved, 1); 15728c2ecf20Sopenharmony_ci dentry = pending->dentry; 15738c2ecf20Sopenharmony_ci parent_inode = pending->dir; 15748c2ecf20Sopenharmony_ci parent_root = BTRFS_I(parent_inode)->root; 15758c2ecf20Sopenharmony_ci record_root_in_trans(trans, parent_root, 0); 15768c2ecf20Sopenharmony_ci 15778c2ecf20Sopenharmony_ci cur_time = current_time(parent_inode); 15788c2ecf20Sopenharmony_ci 15798c2ecf20Sopenharmony_ci /* 15808c2ecf20Sopenharmony_ci * insert the directory item 15818c2ecf20Sopenharmony_ci */ 15828c2ecf20Sopenharmony_ci ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index); 15838c2ecf20Sopenharmony_ci BUG_ON(ret); /* -ENOMEM */ 15848c2ecf20Sopenharmony_ci 15858c2ecf20Sopenharmony_ci /* check if there is a file/dir which has the same name. */ 15868c2ecf20Sopenharmony_ci dir_item = btrfs_lookup_dir_item(NULL, parent_root, path, 15878c2ecf20Sopenharmony_ci btrfs_ino(BTRFS_I(parent_inode)), 15888c2ecf20Sopenharmony_ci dentry->d_name.name, 15898c2ecf20Sopenharmony_ci dentry->d_name.len, 0); 15908c2ecf20Sopenharmony_ci if (dir_item != NULL && !IS_ERR(dir_item)) { 15918c2ecf20Sopenharmony_ci pending->error = -EEXIST; 15928c2ecf20Sopenharmony_ci goto dir_item_existed; 15938c2ecf20Sopenharmony_ci } else if (IS_ERR(dir_item)) { 15948c2ecf20Sopenharmony_ci ret = PTR_ERR(dir_item); 15958c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 15968c2ecf20Sopenharmony_ci goto fail; 15978c2ecf20Sopenharmony_ci } 15988c2ecf20Sopenharmony_ci btrfs_release_path(path); 15998c2ecf20Sopenharmony_ci 16008c2ecf20Sopenharmony_ci /* 16018c2ecf20Sopenharmony_ci * pull in the delayed directory update 16028c2ecf20Sopenharmony_ci * and the delayed inode item 16038c2ecf20Sopenharmony_ci * otherwise we corrupt the FS during 16048c2ecf20Sopenharmony_ci * snapshot 16058c2ecf20Sopenharmony_ci */ 16068c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_items(trans); 16078c2ecf20Sopenharmony_ci if (ret) { /* Transaction aborted */ 16088c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 16098c2ecf20Sopenharmony_ci goto fail; 16108c2ecf20Sopenharmony_ci } 16118c2ecf20Sopenharmony_ci 16128c2ecf20Sopenharmony_ci record_root_in_trans(trans, root, 0); 16138c2ecf20Sopenharmony_ci btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 16148c2ecf20Sopenharmony_ci memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 16158c2ecf20Sopenharmony_ci btrfs_check_and_init_root_item(new_root_item); 16168c2ecf20Sopenharmony_ci 16178c2ecf20Sopenharmony_ci root_flags = btrfs_root_flags(new_root_item); 16188c2ecf20Sopenharmony_ci if (pending->readonly) 16198c2ecf20Sopenharmony_ci root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; 16208c2ecf20Sopenharmony_ci else 16218c2ecf20Sopenharmony_ci root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; 16228c2ecf20Sopenharmony_ci btrfs_set_root_flags(new_root_item, root_flags); 16238c2ecf20Sopenharmony_ci 16248c2ecf20Sopenharmony_ci btrfs_set_root_generation_v2(new_root_item, 16258c2ecf20Sopenharmony_ci trans->transid); 16268c2ecf20Sopenharmony_ci generate_random_guid(new_root_item->uuid); 16278c2ecf20Sopenharmony_ci memcpy(new_root_item->parent_uuid, root->root_item.uuid, 16288c2ecf20Sopenharmony_ci BTRFS_UUID_SIZE); 16298c2ecf20Sopenharmony_ci if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) { 16308c2ecf20Sopenharmony_ci memset(new_root_item->received_uuid, 0, 16318c2ecf20Sopenharmony_ci sizeof(new_root_item->received_uuid)); 16328c2ecf20Sopenharmony_ci memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); 16338c2ecf20Sopenharmony_ci memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); 16348c2ecf20Sopenharmony_ci btrfs_set_root_stransid(new_root_item, 0); 16358c2ecf20Sopenharmony_ci btrfs_set_root_rtransid(new_root_item, 0); 16368c2ecf20Sopenharmony_ci } 16378c2ecf20Sopenharmony_ci btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec); 16388c2ecf20Sopenharmony_ci btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec); 16398c2ecf20Sopenharmony_ci btrfs_set_root_otransid(new_root_item, trans->transid); 16408c2ecf20Sopenharmony_ci 16418c2ecf20Sopenharmony_ci old = btrfs_lock_root_node(root); 16428c2ecf20Sopenharmony_ci ret = btrfs_cow_block(trans, root, old, NULL, 0, &old, 16438c2ecf20Sopenharmony_ci BTRFS_NESTING_COW); 16448c2ecf20Sopenharmony_ci if (ret) { 16458c2ecf20Sopenharmony_ci btrfs_tree_unlock(old); 16468c2ecf20Sopenharmony_ci free_extent_buffer(old); 16478c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 16488c2ecf20Sopenharmony_ci goto fail; 16498c2ecf20Sopenharmony_ci } 16508c2ecf20Sopenharmony_ci 16518c2ecf20Sopenharmony_ci btrfs_set_lock_blocking_write(old); 16528c2ecf20Sopenharmony_ci 16538c2ecf20Sopenharmony_ci ret = btrfs_copy_root(trans, root, old, &tmp, objectid); 16548c2ecf20Sopenharmony_ci /* clean up in any case */ 16558c2ecf20Sopenharmony_ci btrfs_tree_unlock(old); 16568c2ecf20Sopenharmony_ci free_extent_buffer(old); 16578c2ecf20Sopenharmony_ci if (ret) { 16588c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 16598c2ecf20Sopenharmony_ci goto fail; 16608c2ecf20Sopenharmony_ci } 16618c2ecf20Sopenharmony_ci /* see comments in should_cow_block() */ 16628c2ecf20Sopenharmony_ci set_bit(BTRFS_ROOT_FORCE_COW, &root->state); 16638c2ecf20Sopenharmony_ci smp_wmb(); 16648c2ecf20Sopenharmony_ci 16658c2ecf20Sopenharmony_ci btrfs_set_root_node(new_root_item, tmp); 16668c2ecf20Sopenharmony_ci /* record when the snapshot was created in key.offset */ 16678c2ecf20Sopenharmony_ci key.offset = trans->transid; 16688c2ecf20Sopenharmony_ci ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); 16698c2ecf20Sopenharmony_ci btrfs_tree_unlock(tmp); 16708c2ecf20Sopenharmony_ci free_extent_buffer(tmp); 16718c2ecf20Sopenharmony_ci if (ret) { 16728c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 16738c2ecf20Sopenharmony_ci goto fail; 16748c2ecf20Sopenharmony_ci } 16758c2ecf20Sopenharmony_ci 16768c2ecf20Sopenharmony_ci /* 16778c2ecf20Sopenharmony_ci * insert root back/forward references 16788c2ecf20Sopenharmony_ci */ 16798c2ecf20Sopenharmony_ci ret = btrfs_add_root_ref(trans, objectid, 16808c2ecf20Sopenharmony_ci parent_root->root_key.objectid, 16818c2ecf20Sopenharmony_ci btrfs_ino(BTRFS_I(parent_inode)), index, 16828c2ecf20Sopenharmony_ci dentry->d_name.name, dentry->d_name.len); 16838c2ecf20Sopenharmony_ci if (ret) { 16848c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 16858c2ecf20Sopenharmony_ci goto fail; 16868c2ecf20Sopenharmony_ci } 16878c2ecf20Sopenharmony_ci 16888c2ecf20Sopenharmony_ci key.offset = (u64)-1; 16898c2ecf20Sopenharmony_ci pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev); 16908c2ecf20Sopenharmony_ci if (IS_ERR(pending->snap)) { 16918c2ecf20Sopenharmony_ci ret = PTR_ERR(pending->snap); 16928c2ecf20Sopenharmony_ci pending->snap = NULL; 16938c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 16948c2ecf20Sopenharmony_ci goto fail; 16958c2ecf20Sopenharmony_ci } 16968c2ecf20Sopenharmony_ci 16978c2ecf20Sopenharmony_ci ret = btrfs_reloc_post_snapshot(trans, pending); 16988c2ecf20Sopenharmony_ci if (ret) { 16998c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 17008c2ecf20Sopenharmony_ci goto fail; 17018c2ecf20Sopenharmony_ci } 17028c2ecf20Sopenharmony_ci 17038c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); 17048c2ecf20Sopenharmony_ci if (ret) { 17058c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 17068c2ecf20Sopenharmony_ci goto fail; 17078c2ecf20Sopenharmony_ci } 17088c2ecf20Sopenharmony_ci 17098c2ecf20Sopenharmony_ci /* 17108c2ecf20Sopenharmony_ci * Do special qgroup accounting for snapshot, as we do some qgroup 17118c2ecf20Sopenharmony_ci * snapshot hack to do fast snapshot. 17128c2ecf20Sopenharmony_ci * To co-operate with that hack, we do hack again. 17138c2ecf20Sopenharmony_ci * Or snapshot will be greatly slowed down by a subtree qgroup rescan 17148c2ecf20Sopenharmony_ci */ 17158c2ecf20Sopenharmony_ci ret = qgroup_account_snapshot(trans, root, parent_root, 17168c2ecf20Sopenharmony_ci pending->inherit, objectid); 17178c2ecf20Sopenharmony_ci if (ret < 0) 17188c2ecf20Sopenharmony_ci goto fail; 17198c2ecf20Sopenharmony_ci 17208c2ecf20Sopenharmony_ci ret = btrfs_insert_dir_item(trans, dentry->d_name.name, 17218c2ecf20Sopenharmony_ci dentry->d_name.len, BTRFS_I(parent_inode), 17228c2ecf20Sopenharmony_ci &key, BTRFS_FT_DIR, index); 17238c2ecf20Sopenharmony_ci /* We have check then name at the beginning, so it is impossible. */ 17248c2ecf20Sopenharmony_ci BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); 17258c2ecf20Sopenharmony_ci if (ret) { 17268c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 17278c2ecf20Sopenharmony_ci goto fail; 17288c2ecf20Sopenharmony_ci } 17298c2ecf20Sopenharmony_ci 17308c2ecf20Sopenharmony_ci btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size + 17318c2ecf20Sopenharmony_ci dentry->d_name.len * 2); 17328c2ecf20Sopenharmony_ci parent_inode->i_mtime = parent_inode->i_ctime = 17338c2ecf20Sopenharmony_ci current_time(parent_inode); 17348c2ecf20Sopenharmony_ci ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode); 17358c2ecf20Sopenharmony_ci if (ret) { 17368c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 17378c2ecf20Sopenharmony_ci goto fail; 17388c2ecf20Sopenharmony_ci } 17398c2ecf20Sopenharmony_ci ret = btrfs_uuid_tree_add(trans, new_root_item->uuid, 17408c2ecf20Sopenharmony_ci BTRFS_UUID_KEY_SUBVOL, 17418c2ecf20Sopenharmony_ci objectid); 17428c2ecf20Sopenharmony_ci if (ret) { 17438c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 17448c2ecf20Sopenharmony_ci goto fail; 17458c2ecf20Sopenharmony_ci } 17468c2ecf20Sopenharmony_ci if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) { 17478c2ecf20Sopenharmony_ci ret = btrfs_uuid_tree_add(trans, new_root_item->received_uuid, 17488c2ecf20Sopenharmony_ci BTRFS_UUID_KEY_RECEIVED_SUBVOL, 17498c2ecf20Sopenharmony_ci objectid); 17508c2ecf20Sopenharmony_ci if (ret && ret != -EEXIST) { 17518c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 17528c2ecf20Sopenharmony_ci goto fail; 17538c2ecf20Sopenharmony_ci } 17548c2ecf20Sopenharmony_ci } 17558c2ecf20Sopenharmony_ci 17568c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); 17578c2ecf20Sopenharmony_ci if (ret) { 17588c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, ret); 17598c2ecf20Sopenharmony_ci goto fail; 17608c2ecf20Sopenharmony_ci } 17618c2ecf20Sopenharmony_ci 17628c2ecf20Sopenharmony_cifail: 17638c2ecf20Sopenharmony_ci pending->error = ret; 17648c2ecf20Sopenharmony_cidir_item_existed: 17658c2ecf20Sopenharmony_ci trans->block_rsv = rsv; 17668c2ecf20Sopenharmony_ci trans->bytes_reserved = 0; 17678c2ecf20Sopenharmony_ciclear_skip_qgroup: 17688c2ecf20Sopenharmony_ci btrfs_clear_skip_qgroup(trans); 17698c2ecf20Sopenharmony_cino_free_objectid: 17708c2ecf20Sopenharmony_ci kfree(new_root_item); 17718c2ecf20Sopenharmony_ci pending->root_item = NULL; 17728c2ecf20Sopenharmony_ci btrfs_free_path(path); 17738c2ecf20Sopenharmony_ci pending->path = NULL; 17748c2ecf20Sopenharmony_ci 17758c2ecf20Sopenharmony_ci return ret; 17768c2ecf20Sopenharmony_ci} 17778c2ecf20Sopenharmony_ci 17788c2ecf20Sopenharmony_ci/* 17798c2ecf20Sopenharmony_ci * create all the snapshots we've scheduled for creation 17808c2ecf20Sopenharmony_ci */ 17818c2ecf20Sopenharmony_cistatic noinline int create_pending_snapshots(struct btrfs_trans_handle *trans) 17828c2ecf20Sopenharmony_ci{ 17838c2ecf20Sopenharmony_ci struct btrfs_pending_snapshot *pending, *next; 17848c2ecf20Sopenharmony_ci struct list_head *head = &trans->transaction->pending_snapshots; 17858c2ecf20Sopenharmony_ci int ret = 0; 17868c2ecf20Sopenharmony_ci 17878c2ecf20Sopenharmony_ci list_for_each_entry_safe(pending, next, head, list) { 17888c2ecf20Sopenharmony_ci list_del(&pending->list); 17898c2ecf20Sopenharmony_ci ret = create_pending_snapshot(trans, pending); 17908c2ecf20Sopenharmony_ci if (ret) 17918c2ecf20Sopenharmony_ci break; 17928c2ecf20Sopenharmony_ci } 17938c2ecf20Sopenharmony_ci return ret; 17948c2ecf20Sopenharmony_ci} 17958c2ecf20Sopenharmony_ci 17968c2ecf20Sopenharmony_cistatic void update_super_roots(struct btrfs_fs_info *fs_info) 17978c2ecf20Sopenharmony_ci{ 17988c2ecf20Sopenharmony_ci struct btrfs_root_item *root_item; 17998c2ecf20Sopenharmony_ci struct btrfs_super_block *super; 18008c2ecf20Sopenharmony_ci 18018c2ecf20Sopenharmony_ci super = fs_info->super_copy; 18028c2ecf20Sopenharmony_ci 18038c2ecf20Sopenharmony_ci root_item = &fs_info->chunk_root->root_item; 18048c2ecf20Sopenharmony_ci super->chunk_root = root_item->bytenr; 18058c2ecf20Sopenharmony_ci super->chunk_root_generation = root_item->generation; 18068c2ecf20Sopenharmony_ci super->chunk_root_level = root_item->level; 18078c2ecf20Sopenharmony_ci 18088c2ecf20Sopenharmony_ci root_item = &fs_info->tree_root->root_item; 18098c2ecf20Sopenharmony_ci super->root = root_item->bytenr; 18108c2ecf20Sopenharmony_ci super->generation = root_item->generation; 18118c2ecf20Sopenharmony_ci super->root_level = root_item->level; 18128c2ecf20Sopenharmony_ci if (btrfs_test_opt(fs_info, SPACE_CACHE)) 18138c2ecf20Sopenharmony_ci super->cache_generation = root_item->generation; 18148c2ecf20Sopenharmony_ci if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags)) 18158c2ecf20Sopenharmony_ci super->uuid_tree_generation = root_item->generation; 18168c2ecf20Sopenharmony_ci} 18178c2ecf20Sopenharmony_ci 18188c2ecf20Sopenharmony_ciint btrfs_transaction_in_commit(struct btrfs_fs_info *info) 18198c2ecf20Sopenharmony_ci{ 18208c2ecf20Sopenharmony_ci struct btrfs_transaction *trans; 18218c2ecf20Sopenharmony_ci int ret = 0; 18228c2ecf20Sopenharmony_ci 18238c2ecf20Sopenharmony_ci spin_lock(&info->trans_lock); 18248c2ecf20Sopenharmony_ci trans = info->running_transaction; 18258c2ecf20Sopenharmony_ci if (trans) 18268c2ecf20Sopenharmony_ci ret = (trans->state >= TRANS_STATE_COMMIT_START); 18278c2ecf20Sopenharmony_ci spin_unlock(&info->trans_lock); 18288c2ecf20Sopenharmony_ci return ret; 18298c2ecf20Sopenharmony_ci} 18308c2ecf20Sopenharmony_ci 18318c2ecf20Sopenharmony_ciint btrfs_transaction_blocked(struct btrfs_fs_info *info) 18328c2ecf20Sopenharmony_ci{ 18338c2ecf20Sopenharmony_ci struct btrfs_transaction *trans; 18348c2ecf20Sopenharmony_ci int ret = 0; 18358c2ecf20Sopenharmony_ci 18368c2ecf20Sopenharmony_ci spin_lock(&info->trans_lock); 18378c2ecf20Sopenharmony_ci trans = info->running_transaction; 18388c2ecf20Sopenharmony_ci if (trans) 18398c2ecf20Sopenharmony_ci ret = is_transaction_blocked(trans); 18408c2ecf20Sopenharmony_ci spin_unlock(&info->trans_lock); 18418c2ecf20Sopenharmony_ci return ret; 18428c2ecf20Sopenharmony_ci} 18438c2ecf20Sopenharmony_ci 18448c2ecf20Sopenharmony_ci/* 18458c2ecf20Sopenharmony_ci * wait for the current transaction commit to start and block subsequent 18468c2ecf20Sopenharmony_ci * transaction joins 18478c2ecf20Sopenharmony_ci */ 18488c2ecf20Sopenharmony_cistatic void wait_current_trans_commit_start(struct btrfs_fs_info *fs_info, 18498c2ecf20Sopenharmony_ci struct btrfs_transaction *trans) 18508c2ecf20Sopenharmony_ci{ 18518c2ecf20Sopenharmony_ci wait_event(fs_info->transaction_blocked_wait, 18528c2ecf20Sopenharmony_ci trans->state >= TRANS_STATE_COMMIT_START || 18538c2ecf20Sopenharmony_ci TRANS_ABORTED(trans)); 18548c2ecf20Sopenharmony_ci} 18558c2ecf20Sopenharmony_ci 18568c2ecf20Sopenharmony_ci/* 18578c2ecf20Sopenharmony_ci * wait for the current transaction to start and then become unblocked. 18588c2ecf20Sopenharmony_ci * caller holds ref. 18598c2ecf20Sopenharmony_ci */ 18608c2ecf20Sopenharmony_cistatic void wait_current_trans_commit_start_and_unblock( 18618c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info, 18628c2ecf20Sopenharmony_ci struct btrfs_transaction *trans) 18638c2ecf20Sopenharmony_ci{ 18648c2ecf20Sopenharmony_ci wait_event(fs_info->transaction_wait, 18658c2ecf20Sopenharmony_ci trans->state >= TRANS_STATE_UNBLOCKED || 18668c2ecf20Sopenharmony_ci TRANS_ABORTED(trans)); 18678c2ecf20Sopenharmony_ci} 18688c2ecf20Sopenharmony_ci 18698c2ecf20Sopenharmony_ci/* 18708c2ecf20Sopenharmony_ci * commit transactions asynchronously. once btrfs_commit_transaction_async 18718c2ecf20Sopenharmony_ci * returns, any subsequent transaction will not be allowed to join. 18728c2ecf20Sopenharmony_ci */ 18738c2ecf20Sopenharmony_cistruct btrfs_async_commit { 18748c2ecf20Sopenharmony_ci struct btrfs_trans_handle *newtrans; 18758c2ecf20Sopenharmony_ci struct work_struct work; 18768c2ecf20Sopenharmony_ci}; 18778c2ecf20Sopenharmony_ci 18788c2ecf20Sopenharmony_cistatic void do_async_commit(struct work_struct *work) 18798c2ecf20Sopenharmony_ci{ 18808c2ecf20Sopenharmony_ci struct btrfs_async_commit *ac = 18818c2ecf20Sopenharmony_ci container_of(work, struct btrfs_async_commit, work); 18828c2ecf20Sopenharmony_ci 18838c2ecf20Sopenharmony_ci /* 18848c2ecf20Sopenharmony_ci * We've got freeze protection passed with the transaction. 18858c2ecf20Sopenharmony_ci * Tell lockdep about it. 18868c2ecf20Sopenharmony_ci */ 18878c2ecf20Sopenharmony_ci if (ac->newtrans->type & __TRANS_FREEZABLE) 18888c2ecf20Sopenharmony_ci __sb_writers_acquired(ac->newtrans->fs_info->sb, SB_FREEZE_FS); 18898c2ecf20Sopenharmony_ci 18908c2ecf20Sopenharmony_ci current->journal_info = ac->newtrans; 18918c2ecf20Sopenharmony_ci 18928c2ecf20Sopenharmony_ci btrfs_commit_transaction(ac->newtrans); 18938c2ecf20Sopenharmony_ci kfree(ac); 18948c2ecf20Sopenharmony_ci} 18958c2ecf20Sopenharmony_ci 18968c2ecf20Sopenharmony_ciint btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, 18978c2ecf20Sopenharmony_ci int wait_for_unblock) 18988c2ecf20Sopenharmony_ci{ 18998c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 19008c2ecf20Sopenharmony_ci struct btrfs_async_commit *ac; 19018c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans; 19028c2ecf20Sopenharmony_ci 19038c2ecf20Sopenharmony_ci ac = kmalloc(sizeof(*ac), GFP_NOFS); 19048c2ecf20Sopenharmony_ci if (!ac) 19058c2ecf20Sopenharmony_ci return -ENOMEM; 19068c2ecf20Sopenharmony_ci 19078c2ecf20Sopenharmony_ci INIT_WORK(&ac->work, do_async_commit); 19088c2ecf20Sopenharmony_ci ac->newtrans = btrfs_join_transaction(trans->root); 19098c2ecf20Sopenharmony_ci if (IS_ERR(ac->newtrans)) { 19108c2ecf20Sopenharmony_ci int err = PTR_ERR(ac->newtrans); 19118c2ecf20Sopenharmony_ci kfree(ac); 19128c2ecf20Sopenharmony_ci return err; 19138c2ecf20Sopenharmony_ci } 19148c2ecf20Sopenharmony_ci 19158c2ecf20Sopenharmony_ci /* take transaction reference */ 19168c2ecf20Sopenharmony_ci cur_trans = trans->transaction; 19178c2ecf20Sopenharmony_ci refcount_inc(&cur_trans->use_count); 19188c2ecf20Sopenharmony_ci 19198c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 19208c2ecf20Sopenharmony_ci 19218c2ecf20Sopenharmony_ci /* 19228c2ecf20Sopenharmony_ci * Tell lockdep we've released the freeze rwsem, since the 19238c2ecf20Sopenharmony_ci * async commit thread will be the one to unlock it. 19248c2ecf20Sopenharmony_ci */ 19258c2ecf20Sopenharmony_ci if (ac->newtrans->type & __TRANS_FREEZABLE) 19268c2ecf20Sopenharmony_ci __sb_writers_release(fs_info->sb, SB_FREEZE_FS); 19278c2ecf20Sopenharmony_ci 19288c2ecf20Sopenharmony_ci schedule_work(&ac->work); 19298c2ecf20Sopenharmony_ci 19308c2ecf20Sopenharmony_ci /* wait for transaction to start and unblock */ 19318c2ecf20Sopenharmony_ci if (wait_for_unblock) 19328c2ecf20Sopenharmony_ci wait_current_trans_commit_start_and_unblock(fs_info, cur_trans); 19338c2ecf20Sopenharmony_ci else 19348c2ecf20Sopenharmony_ci wait_current_trans_commit_start(fs_info, cur_trans); 19358c2ecf20Sopenharmony_ci 19368c2ecf20Sopenharmony_ci if (current->journal_info == trans) 19378c2ecf20Sopenharmony_ci current->journal_info = NULL; 19388c2ecf20Sopenharmony_ci 19398c2ecf20Sopenharmony_ci btrfs_put_transaction(cur_trans); 19408c2ecf20Sopenharmony_ci return 0; 19418c2ecf20Sopenharmony_ci} 19428c2ecf20Sopenharmony_ci 19438c2ecf20Sopenharmony_ci 19448c2ecf20Sopenharmony_cistatic void cleanup_transaction(struct btrfs_trans_handle *trans, int err) 19458c2ecf20Sopenharmony_ci{ 19468c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 19478c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans = trans->transaction; 19488c2ecf20Sopenharmony_ci 19498c2ecf20Sopenharmony_ci WARN_ON(refcount_read(&trans->use_count) > 1); 19508c2ecf20Sopenharmony_ci 19518c2ecf20Sopenharmony_ci btrfs_abort_transaction(trans, err); 19528c2ecf20Sopenharmony_ci 19538c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 19548c2ecf20Sopenharmony_ci 19558c2ecf20Sopenharmony_ci /* 19568c2ecf20Sopenharmony_ci * If the transaction is removed from the list, it means this 19578c2ecf20Sopenharmony_ci * transaction has been committed successfully, so it is impossible 19588c2ecf20Sopenharmony_ci * to call the cleanup function. 19598c2ecf20Sopenharmony_ci */ 19608c2ecf20Sopenharmony_ci BUG_ON(list_empty(&cur_trans->list)); 19618c2ecf20Sopenharmony_ci 19628c2ecf20Sopenharmony_ci if (cur_trans == fs_info->running_transaction) { 19638c2ecf20Sopenharmony_ci cur_trans->state = TRANS_STATE_COMMIT_DOING; 19648c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 19658c2ecf20Sopenharmony_ci wait_event(cur_trans->writer_wait, 19668c2ecf20Sopenharmony_ci atomic_read(&cur_trans->num_writers) == 1); 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 19698c2ecf20Sopenharmony_ci } 19708c2ecf20Sopenharmony_ci 19718c2ecf20Sopenharmony_ci /* 19728c2ecf20Sopenharmony_ci * Now that we know no one else is still using the transaction we can 19738c2ecf20Sopenharmony_ci * remove the transaction from the list of transactions. This avoids 19748c2ecf20Sopenharmony_ci * the transaction kthread from cleaning up the transaction while some 19758c2ecf20Sopenharmony_ci * other task is still using it, which could result in a use-after-free 19768c2ecf20Sopenharmony_ci * on things like log trees, as it forces the transaction kthread to 19778c2ecf20Sopenharmony_ci * wait for this transaction to be cleaned up by us. 19788c2ecf20Sopenharmony_ci */ 19798c2ecf20Sopenharmony_ci list_del_init(&cur_trans->list); 19808c2ecf20Sopenharmony_ci 19818c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 19828c2ecf20Sopenharmony_ci 19838c2ecf20Sopenharmony_ci btrfs_cleanup_one_transaction(trans->transaction, fs_info); 19848c2ecf20Sopenharmony_ci 19858c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 19868c2ecf20Sopenharmony_ci if (cur_trans == fs_info->running_transaction) 19878c2ecf20Sopenharmony_ci fs_info->running_transaction = NULL; 19888c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci if (trans->type & __TRANS_FREEZABLE) 19918c2ecf20Sopenharmony_ci sb_end_intwrite(fs_info->sb); 19928c2ecf20Sopenharmony_ci btrfs_put_transaction(cur_trans); 19938c2ecf20Sopenharmony_ci btrfs_put_transaction(cur_trans); 19948c2ecf20Sopenharmony_ci 19958c2ecf20Sopenharmony_ci trace_btrfs_transaction_commit(trans->root); 19968c2ecf20Sopenharmony_ci 19978c2ecf20Sopenharmony_ci if (current->journal_info == trans) 19988c2ecf20Sopenharmony_ci current->journal_info = NULL; 19998c2ecf20Sopenharmony_ci btrfs_scrub_cancel(fs_info); 20008c2ecf20Sopenharmony_ci 20018c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_trans_handle_cachep, trans); 20028c2ecf20Sopenharmony_ci} 20038c2ecf20Sopenharmony_ci 20048c2ecf20Sopenharmony_ci/* 20058c2ecf20Sopenharmony_ci * Release reserved delayed ref space of all pending block groups of the 20068c2ecf20Sopenharmony_ci * transaction and remove them from the list 20078c2ecf20Sopenharmony_ci */ 20088c2ecf20Sopenharmony_cistatic void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) 20098c2ecf20Sopenharmony_ci{ 20108c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 20118c2ecf20Sopenharmony_ci struct btrfs_block_group *block_group, *tmp; 20128c2ecf20Sopenharmony_ci 20138c2ecf20Sopenharmony_ci list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { 20148c2ecf20Sopenharmony_ci btrfs_delayed_refs_rsv_release(fs_info, 1); 20158c2ecf20Sopenharmony_ci list_del_init(&block_group->bg_list); 20168c2ecf20Sopenharmony_ci } 20178c2ecf20Sopenharmony_ci} 20188c2ecf20Sopenharmony_ci 20198c2ecf20Sopenharmony_cistatic inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans) 20208c2ecf20Sopenharmony_ci{ 20218c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 20228c2ecf20Sopenharmony_ci 20238c2ecf20Sopenharmony_ci /* 20248c2ecf20Sopenharmony_ci * We use writeback_inodes_sb here because if we used 20258c2ecf20Sopenharmony_ci * btrfs_start_delalloc_roots we would deadlock with fs freeze. 20268c2ecf20Sopenharmony_ci * Currently are holding the fs freeze lock, if we do an async flush 20278c2ecf20Sopenharmony_ci * we'll do btrfs_join_transaction() and deadlock because we need to 20288c2ecf20Sopenharmony_ci * wait for the fs freeze lock. Using the direct flushing we benefit 20298c2ecf20Sopenharmony_ci * from already being in a transaction and our join_transaction doesn't 20308c2ecf20Sopenharmony_ci * have to re-take the fs freeze lock. 20318c2ecf20Sopenharmony_ci */ 20328c2ecf20Sopenharmony_ci if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) { 20338c2ecf20Sopenharmony_ci writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC); 20348c2ecf20Sopenharmony_ci } else { 20358c2ecf20Sopenharmony_ci struct btrfs_pending_snapshot *pending; 20368c2ecf20Sopenharmony_ci struct list_head *head = &trans->transaction->pending_snapshots; 20378c2ecf20Sopenharmony_ci 20388c2ecf20Sopenharmony_ci /* 20398c2ecf20Sopenharmony_ci * Flush dellaloc for any root that is going to be snapshotted. 20408c2ecf20Sopenharmony_ci * This is done to avoid a corrupted version of files, in the 20418c2ecf20Sopenharmony_ci * snapshots, that had both buffered and direct IO writes (even 20428c2ecf20Sopenharmony_ci * if they were done sequentially) due to an unordered update of 20438c2ecf20Sopenharmony_ci * the inode's size on disk. 20448c2ecf20Sopenharmony_ci */ 20458c2ecf20Sopenharmony_ci list_for_each_entry(pending, head, list) { 20468c2ecf20Sopenharmony_ci int ret; 20478c2ecf20Sopenharmony_ci 20488c2ecf20Sopenharmony_ci ret = btrfs_start_delalloc_snapshot(pending->root); 20498c2ecf20Sopenharmony_ci if (ret) 20508c2ecf20Sopenharmony_ci return ret; 20518c2ecf20Sopenharmony_ci } 20528c2ecf20Sopenharmony_ci } 20538c2ecf20Sopenharmony_ci return 0; 20548c2ecf20Sopenharmony_ci} 20558c2ecf20Sopenharmony_ci 20568c2ecf20Sopenharmony_cistatic inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans) 20578c2ecf20Sopenharmony_ci{ 20588c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 20598c2ecf20Sopenharmony_ci 20608c2ecf20Sopenharmony_ci if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) { 20618c2ecf20Sopenharmony_ci btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); 20628c2ecf20Sopenharmony_ci } else { 20638c2ecf20Sopenharmony_ci struct btrfs_pending_snapshot *pending; 20648c2ecf20Sopenharmony_ci struct list_head *head = &trans->transaction->pending_snapshots; 20658c2ecf20Sopenharmony_ci 20668c2ecf20Sopenharmony_ci /* 20678c2ecf20Sopenharmony_ci * Wait for any dellaloc that we started previously for the roots 20688c2ecf20Sopenharmony_ci * that are going to be snapshotted. This is to avoid a corrupted 20698c2ecf20Sopenharmony_ci * version of files in the snapshots that had both buffered and 20708c2ecf20Sopenharmony_ci * direct IO writes (even if they were done sequentially). 20718c2ecf20Sopenharmony_ci */ 20728c2ecf20Sopenharmony_ci list_for_each_entry(pending, head, list) 20738c2ecf20Sopenharmony_ci btrfs_wait_ordered_extents(pending->root, 20748c2ecf20Sopenharmony_ci U64_MAX, 0, U64_MAX); 20758c2ecf20Sopenharmony_ci } 20768c2ecf20Sopenharmony_ci} 20778c2ecf20Sopenharmony_ci 20788c2ecf20Sopenharmony_ciint btrfs_commit_transaction(struct btrfs_trans_handle *trans) 20798c2ecf20Sopenharmony_ci{ 20808c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 20818c2ecf20Sopenharmony_ci struct btrfs_transaction *cur_trans = trans->transaction; 20828c2ecf20Sopenharmony_ci struct btrfs_transaction *prev_trans = NULL; 20838c2ecf20Sopenharmony_ci int ret; 20848c2ecf20Sopenharmony_ci 20858c2ecf20Sopenharmony_ci ASSERT(refcount_read(&trans->use_count) == 1); 20868c2ecf20Sopenharmony_ci 20878c2ecf20Sopenharmony_ci /* 20888c2ecf20Sopenharmony_ci * Some places just start a transaction to commit it. We need to make 20898c2ecf20Sopenharmony_ci * sure that if this commit fails that the abort code actually marks the 20908c2ecf20Sopenharmony_ci * transaction as failed, so set trans->dirty to make the abort code do 20918c2ecf20Sopenharmony_ci * the right thing. 20928c2ecf20Sopenharmony_ci */ 20938c2ecf20Sopenharmony_ci trans->dirty = true; 20948c2ecf20Sopenharmony_ci 20958c2ecf20Sopenharmony_ci /* Stop the commit early if ->aborted is set */ 20968c2ecf20Sopenharmony_ci if (TRANS_ABORTED(cur_trans)) { 20978c2ecf20Sopenharmony_ci ret = cur_trans->aborted; 20988c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 20998c2ecf20Sopenharmony_ci return ret; 21008c2ecf20Sopenharmony_ci } 21018c2ecf20Sopenharmony_ci 21028c2ecf20Sopenharmony_ci btrfs_trans_release_metadata(trans); 21038c2ecf20Sopenharmony_ci trans->block_rsv = NULL; 21048c2ecf20Sopenharmony_ci 21058c2ecf20Sopenharmony_ci /* make a pass through all the delayed refs we have so far 21068c2ecf20Sopenharmony_ci * any runnings procs may add more while we are here 21078c2ecf20Sopenharmony_ci */ 21088c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_refs(trans, 0); 21098c2ecf20Sopenharmony_ci if (ret) { 21108c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 21118c2ecf20Sopenharmony_ci return ret; 21128c2ecf20Sopenharmony_ci } 21138c2ecf20Sopenharmony_ci 21148c2ecf20Sopenharmony_ci cur_trans = trans->transaction; 21158c2ecf20Sopenharmony_ci 21168c2ecf20Sopenharmony_ci /* 21178c2ecf20Sopenharmony_ci * set the flushing flag so procs in this transaction have to 21188c2ecf20Sopenharmony_ci * start sending their work down. 21198c2ecf20Sopenharmony_ci */ 21208c2ecf20Sopenharmony_ci cur_trans->delayed_refs.flushing = 1; 21218c2ecf20Sopenharmony_ci smp_wmb(); 21228c2ecf20Sopenharmony_ci 21238c2ecf20Sopenharmony_ci btrfs_create_pending_block_groups(trans); 21248c2ecf20Sopenharmony_ci 21258c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_refs(trans, 0); 21268c2ecf20Sopenharmony_ci if (ret) { 21278c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 21288c2ecf20Sopenharmony_ci return ret; 21298c2ecf20Sopenharmony_ci } 21308c2ecf20Sopenharmony_ci 21318c2ecf20Sopenharmony_ci if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) { 21328c2ecf20Sopenharmony_ci int run_it = 0; 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_ci /* this mutex is also taken before trying to set 21358c2ecf20Sopenharmony_ci * block groups readonly. We need to make sure 21368c2ecf20Sopenharmony_ci * that nobody has set a block group readonly 21378c2ecf20Sopenharmony_ci * after a extents from that block group have been 21388c2ecf20Sopenharmony_ci * allocated for cache files. btrfs_set_block_group_ro 21398c2ecf20Sopenharmony_ci * will wait for the transaction to commit if it 21408c2ecf20Sopenharmony_ci * finds BTRFS_TRANS_DIRTY_BG_RUN set. 21418c2ecf20Sopenharmony_ci * 21428c2ecf20Sopenharmony_ci * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure 21438c2ecf20Sopenharmony_ci * only one process starts all the block group IO. It wouldn't 21448c2ecf20Sopenharmony_ci * hurt to have more than one go through, but there's no 21458c2ecf20Sopenharmony_ci * real advantage to it either. 21468c2ecf20Sopenharmony_ci */ 21478c2ecf20Sopenharmony_ci mutex_lock(&fs_info->ro_block_group_mutex); 21488c2ecf20Sopenharmony_ci if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN, 21498c2ecf20Sopenharmony_ci &cur_trans->flags)) 21508c2ecf20Sopenharmony_ci run_it = 1; 21518c2ecf20Sopenharmony_ci mutex_unlock(&fs_info->ro_block_group_mutex); 21528c2ecf20Sopenharmony_ci 21538c2ecf20Sopenharmony_ci if (run_it) { 21548c2ecf20Sopenharmony_ci ret = btrfs_start_dirty_block_groups(trans); 21558c2ecf20Sopenharmony_ci if (ret) { 21568c2ecf20Sopenharmony_ci btrfs_end_transaction(trans); 21578c2ecf20Sopenharmony_ci return ret; 21588c2ecf20Sopenharmony_ci } 21598c2ecf20Sopenharmony_ci } 21608c2ecf20Sopenharmony_ci } 21618c2ecf20Sopenharmony_ci 21628c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 21638c2ecf20Sopenharmony_ci if (cur_trans->state >= TRANS_STATE_COMMIT_START) { 21648c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 21658c2ecf20Sopenharmony_ci refcount_inc(&cur_trans->use_count); 21668c2ecf20Sopenharmony_ci ret = btrfs_end_transaction(trans); 21678c2ecf20Sopenharmony_ci 21688c2ecf20Sopenharmony_ci wait_for_commit(cur_trans); 21698c2ecf20Sopenharmony_ci 21708c2ecf20Sopenharmony_ci if (TRANS_ABORTED(cur_trans)) 21718c2ecf20Sopenharmony_ci ret = cur_trans->aborted; 21728c2ecf20Sopenharmony_ci 21738c2ecf20Sopenharmony_ci btrfs_put_transaction(cur_trans); 21748c2ecf20Sopenharmony_ci 21758c2ecf20Sopenharmony_ci return ret; 21768c2ecf20Sopenharmony_ci } 21778c2ecf20Sopenharmony_ci 21788c2ecf20Sopenharmony_ci cur_trans->state = TRANS_STATE_COMMIT_START; 21798c2ecf20Sopenharmony_ci wake_up(&fs_info->transaction_blocked_wait); 21808c2ecf20Sopenharmony_ci 21818c2ecf20Sopenharmony_ci if (cur_trans->list.prev != &fs_info->trans_list) { 21828c2ecf20Sopenharmony_ci prev_trans = list_entry(cur_trans->list.prev, 21838c2ecf20Sopenharmony_ci struct btrfs_transaction, list); 21848c2ecf20Sopenharmony_ci if (prev_trans->state != TRANS_STATE_COMPLETED) { 21858c2ecf20Sopenharmony_ci refcount_inc(&prev_trans->use_count); 21868c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 21878c2ecf20Sopenharmony_ci 21888c2ecf20Sopenharmony_ci wait_for_commit(prev_trans); 21898c2ecf20Sopenharmony_ci ret = READ_ONCE(prev_trans->aborted); 21908c2ecf20Sopenharmony_ci 21918c2ecf20Sopenharmony_ci btrfs_put_transaction(prev_trans); 21928c2ecf20Sopenharmony_ci if (ret) 21938c2ecf20Sopenharmony_ci goto cleanup_transaction; 21948c2ecf20Sopenharmony_ci } else { 21958c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 21968c2ecf20Sopenharmony_ci } 21978c2ecf20Sopenharmony_ci } else { 21988c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 21998c2ecf20Sopenharmony_ci /* 22008c2ecf20Sopenharmony_ci * The previous transaction was aborted and was already removed 22018c2ecf20Sopenharmony_ci * from the list of transactions at fs_info->trans_list. So we 22028c2ecf20Sopenharmony_ci * abort to prevent writing a new superblock that reflects a 22038c2ecf20Sopenharmony_ci * corrupt state (pointing to trees with unwritten nodes/leafs). 22048c2ecf20Sopenharmony_ci */ 22058c2ecf20Sopenharmony_ci if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) { 22068c2ecf20Sopenharmony_ci ret = -EROFS; 22078c2ecf20Sopenharmony_ci goto cleanup_transaction; 22088c2ecf20Sopenharmony_ci } 22098c2ecf20Sopenharmony_ci } 22108c2ecf20Sopenharmony_ci 22118c2ecf20Sopenharmony_ci extwriter_counter_dec(cur_trans, trans->type); 22128c2ecf20Sopenharmony_ci 22138c2ecf20Sopenharmony_ci ret = btrfs_start_delalloc_flush(trans); 22148c2ecf20Sopenharmony_ci if (ret) 22158c2ecf20Sopenharmony_ci goto cleanup_transaction; 22168c2ecf20Sopenharmony_ci 22178c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_items(trans); 22188c2ecf20Sopenharmony_ci if (ret) 22198c2ecf20Sopenharmony_ci goto cleanup_transaction; 22208c2ecf20Sopenharmony_ci 22218c2ecf20Sopenharmony_ci wait_event(cur_trans->writer_wait, 22228c2ecf20Sopenharmony_ci extwriter_counter_read(cur_trans) == 0); 22238c2ecf20Sopenharmony_ci 22248c2ecf20Sopenharmony_ci /* some pending stuffs might be added after the previous flush. */ 22258c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_items(trans); 22268c2ecf20Sopenharmony_ci if (ret) 22278c2ecf20Sopenharmony_ci goto cleanup_transaction; 22288c2ecf20Sopenharmony_ci 22298c2ecf20Sopenharmony_ci btrfs_wait_delalloc_flush(trans); 22308c2ecf20Sopenharmony_ci 22318c2ecf20Sopenharmony_ci /* 22328c2ecf20Sopenharmony_ci * Wait for all ordered extents started by a fast fsync that joined this 22338c2ecf20Sopenharmony_ci * transaction. Otherwise if this transaction commits before the ordered 22348c2ecf20Sopenharmony_ci * extents complete we lose logged data after a power failure. 22358c2ecf20Sopenharmony_ci */ 22368c2ecf20Sopenharmony_ci wait_event(cur_trans->pending_wait, 22378c2ecf20Sopenharmony_ci atomic_read(&cur_trans->pending_ordered) == 0); 22388c2ecf20Sopenharmony_ci 22398c2ecf20Sopenharmony_ci btrfs_scrub_pause(fs_info); 22408c2ecf20Sopenharmony_ci /* 22418c2ecf20Sopenharmony_ci * Ok now we need to make sure to block out any other joins while we 22428c2ecf20Sopenharmony_ci * commit the transaction. We could have started a join before setting 22438c2ecf20Sopenharmony_ci * COMMIT_DOING so make sure to wait for num_writers to == 1 again. 22448c2ecf20Sopenharmony_ci */ 22458c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 22468c2ecf20Sopenharmony_ci cur_trans->state = TRANS_STATE_COMMIT_DOING; 22478c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 22488c2ecf20Sopenharmony_ci wait_event(cur_trans->writer_wait, 22498c2ecf20Sopenharmony_ci atomic_read(&cur_trans->num_writers) == 1); 22508c2ecf20Sopenharmony_ci 22518c2ecf20Sopenharmony_ci if (TRANS_ABORTED(cur_trans)) { 22528c2ecf20Sopenharmony_ci ret = cur_trans->aborted; 22538c2ecf20Sopenharmony_ci goto scrub_continue; 22548c2ecf20Sopenharmony_ci } 22558c2ecf20Sopenharmony_ci /* 22568c2ecf20Sopenharmony_ci * the reloc mutex makes sure that we stop 22578c2ecf20Sopenharmony_ci * the balancing code from coming in and moving 22588c2ecf20Sopenharmony_ci * extents around in the middle of the commit 22598c2ecf20Sopenharmony_ci */ 22608c2ecf20Sopenharmony_ci mutex_lock(&fs_info->reloc_mutex); 22618c2ecf20Sopenharmony_ci 22628c2ecf20Sopenharmony_ci /* 22638c2ecf20Sopenharmony_ci * We needn't worry about the delayed items because we will 22648c2ecf20Sopenharmony_ci * deal with them in create_pending_snapshot(), which is the 22658c2ecf20Sopenharmony_ci * core function of the snapshot creation. 22668c2ecf20Sopenharmony_ci */ 22678c2ecf20Sopenharmony_ci ret = create_pending_snapshots(trans); 22688c2ecf20Sopenharmony_ci if (ret) 22698c2ecf20Sopenharmony_ci goto unlock_reloc; 22708c2ecf20Sopenharmony_ci 22718c2ecf20Sopenharmony_ci /* 22728c2ecf20Sopenharmony_ci * We insert the dir indexes of the snapshots and update the inode 22738c2ecf20Sopenharmony_ci * of the snapshots' parents after the snapshot creation, so there 22748c2ecf20Sopenharmony_ci * are some delayed items which are not dealt with. Now deal with 22758c2ecf20Sopenharmony_ci * them. 22768c2ecf20Sopenharmony_ci * 22778c2ecf20Sopenharmony_ci * We needn't worry that this operation will corrupt the snapshots, 22788c2ecf20Sopenharmony_ci * because all the tree which are snapshoted will be forced to COW 22798c2ecf20Sopenharmony_ci * the nodes and leaves. 22808c2ecf20Sopenharmony_ci */ 22818c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_items(trans); 22828c2ecf20Sopenharmony_ci if (ret) 22838c2ecf20Sopenharmony_ci goto unlock_reloc; 22848c2ecf20Sopenharmony_ci 22858c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); 22868c2ecf20Sopenharmony_ci if (ret) 22878c2ecf20Sopenharmony_ci goto unlock_reloc; 22888c2ecf20Sopenharmony_ci 22898c2ecf20Sopenharmony_ci /* 22908c2ecf20Sopenharmony_ci * make sure none of the code above managed to slip in a 22918c2ecf20Sopenharmony_ci * delayed item 22928c2ecf20Sopenharmony_ci */ 22938c2ecf20Sopenharmony_ci btrfs_assert_delayed_root_empty(fs_info); 22948c2ecf20Sopenharmony_ci 22958c2ecf20Sopenharmony_ci WARN_ON(cur_trans != trans->transaction); 22968c2ecf20Sopenharmony_ci 22978c2ecf20Sopenharmony_ci /* btrfs_commit_tree_roots is responsible for getting the 22988c2ecf20Sopenharmony_ci * various roots consistent with each other. Every pointer 22998c2ecf20Sopenharmony_ci * in the tree of tree roots has to point to the most up to date 23008c2ecf20Sopenharmony_ci * root for every subvolume and other tree. So, we have to keep 23018c2ecf20Sopenharmony_ci * the tree logging code from jumping in and changing any 23028c2ecf20Sopenharmony_ci * of the trees. 23038c2ecf20Sopenharmony_ci * 23048c2ecf20Sopenharmony_ci * At this point in the commit, there can't be any tree-log 23058c2ecf20Sopenharmony_ci * writers, but a little lower down we drop the trans mutex 23068c2ecf20Sopenharmony_ci * and let new people in. By holding the tree_log_mutex 23078c2ecf20Sopenharmony_ci * from now until after the super is written, we avoid races 23088c2ecf20Sopenharmony_ci * with the tree-log code. 23098c2ecf20Sopenharmony_ci */ 23108c2ecf20Sopenharmony_ci mutex_lock(&fs_info->tree_log_mutex); 23118c2ecf20Sopenharmony_ci 23128c2ecf20Sopenharmony_ci ret = commit_fs_roots(trans); 23138c2ecf20Sopenharmony_ci if (ret) 23148c2ecf20Sopenharmony_ci goto unlock_tree_log; 23158c2ecf20Sopenharmony_ci 23168c2ecf20Sopenharmony_ci /* 23178c2ecf20Sopenharmony_ci * Since the transaction is done, we can apply the pending changes 23188c2ecf20Sopenharmony_ci * before the next transaction. 23198c2ecf20Sopenharmony_ci */ 23208c2ecf20Sopenharmony_ci btrfs_apply_pending_changes(fs_info); 23218c2ecf20Sopenharmony_ci 23228c2ecf20Sopenharmony_ci /* commit_fs_roots gets rid of all the tree log roots, it is now 23238c2ecf20Sopenharmony_ci * safe to free the root of tree log roots 23248c2ecf20Sopenharmony_ci */ 23258c2ecf20Sopenharmony_ci btrfs_free_log_root_tree(trans, fs_info); 23268c2ecf20Sopenharmony_ci 23278c2ecf20Sopenharmony_ci /* 23288c2ecf20Sopenharmony_ci * commit_fs_roots() can call btrfs_save_ino_cache(), which generates 23298c2ecf20Sopenharmony_ci * new delayed refs. Must handle them or qgroup can be wrong. 23308c2ecf20Sopenharmony_ci */ 23318c2ecf20Sopenharmony_ci ret = btrfs_run_delayed_refs(trans, (unsigned long)-1); 23328c2ecf20Sopenharmony_ci if (ret) 23338c2ecf20Sopenharmony_ci goto unlock_tree_log; 23348c2ecf20Sopenharmony_ci 23358c2ecf20Sopenharmony_ci /* 23368c2ecf20Sopenharmony_ci * Since fs roots are all committed, we can get a quite accurate 23378c2ecf20Sopenharmony_ci * new_roots. So let's do quota accounting. 23388c2ecf20Sopenharmony_ci */ 23398c2ecf20Sopenharmony_ci ret = btrfs_qgroup_account_extents(trans); 23408c2ecf20Sopenharmony_ci if (ret < 0) 23418c2ecf20Sopenharmony_ci goto unlock_tree_log; 23428c2ecf20Sopenharmony_ci 23438c2ecf20Sopenharmony_ci ret = commit_cowonly_roots(trans); 23448c2ecf20Sopenharmony_ci if (ret) 23458c2ecf20Sopenharmony_ci goto unlock_tree_log; 23468c2ecf20Sopenharmony_ci 23478c2ecf20Sopenharmony_ci /* 23488c2ecf20Sopenharmony_ci * The tasks which save the space cache and inode cache may also 23498c2ecf20Sopenharmony_ci * update ->aborted, check it. 23508c2ecf20Sopenharmony_ci */ 23518c2ecf20Sopenharmony_ci if (TRANS_ABORTED(cur_trans)) { 23528c2ecf20Sopenharmony_ci ret = cur_trans->aborted; 23538c2ecf20Sopenharmony_ci goto unlock_tree_log; 23548c2ecf20Sopenharmony_ci } 23558c2ecf20Sopenharmony_ci 23568c2ecf20Sopenharmony_ci cur_trans = fs_info->running_transaction; 23578c2ecf20Sopenharmony_ci 23588c2ecf20Sopenharmony_ci btrfs_set_root_node(&fs_info->tree_root->root_item, 23598c2ecf20Sopenharmony_ci fs_info->tree_root->node); 23608c2ecf20Sopenharmony_ci list_add_tail(&fs_info->tree_root->dirty_list, 23618c2ecf20Sopenharmony_ci &cur_trans->switch_commits); 23628c2ecf20Sopenharmony_ci 23638c2ecf20Sopenharmony_ci btrfs_set_root_node(&fs_info->chunk_root->root_item, 23648c2ecf20Sopenharmony_ci fs_info->chunk_root->node); 23658c2ecf20Sopenharmony_ci list_add_tail(&fs_info->chunk_root->dirty_list, 23668c2ecf20Sopenharmony_ci &cur_trans->switch_commits); 23678c2ecf20Sopenharmony_ci 23688c2ecf20Sopenharmony_ci switch_commit_roots(trans); 23698c2ecf20Sopenharmony_ci 23708c2ecf20Sopenharmony_ci ASSERT(list_empty(&cur_trans->dirty_bgs)); 23718c2ecf20Sopenharmony_ci ASSERT(list_empty(&cur_trans->io_bgs)); 23728c2ecf20Sopenharmony_ci update_super_roots(fs_info); 23738c2ecf20Sopenharmony_ci 23748c2ecf20Sopenharmony_ci btrfs_set_super_log_root(fs_info->super_copy, 0); 23758c2ecf20Sopenharmony_ci btrfs_set_super_log_root_level(fs_info->super_copy, 0); 23768c2ecf20Sopenharmony_ci memcpy(fs_info->super_for_commit, fs_info->super_copy, 23778c2ecf20Sopenharmony_ci sizeof(*fs_info->super_copy)); 23788c2ecf20Sopenharmony_ci 23798c2ecf20Sopenharmony_ci btrfs_commit_device_sizes(cur_trans); 23808c2ecf20Sopenharmony_ci 23818c2ecf20Sopenharmony_ci clear_bit(BTRFS_FS_LOG1_ERR, &fs_info->flags); 23828c2ecf20Sopenharmony_ci clear_bit(BTRFS_FS_LOG2_ERR, &fs_info->flags); 23838c2ecf20Sopenharmony_ci 23848c2ecf20Sopenharmony_ci btrfs_trans_release_chunk_metadata(trans); 23858c2ecf20Sopenharmony_ci 23868c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 23878c2ecf20Sopenharmony_ci cur_trans->state = TRANS_STATE_UNBLOCKED; 23888c2ecf20Sopenharmony_ci fs_info->running_transaction = NULL; 23898c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 23908c2ecf20Sopenharmony_ci mutex_unlock(&fs_info->reloc_mutex); 23918c2ecf20Sopenharmony_ci 23928c2ecf20Sopenharmony_ci wake_up(&fs_info->transaction_wait); 23938c2ecf20Sopenharmony_ci 23948c2ecf20Sopenharmony_ci ret = btrfs_write_and_wait_transaction(trans); 23958c2ecf20Sopenharmony_ci if (ret) { 23968c2ecf20Sopenharmony_ci btrfs_handle_fs_error(fs_info, ret, 23978c2ecf20Sopenharmony_ci "Error while writing out transaction"); 23988c2ecf20Sopenharmony_ci /* 23998c2ecf20Sopenharmony_ci * reloc_mutex has been unlocked, tree_log_mutex is still held 24008c2ecf20Sopenharmony_ci * but we can't jump to unlock_tree_log causing double unlock 24018c2ecf20Sopenharmony_ci */ 24028c2ecf20Sopenharmony_ci mutex_unlock(&fs_info->tree_log_mutex); 24038c2ecf20Sopenharmony_ci goto scrub_continue; 24048c2ecf20Sopenharmony_ci } 24058c2ecf20Sopenharmony_ci 24068c2ecf20Sopenharmony_ci ret = write_all_supers(fs_info, 0); 24078c2ecf20Sopenharmony_ci /* 24088c2ecf20Sopenharmony_ci * the super is written, we can safely allow the tree-loggers 24098c2ecf20Sopenharmony_ci * to go about their business 24108c2ecf20Sopenharmony_ci */ 24118c2ecf20Sopenharmony_ci mutex_unlock(&fs_info->tree_log_mutex); 24128c2ecf20Sopenharmony_ci if (ret) 24138c2ecf20Sopenharmony_ci goto scrub_continue; 24148c2ecf20Sopenharmony_ci 24158c2ecf20Sopenharmony_ci btrfs_finish_extent_commit(trans); 24168c2ecf20Sopenharmony_ci 24178c2ecf20Sopenharmony_ci if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags)) 24188c2ecf20Sopenharmony_ci btrfs_clear_space_info_full(fs_info); 24198c2ecf20Sopenharmony_ci 24208c2ecf20Sopenharmony_ci fs_info->last_trans_committed = cur_trans->transid; 24218c2ecf20Sopenharmony_ci /* 24228c2ecf20Sopenharmony_ci * We needn't acquire the lock here because there is no other task 24238c2ecf20Sopenharmony_ci * which can change it. 24248c2ecf20Sopenharmony_ci */ 24258c2ecf20Sopenharmony_ci cur_trans->state = TRANS_STATE_COMPLETED; 24268c2ecf20Sopenharmony_ci wake_up(&cur_trans->commit_wait); 24278c2ecf20Sopenharmony_ci 24288c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 24298c2ecf20Sopenharmony_ci list_del_init(&cur_trans->list); 24308c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 24318c2ecf20Sopenharmony_ci 24328c2ecf20Sopenharmony_ci btrfs_put_transaction(cur_trans); 24338c2ecf20Sopenharmony_ci btrfs_put_transaction(cur_trans); 24348c2ecf20Sopenharmony_ci 24358c2ecf20Sopenharmony_ci if (trans->type & __TRANS_FREEZABLE) 24368c2ecf20Sopenharmony_ci sb_end_intwrite(fs_info->sb); 24378c2ecf20Sopenharmony_ci 24388c2ecf20Sopenharmony_ci trace_btrfs_transaction_commit(trans->root); 24398c2ecf20Sopenharmony_ci 24408c2ecf20Sopenharmony_ci btrfs_scrub_continue(fs_info); 24418c2ecf20Sopenharmony_ci 24428c2ecf20Sopenharmony_ci if (current->journal_info == trans) 24438c2ecf20Sopenharmony_ci current->journal_info = NULL; 24448c2ecf20Sopenharmony_ci 24458c2ecf20Sopenharmony_ci kmem_cache_free(btrfs_trans_handle_cachep, trans); 24468c2ecf20Sopenharmony_ci 24478c2ecf20Sopenharmony_ci return ret; 24488c2ecf20Sopenharmony_ci 24498c2ecf20Sopenharmony_ciunlock_tree_log: 24508c2ecf20Sopenharmony_ci mutex_unlock(&fs_info->tree_log_mutex); 24518c2ecf20Sopenharmony_ciunlock_reloc: 24528c2ecf20Sopenharmony_ci mutex_unlock(&fs_info->reloc_mutex); 24538c2ecf20Sopenharmony_ciscrub_continue: 24548c2ecf20Sopenharmony_ci btrfs_scrub_continue(fs_info); 24558c2ecf20Sopenharmony_cicleanup_transaction: 24568c2ecf20Sopenharmony_ci btrfs_trans_release_metadata(trans); 24578c2ecf20Sopenharmony_ci btrfs_cleanup_pending_block_groups(trans); 24588c2ecf20Sopenharmony_ci btrfs_trans_release_chunk_metadata(trans); 24598c2ecf20Sopenharmony_ci trans->block_rsv = NULL; 24608c2ecf20Sopenharmony_ci btrfs_warn(fs_info, "Skipping commit of aborted transaction."); 24618c2ecf20Sopenharmony_ci if (current->journal_info == trans) 24628c2ecf20Sopenharmony_ci current->journal_info = NULL; 24638c2ecf20Sopenharmony_ci cleanup_transaction(trans, ret); 24648c2ecf20Sopenharmony_ci 24658c2ecf20Sopenharmony_ci return ret; 24668c2ecf20Sopenharmony_ci} 24678c2ecf20Sopenharmony_ci 24688c2ecf20Sopenharmony_ci/* 24698c2ecf20Sopenharmony_ci * return < 0 if error 24708c2ecf20Sopenharmony_ci * 0 if there are no more dead_roots at the time of call 24718c2ecf20Sopenharmony_ci * 1 there are more to be processed, call me again 24728c2ecf20Sopenharmony_ci * 24738c2ecf20Sopenharmony_ci * The return value indicates there are certainly more snapshots to delete, but 24748c2ecf20Sopenharmony_ci * if there comes a new one during processing, it may return 0. We don't mind, 24758c2ecf20Sopenharmony_ci * because btrfs_commit_super will poke cleaner thread and it will process it a 24768c2ecf20Sopenharmony_ci * few seconds later. 24778c2ecf20Sopenharmony_ci */ 24788c2ecf20Sopenharmony_ciint btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) 24798c2ecf20Sopenharmony_ci{ 24808c2ecf20Sopenharmony_ci int ret; 24818c2ecf20Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 24828c2ecf20Sopenharmony_ci 24838c2ecf20Sopenharmony_ci spin_lock(&fs_info->trans_lock); 24848c2ecf20Sopenharmony_ci if (list_empty(&fs_info->dead_roots)) { 24858c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 24868c2ecf20Sopenharmony_ci return 0; 24878c2ecf20Sopenharmony_ci } 24888c2ecf20Sopenharmony_ci root = list_first_entry(&fs_info->dead_roots, 24898c2ecf20Sopenharmony_ci struct btrfs_root, root_list); 24908c2ecf20Sopenharmony_ci list_del_init(&root->root_list); 24918c2ecf20Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 24928c2ecf20Sopenharmony_ci 24938c2ecf20Sopenharmony_ci btrfs_debug(fs_info, "cleaner removing %llu", root->root_key.objectid); 24948c2ecf20Sopenharmony_ci 24958c2ecf20Sopenharmony_ci btrfs_kill_all_delayed_nodes(root); 24968c2ecf20Sopenharmony_ci if (root->ino_cache_inode) { 24978c2ecf20Sopenharmony_ci iput(root->ino_cache_inode); 24988c2ecf20Sopenharmony_ci root->ino_cache_inode = NULL; 24998c2ecf20Sopenharmony_ci } 25008c2ecf20Sopenharmony_ci 25018c2ecf20Sopenharmony_ci if (btrfs_header_backref_rev(root->node) < 25028c2ecf20Sopenharmony_ci BTRFS_MIXED_BACKREF_REV) 25038c2ecf20Sopenharmony_ci ret = btrfs_drop_snapshot(root, 0, 0); 25048c2ecf20Sopenharmony_ci else 25058c2ecf20Sopenharmony_ci ret = btrfs_drop_snapshot(root, 1, 0); 25068c2ecf20Sopenharmony_ci 25078c2ecf20Sopenharmony_ci btrfs_put_root(root); 25088c2ecf20Sopenharmony_ci return (ret < 0) ? 0 : 1; 25098c2ecf20Sopenharmony_ci} 25108c2ecf20Sopenharmony_ci 25118c2ecf20Sopenharmony_civoid btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info) 25128c2ecf20Sopenharmony_ci{ 25138c2ecf20Sopenharmony_ci unsigned long prev; 25148c2ecf20Sopenharmony_ci unsigned long bit; 25158c2ecf20Sopenharmony_ci 25168c2ecf20Sopenharmony_ci prev = xchg(&fs_info->pending_changes, 0); 25178c2ecf20Sopenharmony_ci if (!prev) 25188c2ecf20Sopenharmony_ci return; 25198c2ecf20Sopenharmony_ci 25208c2ecf20Sopenharmony_ci bit = 1 << BTRFS_PENDING_SET_INODE_MAP_CACHE; 25218c2ecf20Sopenharmony_ci if (prev & bit) 25228c2ecf20Sopenharmony_ci btrfs_set_opt(fs_info->mount_opt, INODE_MAP_CACHE); 25238c2ecf20Sopenharmony_ci prev &= ~bit; 25248c2ecf20Sopenharmony_ci 25258c2ecf20Sopenharmony_ci bit = 1 << BTRFS_PENDING_CLEAR_INODE_MAP_CACHE; 25268c2ecf20Sopenharmony_ci if (prev & bit) 25278c2ecf20Sopenharmony_ci btrfs_clear_opt(fs_info->mount_opt, INODE_MAP_CACHE); 25288c2ecf20Sopenharmony_ci prev &= ~bit; 25298c2ecf20Sopenharmony_ci 25308c2ecf20Sopenharmony_ci bit = 1 << BTRFS_PENDING_COMMIT; 25318c2ecf20Sopenharmony_ci if (prev & bit) 25328c2ecf20Sopenharmony_ci btrfs_debug(fs_info, "pending commit done"); 25338c2ecf20Sopenharmony_ci prev &= ~bit; 25348c2ecf20Sopenharmony_ci 25358c2ecf20Sopenharmony_ci if (prev) 25368c2ecf20Sopenharmony_ci btrfs_warn(fs_info, 25378c2ecf20Sopenharmony_ci "unknown pending changes left 0x%lx, ignoring", prev); 25388c2ecf20Sopenharmony_ci} 2539