18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0+ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/fs/jbd2/transaction.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Copyright 1998 Red Hat corp --- All Rights Reserved 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Generic filesystem transaction handling code; part of the ext2fs 108c2ecf20Sopenharmony_ci * journaling system. 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * This file manages transactions (compound commits managed by the 138c2ecf20Sopenharmony_ci * journaling code) and handles (individual atomic operations by the 148c2ecf20Sopenharmony_ci * filesystem). 158c2ecf20Sopenharmony_ci */ 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#include <linux/time.h> 188c2ecf20Sopenharmony_ci#include <linux/fs.h> 198c2ecf20Sopenharmony_ci#include <linux/jbd2.h> 208c2ecf20Sopenharmony_ci#include <linux/errno.h> 218c2ecf20Sopenharmony_ci#include <linux/slab.h> 228c2ecf20Sopenharmony_ci#include <linux/timer.h> 238c2ecf20Sopenharmony_ci#include <linux/mm.h> 248c2ecf20Sopenharmony_ci#include <linux/highmem.h> 258c2ecf20Sopenharmony_ci#include <linux/hrtimer.h> 268c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 278c2ecf20Sopenharmony_ci#include <linux/bug.h> 288c2ecf20Sopenharmony_ci#include <linux/module.h> 298c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci#include <trace/events/jbd2.h> 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_cistatic void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 348c2ecf20Sopenharmony_cistatic void __jbd2_journal_unfile_buffer(struct journal_head *jh); 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_cistatic struct kmem_cache *transaction_cache; 378c2ecf20Sopenharmony_ciint __init jbd2_journal_init_transaction_cache(void) 388c2ecf20Sopenharmony_ci{ 398c2ecf20Sopenharmony_ci J_ASSERT(!transaction_cache); 408c2ecf20Sopenharmony_ci transaction_cache = kmem_cache_create("jbd2_transaction_s", 418c2ecf20Sopenharmony_ci sizeof(transaction_t), 428c2ecf20Sopenharmony_ci 0, 438c2ecf20Sopenharmony_ci SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 448c2ecf20Sopenharmony_ci NULL); 458c2ecf20Sopenharmony_ci if (!transaction_cache) { 468c2ecf20Sopenharmony_ci pr_emerg("JBD2: failed to create transaction cache\n"); 478c2ecf20Sopenharmony_ci return -ENOMEM; 488c2ecf20Sopenharmony_ci } 498c2ecf20Sopenharmony_ci return 0; 508c2ecf20Sopenharmony_ci} 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_civoid jbd2_journal_destroy_transaction_cache(void) 538c2ecf20Sopenharmony_ci{ 548c2ecf20Sopenharmony_ci kmem_cache_destroy(transaction_cache); 558c2ecf20Sopenharmony_ci transaction_cache = NULL; 568c2ecf20Sopenharmony_ci} 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_civoid jbd2_journal_free_transaction(transaction_t *transaction) 598c2ecf20Sopenharmony_ci{ 608c2ecf20Sopenharmony_ci if (unlikely(ZERO_OR_NULL_PTR(transaction))) 618c2ecf20Sopenharmony_ci return; 628c2ecf20Sopenharmony_ci kmem_cache_free(transaction_cache, transaction); 638c2ecf20Sopenharmony_ci} 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci/* 668c2ecf20Sopenharmony_ci * Base amount of descriptor blocks we reserve for each transaction. 678c2ecf20Sopenharmony_ci */ 688c2ecf20Sopenharmony_cistatic int jbd2_descriptor_blocks_per_trans(journal_t *journal) 698c2ecf20Sopenharmony_ci{ 708c2ecf20Sopenharmony_ci int tag_space = journal->j_blocksize - sizeof(journal_header_t); 718c2ecf20Sopenharmony_ci int tags_per_block; 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci /* Subtract UUID */ 748c2ecf20Sopenharmony_ci tag_space -= 16; 758c2ecf20Sopenharmony_ci if (jbd2_journal_has_csum_v2or3(journal)) 768c2ecf20Sopenharmony_ci tag_space -= sizeof(struct jbd2_journal_block_tail); 778c2ecf20Sopenharmony_ci /* Commit code leaves a slack space of 16 bytes at the end of block */ 788c2ecf20Sopenharmony_ci tags_per_block = (tag_space - 16) / journal_tag_bytes(journal); 798c2ecf20Sopenharmony_ci /* 808c2ecf20Sopenharmony_ci * Revoke descriptors are accounted separately so we need to reserve 818c2ecf20Sopenharmony_ci * space for commit block and normal transaction descriptor blocks. 828c2ecf20Sopenharmony_ci */ 838c2ecf20Sopenharmony_ci return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers, 848c2ecf20Sopenharmony_ci tags_per_block); 858c2ecf20Sopenharmony_ci} 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci/* 888c2ecf20Sopenharmony_ci * jbd2_get_transaction: obtain a new transaction_t object. 898c2ecf20Sopenharmony_ci * 908c2ecf20Sopenharmony_ci * Simply initialise a new transaction. Initialize it in 918c2ecf20Sopenharmony_ci * RUNNING state and add it to the current journal (which should not 928c2ecf20Sopenharmony_ci * have an existing running transaction: we only make a new transaction 938c2ecf20Sopenharmony_ci * once we have started to commit the old one). 948c2ecf20Sopenharmony_ci * 958c2ecf20Sopenharmony_ci * Preconditions: 968c2ecf20Sopenharmony_ci * The journal MUST be locked. We don't perform atomic mallocs on the 978c2ecf20Sopenharmony_ci * new transaction and we can't block without protecting against other 988c2ecf20Sopenharmony_ci * processes trying to touch the journal while it is in transition. 998c2ecf20Sopenharmony_ci * 1008c2ecf20Sopenharmony_ci */ 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_cistatic void jbd2_get_transaction(journal_t *journal, 1038c2ecf20Sopenharmony_ci transaction_t *transaction) 1048c2ecf20Sopenharmony_ci{ 1058c2ecf20Sopenharmony_ci transaction->t_journal = journal; 1068c2ecf20Sopenharmony_ci transaction->t_state = T_RUNNING; 1078c2ecf20Sopenharmony_ci transaction->t_start_time = ktime_get(); 1088c2ecf20Sopenharmony_ci transaction->t_tid = journal->j_transaction_sequence++; 1098c2ecf20Sopenharmony_ci transaction->t_expires = jiffies + journal->j_commit_interval; 1108c2ecf20Sopenharmony_ci spin_lock_init(&transaction->t_handle_lock); 1118c2ecf20Sopenharmony_ci atomic_set(&transaction->t_updates, 0); 1128c2ecf20Sopenharmony_ci atomic_set(&transaction->t_outstanding_credits, 1138c2ecf20Sopenharmony_ci jbd2_descriptor_blocks_per_trans(journal) + 1148c2ecf20Sopenharmony_ci atomic_read(&journal->j_reserved_credits)); 1158c2ecf20Sopenharmony_ci atomic_set(&transaction->t_outstanding_revokes, 0); 1168c2ecf20Sopenharmony_ci atomic_set(&transaction->t_handle_count, 0); 1178c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&transaction->t_inode_list); 1188c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&transaction->t_private_list); 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci /* Set up the commit timer for the new transaction. */ 1218c2ecf20Sopenharmony_ci journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires); 1228c2ecf20Sopenharmony_ci add_timer(&journal->j_commit_timer); 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci J_ASSERT(journal->j_running_transaction == NULL); 1258c2ecf20Sopenharmony_ci journal->j_running_transaction = transaction; 1268c2ecf20Sopenharmony_ci transaction->t_max_wait = 0; 1278c2ecf20Sopenharmony_ci transaction->t_start = jiffies; 1288c2ecf20Sopenharmony_ci transaction->t_requested = 0; 1298c2ecf20Sopenharmony_ci} 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci/* 1328c2ecf20Sopenharmony_ci * Handle management. 1338c2ecf20Sopenharmony_ci * 1348c2ecf20Sopenharmony_ci * A handle_t is an object which represents a single atomic update to a 1358c2ecf20Sopenharmony_ci * filesystem, and which tracks all of the modifications which form part 1368c2ecf20Sopenharmony_ci * of that one update. 1378c2ecf20Sopenharmony_ci */ 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci/* 1408c2ecf20Sopenharmony_ci * Update transaction's maximum wait time, if debugging is enabled. 1418c2ecf20Sopenharmony_ci * 1428c2ecf20Sopenharmony_ci * In order for t_max_wait to be reliable, it must be protected by a 1438c2ecf20Sopenharmony_ci * lock. But doing so will mean that start_this_handle() can not be 1448c2ecf20Sopenharmony_ci * run in parallel on SMP systems, which limits our scalability. So 1458c2ecf20Sopenharmony_ci * unless debugging is enabled, we no longer update t_max_wait, which 1468c2ecf20Sopenharmony_ci * means that maximum wait time reported by the jbd2_run_stats 1478c2ecf20Sopenharmony_ci * tracepoint will always be zero. 1488c2ecf20Sopenharmony_ci */ 1498c2ecf20Sopenharmony_cistatic inline void update_t_max_wait(transaction_t *transaction, 1508c2ecf20Sopenharmony_ci unsigned long ts) 1518c2ecf20Sopenharmony_ci{ 1528c2ecf20Sopenharmony_ci#ifdef CONFIG_JBD2_DEBUG 1538c2ecf20Sopenharmony_ci if (jbd2_journal_enable_debug && 1548c2ecf20Sopenharmony_ci time_after(transaction->t_start, ts)) { 1558c2ecf20Sopenharmony_ci ts = jbd2_time_diff(ts, transaction->t_start); 1568c2ecf20Sopenharmony_ci spin_lock(&transaction->t_handle_lock); 1578c2ecf20Sopenharmony_ci if (ts > transaction->t_max_wait) 1588c2ecf20Sopenharmony_ci transaction->t_max_wait = ts; 1598c2ecf20Sopenharmony_ci spin_unlock(&transaction->t_handle_lock); 1608c2ecf20Sopenharmony_ci } 1618c2ecf20Sopenharmony_ci#endif 1628c2ecf20Sopenharmony_ci} 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci/* 1658c2ecf20Sopenharmony_ci * Wait until running transaction passes to T_FLUSH state and new transaction 1668c2ecf20Sopenharmony_ci * can thus be started. Also starts the commit if needed. The function expects 1678c2ecf20Sopenharmony_ci * running transaction to exist and releases j_state_lock. 1688c2ecf20Sopenharmony_ci */ 1698c2ecf20Sopenharmony_cistatic void wait_transaction_locked(journal_t *journal) 1708c2ecf20Sopenharmony_ci __releases(journal->j_state_lock) 1718c2ecf20Sopenharmony_ci{ 1728c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 1738c2ecf20Sopenharmony_ci int need_to_start; 1748c2ecf20Sopenharmony_ci tid_t tid = journal->j_running_transaction->t_tid; 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, 1778c2ecf20Sopenharmony_ci TASK_UNINTERRUPTIBLE); 1788c2ecf20Sopenharmony_ci need_to_start = !tid_geq(journal->j_commit_request, tid); 1798c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 1808c2ecf20Sopenharmony_ci if (need_to_start) 1818c2ecf20Sopenharmony_ci jbd2_log_start_commit(journal, tid); 1828c2ecf20Sopenharmony_ci jbd2_might_wait_for_commit(journal); 1838c2ecf20Sopenharmony_ci schedule(); 1848c2ecf20Sopenharmony_ci finish_wait(&journal->j_wait_transaction_locked, &wait); 1858c2ecf20Sopenharmony_ci} 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci/* 1888c2ecf20Sopenharmony_ci * Wait until running transaction transitions from T_SWITCH to T_FLUSH 1898c2ecf20Sopenharmony_ci * state and new transaction can thus be started. The function releases 1908c2ecf20Sopenharmony_ci * j_state_lock. 1918c2ecf20Sopenharmony_ci */ 1928c2ecf20Sopenharmony_cistatic void wait_transaction_switching(journal_t *journal) 1938c2ecf20Sopenharmony_ci __releases(journal->j_state_lock) 1948c2ecf20Sopenharmony_ci{ 1958c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci if (WARN_ON(!journal->j_running_transaction || 1988c2ecf20Sopenharmony_ci journal->j_running_transaction->t_state != T_SWITCH)) { 1998c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 2008c2ecf20Sopenharmony_ci return; 2018c2ecf20Sopenharmony_ci } 2028c2ecf20Sopenharmony_ci prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait, 2038c2ecf20Sopenharmony_ci TASK_UNINTERRUPTIBLE); 2048c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 2058c2ecf20Sopenharmony_ci /* 2068c2ecf20Sopenharmony_ci * We don't call jbd2_might_wait_for_commit() here as there's no 2078c2ecf20Sopenharmony_ci * waiting for outstanding handles happening anymore in T_SWITCH state 2088c2ecf20Sopenharmony_ci * and handling of reserved handles actually relies on that for 2098c2ecf20Sopenharmony_ci * correctness. 2108c2ecf20Sopenharmony_ci */ 2118c2ecf20Sopenharmony_ci schedule(); 2128c2ecf20Sopenharmony_ci finish_wait(&journal->j_wait_transaction_locked, &wait); 2138c2ecf20Sopenharmony_ci} 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_cistatic void sub_reserved_credits(journal_t *journal, int blocks) 2168c2ecf20Sopenharmony_ci{ 2178c2ecf20Sopenharmony_ci atomic_sub(blocks, &journal->j_reserved_credits); 2188c2ecf20Sopenharmony_ci wake_up(&journal->j_wait_reserved); 2198c2ecf20Sopenharmony_ci} 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci/* 2228c2ecf20Sopenharmony_ci * Wait until we can add credits for handle to the running transaction. Called 2238c2ecf20Sopenharmony_ci * with j_state_lock held for reading. Returns 0 if handle joined the running 2248c2ecf20Sopenharmony_ci * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and 2258c2ecf20Sopenharmony_ci * caller must retry. 2268c2ecf20Sopenharmony_ci */ 2278c2ecf20Sopenharmony_cistatic int add_transaction_credits(journal_t *journal, int blocks, 2288c2ecf20Sopenharmony_ci int rsv_blocks) 2298c2ecf20Sopenharmony_ci{ 2308c2ecf20Sopenharmony_ci transaction_t *t = journal->j_running_transaction; 2318c2ecf20Sopenharmony_ci int needed; 2328c2ecf20Sopenharmony_ci int total = blocks + rsv_blocks; 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci /* 2358c2ecf20Sopenharmony_ci * If the current transaction is locked down for commit, wait 2368c2ecf20Sopenharmony_ci * for the lock to be released. 2378c2ecf20Sopenharmony_ci */ 2388c2ecf20Sopenharmony_ci if (t->t_state != T_RUNNING) { 2398c2ecf20Sopenharmony_ci WARN_ON_ONCE(t->t_state >= T_FLUSH); 2408c2ecf20Sopenharmony_ci wait_transaction_locked(journal); 2418c2ecf20Sopenharmony_ci return 1; 2428c2ecf20Sopenharmony_ci } 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci /* 2458c2ecf20Sopenharmony_ci * If there is not enough space left in the log to write all 2468c2ecf20Sopenharmony_ci * potential buffers requested by this operation, we need to 2478c2ecf20Sopenharmony_ci * stall pending a log checkpoint to free some more log space. 2488c2ecf20Sopenharmony_ci */ 2498c2ecf20Sopenharmony_ci needed = atomic_add_return(total, &t->t_outstanding_credits); 2508c2ecf20Sopenharmony_ci if (needed > journal->j_max_transaction_buffers) { 2518c2ecf20Sopenharmony_ci /* 2528c2ecf20Sopenharmony_ci * If the current transaction is already too large, 2538c2ecf20Sopenharmony_ci * then start to commit it: we can then go back and 2548c2ecf20Sopenharmony_ci * attach this handle to a new transaction. 2558c2ecf20Sopenharmony_ci */ 2568c2ecf20Sopenharmony_ci atomic_sub(total, &t->t_outstanding_credits); 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci /* 2598c2ecf20Sopenharmony_ci * Is the number of reserved credits in the current transaction too 2608c2ecf20Sopenharmony_ci * big to fit this handle? Wait until reserved credits are freed. 2618c2ecf20Sopenharmony_ci */ 2628c2ecf20Sopenharmony_ci if (atomic_read(&journal->j_reserved_credits) + total > 2638c2ecf20Sopenharmony_ci journal->j_max_transaction_buffers) { 2648c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 2658c2ecf20Sopenharmony_ci jbd2_might_wait_for_commit(journal); 2668c2ecf20Sopenharmony_ci wait_event(journal->j_wait_reserved, 2678c2ecf20Sopenharmony_ci atomic_read(&journal->j_reserved_credits) + total <= 2688c2ecf20Sopenharmony_ci journal->j_max_transaction_buffers); 2698c2ecf20Sopenharmony_ci return 1; 2708c2ecf20Sopenharmony_ci } 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci wait_transaction_locked(journal); 2738c2ecf20Sopenharmony_ci return 1; 2748c2ecf20Sopenharmony_ci } 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci /* 2778c2ecf20Sopenharmony_ci * The commit code assumes that it can get enough log space 2788c2ecf20Sopenharmony_ci * without forcing a checkpoint. This is *critical* for 2798c2ecf20Sopenharmony_ci * correctness: a checkpoint of a buffer which is also 2808c2ecf20Sopenharmony_ci * associated with a committing transaction creates a deadlock, 2818c2ecf20Sopenharmony_ci * so commit simply cannot force through checkpoints. 2828c2ecf20Sopenharmony_ci * 2838c2ecf20Sopenharmony_ci * We must therefore ensure the necessary space in the journal 2848c2ecf20Sopenharmony_ci * *before* starting to dirty potentially checkpointed buffers 2858c2ecf20Sopenharmony_ci * in the new transaction. 2868c2ecf20Sopenharmony_ci */ 2878c2ecf20Sopenharmony_ci if (jbd2_log_space_left(journal) < journal->j_max_transaction_buffers) { 2888c2ecf20Sopenharmony_ci atomic_sub(total, &t->t_outstanding_credits); 2898c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 2908c2ecf20Sopenharmony_ci jbd2_might_wait_for_commit(journal); 2918c2ecf20Sopenharmony_ci write_lock(&journal->j_state_lock); 2928c2ecf20Sopenharmony_ci if (jbd2_log_space_left(journal) < 2938c2ecf20Sopenharmony_ci journal->j_max_transaction_buffers) 2948c2ecf20Sopenharmony_ci __jbd2_log_wait_for_space(journal); 2958c2ecf20Sopenharmony_ci write_unlock(&journal->j_state_lock); 2968c2ecf20Sopenharmony_ci return 1; 2978c2ecf20Sopenharmony_ci } 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci /* No reservation? We are done... */ 3008c2ecf20Sopenharmony_ci if (!rsv_blocks) 3018c2ecf20Sopenharmony_ci return 0; 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits); 3048c2ecf20Sopenharmony_ci /* We allow at most half of a transaction to be reserved */ 3058c2ecf20Sopenharmony_ci if (needed > journal->j_max_transaction_buffers / 2) { 3068c2ecf20Sopenharmony_ci sub_reserved_credits(journal, rsv_blocks); 3078c2ecf20Sopenharmony_ci atomic_sub(total, &t->t_outstanding_credits); 3088c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 3098c2ecf20Sopenharmony_ci jbd2_might_wait_for_commit(journal); 3108c2ecf20Sopenharmony_ci wait_event(journal->j_wait_reserved, 3118c2ecf20Sopenharmony_ci atomic_read(&journal->j_reserved_credits) + rsv_blocks 3128c2ecf20Sopenharmony_ci <= journal->j_max_transaction_buffers / 2); 3138c2ecf20Sopenharmony_ci return 1; 3148c2ecf20Sopenharmony_ci } 3158c2ecf20Sopenharmony_ci return 0; 3168c2ecf20Sopenharmony_ci} 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci/* 3198c2ecf20Sopenharmony_ci * start_this_handle: Given a handle, deal with any locking or stalling 3208c2ecf20Sopenharmony_ci * needed to make sure that there is enough journal space for the handle 3218c2ecf20Sopenharmony_ci * to begin. Attach the handle to a transaction and set up the 3228c2ecf20Sopenharmony_ci * transaction's buffer credits. 3238c2ecf20Sopenharmony_ci */ 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_cistatic int start_this_handle(journal_t *journal, handle_t *handle, 3268c2ecf20Sopenharmony_ci gfp_t gfp_mask) 3278c2ecf20Sopenharmony_ci{ 3288c2ecf20Sopenharmony_ci transaction_t *transaction, *new_transaction = NULL; 3298c2ecf20Sopenharmony_ci int blocks = handle->h_total_credits; 3308c2ecf20Sopenharmony_ci int rsv_blocks = 0; 3318c2ecf20Sopenharmony_ci unsigned long ts = jiffies; 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci if (handle->h_rsv_handle) 3348c2ecf20Sopenharmony_ci rsv_blocks = handle->h_rsv_handle->h_total_credits; 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci /* 3378c2ecf20Sopenharmony_ci * Limit the number of reserved credits to 1/2 of maximum transaction 3388c2ecf20Sopenharmony_ci * size and limit the number of total credits to not exceed maximum 3398c2ecf20Sopenharmony_ci * transaction size per operation. 3408c2ecf20Sopenharmony_ci */ 3418c2ecf20Sopenharmony_ci if ((rsv_blocks > journal->j_max_transaction_buffers / 2) || 3428c2ecf20Sopenharmony_ci (rsv_blocks + blocks > journal->j_max_transaction_buffers)) { 3438c2ecf20Sopenharmony_ci printk(KERN_ERR "JBD2: %s wants too many credits " 3448c2ecf20Sopenharmony_ci "credits:%d rsv_credits:%d max:%d\n", 3458c2ecf20Sopenharmony_ci current->comm, blocks, rsv_blocks, 3468c2ecf20Sopenharmony_ci journal->j_max_transaction_buffers); 3478c2ecf20Sopenharmony_ci WARN_ON(1); 3488c2ecf20Sopenharmony_ci return -ENOSPC; 3498c2ecf20Sopenharmony_ci } 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_cialloc_transaction: 3528c2ecf20Sopenharmony_ci /* 3538c2ecf20Sopenharmony_ci * This check is racy but it is just an optimization of allocating new 3548c2ecf20Sopenharmony_ci * transaction early if there are high chances we'll need it. If we 3558c2ecf20Sopenharmony_ci * guess wrong, we'll retry or free unused transaction. 3568c2ecf20Sopenharmony_ci */ 3578c2ecf20Sopenharmony_ci if (!data_race(journal->j_running_transaction)) { 3588c2ecf20Sopenharmony_ci /* 3598c2ecf20Sopenharmony_ci * If __GFP_FS is not present, then we may be being called from 3608c2ecf20Sopenharmony_ci * inside the fs writeback layer, so we MUST NOT fail. 3618c2ecf20Sopenharmony_ci */ 3628c2ecf20Sopenharmony_ci if ((gfp_mask & __GFP_FS) == 0) 3638c2ecf20Sopenharmony_ci gfp_mask |= __GFP_NOFAIL; 3648c2ecf20Sopenharmony_ci new_transaction = kmem_cache_zalloc(transaction_cache, 3658c2ecf20Sopenharmony_ci gfp_mask); 3668c2ecf20Sopenharmony_ci if (!new_transaction) 3678c2ecf20Sopenharmony_ci return -ENOMEM; 3688c2ecf20Sopenharmony_ci } 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ci jbd_debug(3, "New handle %p going live.\n", handle); 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_ci /* 3738c2ecf20Sopenharmony_ci * We need to hold j_state_lock until t_updates has been incremented, 3748c2ecf20Sopenharmony_ci * for proper journal barrier handling 3758c2ecf20Sopenharmony_ci */ 3768c2ecf20Sopenharmony_cirepeat: 3778c2ecf20Sopenharmony_ci read_lock(&journal->j_state_lock); 3788c2ecf20Sopenharmony_ci BUG_ON(journal->j_flags & JBD2_UNMOUNT); 3798c2ecf20Sopenharmony_ci if (is_journal_aborted(journal) || 3808c2ecf20Sopenharmony_ci (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 3818c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 3828c2ecf20Sopenharmony_ci jbd2_journal_free_transaction(new_transaction); 3838c2ecf20Sopenharmony_ci return -EROFS; 3848c2ecf20Sopenharmony_ci } 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci /* 3878c2ecf20Sopenharmony_ci * Wait on the journal's transaction barrier if necessary. Specifically 3888c2ecf20Sopenharmony_ci * we allow reserved handles to proceed because otherwise commit could 3898c2ecf20Sopenharmony_ci * deadlock on page writeback not being able to complete. 3908c2ecf20Sopenharmony_ci */ 3918c2ecf20Sopenharmony_ci if (!handle->h_reserved && journal->j_barrier_count) { 3928c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 3938c2ecf20Sopenharmony_ci wait_event(journal->j_wait_transaction_locked, 3948c2ecf20Sopenharmony_ci journal->j_barrier_count == 0); 3958c2ecf20Sopenharmony_ci goto repeat; 3968c2ecf20Sopenharmony_ci } 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci if (!journal->j_running_transaction) { 3998c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 4008c2ecf20Sopenharmony_ci if (!new_transaction) 4018c2ecf20Sopenharmony_ci goto alloc_transaction; 4028c2ecf20Sopenharmony_ci write_lock(&journal->j_state_lock); 4038c2ecf20Sopenharmony_ci if (!journal->j_running_transaction && 4048c2ecf20Sopenharmony_ci (handle->h_reserved || !journal->j_barrier_count)) { 4058c2ecf20Sopenharmony_ci jbd2_get_transaction(journal, new_transaction); 4068c2ecf20Sopenharmony_ci new_transaction = NULL; 4078c2ecf20Sopenharmony_ci } 4088c2ecf20Sopenharmony_ci write_unlock(&journal->j_state_lock); 4098c2ecf20Sopenharmony_ci goto repeat; 4108c2ecf20Sopenharmony_ci } 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci transaction = journal->j_running_transaction; 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci if (!handle->h_reserved) { 4158c2ecf20Sopenharmony_ci /* We may have dropped j_state_lock - restart in that case */ 4168c2ecf20Sopenharmony_ci if (add_transaction_credits(journal, blocks, rsv_blocks)) 4178c2ecf20Sopenharmony_ci goto repeat; 4188c2ecf20Sopenharmony_ci } else { 4198c2ecf20Sopenharmony_ci /* 4208c2ecf20Sopenharmony_ci * We have handle reserved so we are allowed to join T_LOCKED 4218c2ecf20Sopenharmony_ci * transaction and we don't have to check for transaction size 4228c2ecf20Sopenharmony_ci * and journal space. But we still have to wait while running 4238c2ecf20Sopenharmony_ci * transaction is being switched to a committing one as it 4248c2ecf20Sopenharmony_ci * won't wait for any handles anymore. 4258c2ecf20Sopenharmony_ci */ 4268c2ecf20Sopenharmony_ci if (transaction->t_state == T_SWITCH) { 4278c2ecf20Sopenharmony_ci wait_transaction_switching(journal); 4288c2ecf20Sopenharmony_ci goto repeat; 4298c2ecf20Sopenharmony_ci } 4308c2ecf20Sopenharmony_ci sub_reserved_credits(journal, blocks); 4318c2ecf20Sopenharmony_ci handle->h_reserved = 0; 4328c2ecf20Sopenharmony_ci } 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci /* OK, account for the buffers that this operation expects to 4358c2ecf20Sopenharmony_ci * use and add the handle to the running transaction. 4368c2ecf20Sopenharmony_ci */ 4378c2ecf20Sopenharmony_ci update_t_max_wait(transaction, ts); 4388c2ecf20Sopenharmony_ci handle->h_transaction = transaction; 4398c2ecf20Sopenharmony_ci handle->h_requested_credits = blocks; 4408c2ecf20Sopenharmony_ci handle->h_revoke_credits_requested = handle->h_revoke_credits; 4418c2ecf20Sopenharmony_ci handle->h_start_jiffies = jiffies; 4428c2ecf20Sopenharmony_ci atomic_inc(&transaction->t_updates); 4438c2ecf20Sopenharmony_ci atomic_inc(&transaction->t_handle_count); 4448c2ecf20Sopenharmony_ci jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n", 4458c2ecf20Sopenharmony_ci handle, blocks, 4468c2ecf20Sopenharmony_ci atomic_read(&transaction->t_outstanding_credits), 4478c2ecf20Sopenharmony_ci jbd2_log_space_left(journal)); 4488c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 4498c2ecf20Sopenharmony_ci current->journal_info = handle; 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_); 4528c2ecf20Sopenharmony_ci jbd2_journal_free_transaction(new_transaction); 4538c2ecf20Sopenharmony_ci /* 4548c2ecf20Sopenharmony_ci * Ensure that no allocations done while the transaction is open are 4558c2ecf20Sopenharmony_ci * going to recurse back to the fs layer. 4568c2ecf20Sopenharmony_ci */ 4578c2ecf20Sopenharmony_ci handle->saved_alloc_context = memalloc_nofs_save(); 4588c2ecf20Sopenharmony_ci return 0; 4598c2ecf20Sopenharmony_ci} 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci/* Allocate a new handle. This should probably be in a slab... */ 4628c2ecf20Sopenharmony_cistatic handle_t *new_handle(int nblocks) 4638c2ecf20Sopenharmony_ci{ 4648c2ecf20Sopenharmony_ci handle_t *handle = jbd2_alloc_handle(GFP_NOFS); 4658c2ecf20Sopenharmony_ci if (!handle) 4668c2ecf20Sopenharmony_ci return NULL; 4678c2ecf20Sopenharmony_ci handle->h_total_credits = nblocks; 4688c2ecf20Sopenharmony_ci handle->h_ref = 1; 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci return handle; 4718c2ecf20Sopenharmony_ci} 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_cihandle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, 4748c2ecf20Sopenharmony_ci int revoke_records, gfp_t gfp_mask, 4758c2ecf20Sopenharmony_ci unsigned int type, unsigned int line_no) 4768c2ecf20Sopenharmony_ci{ 4778c2ecf20Sopenharmony_ci handle_t *handle = journal_current_handle(); 4788c2ecf20Sopenharmony_ci int err; 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_ci if (!journal) 4818c2ecf20Sopenharmony_ci return ERR_PTR(-EROFS); 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci if (handle) { 4848c2ecf20Sopenharmony_ci J_ASSERT(handle->h_transaction->t_journal == journal); 4858c2ecf20Sopenharmony_ci handle->h_ref++; 4868c2ecf20Sopenharmony_ci return handle; 4878c2ecf20Sopenharmony_ci } 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci nblocks += DIV_ROUND_UP(revoke_records, 4908c2ecf20Sopenharmony_ci journal->j_revoke_records_per_block); 4918c2ecf20Sopenharmony_ci handle = new_handle(nblocks); 4928c2ecf20Sopenharmony_ci if (!handle) 4938c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 4948c2ecf20Sopenharmony_ci if (rsv_blocks) { 4958c2ecf20Sopenharmony_ci handle_t *rsv_handle; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci rsv_handle = new_handle(rsv_blocks); 4988c2ecf20Sopenharmony_ci if (!rsv_handle) { 4998c2ecf20Sopenharmony_ci jbd2_free_handle(handle); 5008c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 5018c2ecf20Sopenharmony_ci } 5028c2ecf20Sopenharmony_ci rsv_handle->h_reserved = 1; 5038c2ecf20Sopenharmony_ci rsv_handle->h_journal = journal; 5048c2ecf20Sopenharmony_ci handle->h_rsv_handle = rsv_handle; 5058c2ecf20Sopenharmony_ci } 5068c2ecf20Sopenharmony_ci handle->h_revoke_credits = revoke_records; 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_ci err = start_this_handle(journal, handle, gfp_mask); 5098c2ecf20Sopenharmony_ci if (err < 0) { 5108c2ecf20Sopenharmony_ci if (handle->h_rsv_handle) 5118c2ecf20Sopenharmony_ci jbd2_free_handle(handle->h_rsv_handle); 5128c2ecf20Sopenharmony_ci jbd2_free_handle(handle); 5138c2ecf20Sopenharmony_ci return ERR_PTR(err); 5148c2ecf20Sopenharmony_ci } 5158c2ecf20Sopenharmony_ci handle->h_type = type; 5168c2ecf20Sopenharmony_ci handle->h_line_no = line_no; 5178c2ecf20Sopenharmony_ci trace_jbd2_handle_start(journal->j_fs_dev->bd_dev, 5188c2ecf20Sopenharmony_ci handle->h_transaction->t_tid, type, 5198c2ecf20Sopenharmony_ci line_no, nblocks); 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci return handle; 5228c2ecf20Sopenharmony_ci} 5238c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2__journal_start); 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci 5268c2ecf20Sopenharmony_ci/** 5278c2ecf20Sopenharmony_ci * jbd2_journal_start() - Obtain a new handle. 5288c2ecf20Sopenharmony_ci * @journal: Journal to start transaction on. 5298c2ecf20Sopenharmony_ci * @nblocks: number of block buffer we might modify 5308c2ecf20Sopenharmony_ci * 5318c2ecf20Sopenharmony_ci * We make sure that the transaction can guarantee at least nblocks of 5328c2ecf20Sopenharmony_ci * modified buffers in the log. We block until the log can guarantee 5338c2ecf20Sopenharmony_ci * that much space. Additionally, if rsv_blocks > 0, we also create another 5348c2ecf20Sopenharmony_ci * handle with rsv_blocks reserved blocks in the journal. This handle is 5358c2ecf20Sopenharmony_ci * stored in h_rsv_handle. It is not attached to any particular transaction 5368c2ecf20Sopenharmony_ci * and thus doesn't block transaction commit. If the caller uses this reserved 5378c2ecf20Sopenharmony_ci * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop() 5388c2ecf20Sopenharmony_ci * on the parent handle will dispose the reserved one. Reserved handle has to 5398c2ecf20Sopenharmony_ci * be converted to a normal handle using jbd2_journal_start_reserved() before 5408c2ecf20Sopenharmony_ci * it can be used. 5418c2ecf20Sopenharmony_ci * 5428c2ecf20Sopenharmony_ci * Return a pointer to a newly allocated handle, or an ERR_PTR() value 5438c2ecf20Sopenharmony_ci * on failure. 5448c2ecf20Sopenharmony_ci */ 5458c2ecf20Sopenharmony_cihandle_t *jbd2_journal_start(journal_t *journal, int nblocks) 5468c2ecf20Sopenharmony_ci{ 5478c2ecf20Sopenharmony_ci return jbd2__journal_start(journal, nblocks, 0, 0, GFP_NOFS, 0, 0); 5488c2ecf20Sopenharmony_ci} 5498c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_start); 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_cistatic void __jbd2_journal_unreserve_handle(handle_t *handle, transaction_t *t) 5528c2ecf20Sopenharmony_ci{ 5538c2ecf20Sopenharmony_ci journal_t *journal = handle->h_journal; 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci WARN_ON(!handle->h_reserved); 5568c2ecf20Sopenharmony_ci sub_reserved_credits(journal, handle->h_total_credits); 5578c2ecf20Sopenharmony_ci if (t) 5588c2ecf20Sopenharmony_ci atomic_sub(handle->h_total_credits, &t->t_outstanding_credits); 5598c2ecf20Sopenharmony_ci} 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_civoid jbd2_journal_free_reserved(handle_t *handle) 5628c2ecf20Sopenharmony_ci{ 5638c2ecf20Sopenharmony_ci journal_t *journal = handle->h_journal; 5648c2ecf20Sopenharmony_ci 5658c2ecf20Sopenharmony_ci /* Get j_state_lock to pin running transaction if it exists */ 5668c2ecf20Sopenharmony_ci read_lock(&journal->j_state_lock); 5678c2ecf20Sopenharmony_ci __jbd2_journal_unreserve_handle(handle, journal->j_running_transaction); 5688c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 5698c2ecf20Sopenharmony_ci jbd2_free_handle(handle); 5708c2ecf20Sopenharmony_ci} 5718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_free_reserved); 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci/** 5748c2ecf20Sopenharmony_ci * jbd2_journal_start_reserved() - start reserved handle 5758c2ecf20Sopenharmony_ci * @handle: handle to start 5768c2ecf20Sopenharmony_ci * @type: for handle statistics 5778c2ecf20Sopenharmony_ci * @line_no: for handle statistics 5788c2ecf20Sopenharmony_ci * 5798c2ecf20Sopenharmony_ci * Start handle that has been previously reserved with jbd2_journal_reserve(). 5808c2ecf20Sopenharmony_ci * This attaches @handle to the running transaction (or creates one if there's 5818c2ecf20Sopenharmony_ci * not transaction running). Unlike jbd2_journal_start() this function cannot 5828c2ecf20Sopenharmony_ci * block on journal commit, checkpointing, or similar stuff. It can block on 5838c2ecf20Sopenharmony_ci * memory allocation or frozen journal though. 5848c2ecf20Sopenharmony_ci * 5858c2ecf20Sopenharmony_ci * Return 0 on success, non-zero on error - handle is freed in that case. 5868c2ecf20Sopenharmony_ci */ 5878c2ecf20Sopenharmony_ciint jbd2_journal_start_reserved(handle_t *handle, unsigned int type, 5888c2ecf20Sopenharmony_ci unsigned int line_no) 5898c2ecf20Sopenharmony_ci{ 5908c2ecf20Sopenharmony_ci journal_t *journal = handle->h_journal; 5918c2ecf20Sopenharmony_ci int ret = -EIO; 5928c2ecf20Sopenharmony_ci 5938c2ecf20Sopenharmony_ci if (WARN_ON(!handle->h_reserved)) { 5948c2ecf20Sopenharmony_ci /* Someone passed in normal handle? Just stop it. */ 5958c2ecf20Sopenharmony_ci jbd2_journal_stop(handle); 5968c2ecf20Sopenharmony_ci return ret; 5978c2ecf20Sopenharmony_ci } 5988c2ecf20Sopenharmony_ci /* 5998c2ecf20Sopenharmony_ci * Usefulness of mixing of reserved and unreserved handles is 6008c2ecf20Sopenharmony_ci * questionable. So far nobody seems to need it so just error out. 6018c2ecf20Sopenharmony_ci */ 6028c2ecf20Sopenharmony_ci if (WARN_ON(current->journal_info)) { 6038c2ecf20Sopenharmony_ci jbd2_journal_free_reserved(handle); 6048c2ecf20Sopenharmony_ci return ret; 6058c2ecf20Sopenharmony_ci } 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci handle->h_journal = NULL; 6088c2ecf20Sopenharmony_ci /* 6098c2ecf20Sopenharmony_ci * GFP_NOFS is here because callers are likely from writeback or 6108c2ecf20Sopenharmony_ci * similarly constrained call sites 6118c2ecf20Sopenharmony_ci */ 6128c2ecf20Sopenharmony_ci ret = start_this_handle(journal, handle, GFP_NOFS); 6138c2ecf20Sopenharmony_ci if (ret < 0) { 6148c2ecf20Sopenharmony_ci handle->h_journal = journal; 6158c2ecf20Sopenharmony_ci jbd2_journal_free_reserved(handle); 6168c2ecf20Sopenharmony_ci return ret; 6178c2ecf20Sopenharmony_ci } 6188c2ecf20Sopenharmony_ci handle->h_type = type; 6198c2ecf20Sopenharmony_ci handle->h_line_no = line_no; 6208c2ecf20Sopenharmony_ci trace_jbd2_handle_start(journal->j_fs_dev->bd_dev, 6218c2ecf20Sopenharmony_ci handle->h_transaction->t_tid, type, 6228c2ecf20Sopenharmony_ci line_no, handle->h_total_credits); 6238c2ecf20Sopenharmony_ci return 0; 6248c2ecf20Sopenharmony_ci} 6258c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_start_reserved); 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci/** 6288c2ecf20Sopenharmony_ci * jbd2_journal_extend() - extend buffer credits. 6298c2ecf20Sopenharmony_ci * @handle: handle to 'extend' 6308c2ecf20Sopenharmony_ci * @nblocks: nr blocks to try to extend by. 6318c2ecf20Sopenharmony_ci * @revoke_records: number of revoke records to try to extend by. 6328c2ecf20Sopenharmony_ci * 6338c2ecf20Sopenharmony_ci * Some transactions, such as large extends and truncates, can be done 6348c2ecf20Sopenharmony_ci * atomically all at once or in several stages. The operation requests 6358c2ecf20Sopenharmony_ci * a credit for a number of buffer modifications in advance, but can 6368c2ecf20Sopenharmony_ci * extend its credit if it needs more. 6378c2ecf20Sopenharmony_ci * 6388c2ecf20Sopenharmony_ci * jbd2_journal_extend tries to give the running handle more buffer credits. 6398c2ecf20Sopenharmony_ci * It does not guarantee that allocation - this is a best-effort only. 6408c2ecf20Sopenharmony_ci * The calling process MUST be able to deal cleanly with a failure to 6418c2ecf20Sopenharmony_ci * extend here. 6428c2ecf20Sopenharmony_ci * 6438c2ecf20Sopenharmony_ci * Return 0 on success, non-zero on failure. 6448c2ecf20Sopenharmony_ci * 6458c2ecf20Sopenharmony_ci * return code < 0 implies an error 6468c2ecf20Sopenharmony_ci * return code > 0 implies normal transaction-full status. 6478c2ecf20Sopenharmony_ci */ 6488c2ecf20Sopenharmony_ciint jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records) 6498c2ecf20Sopenharmony_ci{ 6508c2ecf20Sopenharmony_ci transaction_t *transaction = handle->h_transaction; 6518c2ecf20Sopenharmony_ci journal_t *journal; 6528c2ecf20Sopenharmony_ci int result; 6538c2ecf20Sopenharmony_ci int wanted; 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) 6568c2ecf20Sopenharmony_ci return -EROFS; 6578c2ecf20Sopenharmony_ci journal = transaction->t_journal; 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci result = 1; 6608c2ecf20Sopenharmony_ci 6618c2ecf20Sopenharmony_ci read_lock(&journal->j_state_lock); 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci /* Don't extend a locked-down transaction! */ 6648c2ecf20Sopenharmony_ci if (transaction->t_state != T_RUNNING) { 6658c2ecf20Sopenharmony_ci jbd_debug(3, "denied handle %p %d blocks: " 6668c2ecf20Sopenharmony_ci "transaction not running\n", handle, nblocks); 6678c2ecf20Sopenharmony_ci goto error_out; 6688c2ecf20Sopenharmony_ci } 6698c2ecf20Sopenharmony_ci 6708c2ecf20Sopenharmony_ci nblocks += DIV_ROUND_UP( 6718c2ecf20Sopenharmony_ci handle->h_revoke_credits_requested + revoke_records, 6728c2ecf20Sopenharmony_ci journal->j_revoke_records_per_block) - 6738c2ecf20Sopenharmony_ci DIV_ROUND_UP( 6748c2ecf20Sopenharmony_ci handle->h_revoke_credits_requested, 6758c2ecf20Sopenharmony_ci journal->j_revoke_records_per_block); 6768c2ecf20Sopenharmony_ci spin_lock(&transaction->t_handle_lock); 6778c2ecf20Sopenharmony_ci wanted = atomic_add_return(nblocks, 6788c2ecf20Sopenharmony_ci &transaction->t_outstanding_credits); 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_ci if (wanted > journal->j_max_transaction_buffers) { 6818c2ecf20Sopenharmony_ci jbd_debug(3, "denied handle %p %d blocks: " 6828c2ecf20Sopenharmony_ci "transaction too large\n", handle, nblocks); 6838c2ecf20Sopenharmony_ci atomic_sub(nblocks, &transaction->t_outstanding_credits); 6848c2ecf20Sopenharmony_ci goto unlock; 6858c2ecf20Sopenharmony_ci } 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, 6888c2ecf20Sopenharmony_ci transaction->t_tid, 6898c2ecf20Sopenharmony_ci handle->h_type, handle->h_line_no, 6908c2ecf20Sopenharmony_ci handle->h_total_credits, 6918c2ecf20Sopenharmony_ci nblocks); 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci handle->h_total_credits += nblocks; 6948c2ecf20Sopenharmony_ci handle->h_requested_credits += nblocks; 6958c2ecf20Sopenharmony_ci handle->h_revoke_credits += revoke_records; 6968c2ecf20Sopenharmony_ci handle->h_revoke_credits_requested += revoke_records; 6978c2ecf20Sopenharmony_ci result = 0; 6988c2ecf20Sopenharmony_ci 6998c2ecf20Sopenharmony_ci jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); 7008c2ecf20Sopenharmony_ciunlock: 7018c2ecf20Sopenharmony_ci spin_unlock(&transaction->t_handle_lock); 7028c2ecf20Sopenharmony_cierror_out: 7038c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 7048c2ecf20Sopenharmony_ci return result; 7058c2ecf20Sopenharmony_ci} 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_cistatic void stop_this_handle(handle_t *handle) 7088c2ecf20Sopenharmony_ci{ 7098c2ecf20Sopenharmony_ci transaction_t *transaction = handle->h_transaction; 7108c2ecf20Sopenharmony_ci journal_t *journal = transaction->t_journal; 7118c2ecf20Sopenharmony_ci int revokes; 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci J_ASSERT(journal_current_handle() == handle); 7148c2ecf20Sopenharmony_ci J_ASSERT(atomic_read(&transaction->t_updates) > 0); 7158c2ecf20Sopenharmony_ci current->journal_info = NULL; 7168c2ecf20Sopenharmony_ci /* 7178c2ecf20Sopenharmony_ci * Subtract necessary revoke descriptor blocks from handle credits. We 7188c2ecf20Sopenharmony_ci * take care to account only for revoke descriptor blocks the 7198c2ecf20Sopenharmony_ci * transaction will really need as large sequences of transactions with 7208c2ecf20Sopenharmony_ci * small numbers of revokes are relatively common. 7218c2ecf20Sopenharmony_ci */ 7228c2ecf20Sopenharmony_ci revokes = handle->h_revoke_credits_requested - handle->h_revoke_credits; 7238c2ecf20Sopenharmony_ci if (revokes) { 7248c2ecf20Sopenharmony_ci int t_revokes, revoke_descriptors; 7258c2ecf20Sopenharmony_ci int rr_per_blk = journal->j_revoke_records_per_block; 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci WARN_ON_ONCE(DIV_ROUND_UP(revokes, rr_per_blk) 7288c2ecf20Sopenharmony_ci > handle->h_total_credits); 7298c2ecf20Sopenharmony_ci t_revokes = atomic_add_return(revokes, 7308c2ecf20Sopenharmony_ci &transaction->t_outstanding_revokes); 7318c2ecf20Sopenharmony_ci revoke_descriptors = 7328c2ecf20Sopenharmony_ci DIV_ROUND_UP(t_revokes, rr_per_blk) - 7338c2ecf20Sopenharmony_ci DIV_ROUND_UP(t_revokes - revokes, rr_per_blk); 7348c2ecf20Sopenharmony_ci handle->h_total_credits -= revoke_descriptors; 7358c2ecf20Sopenharmony_ci } 7368c2ecf20Sopenharmony_ci atomic_sub(handle->h_total_credits, 7378c2ecf20Sopenharmony_ci &transaction->t_outstanding_credits); 7388c2ecf20Sopenharmony_ci if (handle->h_rsv_handle) 7398c2ecf20Sopenharmony_ci __jbd2_journal_unreserve_handle(handle->h_rsv_handle, 7408c2ecf20Sopenharmony_ci transaction); 7418c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&transaction->t_updates)) 7428c2ecf20Sopenharmony_ci wake_up(&journal->j_wait_updates); 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_ci rwsem_release(&journal->j_trans_commit_map, _THIS_IP_); 7458c2ecf20Sopenharmony_ci /* 7468c2ecf20Sopenharmony_ci * Scope of the GFP_NOFS context is over here and so we can restore the 7478c2ecf20Sopenharmony_ci * original alloc context. 7488c2ecf20Sopenharmony_ci */ 7498c2ecf20Sopenharmony_ci memalloc_nofs_restore(handle->saved_alloc_context); 7508c2ecf20Sopenharmony_ci} 7518c2ecf20Sopenharmony_ci 7528c2ecf20Sopenharmony_ci/** 7538c2ecf20Sopenharmony_ci * jbd2__journal_restart() - restart a handle . 7548c2ecf20Sopenharmony_ci * @handle: handle to restart 7558c2ecf20Sopenharmony_ci * @nblocks: nr credits requested 7568c2ecf20Sopenharmony_ci * @revoke_records: number of revoke record credits requested 7578c2ecf20Sopenharmony_ci * @gfp_mask: memory allocation flags (for start_this_handle) 7588c2ecf20Sopenharmony_ci * 7598c2ecf20Sopenharmony_ci * Restart a handle for a multi-transaction filesystem 7608c2ecf20Sopenharmony_ci * operation. 7618c2ecf20Sopenharmony_ci * 7628c2ecf20Sopenharmony_ci * If the jbd2_journal_extend() call above fails to grant new buffer credits 7638c2ecf20Sopenharmony_ci * to a running handle, a call to jbd2_journal_restart will commit the 7648c2ecf20Sopenharmony_ci * handle's transaction so far and reattach the handle to a new 7658c2ecf20Sopenharmony_ci * transaction capable of guaranteeing the requested number of 7668c2ecf20Sopenharmony_ci * credits. We preserve reserved handle if there's any attached to the 7678c2ecf20Sopenharmony_ci * passed in handle. 7688c2ecf20Sopenharmony_ci */ 7698c2ecf20Sopenharmony_ciint jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records, 7708c2ecf20Sopenharmony_ci gfp_t gfp_mask) 7718c2ecf20Sopenharmony_ci{ 7728c2ecf20Sopenharmony_ci transaction_t *transaction = handle->h_transaction; 7738c2ecf20Sopenharmony_ci journal_t *journal; 7748c2ecf20Sopenharmony_ci tid_t tid; 7758c2ecf20Sopenharmony_ci int need_to_start; 7768c2ecf20Sopenharmony_ci int ret; 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ci /* If we've had an abort of any type, don't even think about 7798c2ecf20Sopenharmony_ci * actually doing the restart! */ 7808c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) 7818c2ecf20Sopenharmony_ci return 0; 7828c2ecf20Sopenharmony_ci journal = transaction->t_journal; 7838c2ecf20Sopenharmony_ci tid = transaction->t_tid; 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci /* 7868c2ecf20Sopenharmony_ci * First unlink the handle from its current transaction, and start the 7878c2ecf20Sopenharmony_ci * commit on that. 7888c2ecf20Sopenharmony_ci */ 7898c2ecf20Sopenharmony_ci jbd_debug(2, "restarting handle %p\n", handle); 7908c2ecf20Sopenharmony_ci stop_this_handle(handle); 7918c2ecf20Sopenharmony_ci handle->h_transaction = NULL; 7928c2ecf20Sopenharmony_ci 7938c2ecf20Sopenharmony_ci /* 7948c2ecf20Sopenharmony_ci * TODO: If we use READ_ONCE / WRITE_ONCE for j_commit_request we can 7958c2ecf20Sopenharmony_ci * get rid of pointless j_state_lock traffic like this. 7968c2ecf20Sopenharmony_ci */ 7978c2ecf20Sopenharmony_ci read_lock(&journal->j_state_lock); 7988c2ecf20Sopenharmony_ci need_to_start = !tid_geq(journal->j_commit_request, tid); 7998c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 8008c2ecf20Sopenharmony_ci if (need_to_start) 8018c2ecf20Sopenharmony_ci jbd2_log_start_commit(journal, tid); 8028c2ecf20Sopenharmony_ci handle->h_total_credits = nblocks + 8038c2ecf20Sopenharmony_ci DIV_ROUND_UP(revoke_records, 8048c2ecf20Sopenharmony_ci journal->j_revoke_records_per_block); 8058c2ecf20Sopenharmony_ci handle->h_revoke_credits = revoke_records; 8068c2ecf20Sopenharmony_ci ret = start_this_handle(journal, handle, gfp_mask); 8078c2ecf20Sopenharmony_ci trace_jbd2_handle_restart(journal->j_fs_dev->bd_dev, 8088c2ecf20Sopenharmony_ci ret ? 0 : handle->h_transaction->t_tid, 8098c2ecf20Sopenharmony_ci handle->h_type, handle->h_line_no, 8108c2ecf20Sopenharmony_ci handle->h_total_credits); 8118c2ecf20Sopenharmony_ci return ret; 8128c2ecf20Sopenharmony_ci} 8138c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2__journal_restart); 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_ciint jbd2_journal_restart(handle_t *handle, int nblocks) 8178c2ecf20Sopenharmony_ci{ 8188c2ecf20Sopenharmony_ci return jbd2__journal_restart(handle, nblocks, 0, GFP_NOFS); 8198c2ecf20Sopenharmony_ci} 8208c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_restart); 8218c2ecf20Sopenharmony_ci 8228c2ecf20Sopenharmony_ci/** 8238c2ecf20Sopenharmony_ci * jbd2_journal_lock_updates () - establish a transaction barrier. 8248c2ecf20Sopenharmony_ci * @journal: Journal to establish a barrier on. 8258c2ecf20Sopenharmony_ci * 8268c2ecf20Sopenharmony_ci * This locks out any further updates from being started, and blocks 8278c2ecf20Sopenharmony_ci * until all existing updates have completed, returning only once the 8288c2ecf20Sopenharmony_ci * journal is in a quiescent state with no updates running. 8298c2ecf20Sopenharmony_ci * 8308c2ecf20Sopenharmony_ci * The journal lock should not be held on entry. 8318c2ecf20Sopenharmony_ci */ 8328c2ecf20Sopenharmony_civoid jbd2_journal_lock_updates(journal_t *journal) 8338c2ecf20Sopenharmony_ci{ 8348c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci jbd2_might_wait_for_commit(journal); 8378c2ecf20Sopenharmony_ci 8388c2ecf20Sopenharmony_ci write_lock(&journal->j_state_lock); 8398c2ecf20Sopenharmony_ci ++journal->j_barrier_count; 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci /* Wait until there are no reserved handles */ 8428c2ecf20Sopenharmony_ci if (atomic_read(&journal->j_reserved_credits)) { 8438c2ecf20Sopenharmony_ci write_unlock(&journal->j_state_lock); 8448c2ecf20Sopenharmony_ci wait_event(journal->j_wait_reserved, 8458c2ecf20Sopenharmony_ci atomic_read(&journal->j_reserved_credits) == 0); 8468c2ecf20Sopenharmony_ci write_lock(&journal->j_state_lock); 8478c2ecf20Sopenharmony_ci } 8488c2ecf20Sopenharmony_ci 8498c2ecf20Sopenharmony_ci /* Wait until there are no running updates */ 8508c2ecf20Sopenharmony_ci while (1) { 8518c2ecf20Sopenharmony_ci transaction_t *transaction = journal->j_running_transaction; 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci if (!transaction) 8548c2ecf20Sopenharmony_ci break; 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci spin_lock(&transaction->t_handle_lock); 8578c2ecf20Sopenharmony_ci prepare_to_wait(&journal->j_wait_updates, &wait, 8588c2ecf20Sopenharmony_ci TASK_UNINTERRUPTIBLE); 8598c2ecf20Sopenharmony_ci if (!atomic_read(&transaction->t_updates)) { 8608c2ecf20Sopenharmony_ci spin_unlock(&transaction->t_handle_lock); 8618c2ecf20Sopenharmony_ci finish_wait(&journal->j_wait_updates, &wait); 8628c2ecf20Sopenharmony_ci break; 8638c2ecf20Sopenharmony_ci } 8648c2ecf20Sopenharmony_ci spin_unlock(&transaction->t_handle_lock); 8658c2ecf20Sopenharmony_ci write_unlock(&journal->j_state_lock); 8668c2ecf20Sopenharmony_ci schedule(); 8678c2ecf20Sopenharmony_ci finish_wait(&journal->j_wait_updates, &wait); 8688c2ecf20Sopenharmony_ci write_lock(&journal->j_state_lock); 8698c2ecf20Sopenharmony_ci } 8708c2ecf20Sopenharmony_ci write_unlock(&journal->j_state_lock); 8718c2ecf20Sopenharmony_ci 8728c2ecf20Sopenharmony_ci /* 8738c2ecf20Sopenharmony_ci * We have now established a barrier against other normal updates, but 8748c2ecf20Sopenharmony_ci * we also need to barrier against other jbd2_journal_lock_updates() calls 8758c2ecf20Sopenharmony_ci * to make sure that we serialise special journal-locked operations 8768c2ecf20Sopenharmony_ci * too. 8778c2ecf20Sopenharmony_ci */ 8788c2ecf20Sopenharmony_ci mutex_lock(&journal->j_barrier); 8798c2ecf20Sopenharmony_ci} 8808c2ecf20Sopenharmony_ci 8818c2ecf20Sopenharmony_ci/** 8828c2ecf20Sopenharmony_ci * jbd2_journal_unlock_updates () - release barrier 8838c2ecf20Sopenharmony_ci * @journal: Journal to release the barrier on. 8848c2ecf20Sopenharmony_ci * 8858c2ecf20Sopenharmony_ci * Release a transaction barrier obtained with jbd2_journal_lock_updates(). 8868c2ecf20Sopenharmony_ci * 8878c2ecf20Sopenharmony_ci * Should be called without the journal lock held. 8888c2ecf20Sopenharmony_ci */ 8898c2ecf20Sopenharmony_civoid jbd2_journal_unlock_updates (journal_t *journal) 8908c2ecf20Sopenharmony_ci{ 8918c2ecf20Sopenharmony_ci J_ASSERT(journal->j_barrier_count != 0); 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_ci mutex_unlock(&journal->j_barrier); 8948c2ecf20Sopenharmony_ci write_lock(&journal->j_state_lock); 8958c2ecf20Sopenharmony_ci --journal->j_barrier_count; 8968c2ecf20Sopenharmony_ci write_unlock(&journal->j_state_lock); 8978c2ecf20Sopenharmony_ci wake_up_all(&journal->j_wait_transaction_locked); 8988c2ecf20Sopenharmony_ci} 8998c2ecf20Sopenharmony_ci 9008c2ecf20Sopenharmony_cistatic void warn_dirty_buffer(struct buffer_head *bh) 9018c2ecf20Sopenharmony_ci{ 9028c2ecf20Sopenharmony_ci printk(KERN_WARNING 9038c2ecf20Sopenharmony_ci "JBD2: Spotted dirty metadata buffer (dev = %pg, blocknr = %llu). " 9048c2ecf20Sopenharmony_ci "There's a risk of filesystem corruption in case of system " 9058c2ecf20Sopenharmony_ci "crash.\n", 9068c2ecf20Sopenharmony_ci bh->b_bdev, (unsigned long long)bh->b_blocknr); 9078c2ecf20Sopenharmony_ci} 9088c2ecf20Sopenharmony_ci 9098c2ecf20Sopenharmony_ci/* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */ 9108c2ecf20Sopenharmony_cistatic void jbd2_freeze_jh_data(struct journal_head *jh) 9118c2ecf20Sopenharmony_ci{ 9128c2ecf20Sopenharmony_ci struct page *page; 9138c2ecf20Sopenharmony_ci int offset; 9148c2ecf20Sopenharmony_ci char *source; 9158c2ecf20Sopenharmony_ci struct buffer_head *bh = jh2bh(jh); 9168c2ecf20Sopenharmony_ci 9178c2ecf20Sopenharmony_ci J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n"); 9188c2ecf20Sopenharmony_ci page = bh->b_page; 9198c2ecf20Sopenharmony_ci offset = offset_in_page(bh->b_data); 9208c2ecf20Sopenharmony_ci source = kmap_atomic(page); 9218c2ecf20Sopenharmony_ci /* Fire data frozen trigger just before we copy the data */ 9228c2ecf20Sopenharmony_ci jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers); 9238c2ecf20Sopenharmony_ci memcpy(jh->b_frozen_data, source + offset, bh->b_size); 9248c2ecf20Sopenharmony_ci kunmap_atomic(source); 9258c2ecf20Sopenharmony_ci 9268c2ecf20Sopenharmony_ci /* 9278c2ecf20Sopenharmony_ci * Now that the frozen data is saved off, we need to store any matching 9288c2ecf20Sopenharmony_ci * triggers. 9298c2ecf20Sopenharmony_ci */ 9308c2ecf20Sopenharmony_ci jh->b_frozen_triggers = jh->b_triggers; 9318c2ecf20Sopenharmony_ci} 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_ci/* 9348c2ecf20Sopenharmony_ci * If the buffer is already part of the current transaction, then there 9358c2ecf20Sopenharmony_ci * is nothing we need to do. If it is already part of a prior 9368c2ecf20Sopenharmony_ci * transaction which we are still committing to disk, then we need to 9378c2ecf20Sopenharmony_ci * make sure that we do not overwrite the old copy: we do copy-out to 9388c2ecf20Sopenharmony_ci * preserve the copy going to disk. We also account the buffer against 9398c2ecf20Sopenharmony_ci * the handle's metadata buffer credits (unless the buffer is already 9408c2ecf20Sopenharmony_ci * part of the transaction, that is). 9418c2ecf20Sopenharmony_ci * 9428c2ecf20Sopenharmony_ci */ 9438c2ecf20Sopenharmony_cistatic int 9448c2ecf20Sopenharmony_cido_get_write_access(handle_t *handle, struct journal_head *jh, 9458c2ecf20Sopenharmony_ci int force_copy) 9468c2ecf20Sopenharmony_ci{ 9478c2ecf20Sopenharmony_ci struct buffer_head *bh; 9488c2ecf20Sopenharmony_ci transaction_t *transaction = handle->h_transaction; 9498c2ecf20Sopenharmony_ci journal_t *journal; 9508c2ecf20Sopenharmony_ci int error; 9518c2ecf20Sopenharmony_ci char *frozen_buffer = NULL; 9528c2ecf20Sopenharmony_ci unsigned long start_lock, time_lock; 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_ci journal = transaction->t_journal; 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ci jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); 9578c2ecf20Sopenharmony_ci 9588c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "entry"); 9598c2ecf20Sopenharmony_cirepeat: 9608c2ecf20Sopenharmony_ci bh = jh2bh(jh); 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci /* @@@ Need to check for errors here at some point. */ 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci start_lock = jiffies; 9658c2ecf20Sopenharmony_ci lock_buffer(bh); 9668c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 9678c2ecf20Sopenharmony_ci 9688c2ecf20Sopenharmony_ci /* If it takes too long to lock the buffer, trace it */ 9698c2ecf20Sopenharmony_ci time_lock = jbd2_time_diff(start_lock, jiffies); 9708c2ecf20Sopenharmony_ci if (time_lock > HZ/10) 9718c2ecf20Sopenharmony_ci trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev, 9728c2ecf20Sopenharmony_ci jiffies_to_msecs(time_lock)); 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_ci /* We now hold the buffer lock so it is safe to query the buffer 9758c2ecf20Sopenharmony_ci * state. Is the buffer dirty? 9768c2ecf20Sopenharmony_ci * 9778c2ecf20Sopenharmony_ci * If so, there are two possibilities. The buffer may be 9788c2ecf20Sopenharmony_ci * non-journaled, and undergoing a quite legitimate writeback. 9798c2ecf20Sopenharmony_ci * Otherwise, it is journaled, and we don't expect dirty buffers 9808c2ecf20Sopenharmony_ci * in that state (the buffers should be marked JBD_Dirty 9818c2ecf20Sopenharmony_ci * instead.) So either the IO is being done under our own 9828c2ecf20Sopenharmony_ci * control and this is a bug, or it's a third party IO such as 9838c2ecf20Sopenharmony_ci * dump(8) (which may leave the buffer scheduled for read --- 9848c2ecf20Sopenharmony_ci * ie. locked but not dirty) or tune2fs (which may actually have 9858c2ecf20Sopenharmony_ci * the buffer dirtied, ugh.) */ 9868c2ecf20Sopenharmony_ci 9878c2ecf20Sopenharmony_ci if (buffer_dirty(bh) && jh->b_transaction) { 9888c2ecf20Sopenharmony_ci warn_dirty_buffer(bh); 9898c2ecf20Sopenharmony_ci /* 9908c2ecf20Sopenharmony_ci * We need to clean the dirty flag and we must do it under the 9918c2ecf20Sopenharmony_ci * buffer lock to be sure we don't race with running write-out. 9928c2ecf20Sopenharmony_ci */ 9938c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "Journalling dirty buffer"); 9948c2ecf20Sopenharmony_ci clear_buffer_dirty(bh); 9958c2ecf20Sopenharmony_ci /* 9968c2ecf20Sopenharmony_ci * The buffer is going to be added to BJ_Reserved list now and 9978c2ecf20Sopenharmony_ci * nothing guarantees jbd2_journal_dirty_metadata() will be 9988c2ecf20Sopenharmony_ci * ever called for it. So we need to set jbddirty bit here to 9998c2ecf20Sopenharmony_ci * make sure the buffer is dirtied and written out when the 10008c2ecf20Sopenharmony_ci * journaling machinery is done with it. 10018c2ecf20Sopenharmony_ci */ 10028c2ecf20Sopenharmony_ci set_buffer_jbddirty(bh); 10038c2ecf20Sopenharmony_ci } 10048c2ecf20Sopenharmony_ci 10058c2ecf20Sopenharmony_ci error = -EROFS; 10068c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) { 10078c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 10088c2ecf20Sopenharmony_ci unlock_buffer(bh); 10098c2ecf20Sopenharmony_ci goto out; 10108c2ecf20Sopenharmony_ci } 10118c2ecf20Sopenharmony_ci error = 0; 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_ci /* 10148c2ecf20Sopenharmony_ci * The buffer is already part of this transaction if b_transaction or 10158c2ecf20Sopenharmony_ci * b_next_transaction points to it 10168c2ecf20Sopenharmony_ci */ 10178c2ecf20Sopenharmony_ci if (jh->b_transaction == transaction || 10188c2ecf20Sopenharmony_ci jh->b_next_transaction == transaction) { 10198c2ecf20Sopenharmony_ci unlock_buffer(bh); 10208c2ecf20Sopenharmony_ci goto done; 10218c2ecf20Sopenharmony_ci } 10228c2ecf20Sopenharmony_ci 10238c2ecf20Sopenharmony_ci /* 10248c2ecf20Sopenharmony_ci * this is the first time this transaction is touching this buffer, 10258c2ecf20Sopenharmony_ci * reset the modified flag 10268c2ecf20Sopenharmony_ci */ 10278c2ecf20Sopenharmony_ci jh->b_modified = 0; 10288c2ecf20Sopenharmony_ci 10298c2ecf20Sopenharmony_ci /* 10308c2ecf20Sopenharmony_ci * If the buffer is not journaled right now, we need to make sure it 10318c2ecf20Sopenharmony_ci * doesn't get written to disk before the caller actually commits the 10328c2ecf20Sopenharmony_ci * new data 10338c2ecf20Sopenharmony_ci */ 10348c2ecf20Sopenharmony_ci if (!jh->b_transaction) { 10358c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "no transaction"); 10368c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, !jh->b_next_transaction); 10378c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "file as BJ_Reserved"); 10388c2ecf20Sopenharmony_ci /* 10398c2ecf20Sopenharmony_ci * Make sure all stores to jh (b_modified, b_frozen_data) are 10408c2ecf20Sopenharmony_ci * visible before attaching it to the running transaction. 10418c2ecf20Sopenharmony_ci * Paired with barrier in jbd2_write_access_granted() 10428c2ecf20Sopenharmony_ci */ 10438c2ecf20Sopenharmony_ci smp_wmb(); 10448c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 10458c2ecf20Sopenharmony_ci if (test_clear_buffer_dirty(bh)) { 10468c2ecf20Sopenharmony_ci /* 10478c2ecf20Sopenharmony_ci * Execute buffer dirty clearing and jh->b_transaction 10488c2ecf20Sopenharmony_ci * assignment under journal->j_list_lock locked to 10498c2ecf20Sopenharmony_ci * prevent bh being removed from checkpoint list if 10508c2ecf20Sopenharmony_ci * the buffer is in an intermediate state (not dirty 10518c2ecf20Sopenharmony_ci * and jh->b_transaction is NULL). 10528c2ecf20Sopenharmony_ci */ 10538c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "Journalling dirty buffer"); 10548c2ecf20Sopenharmony_ci set_buffer_jbddirty(bh); 10558c2ecf20Sopenharmony_ci } 10568c2ecf20Sopenharmony_ci __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 10578c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 10588c2ecf20Sopenharmony_ci unlock_buffer(bh); 10598c2ecf20Sopenharmony_ci goto done; 10608c2ecf20Sopenharmony_ci } 10618c2ecf20Sopenharmony_ci unlock_buffer(bh); 10628c2ecf20Sopenharmony_ci 10638c2ecf20Sopenharmony_ci /* 10648c2ecf20Sopenharmony_ci * If there is already a copy-out version of this buffer, then we don't 10658c2ecf20Sopenharmony_ci * need to make another one 10668c2ecf20Sopenharmony_ci */ 10678c2ecf20Sopenharmony_ci if (jh->b_frozen_data) { 10688c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "has frozen data"); 10698c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 10708c2ecf20Sopenharmony_ci goto attach_next; 10718c2ecf20Sopenharmony_ci } 10728c2ecf20Sopenharmony_ci 10738c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "owned by older transaction"); 10748c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 10758c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction); 10768c2ecf20Sopenharmony_ci 10778c2ecf20Sopenharmony_ci /* 10788c2ecf20Sopenharmony_ci * There is one case we have to be very careful about. If the 10798c2ecf20Sopenharmony_ci * committing transaction is currently writing this buffer out to disk 10808c2ecf20Sopenharmony_ci * and has NOT made a copy-out, then we cannot modify the buffer 10818c2ecf20Sopenharmony_ci * contents at all right now. The essence of copy-out is that it is 10828c2ecf20Sopenharmony_ci * the extra copy, not the primary copy, which gets journaled. If the 10838c2ecf20Sopenharmony_ci * primary copy is already going to disk then we cannot do copy-out 10848c2ecf20Sopenharmony_ci * here. 10858c2ecf20Sopenharmony_ci */ 10868c2ecf20Sopenharmony_ci if (buffer_shadow(bh)) { 10878c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "on shadow: sleep"); 10888c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 10898c2ecf20Sopenharmony_ci wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE); 10908c2ecf20Sopenharmony_ci goto repeat; 10918c2ecf20Sopenharmony_ci } 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_ci /* 10948c2ecf20Sopenharmony_ci * Only do the copy if the currently-owning transaction still needs it. 10958c2ecf20Sopenharmony_ci * If buffer isn't on BJ_Metadata list, the committing transaction is 10968c2ecf20Sopenharmony_ci * past that stage (here we use the fact that BH_Shadow is set under 10978c2ecf20Sopenharmony_ci * bh_state lock together with refiling to BJ_Shadow list and at this 10988c2ecf20Sopenharmony_ci * point we know the buffer doesn't have BH_Shadow set). 10998c2ecf20Sopenharmony_ci * 11008c2ecf20Sopenharmony_ci * Subtle point, though: if this is a get_undo_access, then we will be 11018c2ecf20Sopenharmony_ci * relying on the frozen_data to contain the new value of the 11028c2ecf20Sopenharmony_ci * committed_data record after the transaction, so we HAVE to force the 11038c2ecf20Sopenharmony_ci * frozen_data copy in that case. 11048c2ecf20Sopenharmony_ci */ 11058c2ecf20Sopenharmony_ci if (jh->b_jlist == BJ_Metadata || force_copy) { 11068c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "generate frozen data"); 11078c2ecf20Sopenharmony_ci if (!frozen_buffer) { 11088c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "allocate memory for buffer"); 11098c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 11108c2ecf20Sopenharmony_ci frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, 11118c2ecf20Sopenharmony_ci GFP_NOFS | __GFP_NOFAIL); 11128c2ecf20Sopenharmony_ci goto repeat; 11138c2ecf20Sopenharmony_ci } 11148c2ecf20Sopenharmony_ci jh->b_frozen_data = frozen_buffer; 11158c2ecf20Sopenharmony_ci frozen_buffer = NULL; 11168c2ecf20Sopenharmony_ci jbd2_freeze_jh_data(jh); 11178c2ecf20Sopenharmony_ci } 11188c2ecf20Sopenharmony_ciattach_next: 11198c2ecf20Sopenharmony_ci /* 11208c2ecf20Sopenharmony_ci * Make sure all stores to jh (b_modified, b_frozen_data) are visible 11218c2ecf20Sopenharmony_ci * before attaching it to the running transaction. Paired with barrier 11228c2ecf20Sopenharmony_ci * in jbd2_write_access_granted() 11238c2ecf20Sopenharmony_ci */ 11248c2ecf20Sopenharmony_ci smp_wmb(); 11258c2ecf20Sopenharmony_ci jh->b_next_transaction = transaction; 11268c2ecf20Sopenharmony_ci 11278c2ecf20Sopenharmony_cidone: 11288c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 11298c2ecf20Sopenharmony_ci 11308c2ecf20Sopenharmony_ci /* 11318c2ecf20Sopenharmony_ci * If we are about to journal a buffer, then any revoke pending on it is 11328c2ecf20Sopenharmony_ci * no longer valid 11338c2ecf20Sopenharmony_ci */ 11348c2ecf20Sopenharmony_ci jbd2_journal_cancel_revoke(handle, jh); 11358c2ecf20Sopenharmony_ci 11368c2ecf20Sopenharmony_ciout: 11378c2ecf20Sopenharmony_ci if (unlikely(frozen_buffer)) /* It's usually NULL */ 11388c2ecf20Sopenharmony_ci jbd2_free(frozen_buffer, bh->b_size); 11398c2ecf20Sopenharmony_ci 11408c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "exit"); 11418c2ecf20Sopenharmony_ci return error; 11428c2ecf20Sopenharmony_ci} 11438c2ecf20Sopenharmony_ci 11448c2ecf20Sopenharmony_ci/* Fast check whether buffer is already attached to the required transaction */ 11458c2ecf20Sopenharmony_cistatic bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, 11468c2ecf20Sopenharmony_ci bool undo) 11478c2ecf20Sopenharmony_ci{ 11488c2ecf20Sopenharmony_ci struct journal_head *jh; 11498c2ecf20Sopenharmony_ci bool ret = false; 11508c2ecf20Sopenharmony_ci 11518c2ecf20Sopenharmony_ci /* Dirty buffers require special handling... */ 11528c2ecf20Sopenharmony_ci if (buffer_dirty(bh)) 11538c2ecf20Sopenharmony_ci return false; 11548c2ecf20Sopenharmony_ci 11558c2ecf20Sopenharmony_ci /* 11568c2ecf20Sopenharmony_ci * RCU protects us from dereferencing freed pages. So the checks we do 11578c2ecf20Sopenharmony_ci * are guaranteed not to oops. However the jh slab object can get freed 11588c2ecf20Sopenharmony_ci * & reallocated while we work with it. So we have to be careful. When 11598c2ecf20Sopenharmony_ci * we see jh attached to the running transaction, we know it must stay 11608c2ecf20Sopenharmony_ci * so until the transaction is committed. Thus jh won't be freed and 11618c2ecf20Sopenharmony_ci * will be attached to the same bh while we run. However it can 11628c2ecf20Sopenharmony_ci * happen jh gets freed, reallocated, and attached to the transaction 11638c2ecf20Sopenharmony_ci * just after we get pointer to it from bh. So we have to be careful 11648c2ecf20Sopenharmony_ci * and recheck jh still belongs to our bh before we return success. 11658c2ecf20Sopenharmony_ci */ 11668c2ecf20Sopenharmony_ci rcu_read_lock(); 11678c2ecf20Sopenharmony_ci if (!buffer_jbd(bh)) 11688c2ecf20Sopenharmony_ci goto out; 11698c2ecf20Sopenharmony_ci /* This should be bh2jh() but that doesn't work with inline functions */ 11708c2ecf20Sopenharmony_ci jh = READ_ONCE(bh->b_private); 11718c2ecf20Sopenharmony_ci if (!jh) 11728c2ecf20Sopenharmony_ci goto out; 11738c2ecf20Sopenharmony_ci /* For undo access buffer must have data copied */ 11748c2ecf20Sopenharmony_ci if (undo && !jh->b_committed_data) 11758c2ecf20Sopenharmony_ci goto out; 11768c2ecf20Sopenharmony_ci if (READ_ONCE(jh->b_transaction) != handle->h_transaction && 11778c2ecf20Sopenharmony_ci READ_ONCE(jh->b_next_transaction) != handle->h_transaction) 11788c2ecf20Sopenharmony_ci goto out; 11798c2ecf20Sopenharmony_ci /* 11808c2ecf20Sopenharmony_ci * There are two reasons for the barrier here: 11818c2ecf20Sopenharmony_ci * 1) Make sure to fetch b_bh after we did previous checks so that we 11828c2ecf20Sopenharmony_ci * detect when jh went through free, realloc, attach to transaction 11838c2ecf20Sopenharmony_ci * while we were checking. Paired with implicit barrier in that path. 11848c2ecf20Sopenharmony_ci * 2) So that access to bh done after jbd2_write_access_granted() 11858c2ecf20Sopenharmony_ci * doesn't get reordered and see inconsistent state of concurrent 11868c2ecf20Sopenharmony_ci * do_get_write_access(). 11878c2ecf20Sopenharmony_ci */ 11888c2ecf20Sopenharmony_ci smp_mb(); 11898c2ecf20Sopenharmony_ci if (unlikely(jh->b_bh != bh)) 11908c2ecf20Sopenharmony_ci goto out; 11918c2ecf20Sopenharmony_ci ret = true; 11928c2ecf20Sopenharmony_ciout: 11938c2ecf20Sopenharmony_ci rcu_read_unlock(); 11948c2ecf20Sopenharmony_ci return ret; 11958c2ecf20Sopenharmony_ci} 11968c2ecf20Sopenharmony_ci 11978c2ecf20Sopenharmony_ci/** 11988c2ecf20Sopenharmony_ci * jbd2_journal_get_write_access() - notify intent to modify a buffer 11998c2ecf20Sopenharmony_ci * for metadata (not data) update. 12008c2ecf20Sopenharmony_ci * @handle: transaction to add buffer modifications to 12018c2ecf20Sopenharmony_ci * @bh: bh to be used for metadata writes 12028c2ecf20Sopenharmony_ci * 12038c2ecf20Sopenharmony_ci * Returns: error code or 0 on success. 12048c2ecf20Sopenharmony_ci * 12058c2ecf20Sopenharmony_ci * In full data journalling mode the buffer may be of type BJ_AsyncData, 12068c2ecf20Sopenharmony_ci * because we're ``write()ing`` a buffer which is also part of a shared mapping. 12078c2ecf20Sopenharmony_ci */ 12088c2ecf20Sopenharmony_ci 12098c2ecf20Sopenharmony_ciint jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) 12108c2ecf20Sopenharmony_ci{ 12118c2ecf20Sopenharmony_ci struct journal_head *jh; 12128c2ecf20Sopenharmony_ci int rc; 12138c2ecf20Sopenharmony_ci 12148c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) 12158c2ecf20Sopenharmony_ci return -EROFS; 12168c2ecf20Sopenharmony_ci 12178c2ecf20Sopenharmony_ci if (jbd2_write_access_granted(handle, bh, false)) 12188c2ecf20Sopenharmony_ci return 0; 12198c2ecf20Sopenharmony_ci 12208c2ecf20Sopenharmony_ci jh = jbd2_journal_add_journal_head(bh); 12218c2ecf20Sopenharmony_ci /* We do not want to get caught playing with fields which the 12228c2ecf20Sopenharmony_ci * log thread also manipulates. Make sure that the buffer 12238c2ecf20Sopenharmony_ci * completes any outstanding IO before proceeding. */ 12248c2ecf20Sopenharmony_ci rc = do_get_write_access(handle, jh, 0); 12258c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 12268c2ecf20Sopenharmony_ci return rc; 12278c2ecf20Sopenharmony_ci} 12288c2ecf20Sopenharmony_ci 12298c2ecf20Sopenharmony_ci 12308c2ecf20Sopenharmony_ci/* 12318c2ecf20Sopenharmony_ci * When the user wants to journal a newly created buffer_head 12328c2ecf20Sopenharmony_ci * (ie. getblk() returned a new buffer and we are going to populate it 12338c2ecf20Sopenharmony_ci * manually rather than reading off disk), then we need to keep the 12348c2ecf20Sopenharmony_ci * buffer_head locked until it has been completely filled with new 12358c2ecf20Sopenharmony_ci * data. In this case, we should be able to make the assertion that 12368c2ecf20Sopenharmony_ci * the bh is not already part of an existing transaction. 12378c2ecf20Sopenharmony_ci * 12388c2ecf20Sopenharmony_ci * The buffer should already be locked by the caller by this point. 12398c2ecf20Sopenharmony_ci * There is no lock ranking violation: it was a newly created, 12408c2ecf20Sopenharmony_ci * unlocked buffer beforehand. */ 12418c2ecf20Sopenharmony_ci 12428c2ecf20Sopenharmony_ci/** 12438c2ecf20Sopenharmony_ci * jbd2_journal_get_create_access () - notify intent to use newly created bh 12448c2ecf20Sopenharmony_ci * @handle: transaction to new buffer to 12458c2ecf20Sopenharmony_ci * @bh: new buffer. 12468c2ecf20Sopenharmony_ci * 12478c2ecf20Sopenharmony_ci * Call this if you create a new bh. 12488c2ecf20Sopenharmony_ci */ 12498c2ecf20Sopenharmony_ciint jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) 12508c2ecf20Sopenharmony_ci{ 12518c2ecf20Sopenharmony_ci transaction_t *transaction = handle->h_transaction; 12528c2ecf20Sopenharmony_ci journal_t *journal; 12538c2ecf20Sopenharmony_ci struct journal_head *jh = jbd2_journal_add_journal_head(bh); 12548c2ecf20Sopenharmony_ci int err; 12558c2ecf20Sopenharmony_ci 12568c2ecf20Sopenharmony_ci jbd_debug(5, "journal_head %p\n", jh); 12578c2ecf20Sopenharmony_ci err = -EROFS; 12588c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) 12598c2ecf20Sopenharmony_ci goto out; 12608c2ecf20Sopenharmony_ci journal = transaction->t_journal; 12618c2ecf20Sopenharmony_ci err = 0; 12628c2ecf20Sopenharmony_ci 12638c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "entry"); 12648c2ecf20Sopenharmony_ci /* 12658c2ecf20Sopenharmony_ci * The buffer may already belong to this transaction due to pre-zeroing 12668c2ecf20Sopenharmony_ci * in the filesystem's new_block code. It may also be on the previous, 12678c2ecf20Sopenharmony_ci * committing transaction's lists, but it HAS to be in Forget state in 12688c2ecf20Sopenharmony_ci * that case: the transaction must have deleted the buffer for it to be 12698c2ecf20Sopenharmony_ci * reused here. 12708c2ecf20Sopenharmony_ci */ 12718c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 12728c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, (jh->b_transaction == transaction || 12738c2ecf20Sopenharmony_ci jh->b_transaction == NULL || 12748c2ecf20Sopenharmony_ci (jh->b_transaction == journal->j_committing_transaction && 12758c2ecf20Sopenharmony_ci jh->b_jlist == BJ_Forget))); 12768c2ecf20Sopenharmony_ci 12778c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 12788c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); 12798c2ecf20Sopenharmony_ci 12808c2ecf20Sopenharmony_ci if (jh->b_transaction == NULL) { 12818c2ecf20Sopenharmony_ci /* 12828c2ecf20Sopenharmony_ci * Previous jbd2_journal_forget() could have left the buffer 12838c2ecf20Sopenharmony_ci * with jbddirty bit set because it was being committed. When 12848c2ecf20Sopenharmony_ci * the commit finished, we've filed the buffer for 12858c2ecf20Sopenharmony_ci * checkpointing and marked it dirty. Now we are reallocating 12868c2ecf20Sopenharmony_ci * the buffer so the transaction freeing it must have 12878c2ecf20Sopenharmony_ci * committed and so it's safe to clear the dirty bit. 12888c2ecf20Sopenharmony_ci */ 12898c2ecf20Sopenharmony_ci clear_buffer_dirty(jh2bh(jh)); 12908c2ecf20Sopenharmony_ci /* first access by this transaction */ 12918c2ecf20Sopenharmony_ci jh->b_modified = 0; 12928c2ecf20Sopenharmony_ci 12938c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "file as BJ_Reserved"); 12948c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 12958c2ecf20Sopenharmony_ci __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 12968c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 12978c2ecf20Sopenharmony_ci } else if (jh->b_transaction == journal->j_committing_transaction) { 12988c2ecf20Sopenharmony_ci /* first access by this transaction */ 12998c2ecf20Sopenharmony_ci jh->b_modified = 0; 13008c2ecf20Sopenharmony_ci 13018c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "set next transaction"); 13028c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 13038c2ecf20Sopenharmony_ci jh->b_next_transaction = transaction; 13048c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 13058c2ecf20Sopenharmony_ci } 13068c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 13078c2ecf20Sopenharmony_ci 13088c2ecf20Sopenharmony_ci /* 13098c2ecf20Sopenharmony_ci * akpm: I added this. ext3_alloc_branch can pick up new indirect 13108c2ecf20Sopenharmony_ci * blocks which contain freed but then revoked metadata. We need 13118c2ecf20Sopenharmony_ci * to cancel the revoke in case we end up freeing it yet again 13128c2ecf20Sopenharmony_ci * and the reallocating as data - this would cause a second revoke, 13138c2ecf20Sopenharmony_ci * which hits an assertion error. 13148c2ecf20Sopenharmony_ci */ 13158c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "cancelling revoke"); 13168c2ecf20Sopenharmony_ci jbd2_journal_cancel_revoke(handle, jh); 13178c2ecf20Sopenharmony_ciout: 13188c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 13198c2ecf20Sopenharmony_ci return err; 13208c2ecf20Sopenharmony_ci} 13218c2ecf20Sopenharmony_ci 13228c2ecf20Sopenharmony_ci/** 13238c2ecf20Sopenharmony_ci * jbd2_journal_get_undo_access() - Notify intent to modify metadata with 13248c2ecf20Sopenharmony_ci * non-rewindable consequences 13258c2ecf20Sopenharmony_ci * @handle: transaction 13268c2ecf20Sopenharmony_ci * @bh: buffer to undo 13278c2ecf20Sopenharmony_ci * 13288c2ecf20Sopenharmony_ci * Sometimes there is a need to distinguish between metadata which has 13298c2ecf20Sopenharmony_ci * been committed to disk and that which has not. The ext3fs code uses 13308c2ecf20Sopenharmony_ci * this for freeing and allocating space, we have to make sure that we 13318c2ecf20Sopenharmony_ci * do not reuse freed space until the deallocation has been committed, 13328c2ecf20Sopenharmony_ci * since if we overwrote that space we would make the delete 13338c2ecf20Sopenharmony_ci * un-rewindable in case of a crash. 13348c2ecf20Sopenharmony_ci * 13358c2ecf20Sopenharmony_ci * To deal with that, jbd2_journal_get_undo_access requests write access to a 13368c2ecf20Sopenharmony_ci * buffer for parts of non-rewindable operations such as delete 13378c2ecf20Sopenharmony_ci * operations on the bitmaps. The journaling code must keep a copy of 13388c2ecf20Sopenharmony_ci * the buffer's contents prior to the undo_access call until such time 13398c2ecf20Sopenharmony_ci * as we know that the buffer has definitely been committed to disk. 13408c2ecf20Sopenharmony_ci * 13418c2ecf20Sopenharmony_ci * We never need to know which transaction the committed data is part 13428c2ecf20Sopenharmony_ci * of, buffers touched here are guaranteed to be dirtied later and so 13438c2ecf20Sopenharmony_ci * will be committed to a new transaction in due course, at which point 13448c2ecf20Sopenharmony_ci * we can discard the old committed data pointer. 13458c2ecf20Sopenharmony_ci * 13468c2ecf20Sopenharmony_ci * Returns error number or 0 on success. 13478c2ecf20Sopenharmony_ci */ 13488c2ecf20Sopenharmony_ciint jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) 13498c2ecf20Sopenharmony_ci{ 13508c2ecf20Sopenharmony_ci int err; 13518c2ecf20Sopenharmony_ci struct journal_head *jh; 13528c2ecf20Sopenharmony_ci char *committed_data = NULL; 13538c2ecf20Sopenharmony_ci 13548c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) 13558c2ecf20Sopenharmony_ci return -EROFS; 13568c2ecf20Sopenharmony_ci 13578c2ecf20Sopenharmony_ci if (jbd2_write_access_granted(handle, bh, true)) 13588c2ecf20Sopenharmony_ci return 0; 13598c2ecf20Sopenharmony_ci 13608c2ecf20Sopenharmony_ci jh = jbd2_journal_add_journal_head(bh); 13618c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "entry"); 13628c2ecf20Sopenharmony_ci 13638c2ecf20Sopenharmony_ci /* 13648c2ecf20Sopenharmony_ci * Do this first --- it can drop the journal lock, so we want to 13658c2ecf20Sopenharmony_ci * make sure that obtaining the committed_data is done 13668c2ecf20Sopenharmony_ci * atomically wrt. completion of any outstanding commits. 13678c2ecf20Sopenharmony_ci */ 13688c2ecf20Sopenharmony_ci err = do_get_write_access(handle, jh, 1); 13698c2ecf20Sopenharmony_ci if (err) 13708c2ecf20Sopenharmony_ci goto out; 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_cirepeat: 13738c2ecf20Sopenharmony_ci if (!jh->b_committed_data) 13748c2ecf20Sopenharmony_ci committed_data = jbd2_alloc(jh2bh(jh)->b_size, 13758c2ecf20Sopenharmony_ci GFP_NOFS|__GFP_NOFAIL); 13768c2ecf20Sopenharmony_ci 13778c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 13788c2ecf20Sopenharmony_ci if (!jh->b_committed_data) { 13798c2ecf20Sopenharmony_ci /* Copy out the current buffer contents into the 13808c2ecf20Sopenharmony_ci * preserved, committed copy. */ 13818c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "generate b_committed data"); 13828c2ecf20Sopenharmony_ci if (!committed_data) { 13838c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 13848c2ecf20Sopenharmony_ci goto repeat; 13858c2ecf20Sopenharmony_ci } 13868c2ecf20Sopenharmony_ci 13878c2ecf20Sopenharmony_ci jh->b_committed_data = committed_data; 13888c2ecf20Sopenharmony_ci committed_data = NULL; 13898c2ecf20Sopenharmony_ci memcpy(jh->b_committed_data, bh->b_data, bh->b_size); 13908c2ecf20Sopenharmony_ci } 13918c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 13928c2ecf20Sopenharmony_ciout: 13938c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 13948c2ecf20Sopenharmony_ci if (unlikely(committed_data)) 13958c2ecf20Sopenharmony_ci jbd2_free(committed_data, bh->b_size); 13968c2ecf20Sopenharmony_ci return err; 13978c2ecf20Sopenharmony_ci} 13988c2ecf20Sopenharmony_ci 13998c2ecf20Sopenharmony_ci/** 14008c2ecf20Sopenharmony_ci * jbd2_journal_set_triggers() - Add triggers for commit writeout 14018c2ecf20Sopenharmony_ci * @bh: buffer to trigger on 14028c2ecf20Sopenharmony_ci * @type: struct jbd2_buffer_trigger_type containing the trigger(s). 14038c2ecf20Sopenharmony_ci * 14048c2ecf20Sopenharmony_ci * Set any triggers on this journal_head. This is always safe, because 14058c2ecf20Sopenharmony_ci * triggers for a committing buffer will be saved off, and triggers for 14068c2ecf20Sopenharmony_ci * a running transaction will match the buffer in that transaction. 14078c2ecf20Sopenharmony_ci * 14088c2ecf20Sopenharmony_ci * Call with NULL to clear the triggers. 14098c2ecf20Sopenharmony_ci */ 14108c2ecf20Sopenharmony_civoid jbd2_journal_set_triggers(struct buffer_head *bh, 14118c2ecf20Sopenharmony_ci struct jbd2_buffer_trigger_type *type) 14128c2ecf20Sopenharmony_ci{ 14138c2ecf20Sopenharmony_ci struct journal_head *jh = jbd2_journal_grab_journal_head(bh); 14148c2ecf20Sopenharmony_ci 14158c2ecf20Sopenharmony_ci if (WARN_ON(!jh)) 14168c2ecf20Sopenharmony_ci return; 14178c2ecf20Sopenharmony_ci jh->b_triggers = type; 14188c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 14198c2ecf20Sopenharmony_ci} 14208c2ecf20Sopenharmony_ci 14218c2ecf20Sopenharmony_civoid jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, 14228c2ecf20Sopenharmony_ci struct jbd2_buffer_trigger_type *triggers) 14238c2ecf20Sopenharmony_ci{ 14248c2ecf20Sopenharmony_ci struct buffer_head *bh = jh2bh(jh); 14258c2ecf20Sopenharmony_ci 14268c2ecf20Sopenharmony_ci if (!triggers || !triggers->t_frozen) 14278c2ecf20Sopenharmony_ci return; 14288c2ecf20Sopenharmony_ci 14298c2ecf20Sopenharmony_ci triggers->t_frozen(triggers, bh, mapped_data, bh->b_size); 14308c2ecf20Sopenharmony_ci} 14318c2ecf20Sopenharmony_ci 14328c2ecf20Sopenharmony_civoid jbd2_buffer_abort_trigger(struct journal_head *jh, 14338c2ecf20Sopenharmony_ci struct jbd2_buffer_trigger_type *triggers) 14348c2ecf20Sopenharmony_ci{ 14358c2ecf20Sopenharmony_ci if (!triggers || !triggers->t_abort) 14368c2ecf20Sopenharmony_ci return; 14378c2ecf20Sopenharmony_ci 14388c2ecf20Sopenharmony_ci triggers->t_abort(triggers, jh2bh(jh)); 14398c2ecf20Sopenharmony_ci} 14408c2ecf20Sopenharmony_ci 14418c2ecf20Sopenharmony_ci/** 14428c2ecf20Sopenharmony_ci * jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata 14438c2ecf20Sopenharmony_ci * @handle: transaction to add buffer to. 14448c2ecf20Sopenharmony_ci * @bh: buffer to mark 14458c2ecf20Sopenharmony_ci * 14468c2ecf20Sopenharmony_ci * mark dirty metadata which needs to be journaled as part of the current 14478c2ecf20Sopenharmony_ci * transaction. 14488c2ecf20Sopenharmony_ci * 14498c2ecf20Sopenharmony_ci * The buffer must have previously had jbd2_journal_get_write_access() 14508c2ecf20Sopenharmony_ci * called so that it has a valid journal_head attached to the buffer 14518c2ecf20Sopenharmony_ci * head. 14528c2ecf20Sopenharmony_ci * 14538c2ecf20Sopenharmony_ci * The buffer is placed on the transaction's metadata list and is marked 14548c2ecf20Sopenharmony_ci * as belonging to the transaction. 14558c2ecf20Sopenharmony_ci * 14568c2ecf20Sopenharmony_ci * Returns error number or 0 on success. 14578c2ecf20Sopenharmony_ci * 14588c2ecf20Sopenharmony_ci * Special care needs to be taken if the buffer already belongs to the 14598c2ecf20Sopenharmony_ci * current committing transaction (in which case we should have frozen 14608c2ecf20Sopenharmony_ci * data present for that commit). In that case, we don't relink the 14618c2ecf20Sopenharmony_ci * buffer: that only gets done when the old transaction finally 14628c2ecf20Sopenharmony_ci * completes its commit. 14638c2ecf20Sopenharmony_ci */ 14648c2ecf20Sopenharmony_ciint jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) 14658c2ecf20Sopenharmony_ci{ 14668c2ecf20Sopenharmony_ci transaction_t *transaction = handle->h_transaction; 14678c2ecf20Sopenharmony_ci journal_t *journal; 14688c2ecf20Sopenharmony_ci struct journal_head *jh; 14698c2ecf20Sopenharmony_ci int ret = 0; 14708c2ecf20Sopenharmony_ci 14718c2ecf20Sopenharmony_ci if (!buffer_jbd(bh)) 14728c2ecf20Sopenharmony_ci return -EUCLEAN; 14738c2ecf20Sopenharmony_ci 14748c2ecf20Sopenharmony_ci /* 14758c2ecf20Sopenharmony_ci * We don't grab jh reference here since the buffer must be part 14768c2ecf20Sopenharmony_ci * of the running transaction. 14778c2ecf20Sopenharmony_ci */ 14788c2ecf20Sopenharmony_ci jh = bh2jh(bh); 14798c2ecf20Sopenharmony_ci jbd_debug(5, "journal_head %p\n", jh); 14808c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "entry"); 14818c2ecf20Sopenharmony_ci 14828c2ecf20Sopenharmony_ci /* 14838c2ecf20Sopenharmony_ci * This and the following assertions are unreliable since we may see jh 14848c2ecf20Sopenharmony_ci * in inconsistent state unless we grab bh_state lock. But this is 14858c2ecf20Sopenharmony_ci * crucial to catch bugs so let's do a reliable check until the 14868c2ecf20Sopenharmony_ci * lockless handling is fully proven. 14878c2ecf20Sopenharmony_ci */ 14888c2ecf20Sopenharmony_ci if (data_race(jh->b_transaction != transaction && 14898c2ecf20Sopenharmony_ci jh->b_next_transaction != transaction)) { 14908c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 14918c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_transaction == transaction || 14928c2ecf20Sopenharmony_ci jh->b_next_transaction == transaction); 14938c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 14948c2ecf20Sopenharmony_ci } 14958c2ecf20Sopenharmony_ci if (jh->b_modified == 1) { 14968c2ecf20Sopenharmony_ci /* If it's in our transaction it must be in BJ_Metadata list. */ 14978c2ecf20Sopenharmony_ci if (data_race(jh->b_transaction == transaction && 14988c2ecf20Sopenharmony_ci jh->b_jlist != BJ_Metadata)) { 14998c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 15008c2ecf20Sopenharmony_ci if (jh->b_transaction == transaction && 15018c2ecf20Sopenharmony_ci jh->b_jlist != BJ_Metadata) 15028c2ecf20Sopenharmony_ci pr_err("JBD2: assertion failure: h_type=%u " 15038c2ecf20Sopenharmony_ci "h_line_no=%u block_no=%llu jlist=%u\n", 15048c2ecf20Sopenharmony_ci handle->h_type, handle->h_line_no, 15058c2ecf20Sopenharmony_ci (unsigned long long) bh->b_blocknr, 15068c2ecf20Sopenharmony_ci jh->b_jlist); 15078c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_transaction != transaction || 15088c2ecf20Sopenharmony_ci jh->b_jlist == BJ_Metadata); 15098c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 15108c2ecf20Sopenharmony_ci } 15118c2ecf20Sopenharmony_ci goto out; 15128c2ecf20Sopenharmony_ci } 15138c2ecf20Sopenharmony_ci 15148c2ecf20Sopenharmony_ci journal = transaction->t_journal; 15158c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 15168c2ecf20Sopenharmony_ci 15178c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) { 15188c2ecf20Sopenharmony_ci /* 15198c2ecf20Sopenharmony_ci * Check journal aborting with @jh->b_state_lock locked, 15208c2ecf20Sopenharmony_ci * since 'jh->b_transaction' could be replaced with 15218c2ecf20Sopenharmony_ci * 'jh->b_next_transaction' during old transaction 15228c2ecf20Sopenharmony_ci * committing if journal aborted, which may fail 15238c2ecf20Sopenharmony_ci * assertion on 'jh->b_frozen_data == NULL'. 15248c2ecf20Sopenharmony_ci */ 15258c2ecf20Sopenharmony_ci ret = -EROFS; 15268c2ecf20Sopenharmony_ci goto out_unlock_bh; 15278c2ecf20Sopenharmony_ci } 15288c2ecf20Sopenharmony_ci 15298c2ecf20Sopenharmony_ci if (jh->b_modified == 0) { 15308c2ecf20Sopenharmony_ci /* 15318c2ecf20Sopenharmony_ci * This buffer's got modified and becoming part 15328c2ecf20Sopenharmony_ci * of the transaction. This needs to be done 15338c2ecf20Sopenharmony_ci * once a transaction -bzzz 15348c2ecf20Sopenharmony_ci */ 15358c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(jbd2_handle_buffer_credits(handle) <= 0)) { 15368c2ecf20Sopenharmony_ci ret = -ENOSPC; 15378c2ecf20Sopenharmony_ci goto out_unlock_bh; 15388c2ecf20Sopenharmony_ci } 15398c2ecf20Sopenharmony_ci jh->b_modified = 1; 15408c2ecf20Sopenharmony_ci handle->h_total_credits--; 15418c2ecf20Sopenharmony_ci } 15428c2ecf20Sopenharmony_ci 15438c2ecf20Sopenharmony_ci /* 15448c2ecf20Sopenharmony_ci * fastpath, to avoid expensive locking. If this buffer is already 15458c2ecf20Sopenharmony_ci * on the running transaction's metadata list there is nothing to do. 15468c2ecf20Sopenharmony_ci * Nobody can take it off again because there is a handle open. 15478c2ecf20Sopenharmony_ci * I _think_ we're OK here with SMP barriers - a mistaken decision will 15488c2ecf20Sopenharmony_ci * result in this test being false, so we go in and take the locks. 15498c2ecf20Sopenharmony_ci */ 15508c2ecf20Sopenharmony_ci if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { 15518c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "fastpath"); 15528c2ecf20Sopenharmony_ci if (unlikely(jh->b_transaction != 15538c2ecf20Sopenharmony_ci journal->j_running_transaction)) { 15548c2ecf20Sopenharmony_ci printk(KERN_ERR "JBD2: %s: " 15558c2ecf20Sopenharmony_ci "jh->b_transaction (%llu, %p, %u) != " 15568c2ecf20Sopenharmony_ci "journal->j_running_transaction (%p, %u)\n", 15578c2ecf20Sopenharmony_ci journal->j_devname, 15588c2ecf20Sopenharmony_ci (unsigned long long) bh->b_blocknr, 15598c2ecf20Sopenharmony_ci jh->b_transaction, 15608c2ecf20Sopenharmony_ci jh->b_transaction ? jh->b_transaction->t_tid : 0, 15618c2ecf20Sopenharmony_ci journal->j_running_transaction, 15628c2ecf20Sopenharmony_ci journal->j_running_transaction ? 15638c2ecf20Sopenharmony_ci journal->j_running_transaction->t_tid : 0); 15648c2ecf20Sopenharmony_ci ret = -EINVAL; 15658c2ecf20Sopenharmony_ci } 15668c2ecf20Sopenharmony_ci goto out_unlock_bh; 15678c2ecf20Sopenharmony_ci } 15688c2ecf20Sopenharmony_ci 15698c2ecf20Sopenharmony_ci set_buffer_jbddirty(bh); 15708c2ecf20Sopenharmony_ci 15718c2ecf20Sopenharmony_ci /* 15728c2ecf20Sopenharmony_ci * Metadata already on the current transaction list doesn't 15738c2ecf20Sopenharmony_ci * need to be filed. Metadata on another transaction's list must 15748c2ecf20Sopenharmony_ci * be committing, and will be refiled once the commit completes: 15758c2ecf20Sopenharmony_ci * leave it alone for now. 15768c2ecf20Sopenharmony_ci */ 15778c2ecf20Sopenharmony_ci if (jh->b_transaction != transaction) { 15788c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "already on other transaction"); 15798c2ecf20Sopenharmony_ci if (unlikely(((jh->b_transaction != 15808c2ecf20Sopenharmony_ci journal->j_committing_transaction)) || 15818c2ecf20Sopenharmony_ci (jh->b_next_transaction != transaction))) { 15828c2ecf20Sopenharmony_ci printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: " 15838c2ecf20Sopenharmony_ci "bad jh for block %llu: " 15848c2ecf20Sopenharmony_ci "transaction (%p, %u), " 15858c2ecf20Sopenharmony_ci "jh->b_transaction (%p, %u), " 15868c2ecf20Sopenharmony_ci "jh->b_next_transaction (%p, %u), jlist %u\n", 15878c2ecf20Sopenharmony_ci journal->j_devname, 15888c2ecf20Sopenharmony_ci (unsigned long long) bh->b_blocknr, 15898c2ecf20Sopenharmony_ci transaction, transaction->t_tid, 15908c2ecf20Sopenharmony_ci jh->b_transaction, 15918c2ecf20Sopenharmony_ci jh->b_transaction ? 15928c2ecf20Sopenharmony_ci jh->b_transaction->t_tid : 0, 15938c2ecf20Sopenharmony_ci jh->b_next_transaction, 15948c2ecf20Sopenharmony_ci jh->b_next_transaction ? 15958c2ecf20Sopenharmony_ci jh->b_next_transaction->t_tid : 0, 15968c2ecf20Sopenharmony_ci jh->b_jlist); 15978c2ecf20Sopenharmony_ci WARN_ON(1); 15988c2ecf20Sopenharmony_ci ret = -EINVAL; 15998c2ecf20Sopenharmony_ci } 16008c2ecf20Sopenharmony_ci /* And this case is illegal: we can't reuse another 16018c2ecf20Sopenharmony_ci * transaction's data buffer, ever. */ 16028c2ecf20Sopenharmony_ci goto out_unlock_bh; 16038c2ecf20Sopenharmony_ci } 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci /* That test should have eliminated the following case: */ 16068c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_frozen_data == NULL); 16078c2ecf20Sopenharmony_ci 16088c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "file as BJ_Metadata"); 16098c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 16108c2ecf20Sopenharmony_ci __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata); 16118c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 16128c2ecf20Sopenharmony_ciout_unlock_bh: 16138c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 16148c2ecf20Sopenharmony_ciout: 16158c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "exit"); 16168c2ecf20Sopenharmony_ci return ret; 16178c2ecf20Sopenharmony_ci} 16188c2ecf20Sopenharmony_ci 16198c2ecf20Sopenharmony_ci/** 16208c2ecf20Sopenharmony_ci * jbd2_journal_forget() - bforget() for potentially-journaled buffers. 16218c2ecf20Sopenharmony_ci * @handle: transaction handle 16228c2ecf20Sopenharmony_ci * @bh: bh to 'forget' 16238c2ecf20Sopenharmony_ci * 16248c2ecf20Sopenharmony_ci * We can only do the bforget if there are no commits pending against the 16258c2ecf20Sopenharmony_ci * buffer. If the buffer is dirty in the current running transaction we 16268c2ecf20Sopenharmony_ci * can safely unlink it. 16278c2ecf20Sopenharmony_ci * 16288c2ecf20Sopenharmony_ci * bh may not be a journalled buffer at all - it may be a non-JBD 16298c2ecf20Sopenharmony_ci * buffer which came off the hashtable. Check for this. 16308c2ecf20Sopenharmony_ci * 16318c2ecf20Sopenharmony_ci * Decrements bh->b_count by one. 16328c2ecf20Sopenharmony_ci * 16338c2ecf20Sopenharmony_ci * Allow this call even if the handle has aborted --- it may be part of 16348c2ecf20Sopenharmony_ci * the caller's cleanup after an abort. 16358c2ecf20Sopenharmony_ci */ 16368c2ecf20Sopenharmony_ciint jbd2_journal_forget(handle_t *handle, struct buffer_head *bh) 16378c2ecf20Sopenharmony_ci{ 16388c2ecf20Sopenharmony_ci transaction_t *transaction = handle->h_transaction; 16398c2ecf20Sopenharmony_ci journal_t *journal; 16408c2ecf20Sopenharmony_ci struct journal_head *jh; 16418c2ecf20Sopenharmony_ci int drop_reserve = 0; 16428c2ecf20Sopenharmony_ci int err = 0; 16438c2ecf20Sopenharmony_ci int was_modified = 0; 16448c2ecf20Sopenharmony_ci 16458c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) 16468c2ecf20Sopenharmony_ci return -EROFS; 16478c2ecf20Sopenharmony_ci journal = transaction->t_journal; 16488c2ecf20Sopenharmony_ci 16498c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "entry"); 16508c2ecf20Sopenharmony_ci 16518c2ecf20Sopenharmony_ci jh = jbd2_journal_grab_journal_head(bh); 16528c2ecf20Sopenharmony_ci if (!jh) { 16538c2ecf20Sopenharmony_ci __bforget(bh); 16548c2ecf20Sopenharmony_ci return 0; 16558c2ecf20Sopenharmony_ci } 16568c2ecf20Sopenharmony_ci 16578c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 16588c2ecf20Sopenharmony_ci 16598c2ecf20Sopenharmony_ci /* Critical error: attempting to delete a bitmap buffer, maybe? 16608c2ecf20Sopenharmony_ci * Don't do any jbd operations, and return an error. */ 16618c2ecf20Sopenharmony_ci if (!J_EXPECT_JH(jh, !jh->b_committed_data, 16628c2ecf20Sopenharmony_ci "inconsistent data on disk")) { 16638c2ecf20Sopenharmony_ci err = -EIO; 16648c2ecf20Sopenharmony_ci goto drop; 16658c2ecf20Sopenharmony_ci } 16668c2ecf20Sopenharmony_ci 16678c2ecf20Sopenharmony_ci /* keep track of whether or not this transaction modified us */ 16688c2ecf20Sopenharmony_ci was_modified = jh->b_modified; 16698c2ecf20Sopenharmony_ci 16708c2ecf20Sopenharmony_ci /* 16718c2ecf20Sopenharmony_ci * The buffer's going from the transaction, we must drop 16728c2ecf20Sopenharmony_ci * all references -bzzz 16738c2ecf20Sopenharmony_ci */ 16748c2ecf20Sopenharmony_ci jh->b_modified = 0; 16758c2ecf20Sopenharmony_ci 16768c2ecf20Sopenharmony_ci if (jh->b_transaction == transaction) { 16778c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, !jh->b_frozen_data); 16788c2ecf20Sopenharmony_ci 16798c2ecf20Sopenharmony_ci /* If we are forgetting a buffer which is already part 16808c2ecf20Sopenharmony_ci * of this transaction, then we can just drop it from 16818c2ecf20Sopenharmony_ci * the transaction immediately. */ 16828c2ecf20Sopenharmony_ci clear_buffer_dirty(bh); 16838c2ecf20Sopenharmony_ci clear_buffer_jbddirty(bh); 16848c2ecf20Sopenharmony_ci 16858c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 16868c2ecf20Sopenharmony_ci 16878c2ecf20Sopenharmony_ci /* 16888c2ecf20Sopenharmony_ci * we only want to drop a reference if this transaction 16898c2ecf20Sopenharmony_ci * modified the buffer 16908c2ecf20Sopenharmony_ci */ 16918c2ecf20Sopenharmony_ci if (was_modified) 16928c2ecf20Sopenharmony_ci drop_reserve = 1; 16938c2ecf20Sopenharmony_ci 16948c2ecf20Sopenharmony_ci /* 16958c2ecf20Sopenharmony_ci * We are no longer going to journal this buffer. 16968c2ecf20Sopenharmony_ci * However, the commit of this transaction is still 16978c2ecf20Sopenharmony_ci * important to the buffer: the delete that we are now 16988c2ecf20Sopenharmony_ci * processing might obsolete an old log entry, so by 16998c2ecf20Sopenharmony_ci * committing, we can satisfy the buffer's checkpoint. 17008c2ecf20Sopenharmony_ci * 17018c2ecf20Sopenharmony_ci * So, if we have a checkpoint on the buffer, we should 17028c2ecf20Sopenharmony_ci * now refile the buffer on our BJ_Forget list so that 17038c2ecf20Sopenharmony_ci * we know to remove the checkpoint after we commit. 17048c2ecf20Sopenharmony_ci */ 17058c2ecf20Sopenharmony_ci 17068c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 17078c2ecf20Sopenharmony_ci if (jh->b_cp_transaction) { 17088c2ecf20Sopenharmony_ci __jbd2_journal_temp_unlink_buffer(jh); 17098c2ecf20Sopenharmony_ci __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 17108c2ecf20Sopenharmony_ci } else { 17118c2ecf20Sopenharmony_ci __jbd2_journal_unfile_buffer(jh); 17128c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 17138c2ecf20Sopenharmony_ci } 17148c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 17158c2ecf20Sopenharmony_ci } else if (jh->b_transaction) { 17168c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, (jh->b_transaction == 17178c2ecf20Sopenharmony_ci journal->j_committing_transaction)); 17188c2ecf20Sopenharmony_ci /* However, if the buffer is still owned by a prior 17198c2ecf20Sopenharmony_ci * (committing) transaction, we can't drop it yet... */ 17208c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "belongs to older transaction"); 17218c2ecf20Sopenharmony_ci /* ... but we CAN drop it from the new transaction through 17228c2ecf20Sopenharmony_ci * marking the buffer as freed and set j_next_transaction to 17238c2ecf20Sopenharmony_ci * the new transaction, so that not only the commit code 17248c2ecf20Sopenharmony_ci * knows it should clear dirty bits when it is done with the 17258c2ecf20Sopenharmony_ci * buffer, but also the buffer can be checkpointed only 17268c2ecf20Sopenharmony_ci * after the new transaction commits. */ 17278c2ecf20Sopenharmony_ci 17288c2ecf20Sopenharmony_ci set_buffer_freed(bh); 17298c2ecf20Sopenharmony_ci 17308c2ecf20Sopenharmony_ci if (!jh->b_next_transaction) { 17318c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 17328c2ecf20Sopenharmony_ci jh->b_next_transaction = transaction; 17338c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 17348c2ecf20Sopenharmony_ci } else { 17358c2ecf20Sopenharmony_ci J_ASSERT(jh->b_next_transaction == transaction); 17368c2ecf20Sopenharmony_ci 17378c2ecf20Sopenharmony_ci /* 17388c2ecf20Sopenharmony_ci * only drop a reference if this transaction modified 17398c2ecf20Sopenharmony_ci * the buffer 17408c2ecf20Sopenharmony_ci */ 17418c2ecf20Sopenharmony_ci if (was_modified) 17428c2ecf20Sopenharmony_ci drop_reserve = 1; 17438c2ecf20Sopenharmony_ci } 17448c2ecf20Sopenharmony_ci } else { 17458c2ecf20Sopenharmony_ci /* 17468c2ecf20Sopenharmony_ci * Finally, if the buffer is not belongs to any 17478c2ecf20Sopenharmony_ci * transaction, we can just drop it now if it has no 17488c2ecf20Sopenharmony_ci * checkpoint. 17498c2ecf20Sopenharmony_ci */ 17508c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 17518c2ecf20Sopenharmony_ci if (!jh->b_cp_transaction) { 17528c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "belongs to none transaction"); 17538c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 17548c2ecf20Sopenharmony_ci goto drop; 17558c2ecf20Sopenharmony_ci } 17568c2ecf20Sopenharmony_ci 17578c2ecf20Sopenharmony_ci /* 17588c2ecf20Sopenharmony_ci * Otherwise, if the buffer has been written to disk, 17598c2ecf20Sopenharmony_ci * it is safe to remove the checkpoint and drop it. 17608c2ecf20Sopenharmony_ci */ 17618c2ecf20Sopenharmony_ci if (!buffer_dirty(bh)) { 17628c2ecf20Sopenharmony_ci __jbd2_journal_remove_checkpoint(jh); 17638c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 17648c2ecf20Sopenharmony_ci goto drop; 17658c2ecf20Sopenharmony_ci } 17668c2ecf20Sopenharmony_ci 17678c2ecf20Sopenharmony_ci /* 17688c2ecf20Sopenharmony_ci * The buffer is still not written to disk, we should 17698c2ecf20Sopenharmony_ci * attach this buffer to current transaction so that the 17708c2ecf20Sopenharmony_ci * buffer can be checkpointed only after the current 17718c2ecf20Sopenharmony_ci * transaction commits. 17728c2ecf20Sopenharmony_ci */ 17738c2ecf20Sopenharmony_ci clear_buffer_dirty(bh); 17748c2ecf20Sopenharmony_ci __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 17758c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 17768c2ecf20Sopenharmony_ci } 17778c2ecf20Sopenharmony_cidrop: 17788c2ecf20Sopenharmony_ci __brelse(bh); 17798c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 17808c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 17818c2ecf20Sopenharmony_ci if (drop_reserve) { 17828c2ecf20Sopenharmony_ci /* no need to reserve log space for this block -bzzz */ 17838c2ecf20Sopenharmony_ci handle->h_total_credits++; 17848c2ecf20Sopenharmony_ci } 17858c2ecf20Sopenharmony_ci return err; 17868c2ecf20Sopenharmony_ci} 17878c2ecf20Sopenharmony_ci 17888c2ecf20Sopenharmony_ci/** 17898c2ecf20Sopenharmony_ci * jbd2_journal_stop() - complete a transaction 17908c2ecf20Sopenharmony_ci * @handle: transaction to complete. 17918c2ecf20Sopenharmony_ci * 17928c2ecf20Sopenharmony_ci * All done for a particular handle. 17938c2ecf20Sopenharmony_ci * 17948c2ecf20Sopenharmony_ci * There is not much action needed here. We just return any remaining 17958c2ecf20Sopenharmony_ci * buffer credits to the transaction and remove the handle. The only 17968c2ecf20Sopenharmony_ci * complication is that we need to start a commit operation if the 17978c2ecf20Sopenharmony_ci * filesystem is marked for synchronous update. 17988c2ecf20Sopenharmony_ci * 17998c2ecf20Sopenharmony_ci * jbd2_journal_stop itself will not usually return an error, but it may 18008c2ecf20Sopenharmony_ci * do so in unusual circumstances. In particular, expect it to 18018c2ecf20Sopenharmony_ci * return -EIO if a jbd2_journal_abort has been executed since the 18028c2ecf20Sopenharmony_ci * transaction began. 18038c2ecf20Sopenharmony_ci */ 18048c2ecf20Sopenharmony_ciint jbd2_journal_stop(handle_t *handle) 18058c2ecf20Sopenharmony_ci{ 18068c2ecf20Sopenharmony_ci transaction_t *transaction = handle->h_transaction; 18078c2ecf20Sopenharmony_ci journal_t *journal; 18088c2ecf20Sopenharmony_ci int err = 0, wait_for_commit = 0; 18098c2ecf20Sopenharmony_ci tid_t tid; 18108c2ecf20Sopenharmony_ci pid_t pid; 18118c2ecf20Sopenharmony_ci 18128c2ecf20Sopenharmony_ci if (--handle->h_ref > 0) { 18138c2ecf20Sopenharmony_ci jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, 18148c2ecf20Sopenharmony_ci handle->h_ref); 18158c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) 18168c2ecf20Sopenharmony_ci return -EIO; 18178c2ecf20Sopenharmony_ci return 0; 18188c2ecf20Sopenharmony_ci } 18198c2ecf20Sopenharmony_ci if (!transaction) { 18208c2ecf20Sopenharmony_ci /* 18218c2ecf20Sopenharmony_ci * Handle is already detached from the transaction so there is 18228c2ecf20Sopenharmony_ci * nothing to do other than free the handle. 18238c2ecf20Sopenharmony_ci */ 18248c2ecf20Sopenharmony_ci memalloc_nofs_restore(handle->saved_alloc_context); 18258c2ecf20Sopenharmony_ci goto free_and_exit; 18268c2ecf20Sopenharmony_ci } 18278c2ecf20Sopenharmony_ci journal = transaction->t_journal; 18288c2ecf20Sopenharmony_ci tid = transaction->t_tid; 18298c2ecf20Sopenharmony_ci 18308c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) 18318c2ecf20Sopenharmony_ci err = -EIO; 18328c2ecf20Sopenharmony_ci 18338c2ecf20Sopenharmony_ci jbd_debug(4, "Handle %p going down\n", handle); 18348c2ecf20Sopenharmony_ci trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, 18358c2ecf20Sopenharmony_ci tid, handle->h_type, handle->h_line_no, 18368c2ecf20Sopenharmony_ci jiffies - handle->h_start_jiffies, 18378c2ecf20Sopenharmony_ci handle->h_sync, handle->h_requested_credits, 18388c2ecf20Sopenharmony_ci (handle->h_requested_credits - 18398c2ecf20Sopenharmony_ci handle->h_total_credits)); 18408c2ecf20Sopenharmony_ci 18418c2ecf20Sopenharmony_ci /* 18428c2ecf20Sopenharmony_ci * Implement synchronous transaction batching. If the handle 18438c2ecf20Sopenharmony_ci * was synchronous, don't force a commit immediately. Let's 18448c2ecf20Sopenharmony_ci * yield and let another thread piggyback onto this 18458c2ecf20Sopenharmony_ci * transaction. Keep doing that while new threads continue to 18468c2ecf20Sopenharmony_ci * arrive. It doesn't cost much - we're about to run a commit 18478c2ecf20Sopenharmony_ci * and sleep on IO anyway. Speeds up many-threaded, many-dir 18488c2ecf20Sopenharmony_ci * operations by 30x or more... 18498c2ecf20Sopenharmony_ci * 18508c2ecf20Sopenharmony_ci * We try and optimize the sleep time against what the 18518c2ecf20Sopenharmony_ci * underlying disk can do, instead of having a static sleep 18528c2ecf20Sopenharmony_ci * time. This is useful for the case where our storage is so 18538c2ecf20Sopenharmony_ci * fast that it is more optimal to go ahead and force a flush 18548c2ecf20Sopenharmony_ci * and wait for the transaction to be committed than it is to 18558c2ecf20Sopenharmony_ci * wait for an arbitrary amount of time for new writers to 18568c2ecf20Sopenharmony_ci * join the transaction. We achieve this by measuring how 18578c2ecf20Sopenharmony_ci * long it takes to commit a transaction, and compare it with 18588c2ecf20Sopenharmony_ci * how long this transaction has been running, and if run time 18598c2ecf20Sopenharmony_ci * < commit time then we sleep for the delta and commit. This 18608c2ecf20Sopenharmony_ci * greatly helps super fast disks that would see slowdowns as 18618c2ecf20Sopenharmony_ci * more threads started doing fsyncs. 18628c2ecf20Sopenharmony_ci * 18638c2ecf20Sopenharmony_ci * But don't do this if this process was the most recent one 18648c2ecf20Sopenharmony_ci * to perform a synchronous write. We do this to detect the 18658c2ecf20Sopenharmony_ci * case where a single process is doing a stream of sync 18668c2ecf20Sopenharmony_ci * writes. No point in waiting for joiners in that case. 18678c2ecf20Sopenharmony_ci * 18688c2ecf20Sopenharmony_ci * Setting max_batch_time to 0 disables this completely. 18698c2ecf20Sopenharmony_ci */ 18708c2ecf20Sopenharmony_ci pid = current->pid; 18718c2ecf20Sopenharmony_ci if (handle->h_sync && journal->j_last_sync_writer != pid && 18728c2ecf20Sopenharmony_ci journal->j_max_batch_time) { 18738c2ecf20Sopenharmony_ci u64 commit_time, trans_time; 18748c2ecf20Sopenharmony_ci 18758c2ecf20Sopenharmony_ci journal->j_last_sync_writer = pid; 18768c2ecf20Sopenharmony_ci 18778c2ecf20Sopenharmony_ci read_lock(&journal->j_state_lock); 18788c2ecf20Sopenharmony_ci commit_time = journal->j_average_commit_time; 18798c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 18808c2ecf20Sopenharmony_ci 18818c2ecf20Sopenharmony_ci trans_time = ktime_to_ns(ktime_sub(ktime_get(), 18828c2ecf20Sopenharmony_ci transaction->t_start_time)); 18838c2ecf20Sopenharmony_ci 18848c2ecf20Sopenharmony_ci commit_time = max_t(u64, commit_time, 18858c2ecf20Sopenharmony_ci 1000*journal->j_min_batch_time); 18868c2ecf20Sopenharmony_ci commit_time = min_t(u64, commit_time, 18878c2ecf20Sopenharmony_ci 1000*journal->j_max_batch_time); 18888c2ecf20Sopenharmony_ci 18898c2ecf20Sopenharmony_ci if (trans_time < commit_time) { 18908c2ecf20Sopenharmony_ci ktime_t expires = ktime_add_ns(ktime_get(), 18918c2ecf20Sopenharmony_ci commit_time); 18928c2ecf20Sopenharmony_ci set_current_state(TASK_UNINTERRUPTIBLE); 18938c2ecf20Sopenharmony_ci schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); 18948c2ecf20Sopenharmony_ci } 18958c2ecf20Sopenharmony_ci } 18968c2ecf20Sopenharmony_ci 18978c2ecf20Sopenharmony_ci if (handle->h_sync) 18988c2ecf20Sopenharmony_ci transaction->t_synchronous_commit = 1; 18998c2ecf20Sopenharmony_ci 19008c2ecf20Sopenharmony_ci /* 19018c2ecf20Sopenharmony_ci * If the handle is marked SYNC, we need to set another commit 19028c2ecf20Sopenharmony_ci * going! We also want to force a commit if the transaction is too 19038c2ecf20Sopenharmony_ci * old now. 19048c2ecf20Sopenharmony_ci */ 19058c2ecf20Sopenharmony_ci if (handle->h_sync || 19068c2ecf20Sopenharmony_ci time_after_eq(jiffies, transaction->t_expires)) { 19078c2ecf20Sopenharmony_ci /* Do this even for aborted journals: an abort still 19088c2ecf20Sopenharmony_ci * completes the commit thread, it just doesn't write 19098c2ecf20Sopenharmony_ci * anything to disk. */ 19108c2ecf20Sopenharmony_ci 19118c2ecf20Sopenharmony_ci jbd_debug(2, "transaction too old, requesting commit for " 19128c2ecf20Sopenharmony_ci "handle %p\n", handle); 19138c2ecf20Sopenharmony_ci /* This is non-blocking */ 19148c2ecf20Sopenharmony_ci jbd2_log_start_commit(journal, tid); 19158c2ecf20Sopenharmony_ci 19168c2ecf20Sopenharmony_ci /* 19178c2ecf20Sopenharmony_ci * Special case: JBD2_SYNC synchronous updates require us 19188c2ecf20Sopenharmony_ci * to wait for the commit to complete. 19198c2ecf20Sopenharmony_ci */ 19208c2ecf20Sopenharmony_ci if (handle->h_sync && !(current->flags & PF_MEMALLOC)) 19218c2ecf20Sopenharmony_ci wait_for_commit = 1; 19228c2ecf20Sopenharmony_ci } 19238c2ecf20Sopenharmony_ci 19248c2ecf20Sopenharmony_ci /* 19258c2ecf20Sopenharmony_ci * Once stop_this_handle() drops t_updates, the transaction could start 19268c2ecf20Sopenharmony_ci * committing on us and eventually disappear. So we must not 19278c2ecf20Sopenharmony_ci * dereference transaction pointer again after calling 19288c2ecf20Sopenharmony_ci * stop_this_handle(). 19298c2ecf20Sopenharmony_ci */ 19308c2ecf20Sopenharmony_ci stop_this_handle(handle); 19318c2ecf20Sopenharmony_ci 19328c2ecf20Sopenharmony_ci if (wait_for_commit) 19338c2ecf20Sopenharmony_ci err = jbd2_log_wait_commit(journal, tid); 19348c2ecf20Sopenharmony_ci 19358c2ecf20Sopenharmony_cifree_and_exit: 19368c2ecf20Sopenharmony_ci if (handle->h_rsv_handle) 19378c2ecf20Sopenharmony_ci jbd2_free_handle(handle->h_rsv_handle); 19388c2ecf20Sopenharmony_ci jbd2_free_handle(handle); 19398c2ecf20Sopenharmony_ci return err; 19408c2ecf20Sopenharmony_ci} 19418c2ecf20Sopenharmony_ci 19428c2ecf20Sopenharmony_ci/* 19438c2ecf20Sopenharmony_ci * 19448c2ecf20Sopenharmony_ci * List management code snippets: various functions for manipulating the 19458c2ecf20Sopenharmony_ci * transaction buffer lists. 19468c2ecf20Sopenharmony_ci * 19478c2ecf20Sopenharmony_ci */ 19488c2ecf20Sopenharmony_ci 19498c2ecf20Sopenharmony_ci/* 19508c2ecf20Sopenharmony_ci * Append a buffer to a transaction list, given the transaction's list head 19518c2ecf20Sopenharmony_ci * pointer. 19528c2ecf20Sopenharmony_ci * 19538c2ecf20Sopenharmony_ci * j_list_lock is held. 19548c2ecf20Sopenharmony_ci * 19558c2ecf20Sopenharmony_ci * jh->b_state_lock is held. 19568c2ecf20Sopenharmony_ci */ 19578c2ecf20Sopenharmony_ci 19588c2ecf20Sopenharmony_cistatic inline void 19598c2ecf20Sopenharmony_ci__blist_add_buffer(struct journal_head **list, struct journal_head *jh) 19608c2ecf20Sopenharmony_ci{ 19618c2ecf20Sopenharmony_ci if (!*list) { 19628c2ecf20Sopenharmony_ci jh->b_tnext = jh->b_tprev = jh; 19638c2ecf20Sopenharmony_ci *list = jh; 19648c2ecf20Sopenharmony_ci } else { 19658c2ecf20Sopenharmony_ci /* Insert at the tail of the list to preserve order */ 19668c2ecf20Sopenharmony_ci struct journal_head *first = *list, *last = first->b_tprev; 19678c2ecf20Sopenharmony_ci jh->b_tprev = last; 19688c2ecf20Sopenharmony_ci jh->b_tnext = first; 19698c2ecf20Sopenharmony_ci last->b_tnext = first->b_tprev = jh; 19708c2ecf20Sopenharmony_ci } 19718c2ecf20Sopenharmony_ci} 19728c2ecf20Sopenharmony_ci 19738c2ecf20Sopenharmony_ci/* 19748c2ecf20Sopenharmony_ci * Remove a buffer from a transaction list, given the transaction's list 19758c2ecf20Sopenharmony_ci * head pointer. 19768c2ecf20Sopenharmony_ci * 19778c2ecf20Sopenharmony_ci * Called with j_list_lock held, and the journal may not be locked. 19788c2ecf20Sopenharmony_ci * 19798c2ecf20Sopenharmony_ci * jh->b_state_lock is held. 19808c2ecf20Sopenharmony_ci */ 19818c2ecf20Sopenharmony_ci 19828c2ecf20Sopenharmony_cistatic inline void 19838c2ecf20Sopenharmony_ci__blist_del_buffer(struct journal_head **list, struct journal_head *jh) 19848c2ecf20Sopenharmony_ci{ 19858c2ecf20Sopenharmony_ci if (*list == jh) { 19868c2ecf20Sopenharmony_ci *list = jh->b_tnext; 19878c2ecf20Sopenharmony_ci if (*list == jh) 19888c2ecf20Sopenharmony_ci *list = NULL; 19898c2ecf20Sopenharmony_ci } 19908c2ecf20Sopenharmony_ci jh->b_tprev->b_tnext = jh->b_tnext; 19918c2ecf20Sopenharmony_ci jh->b_tnext->b_tprev = jh->b_tprev; 19928c2ecf20Sopenharmony_ci} 19938c2ecf20Sopenharmony_ci 19948c2ecf20Sopenharmony_ci/* 19958c2ecf20Sopenharmony_ci * Remove a buffer from the appropriate transaction list. 19968c2ecf20Sopenharmony_ci * 19978c2ecf20Sopenharmony_ci * Note that this function can *change* the value of 19988c2ecf20Sopenharmony_ci * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or 19998c2ecf20Sopenharmony_ci * t_reserved_list. If the caller is holding onto a copy of one of these 20008c2ecf20Sopenharmony_ci * pointers, it could go bad. Generally the caller needs to re-read the 20018c2ecf20Sopenharmony_ci * pointer from the transaction_t. 20028c2ecf20Sopenharmony_ci * 20038c2ecf20Sopenharmony_ci * Called under j_list_lock. 20048c2ecf20Sopenharmony_ci */ 20058c2ecf20Sopenharmony_cistatic void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) 20068c2ecf20Sopenharmony_ci{ 20078c2ecf20Sopenharmony_ci struct journal_head **list = NULL; 20088c2ecf20Sopenharmony_ci transaction_t *transaction; 20098c2ecf20Sopenharmony_ci struct buffer_head *bh = jh2bh(jh); 20108c2ecf20Sopenharmony_ci 20118c2ecf20Sopenharmony_ci lockdep_assert_held(&jh->b_state_lock); 20128c2ecf20Sopenharmony_ci transaction = jh->b_transaction; 20138c2ecf20Sopenharmony_ci if (transaction) 20148c2ecf20Sopenharmony_ci assert_spin_locked(&transaction->t_journal->j_list_lock); 20158c2ecf20Sopenharmony_ci 20168c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 20178c2ecf20Sopenharmony_ci if (jh->b_jlist != BJ_None) 20188c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, transaction != NULL); 20198c2ecf20Sopenharmony_ci 20208c2ecf20Sopenharmony_ci switch (jh->b_jlist) { 20218c2ecf20Sopenharmony_ci case BJ_None: 20228c2ecf20Sopenharmony_ci return; 20238c2ecf20Sopenharmony_ci case BJ_Metadata: 20248c2ecf20Sopenharmony_ci transaction->t_nr_buffers--; 20258c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); 20268c2ecf20Sopenharmony_ci list = &transaction->t_buffers; 20278c2ecf20Sopenharmony_ci break; 20288c2ecf20Sopenharmony_ci case BJ_Forget: 20298c2ecf20Sopenharmony_ci list = &transaction->t_forget; 20308c2ecf20Sopenharmony_ci break; 20318c2ecf20Sopenharmony_ci case BJ_Shadow: 20328c2ecf20Sopenharmony_ci list = &transaction->t_shadow_list; 20338c2ecf20Sopenharmony_ci break; 20348c2ecf20Sopenharmony_ci case BJ_Reserved: 20358c2ecf20Sopenharmony_ci list = &transaction->t_reserved_list; 20368c2ecf20Sopenharmony_ci break; 20378c2ecf20Sopenharmony_ci } 20388c2ecf20Sopenharmony_ci 20398c2ecf20Sopenharmony_ci __blist_del_buffer(list, jh); 20408c2ecf20Sopenharmony_ci jh->b_jlist = BJ_None; 20418c2ecf20Sopenharmony_ci if (transaction && is_journal_aborted(transaction->t_journal)) 20428c2ecf20Sopenharmony_ci clear_buffer_jbddirty(bh); 20438c2ecf20Sopenharmony_ci else if (test_clear_buffer_jbddirty(bh)) 20448c2ecf20Sopenharmony_ci mark_buffer_dirty(bh); /* Expose it to the VM */ 20458c2ecf20Sopenharmony_ci} 20468c2ecf20Sopenharmony_ci 20478c2ecf20Sopenharmony_ci/* 20488c2ecf20Sopenharmony_ci * Remove buffer from all transactions. The caller is responsible for dropping 20498c2ecf20Sopenharmony_ci * the jh reference that belonged to the transaction. 20508c2ecf20Sopenharmony_ci * 20518c2ecf20Sopenharmony_ci * Called with bh_state lock and j_list_lock 20528c2ecf20Sopenharmony_ci */ 20538c2ecf20Sopenharmony_cistatic void __jbd2_journal_unfile_buffer(struct journal_head *jh) 20548c2ecf20Sopenharmony_ci{ 20558c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_transaction != NULL); 20568c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 20578c2ecf20Sopenharmony_ci 20588c2ecf20Sopenharmony_ci __jbd2_journal_temp_unlink_buffer(jh); 20598c2ecf20Sopenharmony_ci jh->b_transaction = NULL; 20608c2ecf20Sopenharmony_ci} 20618c2ecf20Sopenharmony_ci 20628c2ecf20Sopenharmony_civoid jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) 20638c2ecf20Sopenharmony_ci{ 20648c2ecf20Sopenharmony_ci struct buffer_head *bh = jh2bh(jh); 20658c2ecf20Sopenharmony_ci 20668c2ecf20Sopenharmony_ci /* Get reference so that buffer cannot be freed before we unlock it */ 20678c2ecf20Sopenharmony_ci get_bh(bh); 20688c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 20698c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 20708c2ecf20Sopenharmony_ci __jbd2_journal_unfile_buffer(jh); 20718c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 20728c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 20738c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 20748c2ecf20Sopenharmony_ci __brelse(bh); 20758c2ecf20Sopenharmony_ci} 20768c2ecf20Sopenharmony_ci 20778c2ecf20Sopenharmony_ci/* 20788c2ecf20Sopenharmony_ci * Called from jbd2_journal_try_to_free_buffers(). 20798c2ecf20Sopenharmony_ci * 20808c2ecf20Sopenharmony_ci * Called under jh->b_state_lock 20818c2ecf20Sopenharmony_ci */ 20828c2ecf20Sopenharmony_cistatic void 20838c2ecf20Sopenharmony_ci__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) 20848c2ecf20Sopenharmony_ci{ 20858c2ecf20Sopenharmony_ci struct journal_head *jh; 20868c2ecf20Sopenharmony_ci 20878c2ecf20Sopenharmony_ci jh = bh2jh(bh); 20888c2ecf20Sopenharmony_ci 20898c2ecf20Sopenharmony_ci if (buffer_locked(bh) || buffer_dirty(bh)) 20908c2ecf20Sopenharmony_ci goto out; 20918c2ecf20Sopenharmony_ci 20928c2ecf20Sopenharmony_ci if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) 20938c2ecf20Sopenharmony_ci goto out; 20948c2ecf20Sopenharmony_ci 20958c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 20968c2ecf20Sopenharmony_ci if (jh->b_cp_transaction != NULL) { 20978c2ecf20Sopenharmony_ci /* written-back checkpointed metadata buffer */ 20988c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "remove from checkpoint list"); 20998c2ecf20Sopenharmony_ci __jbd2_journal_remove_checkpoint(jh); 21008c2ecf20Sopenharmony_ci } 21018c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 21028c2ecf20Sopenharmony_ciout: 21038c2ecf20Sopenharmony_ci return; 21048c2ecf20Sopenharmony_ci} 21058c2ecf20Sopenharmony_ci 21068c2ecf20Sopenharmony_ci/** 21078c2ecf20Sopenharmony_ci * jbd2_journal_try_to_free_buffers() - try to free page buffers. 21088c2ecf20Sopenharmony_ci * @journal: journal for operation 21098c2ecf20Sopenharmony_ci * @page: to try and free 21108c2ecf20Sopenharmony_ci * 21118c2ecf20Sopenharmony_ci * For all the buffers on this page, 21128c2ecf20Sopenharmony_ci * if they are fully written out ordered data, move them onto BUF_CLEAN 21138c2ecf20Sopenharmony_ci * so try_to_free_buffers() can reap them. 21148c2ecf20Sopenharmony_ci * 21158c2ecf20Sopenharmony_ci * This function returns non-zero if we wish try_to_free_buffers() 21168c2ecf20Sopenharmony_ci * to be called. We do this if the page is releasable by try_to_free_buffers(). 21178c2ecf20Sopenharmony_ci * We also do it if the page has locked or dirty buffers and the caller wants 21188c2ecf20Sopenharmony_ci * us to perform sync or async writeout. 21198c2ecf20Sopenharmony_ci * 21208c2ecf20Sopenharmony_ci * This complicates JBD locking somewhat. We aren't protected by the 21218c2ecf20Sopenharmony_ci * BKL here. We wish to remove the buffer from its committing or 21228c2ecf20Sopenharmony_ci * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer. 21238c2ecf20Sopenharmony_ci * 21248c2ecf20Sopenharmony_ci * This may *change* the value of transaction_t->t_datalist, so anyone 21258c2ecf20Sopenharmony_ci * who looks at t_datalist needs to lock against this function. 21268c2ecf20Sopenharmony_ci * 21278c2ecf20Sopenharmony_ci * Even worse, someone may be doing a jbd2_journal_dirty_data on this 21288c2ecf20Sopenharmony_ci * buffer. So we need to lock against that. jbd2_journal_dirty_data() 21298c2ecf20Sopenharmony_ci * will come out of the lock with the buffer dirty, which makes it 21308c2ecf20Sopenharmony_ci * ineligible for release here. 21318c2ecf20Sopenharmony_ci * 21328c2ecf20Sopenharmony_ci * Who else is affected by this? hmm... Really the only contender 21338c2ecf20Sopenharmony_ci * is do_get_write_access() - it could be looking at the buffer while 21348c2ecf20Sopenharmony_ci * journal_try_to_free_buffer() is changing its state. But that 21358c2ecf20Sopenharmony_ci * cannot happen because we never reallocate freed data as metadata 21368c2ecf20Sopenharmony_ci * while the data is part of a transaction. Yes? 21378c2ecf20Sopenharmony_ci * 21388c2ecf20Sopenharmony_ci * Return 0 on failure, 1 on success 21398c2ecf20Sopenharmony_ci */ 21408c2ecf20Sopenharmony_ciint jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page) 21418c2ecf20Sopenharmony_ci{ 21428c2ecf20Sopenharmony_ci struct buffer_head *head; 21438c2ecf20Sopenharmony_ci struct buffer_head *bh; 21448c2ecf20Sopenharmony_ci int ret = 0; 21458c2ecf20Sopenharmony_ci 21468c2ecf20Sopenharmony_ci J_ASSERT(PageLocked(page)); 21478c2ecf20Sopenharmony_ci 21488c2ecf20Sopenharmony_ci head = page_buffers(page); 21498c2ecf20Sopenharmony_ci bh = head; 21508c2ecf20Sopenharmony_ci do { 21518c2ecf20Sopenharmony_ci struct journal_head *jh; 21528c2ecf20Sopenharmony_ci 21538c2ecf20Sopenharmony_ci /* 21548c2ecf20Sopenharmony_ci * We take our own ref against the journal_head here to avoid 21558c2ecf20Sopenharmony_ci * having to add tons of locking around each instance of 21568c2ecf20Sopenharmony_ci * jbd2_journal_put_journal_head(). 21578c2ecf20Sopenharmony_ci */ 21588c2ecf20Sopenharmony_ci jh = jbd2_journal_grab_journal_head(bh); 21598c2ecf20Sopenharmony_ci if (!jh) 21608c2ecf20Sopenharmony_ci continue; 21618c2ecf20Sopenharmony_ci 21628c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 21638c2ecf20Sopenharmony_ci __journal_try_to_free_buffer(journal, bh); 21648c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 21658c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 21668c2ecf20Sopenharmony_ci if (buffer_jbd(bh)) 21678c2ecf20Sopenharmony_ci goto busy; 21688c2ecf20Sopenharmony_ci } while ((bh = bh->b_this_page) != head); 21698c2ecf20Sopenharmony_ci 21708c2ecf20Sopenharmony_ci ret = try_to_free_buffers(page); 21718c2ecf20Sopenharmony_cibusy: 21728c2ecf20Sopenharmony_ci return ret; 21738c2ecf20Sopenharmony_ci} 21748c2ecf20Sopenharmony_ci 21758c2ecf20Sopenharmony_ci/* 21768c2ecf20Sopenharmony_ci * This buffer is no longer needed. If it is on an older transaction's 21778c2ecf20Sopenharmony_ci * checkpoint list we need to record it on this transaction's forget list 21788c2ecf20Sopenharmony_ci * to pin this buffer (and hence its checkpointing transaction) down until 21798c2ecf20Sopenharmony_ci * this transaction commits. If the buffer isn't on a checkpoint list, we 21808c2ecf20Sopenharmony_ci * release it. 21818c2ecf20Sopenharmony_ci * Returns non-zero if JBD no longer has an interest in the buffer. 21828c2ecf20Sopenharmony_ci * 21838c2ecf20Sopenharmony_ci * Called under j_list_lock. 21848c2ecf20Sopenharmony_ci * 21858c2ecf20Sopenharmony_ci * Called under jh->b_state_lock. 21868c2ecf20Sopenharmony_ci */ 21878c2ecf20Sopenharmony_cistatic int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) 21888c2ecf20Sopenharmony_ci{ 21898c2ecf20Sopenharmony_ci int may_free = 1; 21908c2ecf20Sopenharmony_ci struct buffer_head *bh = jh2bh(jh); 21918c2ecf20Sopenharmony_ci 21928c2ecf20Sopenharmony_ci if (jh->b_cp_transaction) { 21938c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "on running+cp transaction"); 21948c2ecf20Sopenharmony_ci __jbd2_journal_temp_unlink_buffer(jh); 21958c2ecf20Sopenharmony_ci /* 21968c2ecf20Sopenharmony_ci * We don't want to write the buffer anymore, clear the 21978c2ecf20Sopenharmony_ci * bit so that we don't confuse checks in 21988c2ecf20Sopenharmony_ci * __journal_file_buffer 21998c2ecf20Sopenharmony_ci */ 22008c2ecf20Sopenharmony_ci clear_buffer_dirty(bh); 22018c2ecf20Sopenharmony_ci __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 22028c2ecf20Sopenharmony_ci may_free = 0; 22038c2ecf20Sopenharmony_ci } else { 22048c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "on running transaction"); 22058c2ecf20Sopenharmony_ci __jbd2_journal_unfile_buffer(jh); 22068c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 22078c2ecf20Sopenharmony_ci } 22088c2ecf20Sopenharmony_ci return may_free; 22098c2ecf20Sopenharmony_ci} 22108c2ecf20Sopenharmony_ci 22118c2ecf20Sopenharmony_ci/* 22128c2ecf20Sopenharmony_ci * jbd2_journal_invalidatepage 22138c2ecf20Sopenharmony_ci * 22148c2ecf20Sopenharmony_ci * This code is tricky. It has a number of cases to deal with. 22158c2ecf20Sopenharmony_ci * 22168c2ecf20Sopenharmony_ci * There are two invariants which this code relies on: 22178c2ecf20Sopenharmony_ci * 22188c2ecf20Sopenharmony_ci * i_size must be updated on disk before we start calling invalidatepage on the 22198c2ecf20Sopenharmony_ci * data. 22208c2ecf20Sopenharmony_ci * 22218c2ecf20Sopenharmony_ci * This is done in ext3 by defining an ext3_setattr method which 22228c2ecf20Sopenharmony_ci * updates i_size before truncate gets going. By maintaining this 22238c2ecf20Sopenharmony_ci * invariant, we can be sure that it is safe to throw away any buffers 22248c2ecf20Sopenharmony_ci * attached to the current transaction: once the transaction commits, 22258c2ecf20Sopenharmony_ci * we know that the data will not be needed. 22268c2ecf20Sopenharmony_ci * 22278c2ecf20Sopenharmony_ci * Note however that we can *not* throw away data belonging to the 22288c2ecf20Sopenharmony_ci * previous, committing transaction! 22298c2ecf20Sopenharmony_ci * 22308c2ecf20Sopenharmony_ci * Any disk blocks which *are* part of the previous, committing 22318c2ecf20Sopenharmony_ci * transaction (and which therefore cannot be discarded immediately) are 22328c2ecf20Sopenharmony_ci * not going to be reused in the new running transaction 22338c2ecf20Sopenharmony_ci * 22348c2ecf20Sopenharmony_ci * The bitmap committed_data images guarantee this: any block which is 22358c2ecf20Sopenharmony_ci * allocated in one transaction and removed in the next will be marked 22368c2ecf20Sopenharmony_ci * as in-use in the committed_data bitmap, so cannot be reused until 22378c2ecf20Sopenharmony_ci * the next transaction to delete the block commits. This means that 22388c2ecf20Sopenharmony_ci * leaving committing buffers dirty is quite safe: the disk blocks 22398c2ecf20Sopenharmony_ci * cannot be reallocated to a different file and so buffer aliasing is 22408c2ecf20Sopenharmony_ci * not possible. 22418c2ecf20Sopenharmony_ci * 22428c2ecf20Sopenharmony_ci * 22438c2ecf20Sopenharmony_ci * The above applies mainly to ordered data mode. In writeback mode we 22448c2ecf20Sopenharmony_ci * don't make guarantees about the order in which data hits disk --- in 22458c2ecf20Sopenharmony_ci * particular we don't guarantee that new dirty data is flushed before 22468c2ecf20Sopenharmony_ci * transaction commit --- so it is always safe just to discard data 22478c2ecf20Sopenharmony_ci * immediately in that mode. --sct 22488c2ecf20Sopenharmony_ci */ 22498c2ecf20Sopenharmony_ci 22508c2ecf20Sopenharmony_ci/* 22518c2ecf20Sopenharmony_ci * The journal_unmap_buffer helper function returns zero if the buffer 22528c2ecf20Sopenharmony_ci * concerned remains pinned as an anonymous buffer belonging to an older 22538c2ecf20Sopenharmony_ci * transaction. 22548c2ecf20Sopenharmony_ci * 22558c2ecf20Sopenharmony_ci * We're outside-transaction here. Either or both of j_running_transaction 22568c2ecf20Sopenharmony_ci * and j_committing_transaction may be NULL. 22578c2ecf20Sopenharmony_ci */ 22588c2ecf20Sopenharmony_cistatic int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, 22598c2ecf20Sopenharmony_ci int partial_page) 22608c2ecf20Sopenharmony_ci{ 22618c2ecf20Sopenharmony_ci transaction_t *transaction; 22628c2ecf20Sopenharmony_ci struct journal_head *jh; 22638c2ecf20Sopenharmony_ci int may_free = 1; 22648c2ecf20Sopenharmony_ci 22658c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "entry"); 22668c2ecf20Sopenharmony_ci 22678c2ecf20Sopenharmony_ci /* 22688c2ecf20Sopenharmony_ci * It is safe to proceed here without the j_list_lock because the 22698c2ecf20Sopenharmony_ci * buffers cannot be stolen by try_to_free_buffers as long as we are 22708c2ecf20Sopenharmony_ci * holding the page lock. --sct 22718c2ecf20Sopenharmony_ci */ 22728c2ecf20Sopenharmony_ci 22738c2ecf20Sopenharmony_ci jh = jbd2_journal_grab_journal_head(bh); 22748c2ecf20Sopenharmony_ci if (!jh) 22758c2ecf20Sopenharmony_ci goto zap_buffer_unlocked; 22768c2ecf20Sopenharmony_ci 22778c2ecf20Sopenharmony_ci /* OK, we have data buffer in journaled mode */ 22788c2ecf20Sopenharmony_ci write_lock(&journal->j_state_lock); 22798c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 22808c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 22818c2ecf20Sopenharmony_ci 22828c2ecf20Sopenharmony_ci /* 22838c2ecf20Sopenharmony_ci * We cannot remove the buffer from checkpoint lists until the 22848c2ecf20Sopenharmony_ci * transaction adding inode to orphan list (let's call it T) 22858c2ecf20Sopenharmony_ci * is committed. Otherwise if the transaction changing the 22868c2ecf20Sopenharmony_ci * buffer would be cleaned from the journal before T is 22878c2ecf20Sopenharmony_ci * committed, a crash will cause that the correct contents of 22888c2ecf20Sopenharmony_ci * the buffer will be lost. On the other hand we have to 22898c2ecf20Sopenharmony_ci * clear the buffer dirty bit at latest at the moment when the 22908c2ecf20Sopenharmony_ci * transaction marking the buffer as freed in the filesystem 22918c2ecf20Sopenharmony_ci * structures is committed because from that moment on the 22928c2ecf20Sopenharmony_ci * block can be reallocated and used by a different page. 22938c2ecf20Sopenharmony_ci * Since the block hasn't been freed yet but the inode has 22948c2ecf20Sopenharmony_ci * already been added to orphan list, it is safe for us to add 22958c2ecf20Sopenharmony_ci * the buffer to BJ_Forget list of the newest transaction. 22968c2ecf20Sopenharmony_ci * 22978c2ecf20Sopenharmony_ci * Also we have to clear buffer_mapped flag of a truncated buffer 22988c2ecf20Sopenharmony_ci * because the buffer_head may be attached to the page straddling 22998c2ecf20Sopenharmony_ci * i_size (can happen only when blocksize < pagesize) and thus the 23008c2ecf20Sopenharmony_ci * buffer_head can be reused when the file is extended again. So we end 23018c2ecf20Sopenharmony_ci * up keeping around invalidated buffers attached to transactions' 23028c2ecf20Sopenharmony_ci * BJ_Forget list just to stop checkpointing code from cleaning up 23038c2ecf20Sopenharmony_ci * the transaction this buffer was modified in. 23048c2ecf20Sopenharmony_ci */ 23058c2ecf20Sopenharmony_ci transaction = jh->b_transaction; 23068c2ecf20Sopenharmony_ci if (transaction == NULL) { 23078c2ecf20Sopenharmony_ci /* First case: not on any transaction. If it 23088c2ecf20Sopenharmony_ci * has no checkpoint link, then we can zap it: 23098c2ecf20Sopenharmony_ci * it's a writeback-mode buffer so we don't care 23108c2ecf20Sopenharmony_ci * if it hits disk safely. */ 23118c2ecf20Sopenharmony_ci if (!jh->b_cp_transaction) { 23128c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "not on any transaction: zap"); 23138c2ecf20Sopenharmony_ci goto zap_buffer; 23148c2ecf20Sopenharmony_ci } 23158c2ecf20Sopenharmony_ci 23168c2ecf20Sopenharmony_ci if (!buffer_dirty(bh)) { 23178c2ecf20Sopenharmony_ci /* bdflush has written it. We can drop it now */ 23188c2ecf20Sopenharmony_ci __jbd2_journal_remove_checkpoint(jh); 23198c2ecf20Sopenharmony_ci goto zap_buffer; 23208c2ecf20Sopenharmony_ci } 23218c2ecf20Sopenharmony_ci 23228c2ecf20Sopenharmony_ci /* OK, it must be in the journal but still not 23238c2ecf20Sopenharmony_ci * written fully to disk: it's metadata or 23248c2ecf20Sopenharmony_ci * journaled data... */ 23258c2ecf20Sopenharmony_ci 23268c2ecf20Sopenharmony_ci if (journal->j_running_transaction) { 23278c2ecf20Sopenharmony_ci /* ... and once the current transaction has 23288c2ecf20Sopenharmony_ci * committed, the buffer won't be needed any 23298c2ecf20Sopenharmony_ci * longer. */ 23308c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 23318c2ecf20Sopenharmony_ci may_free = __dispose_buffer(jh, 23328c2ecf20Sopenharmony_ci journal->j_running_transaction); 23338c2ecf20Sopenharmony_ci goto zap_buffer; 23348c2ecf20Sopenharmony_ci } else { 23358c2ecf20Sopenharmony_ci /* There is no currently-running transaction. So the 23368c2ecf20Sopenharmony_ci * orphan record which we wrote for this file must have 23378c2ecf20Sopenharmony_ci * passed into commit. We must attach this buffer to 23388c2ecf20Sopenharmony_ci * the committing transaction, if it exists. */ 23398c2ecf20Sopenharmony_ci if (journal->j_committing_transaction) { 23408c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "give to committing trans"); 23418c2ecf20Sopenharmony_ci may_free = __dispose_buffer(jh, 23428c2ecf20Sopenharmony_ci journal->j_committing_transaction); 23438c2ecf20Sopenharmony_ci goto zap_buffer; 23448c2ecf20Sopenharmony_ci } else { 23458c2ecf20Sopenharmony_ci /* The orphan record's transaction has 23468c2ecf20Sopenharmony_ci * committed. We can cleanse this buffer */ 23478c2ecf20Sopenharmony_ci clear_buffer_jbddirty(bh); 23488c2ecf20Sopenharmony_ci __jbd2_journal_remove_checkpoint(jh); 23498c2ecf20Sopenharmony_ci goto zap_buffer; 23508c2ecf20Sopenharmony_ci } 23518c2ecf20Sopenharmony_ci } 23528c2ecf20Sopenharmony_ci } else if (transaction == journal->j_committing_transaction) { 23538c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "on committing transaction"); 23548c2ecf20Sopenharmony_ci /* 23558c2ecf20Sopenharmony_ci * The buffer is committing, we simply cannot touch 23568c2ecf20Sopenharmony_ci * it. If the page is straddling i_size we have to wait 23578c2ecf20Sopenharmony_ci * for commit and try again. 23588c2ecf20Sopenharmony_ci */ 23598c2ecf20Sopenharmony_ci if (partial_page) { 23608c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 23618c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 23628c2ecf20Sopenharmony_ci write_unlock(&journal->j_state_lock); 23638c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 23648c2ecf20Sopenharmony_ci /* Already zapped buffer? Nothing to do... */ 23658c2ecf20Sopenharmony_ci if (!bh->b_bdev) 23668c2ecf20Sopenharmony_ci return 0; 23678c2ecf20Sopenharmony_ci return -EBUSY; 23688c2ecf20Sopenharmony_ci } 23698c2ecf20Sopenharmony_ci /* 23708c2ecf20Sopenharmony_ci * OK, buffer won't be reachable after truncate. We just clear 23718c2ecf20Sopenharmony_ci * b_modified to not confuse transaction credit accounting, and 23728c2ecf20Sopenharmony_ci * set j_next_transaction to the running transaction (if there 23738c2ecf20Sopenharmony_ci * is one) and mark buffer as freed so that commit code knows 23748c2ecf20Sopenharmony_ci * it should clear dirty bits when it is done with the buffer. 23758c2ecf20Sopenharmony_ci */ 23768c2ecf20Sopenharmony_ci set_buffer_freed(bh); 23778c2ecf20Sopenharmony_ci if (journal->j_running_transaction && buffer_jbddirty(bh)) 23788c2ecf20Sopenharmony_ci jh->b_next_transaction = journal->j_running_transaction; 23798c2ecf20Sopenharmony_ci jh->b_modified = 0; 23808c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 23818c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 23828c2ecf20Sopenharmony_ci write_unlock(&journal->j_state_lock); 23838c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 23848c2ecf20Sopenharmony_ci return 0; 23858c2ecf20Sopenharmony_ci } else { 23868c2ecf20Sopenharmony_ci /* Good, the buffer belongs to the running transaction. 23878c2ecf20Sopenharmony_ci * We are writing our own transaction's data, not any 23888c2ecf20Sopenharmony_ci * previous one's, so it is safe to throw it away 23898c2ecf20Sopenharmony_ci * (remember that we expect the filesystem to have set 23908c2ecf20Sopenharmony_ci * i_size already for this truncate so recovery will not 23918c2ecf20Sopenharmony_ci * expose the disk blocks we are discarding here.) */ 23928c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, transaction == journal->j_running_transaction); 23938c2ecf20Sopenharmony_ci JBUFFER_TRACE(jh, "on running transaction"); 23948c2ecf20Sopenharmony_ci may_free = __dispose_buffer(jh, transaction); 23958c2ecf20Sopenharmony_ci } 23968c2ecf20Sopenharmony_ci 23978c2ecf20Sopenharmony_cizap_buffer: 23988c2ecf20Sopenharmony_ci /* 23998c2ecf20Sopenharmony_ci * This is tricky. Although the buffer is truncated, it may be reused 24008c2ecf20Sopenharmony_ci * if blocksize < pagesize and it is attached to the page straddling 24018c2ecf20Sopenharmony_ci * EOF. Since the buffer might have been added to BJ_Forget list of the 24028c2ecf20Sopenharmony_ci * running transaction, journal_get_write_access() won't clear 24038c2ecf20Sopenharmony_ci * b_modified and credit accounting gets confused. So clear b_modified 24048c2ecf20Sopenharmony_ci * here. 24058c2ecf20Sopenharmony_ci */ 24068c2ecf20Sopenharmony_ci jh->b_modified = 0; 24078c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 24088c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 24098c2ecf20Sopenharmony_ci write_unlock(&journal->j_state_lock); 24108c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 24118c2ecf20Sopenharmony_cizap_buffer_unlocked: 24128c2ecf20Sopenharmony_ci clear_buffer_dirty(bh); 24138c2ecf20Sopenharmony_ci J_ASSERT_BH(bh, !buffer_jbddirty(bh)); 24148c2ecf20Sopenharmony_ci clear_buffer_mapped(bh); 24158c2ecf20Sopenharmony_ci clear_buffer_req(bh); 24168c2ecf20Sopenharmony_ci clear_buffer_new(bh); 24178c2ecf20Sopenharmony_ci clear_buffer_delay(bh); 24188c2ecf20Sopenharmony_ci clear_buffer_unwritten(bh); 24198c2ecf20Sopenharmony_ci bh->b_bdev = NULL; 24208c2ecf20Sopenharmony_ci return may_free; 24218c2ecf20Sopenharmony_ci} 24228c2ecf20Sopenharmony_ci 24238c2ecf20Sopenharmony_ci/** 24248c2ecf20Sopenharmony_ci * jbd2_journal_invalidatepage() 24258c2ecf20Sopenharmony_ci * @journal: journal to use for flush... 24268c2ecf20Sopenharmony_ci * @page: page to flush 24278c2ecf20Sopenharmony_ci * @offset: start of the range to invalidate 24288c2ecf20Sopenharmony_ci * @length: length of the range to invalidate 24298c2ecf20Sopenharmony_ci * 24308c2ecf20Sopenharmony_ci * Reap page buffers containing data after in the specified range in page. 24318c2ecf20Sopenharmony_ci * Can return -EBUSY if buffers are part of the committing transaction and 24328c2ecf20Sopenharmony_ci * the page is straddling i_size. Caller then has to wait for current commit 24338c2ecf20Sopenharmony_ci * and try again. 24348c2ecf20Sopenharmony_ci */ 24358c2ecf20Sopenharmony_ciint jbd2_journal_invalidatepage(journal_t *journal, 24368c2ecf20Sopenharmony_ci struct page *page, 24378c2ecf20Sopenharmony_ci unsigned int offset, 24388c2ecf20Sopenharmony_ci unsigned int length) 24398c2ecf20Sopenharmony_ci{ 24408c2ecf20Sopenharmony_ci struct buffer_head *head, *bh, *next; 24418c2ecf20Sopenharmony_ci unsigned int stop = offset + length; 24428c2ecf20Sopenharmony_ci unsigned int curr_off = 0; 24438c2ecf20Sopenharmony_ci int partial_page = (offset || length < PAGE_SIZE); 24448c2ecf20Sopenharmony_ci int may_free = 1; 24458c2ecf20Sopenharmony_ci int ret = 0; 24468c2ecf20Sopenharmony_ci 24478c2ecf20Sopenharmony_ci if (!PageLocked(page)) 24488c2ecf20Sopenharmony_ci BUG(); 24498c2ecf20Sopenharmony_ci if (!page_has_buffers(page)) 24508c2ecf20Sopenharmony_ci return 0; 24518c2ecf20Sopenharmony_ci 24528c2ecf20Sopenharmony_ci BUG_ON(stop > PAGE_SIZE || stop < length); 24538c2ecf20Sopenharmony_ci 24548c2ecf20Sopenharmony_ci /* We will potentially be playing with lists other than just the 24558c2ecf20Sopenharmony_ci * data lists (especially for journaled data mode), so be 24568c2ecf20Sopenharmony_ci * cautious in our locking. */ 24578c2ecf20Sopenharmony_ci 24588c2ecf20Sopenharmony_ci head = bh = page_buffers(page); 24598c2ecf20Sopenharmony_ci do { 24608c2ecf20Sopenharmony_ci unsigned int next_off = curr_off + bh->b_size; 24618c2ecf20Sopenharmony_ci next = bh->b_this_page; 24628c2ecf20Sopenharmony_ci 24638c2ecf20Sopenharmony_ci if (next_off > stop) 24648c2ecf20Sopenharmony_ci return 0; 24658c2ecf20Sopenharmony_ci 24668c2ecf20Sopenharmony_ci if (offset <= curr_off) { 24678c2ecf20Sopenharmony_ci /* This block is wholly outside the truncation point */ 24688c2ecf20Sopenharmony_ci lock_buffer(bh); 24698c2ecf20Sopenharmony_ci ret = journal_unmap_buffer(journal, bh, partial_page); 24708c2ecf20Sopenharmony_ci unlock_buffer(bh); 24718c2ecf20Sopenharmony_ci if (ret < 0) 24728c2ecf20Sopenharmony_ci return ret; 24738c2ecf20Sopenharmony_ci may_free &= ret; 24748c2ecf20Sopenharmony_ci } 24758c2ecf20Sopenharmony_ci curr_off = next_off; 24768c2ecf20Sopenharmony_ci bh = next; 24778c2ecf20Sopenharmony_ci 24788c2ecf20Sopenharmony_ci } while (bh != head); 24798c2ecf20Sopenharmony_ci 24808c2ecf20Sopenharmony_ci if (!partial_page) { 24818c2ecf20Sopenharmony_ci if (may_free && try_to_free_buffers(page)) 24828c2ecf20Sopenharmony_ci J_ASSERT(!page_has_buffers(page)); 24838c2ecf20Sopenharmony_ci } 24848c2ecf20Sopenharmony_ci return 0; 24858c2ecf20Sopenharmony_ci} 24868c2ecf20Sopenharmony_ci 24878c2ecf20Sopenharmony_ci/* 24888c2ecf20Sopenharmony_ci * File a buffer on the given transaction list. 24898c2ecf20Sopenharmony_ci */ 24908c2ecf20Sopenharmony_civoid __jbd2_journal_file_buffer(struct journal_head *jh, 24918c2ecf20Sopenharmony_ci transaction_t *transaction, int jlist) 24928c2ecf20Sopenharmony_ci{ 24938c2ecf20Sopenharmony_ci struct journal_head **list = NULL; 24948c2ecf20Sopenharmony_ci int was_dirty = 0; 24958c2ecf20Sopenharmony_ci struct buffer_head *bh = jh2bh(jh); 24968c2ecf20Sopenharmony_ci 24978c2ecf20Sopenharmony_ci lockdep_assert_held(&jh->b_state_lock); 24988c2ecf20Sopenharmony_ci assert_spin_locked(&transaction->t_journal->j_list_lock); 24998c2ecf20Sopenharmony_ci 25008c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 25018c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_transaction == transaction || 25028c2ecf20Sopenharmony_ci jh->b_transaction == NULL); 25038c2ecf20Sopenharmony_ci 25048c2ecf20Sopenharmony_ci if (jh->b_transaction && jh->b_jlist == jlist) 25058c2ecf20Sopenharmony_ci return; 25068c2ecf20Sopenharmony_ci 25078c2ecf20Sopenharmony_ci if (jlist == BJ_Metadata || jlist == BJ_Reserved || 25088c2ecf20Sopenharmony_ci jlist == BJ_Shadow || jlist == BJ_Forget) { 25098c2ecf20Sopenharmony_ci /* 25108c2ecf20Sopenharmony_ci * For metadata buffers, we track dirty bit in buffer_jbddirty 25118c2ecf20Sopenharmony_ci * instead of buffer_dirty. We should not see a dirty bit set 25128c2ecf20Sopenharmony_ci * here because we clear it in do_get_write_access but e.g. 25138c2ecf20Sopenharmony_ci * tune2fs can modify the sb and set the dirty bit at any time 25148c2ecf20Sopenharmony_ci * so we try to gracefully handle that. 25158c2ecf20Sopenharmony_ci */ 25168c2ecf20Sopenharmony_ci if (buffer_dirty(bh)) 25178c2ecf20Sopenharmony_ci warn_dirty_buffer(bh); 25188c2ecf20Sopenharmony_ci if (test_clear_buffer_dirty(bh) || 25198c2ecf20Sopenharmony_ci test_clear_buffer_jbddirty(bh)) 25208c2ecf20Sopenharmony_ci was_dirty = 1; 25218c2ecf20Sopenharmony_ci } 25228c2ecf20Sopenharmony_ci 25238c2ecf20Sopenharmony_ci if (jh->b_transaction) 25248c2ecf20Sopenharmony_ci __jbd2_journal_temp_unlink_buffer(jh); 25258c2ecf20Sopenharmony_ci else 25268c2ecf20Sopenharmony_ci jbd2_journal_grab_journal_head(bh); 25278c2ecf20Sopenharmony_ci jh->b_transaction = transaction; 25288c2ecf20Sopenharmony_ci 25298c2ecf20Sopenharmony_ci switch (jlist) { 25308c2ecf20Sopenharmony_ci case BJ_None: 25318c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, !jh->b_committed_data); 25328c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, !jh->b_frozen_data); 25338c2ecf20Sopenharmony_ci return; 25348c2ecf20Sopenharmony_ci case BJ_Metadata: 25358c2ecf20Sopenharmony_ci transaction->t_nr_buffers++; 25368c2ecf20Sopenharmony_ci list = &transaction->t_buffers; 25378c2ecf20Sopenharmony_ci break; 25388c2ecf20Sopenharmony_ci case BJ_Forget: 25398c2ecf20Sopenharmony_ci list = &transaction->t_forget; 25408c2ecf20Sopenharmony_ci break; 25418c2ecf20Sopenharmony_ci case BJ_Shadow: 25428c2ecf20Sopenharmony_ci list = &transaction->t_shadow_list; 25438c2ecf20Sopenharmony_ci break; 25448c2ecf20Sopenharmony_ci case BJ_Reserved: 25458c2ecf20Sopenharmony_ci list = &transaction->t_reserved_list; 25468c2ecf20Sopenharmony_ci break; 25478c2ecf20Sopenharmony_ci } 25488c2ecf20Sopenharmony_ci 25498c2ecf20Sopenharmony_ci __blist_add_buffer(list, jh); 25508c2ecf20Sopenharmony_ci jh->b_jlist = jlist; 25518c2ecf20Sopenharmony_ci 25528c2ecf20Sopenharmony_ci if (was_dirty) 25538c2ecf20Sopenharmony_ci set_buffer_jbddirty(bh); 25548c2ecf20Sopenharmony_ci} 25558c2ecf20Sopenharmony_ci 25568c2ecf20Sopenharmony_civoid jbd2_journal_file_buffer(struct journal_head *jh, 25578c2ecf20Sopenharmony_ci transaction_t *transaction, int jlist) 25588c2ecf20Sopenharmony_ci{ 25598c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 25608c2ecf20Sopenharmony_ci spin_lock(&transaction->t_journal->j_list_lock); 25618c2ecf20Sopenharmony_ci __jbd2_journal_file_buffer(jh, transaction, jlist); 25628c2ecf20Sopenharmony_ci spin_unlock(&transaction->t_journal->j_list_lock); 25638c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 25648c2ecf20Sopenharmony_ci} 25658c2ecf20Sopenharmony_ci 25668c2ecf20Sopenharmony_ci/* 25678c2ecf20Sopenharmony_ci * Remove a buffer from its current buffer list in preparation for 25688c2ecf20Sopenharmony_ci * dropping it from its current transaction entirely. If the buffer has 25698c2ecf20Sopenharmony_ci * already started to be used by a subsequent transaction, refile the 25708c2ecf20Sopenharmony_ci * buffer on that transaction's metadata list. 25718c2ecf20Sopenharmony_ci * 25728c2ecf20Sopenharmony_ci * Called under j_list_lock 25738c2ecf20Sopenharmony_ci * Called under jh->b_state_lock 25748c2ecf20Sopenharmony_ci * 25758c2ecf20Sopenharmony_ci * When this function returns true, there's no next transaction to refile to 25768c2ecf20Sopenharmony_ci * and the caller has to drop jh reference through 25778c2ecf20Sopenharmony_ci * jbd2_journal_put_journal_head(). 25788c2ecf20Sopenharmony_ci */ 25798c2ecf20Sopenharmony_cibool __jbd2_journal_refile_buffer(struct journal_head *jh) 25808c2ecf20Sopenharmony_ci{ 25818c2ecf20Sopenharmony_ci int was_dirty, jlist; 25828c2ecf20Sopenharmony_ci struct buffer_head *bh = jh2bh(jh); 25838c2ecf20Sopenharmony_ci 25848c2ecf20Sopenharmony_ci lockdep_assert_held(&jh->b_state_lock); 25858c2ecf20Sopenharmony_ci if (jh->b_transaction) 25868c2ecf20Sopenharmony_ci assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); 25878c2ecf20Sopenharmony_ci 25888c2ecf20Sopenharmony_ci /* If the buffer is now unused, just drop it. */ 25898c2ecf20Sopenharmony_ci if (jh->b_next_transaction == NULL) { 25908c2ecf20Sopenharmony_ci __jbd2_journal_unfile_buffer(jh); 25918c2ecf20Sopenharmony_ci return true; 25928c2ecf20Sopenharmony_ci } 25938c2ecf20Sopenharmony_ci 25948c2ecf20Sopenharmony_ci /* 25958c2ecf20Sopenharmony_ci * It has been modified by a later transaction: add it to the new 25968c2ecf20Sopenharmony_ci * transaction's metadata list. 25978c2ecf20Sopenharmony_ci */ 25988c2ecf20Sopenharmony_ci 25998c2ecf20Sopenharmony_ci was_dirty = test_clear_buffer_jbddirty(bh); 26008c2ecf20Sopenharmony_ci __jbd2_journal_temp_unlink_buffer(jh); 26018c2ecf20Sopenharmony_ci 26028c2ecf20Sopenharmony_ci /* 26038c2ecf20Sopenharmony_ci * b_transaction must be set, otherwise the new b_transaction won't 26048c2ecf20Sopenharmony_ci * be holding jh reference 26058c2ecf20Sopenharmony_ci */ 26068c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_transaction != NULL); 26078c2ecf20Sopenharmony_ci 26088c2ecf20Sopenharmony_ci /* 26098c2ecf20Sopenharmony_ci * We set b_transaction here because b_next_transaction will inherit 26108c2ecf20Sopenharmony_ci * our jh reference and thus __jbd2_journal_file_buffer() must not 26118c2ecf20Sopenharmony_ci * take a new one. 26128c2ecf20Sopenharmony_ci */ 26138c2ecf20Sopenharmony_ci WRITE_ONCE(jh->b_transaction, jh->b_next_transaction); 26148c2ecf20Sopenharmony_ci WRITE_ONCE(jh->b_next_transaction, NULL); 26158c2ecf20Sopenharmony_ci if (buffer_freed(bh)) 26168c2ecf20Sopenharmony_ci jlist = BJ_Forget; 26178c2ecf20Sopenharmony_ci else if (jh->b_modified) 26188c2ecf20Sopenharmony_ci jlist = BJ_Metadata; 26198c2ecf20Sopenharmony_ci else 26208c2ecf20Sopenharmony_ci jlist = BJ_Reserved; 26218c2ecf20Sopenharmony_ci __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist); 26228c2ecf20Sopenharmony_ci J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 26238c2ecf20Sopenharmony_ci 26248c2ecf20Sopenharmony_ci if (was_dirty) 26258c2ecf20Sopenharmony_ci set_buffer_jbddirty(bh); 26268c2ecf20Sopenharmony_ci return false; 26278c2ecf20Sopenharmony_ci} 26288c2ecf20Sopenharmony_ci 26298c2ecf20Sopenharmony_ci/* 26308c2ecf20Sopenharmony_ci * __jbd2_journal_refile_buffer() with necessary locking added. We take our 26318c2ecf20Sopenharmony_ci * bh reference so that we can safely unlock bh. 26328c2ecf20Sopenharmony_ci * 26338c2ecf20Sopenharmony_ci * The jh and bh may be freed by this call. 26348c2ecf20Sopenharmony_ci */ 26358c2ecf20Sopenharmony_civoid jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) 26368c2ecf20Sopenharmony_ci{ 26378c2ecf20Sopenharmony_ci bool drop; 26388c2ecf20Sopenharmony_ci 26398c2ecf20Sopenharmony_ci spin_lock(&jh->b_state_lock); 26408c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 26418c2ecf20Sopenharmony_ci drop = __jbd2_journal_refile_buffer(jh); 26428c2ecf20Sopenharmony_ci spin_unlock(&jh->b_state_lock); 26438c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 26448c2ecf20Sopenharmony_ci if (drop) 26458c2ecf20Sopenharmony_ci jbd2_journal_put_journal_head(jh); 26468c2ecf20Sopenharmony_ci} 26478c2ecf20Sopenharmony_ci 26488c2ecf20Sopenharmony_ci/* 26498c2ecf20Sopenharmony_ci * File inode in the inode list of the handle's transaction 26508c2ecf20Sopenharmony_ci */ 26518c2ecf20Sopenharmony_cistatic int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode, 26528c2ecf20Sopenharmony_ci unsigned long flags, loff_t start_byte, loff_t end_byte) 26538c2ecf20Sopenharmony_ci{ 26548c2ecf20Sopenharmony_ci transaction_t *transaction = handle->h_transaction; 26558c2ecf20Sopenharmony_ci journal_t *journal; 26568c2ecf20Sopenharmony_ci 26578c2ecf20Sopenharmony_ci if (is_handle_aborted(handle)) 26588c2ecf20Sopenharmony_ci return -EROFS; 26598c2ecf20Sopenharmony_ci journal = transaction->t_journal; 26608c2ecf20Sopenharmony_ci 26618c2ecf20Sopenharmony_ci jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, 26628c2ecf20Sopenharmony_ci transaction->t_tid); 26638c2ecf20Sopenharmony_ci 26648c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 26658c2ecf20Sopenharmony_ci jinode->i_flags |= flags; 26668c2ecf20Sopenharmony_ci 26678c2ecf20Sopenharmony_ci if (jinode->i_dirty_end) { 26688c2ecf20Sopenharmony_ci jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte); 26698c2ecf20Sopenharmony_ci jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte); 26708c2ecf20Sopenharmony_ci } else { 26718c2ecf20Sopenharmony_ci jinode->i_dirty_start = start_byte; 26728c2ecf20Sopenharmony_ci jinode->i_dirty_end = end_byte; 26738c2ecf20Sopenharmony_ci } 26748c2ecf20Sopenharmony_ci 26758c2ecf20Sopenharmony_ci /* Is inode already attached where we need it? */ 26768c2ecf20Sopenharmony_ci if (jinode->i_transaction == transaction || 26778c2ecf20Sopenharmony_ci jinode->i_next_transaction == transaction) 26788c2ecf20Sopenharmony_ci goto done; 26798c2ecf20Sopenharmony_ci 26808c2ecf20Sopenharmony_ci /* 26818c2ecf20Sopenharmony_ci * We only ever set this variable to 1 so the test is safe. Since 26828c2ecf20Sopenharmony_ci * t_need_data_flush is likely to be set, we do the test to save some 26838c2ecf20Sopenharmony_ci * cacheline bouncing 26848c2ecf20Sopenharmony_ci */ 26858c2ecf20Sopenharmony_ci if (!transaction->t_need_data_flush) 26868c2ecf20Sopenharmony_ci transaction->t_need_data_flush = 1; 26878c2ecf20Sopenharmony_ci /* On some different transaction's list - should be 26888c2ecf20Sopenharmony_ci * the committing one */ 26898c2ecf20Sopenharmony_ci if (jinode->i_transaction) { 26908c2ecf20Sopenharmony_ci J_ASSERT(jinode->i_next_transaction == NULL); 26918c2ecf20Sopenharmony_ci J_ASSERT(jinode->i_transaction == 26928c2ecf20Sopenharmony_ci journal->j_committing_transaction); 26938c2ecf20Sopenharmony_ci jinode->i_next_transaction = transaction; 26948c2ecf20Sopenharmony_ci goto done; 26958c2ecf20Sopenharmony_ci } 26968c2ecf20Sopenharmony_ci /* Not on any transaction list... */ 26978c2ecf20Sopenharmony_ci J_ASSERT(!jinode->i_next_transaction); 26988c2ecf20Sopenharmony_ci jinode->i_transaction = transaction; 26998c2ecf20Sopenharmony_ci list_add(&jinode->i_list, &transaction->t_inode_list); 27008c2ecf20Sopenharmony_cidone: 27018c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 27028c2ecf20Sopenharmony_ci 27038c2ecf20Sopenharmony_ci return 0; 27048c2ecf20Sopenharmony_ci} 27058c2ecf20Sopenharmony_ci 27068c2ecf20Sopenharmony_ciint jbd2_journal_inode_ranged_write(handle_t *handle, 27078c2ecf20Sopenharmony_ci struct jbd2_inode *jinode, loff_t start_byte, loff_t length) 27088c2ecf20Sopenharmony_ci{ 27098c2ecf20Sopenharmony_ci return jbd2_journal_file_inode(handle, jinode, 27108c2ecf20Sopenharmony_ci JI_WRITE_DATA | JI_WAIT_DATA, start_byte, 27118c2ecf20Sopenharmony_ci start_byte + length - 1); 27128c2ecf20Sopenharmony_ci} 27138c2ecf20Sopenharmony_ci 27148c2ecf20Sopenharmony_ciint jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode, 27158c2ecf20Sopenharmony_ci loff_t start_byte, loff_t length) 27168c2ecf20Sopenharmony_ci{ 27178c2ecf20Sopenharmony_ci return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 27188c2ecf20Sopenharmony_ci start_byte, start_byte + length - 1); 27198c2ecf20Sopenharmony_ci} 27208c2ecf20Sopenharmony_ci 27218c2ecf20Sopenharmony_ci/* 27228c2ecf20Sopenharmony_ci * File truncate and transaction commit interact with each other in a 27238c2ecf20Sopenharmony_ci * non-trivial way. If a transaction writing data block A is 27248c2ecf20Sopenharmony_ci * committing, we cannot discard the data by truncate until we have 27258c2ecf20Sopenharmony_ci * written them. Otherwise if we crashed after the transaction with 27268c2ecf20Sopenharmony_ci * write has committed but before the transaction with truncate has 27278c2ecf20Sopenharmony_ci * committed, we could see stale data in block A. This function is a 27288c2ecf20Sopenharmony_ci * helper to solve this problem. It starts writeout of the truncated 27298c2ecf20Sopenharmony_ci * part in case it is in the committing transaction. 27308c2ecf20Sopenharmony_ci * 27318c2ecf20Sopenharmony_ci * Filesystem code must call this function when inode is journaled in 27328c2ecf20Sopenharmony_ci * ordered mode before truncation happens and after the inode has been 27338c2ecf20Sopenharmony_ci * placed on orphan list with the new inode size. The second condition 27348c2ecf20Sopenharmony_ci * avoids the race that someone writes new data and we start 27358c2ecf20Sopenharmony_ci * committing the transaction after this function has been called but 27368c2ecf20Sopenharmony_ci * before a transaction for truncate is started (and furthermore it 27378c2ecf20Sopenharmony_ci * allows us to optimize the case where the addition to orphan list 27388c2ecf20Sopenharmony_ci * happens in the same transaction as write --- we don't have to write 27398c2ecf20Sopenharmony_ci * any data in such case). 27408c2ecf20Sopenharmony_ci */ 27418c2ecf20Sopenharmony_ciint jbd2_journal_begin_ordered_truncate(journal_t *journal, 27428c2ecf20Sopenharmony_ci struct jbd2_inode *jinode, 27438c2ecf20Sopenharmony_ci loff_t new_size) 27448c2ecf20Sopenharmony_ci{ 27458c2ecf20Sopenharmony_ci transaction_t *inode_trans, *commit_trans; 27468c2ecf20Sopenharmony_ci int ret = 0; 27478c2ecf20Sopenharmony_ci 27488c2ecf20Sopenharmony_ci /* This is a quick check to avoid locking if not necessary */ 27498c2ecf20Sopenharmony_ci if (!jinode->i_transaction) 27508c2ecf20Sopenharmony_ci goto out; 27518c2ecf20Sopenharmony_ci /* Locks are here just to force reading of recent values, it is 27528c2ecf20Sopenharmony_ci * enough that the transaction was not committing before we started 27538c2ecf20Sopenharmony_ci * a transaction adding the inode to orphan list */ 27548c2ecf20Sopenharmony_ci read_lock(&journal->j_state_lock); 27558c2ecf20Sopenharmony_ci commit_trans = journal->j_committing_transaction; 27568c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 27578c2ecf20Sopenharmony_ci spin_lock(&journal->j_list_lock); 27588c2ecf20Sopenharmony_ci inode_trans = jinode->i_transaction; 27598c2ecf20Sopenharmony_ci spin_unlock(&journal->j_list_lock); 27608c2ecf20Sopenharmony_ci if (inode_trans == commit_trans) { 27618c2ecf20Sopenharmony_ci ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, 27628c2ecf20Sopenharmony_ci new_size, LLONG_MAX); 27638c2ecf20Sopenharmony_ci if (ret) 27648c2ecf20Sopenharmony_ci jbd2_journal_abort(journal, ret); 27658c2ecf20Sopenharmony_ci } 27668c2ecf20Sopenharmony_ciout: 27678c2ecf20Sopenharmony_ci return ret; 27688c2ecf20Sopenharmony_ci} 2769