162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0+ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/fs/jbd2/checkpoint.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Copyright 1999 Red Hat Software --- All Rights Reserved 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Checkpoint routines for the generic filesystem journaling code. 1062306a36Sopenharmony_ci * Part of the ext2fs journaling system. 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * Checkpointing is the process of ensuring that a section of the log is 1362306a36Sopenharmony_ci * committed fully to disk, so that that portion of the log can be 1462306a36Sopenharmony_ci * reused. 1562306a36Sopenharmony_ci */ 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#include <linux/time.h> 1862306a36Sopenharmony_ci#include <linux/fs.h> 1962306a36Sopenharmony_ci#include <linux/jbd2.h> 2062306a36Sopenharmony_ci#include <linux/errno.h> 2162306a36Sopenharmony_ci#include <linux/slab.h> 2262306a36Sopenharmony_ci#include <linux/blkdev.h> 2362306a36Sopenharmony_ci#include <trace/events/jbd2.h> 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci/* 2662306a36Sopenharmony_ci * Unlink a buffer from a transaction checkpoint list. 2762306a36Sopenharmony_ci * 2862306a36Sopenharmony_ci * Called with j_list_lock held. 2962306a36Sopenharmony_ci */ 3062306a36Sopenharmony_cistatic inline void __buffer_unlink(struct journal_head *jh) 3162306a36Sopenharmony_ci{ 3262306a36Sopenharmony_ci transaction_t *transaction = jh->b_cp_transaction; 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci jh->b_cpnext->b_cpprev = jh->b_cpprev; 3562306a36Sopenharmony_ci jh->b_cpprev->b_cpnext = jh->b_cpnext; 3662306a36Sopenharmony_ci if (transaction->t_checkpoint_list == jh) { 3762306a36Sopenharmony_ci transaction->t_checkpoint_list = jh->b_cpnext; 3862306a36Sopenharmony_ci if (transaction->t_checkpoint_list == jh) 3962306a36Sopenharmony_ci transaction->t_checkpoint_list = NULL; 4062306a36Sopenharmony_ci } 4162306a36Sopenharmony_ci} 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci/* 4462306a36Sopenharmony_ci * __jbd2_log_wait_for_space: wait until there is space in the journal. 4562306a36Sopenharmony_ci * 4662306a36Sopenharmony_ci * Called under j-state_lock *only*. It will be unlocked if we have to wait 4762306a36Sopenharmony_ci * for a checkpoint to free up some space in the log. 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_civoid __jbd2_log_wait_for_space(journal_t *journal) 5062306a36Sopenharmony_ci__acquires(&journal->j_state_lock) 5162306a36Sopenharmony_ci__releases(&journal->j_state_lock) 5262306a36Sopenharmony_ci{ 5362306a36Sopenharmony_ci int nblocks, space_left; 5462306a36Sopenharmony_ci /* assert_spin_locked(&journal->j_state_lock); */ 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci nblocks = journal->j_max_transaction_buffers; 5762306a36Sopenharmony_ci while (jbd2_log_space_left(journal) < nblocks) { 5862306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 5962306a36Sopenharmony_ci mutex_lock_io(&journal->j_checkpoint_mutex); 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci /* 6262306a36Sopenharmony_ci * Test again, another process may have checkpointed while we 6362306a36Sopenharmony_ci * were waiting for the checkpoint lock. If there are no 6462306a36Sopenharmony_ci * transactions ready to be checkpointed, try to recover 6562306a36Sopenharmony_ci * journal space by calling cleanup_journal_tail(), and if 6662306a36Sopenharmony_ci * that doesn't work, by waiting for the currently committing 6762306a36Sopenharmony_ci * transaction to complete. If there is absolutely no way 6862306a36Sopenharmony_ci * to make progress, this is either a BUG or corrupted 6962306a36Sopenharmony_ci * filesystem, so abort the journal and leave a stack 7062306a36Sopenharmony_ci * trace for forensic evidence. 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 7362306a36Sopenharmony_ci if (journal->j_flags & JBD2_ABORT) { 7462306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 7562306a36Sopenharmony_ci return; 7662306a36Sopenharmony_ci } 7762306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 7862306a36Sopenharmony_ci space_left = jbd2_log_space_left(journal); 7962306a36Sopenharmony_ci if (space_left < nblocks) { 8062306a36Sopenharmony_ci int chkpt = journal->j_checkpoint_transactions != NULL; 8162306a36Sopenharmony_ci tid_t tid = 0; 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci if (journal->j_committing_transaction) 8462306a36Sopenharmony_ci tid = journal->j_committing_transaction->t_tid; 8562306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 8662306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 8762306a36Sopenharmony_ci if (chkpt) { 8862306a36Sopenharmony_ci jbd2_log_do_checkpoint(journal); 8962306a36Sopenharmony_ci } else if (jbd2_cleanup_journal_tail(journal) == 0) { 9062306a36Sopenharmony_ci /* We were able to recover space; yay! */ 9162306a36Sopenharmony_ci ; 9262306a36Sopenharmony_ci } else if (tid) { 9362306a36Sopenharmony_ci /* 9462306a36Sopenharmony_ci * jbd2_journal_commit_transaction() may want 9562306a36Sopenharmony_ci * to take the checkpoint_mutex if JBD2_FLUSHED 9662306a36Sopenharmony_ci * is set. So we need to temporarily drop it. 9762306a36Sopenharmony_ci */ 9862306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 9962306a36Sopenharmony_ci jbd2_log_wait_commit(journal, tid); 10062306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 10162306a36Sopenharmony_ci continue; 10262306a36Sopenharmony_ci } else { 10362306a36Sopenharmony_ci printk(KERN_ERR "%s: needed %d blocks and " 10462306a36Sopenharmony_ci "only had %d space available\n", 10562306a36Sopenharmony_ci __func__, nblocks, space_left); 10662306a36Sopenharmony_ci printk(KERN_ERR "%s: no way to get more " 10762306a36Sopenharmony_ci "journal space in %s\n", __func__, 10862306a36Sopenharmony_ci journal->j_devname); 10962306a36Sopenharmony_ci WARN_ON(1); 11062306a36Sopenharmony_ci jbd2_journal_abort(journal, -EIO); 11162306a36Sopenharmony_ci } 11262306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 11362306a36Sopenharmony_ci } else { 11462306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 11562306a36Sopenharmony_ci } 11662306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 11762306a36Sopenharmony_ci } 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_cistatic void 12162306a36Sopenharmony_ci__flush_batch(journal_t *journal, int *batch_count) 12262306a36Sopenharmony_ci{ 12362306a36Sopenharmony_ci int i; 12462306a36Sopenharmony_ci struct blk_plug plug; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci blk_start_plug(&plug); 12762306a36Sopenharmony_ci for (i = 0; i < *batch_count; i++) 12862306a36Sopenharmony_ci write_dirty_buffer(journal->j_chkpt_bhs[i], REQ_SYNC); 12962306a36Sopenharmony_ci blk_finish_plug(&plug); 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci for (i = 0; i < *batch_count; i++) { 13262306a36Sopenharmony_ci struct buffer_head *bh = journal->j_chkpt_bhs[i]; 13362306a36Sopenharmony_ci BUFFER_TRACE(bh, "brelse"); 13462306a36Sopenharmony_ci __brelse(bh); 13562306a36Sopenharmony_ci journal->j_chkpt_bhs[i] = NULL; 13662306a36Sopenharmony_ci } 13762306a36Sopenharmony_ci *batch_count = 0; 13862306a36Sopenharmony_ci} 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci/* 14162306a36Sopenharmony_ci * Perform an actual checkpoint. We take the first transaction on the 14262306a36Sopenharmony_ci * list of transactions to be checkpointed and send all its buffers 14362306a36Sopenharmony_ci * to disk. We submit larger chunks of data at once. 14462306a36Sopenharmony_ci * 14562306a36Sopenharmony_ci * The journal should be locked before calling this function. 14662306a36Sopenharmony_ci * Called with j_checkpoint_mutex held. 14762306a36Sopenharmony_ci */ 14862306a36Sopenharmony_ciint jbd2_log_do_checkpoint(journal_t *journal) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci struct journal_head *jh; 15162306a36Sopenharmony_ci struct buffer_head *bh; 15262306a36Sopenharmony_ci transaction_t *transaction; 15362306a36Sopenharmony_ci tid_t this_tid; 15462306a36Sopenharmony_ci int result, batch_count = 0; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci jbd2_debug(1, "Start checkpoint\n"); 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci /* 15962306a36Sopenharmony_ci * First thing: if there are any transactions in the log which 16062306a36Sopenharmony_ci * don't need checkpointing, just eliminate them from the 16162306a36Sopenharmony_ci * journal straight away. 16262306a36Sopenharmony_ci */ 16362306a36Sopenharmony_ci result = jbd2_cleanup_journal_tail(journal); 16462306a36Sopenharmony_ci trace_jbd2_checkpoint(journal, result); 16562306a36Sopenharmony_ci jbd2_debug(1, "cleanup_journal_tail returned %d\n", result); 16662306a36Sopenharmony_ci if (result <= 0) 16762306a36Sopenharmony_ci return result; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci /* 17062306a36Sopenharmony_ci * OK, we need to start writing disk blocks. Take one transaction 17162306a36Sopenharmony_ci * and write it. 17262306a36Sopenharmony_ci */ 17362306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 17462306a36Sopenharmony_ci if (!journal->j_checkpoint_transactions) 17562306a36Sopenharmony_ci goto out; 17662306a36Sopenharmony_ci transaction = journal->j_checkpoint_transactions; 17762306a36Sopenharmony_ci if (transaction->t_chp_stats.cs_chp_time == 0) 17862306a36Sopenharmony_ci transaction->t_chp_stats.cs_chp_time = jiffies; 17962306a36Sopenharmony_ci this_tid = transaction->t_tid; 18062306a36Sopenharmony_cirestart: 18162306a36Sopenharmony_ci /* 18262306a36Sopenharmony_ci * If someone cleaned up this transaction while we slept, we're 18362306a36Sopenharmony_ci * done (maybe it's a new transaction, but it fell at the same 18462306a36Sopenharmony_ci * address). 18562306a36Sopenharmony_ci */ 18662306a36Sopenharmony_ci if (journal->j_checkpoint_transactions != transaction || 18762306a36Sopenharmony_ci transaction->t_tid != this_tid) 18862306a36Sopenharmony_ci goto out; 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci /* checkpoint all of the transaction's buffers */ 19162306a36Sopenharmony_ci while (transaction->t_checkpoint_list) { 19262306a36Sopenharmony_ci jh = transaction->t_checkpoint_list; 19362306a36Sopenharmony_ci bh = jh2bh(jh); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci if (jh->b_transaction != NULL) { 19662306a36Sopenharmony_ci transaction_t *t = jh->b_transaction; 19762306a36Sopenharmony_ci tid_t tid = t->t_tid; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci transaction->t_chp_stats.cs_forced_to_close++; 20062306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 20162306a36Sopenharmony_ci if (unlikely(journal->j_flags & JBD2_UNMOUNT)) 20262306a36Sopenharmony_ci /* 20362306a36Sopenharmony_ci * The journal thread is dead; so 20462306a36Sopenharmony_ci * starting and waiting for a commit 20562306a36Sopenharmony_ci * to finish will cause us to wait for 20662306a36Sopenharmony_ci * a _very_ long time. 20762306a36Sopenharmony_ci */ 20862306a36Sopenharmony_ci printk(KERN_ERR 20962306a36Sopenharmony_ci "JBD2: %s: Waiting for Godot: block %llu\n", 21062306a36Sopenharmony_ci journal->j_devname, (unsigned long long) bh->b_blocknr); 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci if (batch_count) 21362306a36Sopenharmony_ci __flush_batch(journal, &batch_count); 21462306a36Sopenharmony_ci jbd2_log_start_commit(journal, tid); 21562306a36Sopenharmony_ci /* 21662306a36Sopenharmony_ci * jbd2_journal_commit_transaction() may want 21762306a36Sopenharmony_ci * to take the checkpoint_mutex if JBD2_FLUSHED 21862306a36Sopenharmony_ci * is set, jbd2_update_log_tail() called by 21962306a36Sopenharmony_ci * jbd2_journal_commit_transaction() may also take 22062306a36Sopenharmony_ci * checkpoint_mutex. So we need to temporarily 22162306a36Sopenharmony_ci * drop it. 22262306a36Sopenharmony_ci */ 22362306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 22462306a36Sopenharmony_ci jbd2_log_wait_commit(journal, tid); 22562306a36Sopenharmony_ci mutex_lock_io(&journal->j_checkpoint_mutex); 22662306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 22762306a36Sopenharmony_ci goto restart; 22862306a36Sopenharmony_ci } 22962306a36Sopenharmony_ci if (!trylock_buffer(bh)) { 23062306a36Sopenharmony_ci /* 23162306a36Sopenharmony_ci * The buffer is locked, it may be writing back, or 23262306a36Sopenharmony_ci * flushing out in the last couple of cycles, or 23362306a36Sopenharmony_ci * re-adding into a new transaction, need to check 23462306a36Sopenharmony_ci * it again until it's unlocked. 23562306a36Sopenharmony_ci */ 23662306a36Sopenharmony_ci get_bh(bh); 23762306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 23862306a36Sopenharmony_ci wait_on_buffer(bh); 23962306a36Sopenharmony_ci /* the journal_head may have gone by now */ 24062306a36Sopenharmony_ci BUFFER_TRACE(bh, "brelse"); 24162306a36Sopenharmony_ci __brelse(bh); 24262306a36Sopenharmony_ci goto retry; 24362306a36Sopenharmony_ci } else if (!buffer_dirty(bh)) { 24462306a36Sopenharmony_ci unlock_buffer(bh); 24562306a36Sopenharmony_ci BUFFER_TRACE(bh, "remove from checkpoint"); 24662306a36Sopenharmony_ci /* 24762306a36Sopenharmony_ci * If the transaction was released or the checkpoint 24862306a36Sopenharmony_ci * list was empty, we're done. 24962306a36Sopenharmony_ci */ 25062306a36Sopenharmony_ci if (__jbd2_journal_remove_checkpoint(jh) || 25162306a36Sopenharmony_ci !transaction->t_checkpoint_list) 25262306a36Sopenharmony_ci goto out; 25362306a36Sopenharmony_ci } else { 25462306a36Sopenharmony_ci unlock_buffer(bh); 25562306a36Sopenharmony_ci /* 25662306a36Sopenharmony_ci * We are about to write the buffer, it could be 25762306a36Sopenharmony_ci * raced by some other transaction shrink or buffer 25862306a36Sopenharmony_ci * re-log logic once we release the j_list_lock, 25962306a36Sopenharmony_ci * leave it on the checkpoint list and check status 26062306a36Sopenharmony_ci * again to make sure it's clean. 26162306a36Sopenharmony_ci */ 26262306a36Sopenharmony_ci BUFFER_TRACE(bh, "queue"); 26362306a36Sopenharmony_ci get_bh(bh); 26462306a36Sopenharmony_ci J_ASSERT_BH(bh, !buffer_jwrite(bh)); 26562306a36Sopenharmony_ci journal->j_chkpt_bhs[batch_count++] = bh; 26662306a36Sopenharmony_ci transaction->t_chp_stats.cs_written++; 26762306a36Sopenharmony_ci transaction->t_checkpoint_list = jh->b_cpnext; 26862306a36Sopenharmony_ci } 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci if ((batch_count == JBD2_NR_BATCH) || 27162306a36Sopenharmony_ci need_resched() || spin_needbreak(&journal->j_list_lock) || 27262306a36Sopenharmony_ci jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0]) 27362306a36Sopenharmony_ci goto unlock_and_flush; 27462306a36Sopenharmony_ci } 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci if (batch_count) { 27762306a36Sopenharmony_ci unlock_and_flush: 27862306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 27962306a36Sopenharmony_ci retry: 28062306a36Sopenharmony_ci if (batch_count) 28162306a36Sopenharmony_ci __flush_batch(journal, &batch_count); 28262306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 28362306a36Sopenharmony_ci goto restart; 28462306a36Sopenharmony_ci } 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ciout: 28762306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 28862306a36Sopenharmony_ci result = jbd2_cleanup_journal_tail(journal); 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci return (result < 0) ? result : 0; 29162306a36Sopenharmony_ci} 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci/* 29462306a36Sopenharmony_ci * Check the list of checkpoint transactions for the journal to see if 29562306a36Sopenharmony_ci * we have already got rid of any since the last update of the log tail 29662306a36Sopenharmony_ci * in the journal superblock. If so, we can instantly roll the 29762306a36Sopenharmony_ci * superblock forward to remove those transactions from the log. 29862306a36Sopenharmony_ci * 29962306a36Sopenharmony_ci * Return <0 on error, 0 on success, 1 if there was nothing to clean up. 30062306a36Sopenharmony_ci * 30162306a36Sopenharmony_ci * Called with the journal lock held. 30262306a36Sopenharmony_ci * 30362306a36Sopenharmony_ci * This is the only part of the journaling code which really needs to be 30462306a36Sopenharmony_ci * aware of transaction aborts. Checkpointing involves writing to the 30562306a36Sopenharmony_ci * main filesystem area rather than to the journal, so it can proceed 30662306a36Sopenharmony_ci * even in abort state, but we must not update the super block if 30762306a36Sopenharmony_ci * checkpointing may have failed. Otherwise, we would lose some metadata 30862306a36Sopenharmony_ci * buffers which should be written-back to the filesystem. 30962306a36Sopenharmony_ci */ 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ciint jbd2_cleanup_journal_tail(journal_t *journal) 31262306a36Sopenharmony_ci{ 31362306a36Sopenharmony_ci tid_t first_tid; 31462306a36Sopenharmony_ci unsigned long blocknr; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci if (is_journal_aborted(journal)) 31762306a36Sopenharmony_ci return -EIO; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr)) 32062306a36Sopenharmony_ci return 1; 32162306a36Sopenharmony_ci J_ASSERT(blocknr != 0); 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci /* 32462306a36Sopenharmony_ci * We need to make sure that any blocks that were recently written out 32562306a36Sopenharmony_ci * --- perhaps by jbd2_log_do_checkpoint() --- are flushed out before 32662306a36Sopenharmony_ci * we drop the transactions from the journal. It's unlikely this will 32762306a36Sopenharmony_ci * be necessary, especially with an appropriately sized journal, but we 32862306a36Sopenharmony_ci * need this to guarantee correctness. Fortunately 32962306a36Sopenharmony_ci * jbd2_cleanup_journal_tail() doesn't get called all that often. 33062306a36Sopenharmony_ci */ 33162306a36Sopenharmony_ci if (journal->j_flags & JBD2_BARRIER) 33262306a36Sopenharmony_ci blkdev_issue_flush(journal->j_fs_dev); 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci return __jbd2_update_log_tail(journal, first_tid, blocknr); 33562306a36Sopenharmony_ci} 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci/* Checkpoint list management */ 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_cienum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP}; 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci/* 34362306a36Sopenharmony_ci * journal_shrink_one_cp_list 34462306a36Sopenharmony_ci * 34562306a36Sopenharmony_ci * Find all the written-back checkpoint buffers in the given list 34662306a36Sopenharmony_ci * and try to release them. If the whole transaction is released, set 34762306a36Sopenharmony_ci * the 'released' parameter. Return the number of released checkpointed 34862306a36Sopenharmony_ci * buffers. 34962306a36Sopenharmony_ci * 35062306a36Sopenharmony_ci * Called with j_list_lock held. 35162306a36Sopenharmony_ci */ 35262306a36Sopenharmony_cistatic unsigned long journal_shrink_one_cp_list(struct journal_head *jh, 35362306a36Sopenharmony_ci enum shrink_type type, 35462306a36Sopenharmony_ci bool *released) 35562306a36Sopenharmony_ci{ 35662306a36Sopenharmony_ci struct journal_head *last_jh; 35762306a36Sopenharmony_ci struct journal_head *next_jh = jh; 35862306a36Sopenharmony_ci unsigned long nr_freed = 0; 35962306a36Sopenharmony_ci int ret; 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci *released = false; 36262306a36Sopenharmony_ci if (!jh) 36362306a36Sopenharmony_ci return 0; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci last_jh = jh->b_cpprev; 36662306a36Sopenharmony_ci do { 36762306a36Sopenharmony_ci jh = next_jh; 36862306a36Sopenharmony_ci next_jh = jh->b_cpnext; 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci if (type == SHRINK_DESTROY) { 37162306a36Sopenharmony_ci ret = __jbd2_journal_remove_checkpoint(jh); 37262306a36Sopenharmony_ci } else { 37362306a36Sopenharmony_ci ret = jbd2_journal_try_remove_checkpoint(jh); 37462306a36Sopenharmony_ci if (ret < 0) { 37562306a36Sopenharmony_ci if (type == SHRINK_BUSY_SKIP) 37662306a36Sopenharmony_ci continue; 37762306a36Sopenharmony_ci break; 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci } 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci nr_freed++; 38262306a36Sopenharmony_ci if (ret) { 38362306a36Sopenharmony_ci *released = true; 38462306a36Sopenharmony_ci break; 38562306a36Sopenharmony_ci } 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci if (need_resched()) 38862306a36Sopenharmony_ci break; 38962306a36Sopenharmony_ci } while (jh != last_jh); 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci return nr_freed; 39262306a36Sopenharmony_ci} 39362306a36Sopenharmony_ci 39462306a36Sopenharmony_ci/* 39562306a36Sopenharmony_ci * jbd2_journal_shrink_checkpoint_list 39662306a36Sopenharmony_ci * 39762306a36Sopenharmony_ci * Find 'nr_to_scan' written-back checkpoint buffers in the journal 39862306a36Sopenharmony_ci * and try to release them. Return the number of released checkpointed 39962306a36Sopenharmony_ci * buffers. 40062306a36Sopenharmony_ci * 40162306a36Sopenharmony_ci * Called with j_list_lock held. 40262306a36Sopenharmony_ci */ 40362306a36Sopenharmony_ciunsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, 40462306a36Sopenharmony_ci unsigned long *nr_to_scan) 40562306a36Sopenharmony_ci{ 40662306a36Sopenharmony_ci transaction_t *transaction, *last_transaction, *next_transaction; 40762306a36Sopenharmony_ci bool __maybe_unused released; 40862306a36Sopenharmony_ci tid_t first_tid = 0, last_tid = 0, next_tid = 0; 40962306a36Sopenharmony_ci tid_t tid = 0; 41062306a36Sopenharmony_ci unsigned long nr_freed = 0; 41162306a36Sopenharmony_ci unsigned long freed; 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ciagain: 41462306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 41562306a36Sopenharmony_ci if (!journal->j_checkpoint_transactions) { 41662306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 41762306a36Sopenharmony_ci goto out; 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_ci /* 42162306a36Sopenharmony_ci * Get next shrink transaction, resume previous scan or start 42262306a36Sopenharmony_ci * over again. If some others do checkpoint and drop transaction 42362306a36Sopenharmony_ci * from the checkpoint list, we ignore saved j_shrink_transaction 42462306a36Sopenharmony_ci * and start over unconditionally. 42562306a36Sopenharmony_ci */ 42662306a36Sopenharmony_ci if (journal->j_shrink_transaction) 42762306a36Sopenharmony_ci transaction = journal->j_shrink_transaction; 42862306a36Sopenharmony_ci else 42962306a36Sopenharmony_ci transaction = journal->j_checkpoint_transactions; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci if (!first_tid) 43262306a36Sopenharmony_ci first_tid = transaction->t_tid; 43362306a36Sopenharmony_ci last_transaction = journal->j_checkpoint_transactions->t_cpprev; 43462306a36Sopenharmony_ci next_transaction = transaction; 43562306a36Sopenharmony_ci last_tid = last_transaction->t_tid; 43662306a36Sopenharmony_ci do { 43762306a36Sopenharmony_ci transaction = next_transaction; 43862306a36Sopenharmony_ci next_transaction = transaction->t_cpnext; 43962306a36Sopenharmony_ci tid = transaction->t_tid; 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list, 44262306a36Sopenharmony_ci SHRINK_BUSY_SKIP, &released); 44362306a36Sopenharmony_ci nr_freed += freed; 44462306a36Sopenharmony_ci (*nr_to_scan) -= min(*nr_to_scan, freed); 44562306a36Sopenharmony_ci if (*nr_to_scan == 0) 44662306a36Sopenharmony_ci break; 44762306a36Sopenharmony_ci if (need_resched() || spin_needbreak(&journal->j_list_lock)) 44862306a36Sopenharmony_ci break; 44962306a36Sopenharmony_ci } while (transaction != last_transaction); 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci if (transaction != last_transaction) { 45262306a36Sopenharmony_ci journal->j_shrink_transaction = next_transaction; 45362306a36Sopenharmony_ci next_tid = next_transaction->t_tid; 45462306a36Sopenharmony_ci } else { 45562306a36Sopenharmony_ci journal->j_shrink_transaction = NULL; 45662306a36Sopenharmony_ci next_tid = 0; 45762306a36Sopenharmony_ci } 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 46062306a36Sopenharmony_ci cond_resched(); 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci if (*nr_to_scan && next_tid) 46362306a36Sopenharmony_ci goto again; 46462306a36Sopenharmony_ciout: 46562306a36Sopenharmony_ci trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid, 46662306a36Sopenharmony_ci nr_freed, next_tid); 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci return nr_freed; 46962306a36Sopenharmony_ci} 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci/* 47262306a36Sopenharmony_ci * journal_clean_checkpoint_list 47362306a36Sopenharmony_ci * 47462306a36Sopenharmony_ci * Find all the written-back checkpoint buffers in the journal and release them. 47562306a36Sopenharmony_ci * If 'destroy' is set, release all buffers unconditionally. 47662306a36Sopenharmony_ci * 47762306a36Sopenharmony_ci * Called with j_list_lock held. 47862306a36Sopenharmony_ci */ 47962306a36Sopenharmony_civoid __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) 48062306a36Sopenharmony_ci{ 48162306a36Sopenharmony_ci transaction_t *transaction, *last_transaction, *next_transaction; 48262306a36Sopenharmony_ci enum shrink_type type; 48362306a36Sopenharmony_ci bool released; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci transaction = journal->j_checkpoint_transactions; 48662306a36Sopenharmony_ci if (!transaction) 48762306a36Sopenharmony_ci return; 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP; 49062306a36Sopenharmony_ci last_transaction = transaction->t_cpprev; 49162306a36Sopenharmony_ci next_transaction = transaction; 49262306a36Sopenharmony_ci do { 49362306a36Sopenharmony_ci transaction = next_transaction; 49462306a36Sopenharmony_ci next_transaction = transaction->t_cpnext; 49562306a36Sopenharmony_ci journal_shrink_one_cp_list(transaction->t_checkpoint_list, 49662306a36Sopenharmony_ci type, &released); 49762306a36Sopenharmony_ci /* 49862306a36Sopenharmony_ci * This function only frees up some memory if possible so we 49962306a36Sopenharmony_ci * dont have an obligation to finish processing. Bail out if 50062306a36Sopenharmony_ci * preemption requested: 50162306a36Sopenharmony_ci */ 50262306a36Sopenharmony_ci if (need_resched()) 50362306a36Sopenharmony_ci return; 50462306a36Sopenharmony_ci /* 50562306a36Sopenharmony_ci * Stop scanning if we couldn't free the transaction. This 50662306a36Sopenharmony_ci * avoids pointless scanning of transactions which still 50762306a36Sopenharmony_ci * weren't checkpointed. 50862306a36Sopenharmony_ci */ 50962306a36Sopenharmony_ci if (!released) 51062306a36Sopenharmony_ci return; 51162306a36Sopenharmony_ci } while (transaction != last_transaction); 51262306a36Sopenharmony_ci} 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci/* 51562306a36Sopenharmony_ci * Remove buffers from all checkpoint lists as journal is aborted and we just 51662306a36Sopenharmony_ci * need to free memory 51762306a36Sopenharmony_ci */ 51862306a36Sopenharmony_civoid jbd2_journal_destroy_checkpoint(journal_t *journal) 51962306a36Sopenharmony_ci{ 52062306a36Sopenharmony_ci /* 52162306a36Sopenharmony_ci * We loop because __jbd2_journal_clean_checkpoint_list() may abort 52262306a36Sopenharmony_ci * early due to a need of rescheduling. 52362306a36Sopenharmony_ci */ 52462306a36Sopenharmony_ci while (1) { 52562306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 52662306a36Sopenharmony_ci if (!journal->j_checkpoint_transactions) { 52762306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 52862306a36Sopenharmony_ci break; 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci __jbd2_journal_clean_checkpoint_list(journal, true); 53162306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 53262306a36Sopenharmony_ci cond_resched(); 53362306a36Sopenharmony_ci } 53462306a36Sopenharmony_ci} 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci/* 53762306a36Sopenharmony_ci * journal_remove_checkpoint: called after a buffer has been committed 53862306a36Sopenharmony_ci * to disk (either by being write-back flushed to disk, or being 53962306a36Sopenharmony_ci * committed to the log). 54062306a36Sopenharmony_ci * 54162306a36Sopenharmony_ci * We cannot safely clean a transaction out of the log until all of the 54262306a36Sopenharmony_ci * buffer updates committed in that transaction have safely been stored 54362306a36Sopenharmony_ci * elsewhere on disk. To achieve this, all of the buffers in a 54462306a36Sopenharmony_ci * transaction need to be maintained on the transaction's checkpoint 54562306a36Sopenharmony_ci * lists until they have been rewritten, at which point this function is 54662306a36Sopenharmony_ci * called to remove the buffer from the existing transaction's 54762306a36Sopenharmony_ci * checkpoint lists. 54862306a36Sopenharmony_ci * 54962306a36Sopenharmony_ci * The function returns 1 if it frees the transaction, 0 otherwise. 55062306a36Sopenharmony_ci * The function can free jh and bh. 55162306a36Sopenharmony_ci * 55262306a36Sopenharmony_ci * This function is called with j_list_lock held. 55362306a36Sopenharmony_ci */ 55462306a36Sopenharmony_ciint __jbd2_journal_remove_checkpoint(struct journal_head *jh) 55562306a36Sopenharmony_ci{ 55662306a36Sopenharmony_ci struct transaction_chp_stats_s *stats; 55762306a36Sopenharmony_ci transaction_t *transaction; 55862306a36Sopenharmony_ci journal_t *journal; 55962306a36Sopenharmony_ci struct buffer_head *bh = jh2bh(jh); 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci JBUFFER_TRACE(jh, "entry"); 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci transaction = jh->b_cp_transaction; 56462306a36Sopenharmony_ci if (!transaction) { 56562306a36Sopenharmony_ci JBUFFER_TRACE(jh, "not on transaction"); 56662306a36Sopenharmony_ci return 0; 56762306a36Sopenharmony_ci } 56862306a36Sopenharmony_ci journal = transaction->t_journal; 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci JBUFFER_TRACE(jh, "removing from transaction"); 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci /* 57362306a36Sopenharmony_ci * If we have failed to write the buffer out to disk, the filesystem 57462306a36Sopenharmony_ci * may become inconsistent. We cannot abort the journal here since 57562306a36Sopenharmony_ci * we hold j_list_lock and we have to be careful about races with 57662306a36Sopenharmony_ci * jbd2_journal_destroy(). So mark the writeback IO error in the 57762306a36Sopenharmony_ci * journal here and we abort the journal later from a better context. 57862306a36Sopenharmony_ci */ 57962306a36Sopenharmony_ci if (buffer_write_io_error(bh)) 58062306a36Sopenharmony_ci set_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags); 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci __buffer_unlink(jh); 58362306a36Sopenharmony_ci jh->b_cp_transaction = NULL; 58462306a36Sopenharmony_ci percpu_counter_dec(&journal->j_checkpoint_jh_count); 58562306a36Sopenharmony_ci jbd2_journal_put_journal_head(jh); 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci /* Is this transaction empty? */ 58862306a36Sopenharmony_ci if (transaction->t_checkpoint_list) 58962306a36Sopenharmony_ci return 0; 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci /* 59262306a36Sopenharmony_ci * There is one special case to worry about: if we have just pulled the 59362306a36Sopenharmony_ci * buffer off a running or committing transaction's checkpoing list, 59462306a36Sopenharmony_ci * then even if the checkpoint list is empty, the transaction obviously 59562306a36Sopenharmony_ci * cannot be dropped! 59662306a36Sopenharmony_ci * 59762306a36Sopenharmony_ci * The locking here around t_state is a bit sleazy. 59862306a36Sopenharmony_ci * See the comment at the end of jbd2_journal_commit_transaction(). 59962306a36Sopenharmony_ci */ 60062306a36Sopenharmony_ci if (transaction->t_state != T_FINISHED) 60162306a36Sopenharmony_ci return 0; 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci /* 60462306a36Sopenharmony_ci * OK, that was the last buffer for the transaction, we can now 60562306a36Sopenharmony_ci * safely remove this transaction from the log. 60662306a36Sopenharmony_ci */ 60762306a36Sopenharmony_ci stats = &transaction->t_chp_stats; 60862306a36Sopenharmony_ci if (stats->cs_chp_time) 60962306a36Sopenharmony_ci stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time, 61062306a36Sopenharmony_ci jiffies); 61162306a36Sopenharmony_ci trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev, 61262306a36Sopenharmony_ci transaction->t_tid, stats); 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci __jbd2_journal_drop_transaction(journal, transaction); 61562306a36Sopenharmony_ci jbd2_journal_free_transaction(transaction); 61662306a36Sopenharmony_ci return 1; 61762306a36Sopenharmony_ci} 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci/* 62062306a36Sopenharmony_ci * Check the checkpoint buffer and try to remove it from the checkpoint 62162306a36Sopenharmony_ci * list if it's clean. Returns -EBUSY if it is not clean, returns 1 if 62262306a36Sopenharmony_ci * it frees the transaction, 0 otherwise. 62362306a36Sopenharmony_ci * 62462306a36Sopenharmony_ci * This function is called with j_list_lock held. 62562306a36Sopenharmony_ci */ 62662306a36Sopenharmony_ciint jbd2_journal_try_remove_checkpoint(struct journal_head *jh) 62762306a36Sopenharmony_ci{ 62862306a36Sopenharmony_ci struct buffer_head *bh = jh2bh(jh); 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci if (jh->b_transaction) 63162306a36Sopenharmony_ci return -EBUSY; 63262306a36Sopenharmony_ci if (!trylock_buffer(bh)) 63362306a36Sopenharmony_ci return -EBUSY; 63462306a36Sopenharmony_ci if (buffer_dirty(bh)) { 63562306a36Sopenharmony_ci unlock_buffer(bh); 63662306a36Sopenharmony_ci return -EBUSY; 63762306a36Sopenharmony_ci } 63862306a36Sopenharmony_ci unlock_buffer(bh); 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci /* 64162306a36Sopenharmony_ci * Buffer is clean and the IO has finished (we held the buffer 64262306a36Sopenharmony_ci * lock) so the checkpoint is done. We can safely remove the 64362306a36Sopenharmony_ci * buffer from this transaction. 64462306a36Sopenharmony_ci */ 64562306a36Sopenharmony_ci JBUFFER_TRACE(jh, "remove from checkpoint list"); 64662306a36Sopenharmony_ci return __jbd2_journal_remove_checkpoint(jh); 64762306a36Sopenharmony_ci} 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci/* 65062306a36Sopenharmony_ci * journal_insert_checkpoint: put a committed buffer onto a checkpoint 65162306a36Sopenharmony_ci * list so that we know when it is safe to clean the transaction out of 65262306a36Sopenharmony_ci * the log. 65362306a36Sopenharmony_ci * 65462306a36Sopenharmony_ci * Called with the journal locked. 65562306a36Sopenharmony_ci * Called with j_list_lock held. 65662306a36Sopenharmony_ci */ 65762306a36Sopenharmony_civoid __jbd2_journal_insert_checkpoint(struct journal_head *jh, 65862306a36Sopenharmony_ci transaction_t *transaction) 65962306a36Sopenharmony_ci{ 66062306a36Sopenharmony_ci JBUFFER_TRACE(jh, "entry"); 66162306a36Sopenharmony_ci J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); 66262306a36Sopenharmony_ci J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci /* Get reference for checkpointing transaction */ 66562306a36Sopenharmony_ci jbd2_journal_grab_journal_head(jh2bh(jh)); 66662306a36Sopenharmony_ci jh->b_cp_transaction = transaction; 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci if (!transaction->t_checkpoint_list) { 66962306a36Sopenharmony_ci jh->b_cpnext = jh->b_cpprev = jh; 67062306a36Sopenharmony_ci } else { 67162306a36Sopenharmony_ci jh->b_cpnext = transaction->t_checkpoint_list; 67262306a36Sopenharmony_ci jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev; 67362306a36Sopenharmony_ci jh->b_cpprev->b_cpnext = jh; 67462306a36Sopenharmony_ci jh->b_cpnext->b_cpprev = jh; 67562306a36Sopenharmony_ci } 67662306a36Sopenharmony_ci transaction->t_checkpoint_list = jh; 67762306a36Sopenharmony_ci percpu_counter_inc(&transaction->t_journal->j_checkpoint_jh_count); 67862306a36Sopenharmony_ci} 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ci/* 68162306a36Sopenharmony_ci * We've finished with this transaction structure: adios... 68262306a36Sopenharmony_ci * 68362306a36Sopenharmony_ci * The transaction must have no links except for the checkpoint by this 68462306a36Sopenharmony_ci * point. 68562306a36Sopenharmony_ci * 68662306a36Sopenharmony_ci * Called with the journal locked. 68762306a36Sopenharmony_ci * Called with j_list_lock held. 68862306a36Sopenharmony_ci */ 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_civoid __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction) 69162306a36Sopenharmony_ci{ 69262306a36Sopenharmony_ci assert_spin_locked(&journal->j_list_lock); 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci journal->j_shrink_transaction = NULL; 69562306a36Sopenharmony_ci if (transaction->t_cpnext) { 69662306a36Sopenharmony_ci transaction->t_cpnext->t_cpprev = transaction->t_cpprev; 69762306a36Sopenharmony_ci transaction->t_cpprev->t_cpnext = transaction->t_cpnext; 69862306a36Sopenharmony_ci if (journal->j_checkpoint_transactions == transaction) 69962306a36Sopenharmony_ci journal->j_checkpoint_transactions = 70062306a36Sopenharmony_ci transaction->t_cpnext; 70162306a36Sopenharmony_ci if (journal->j_checkpoint_transactions == transaction) 70262306a36Sopenharmony_ci journal->j_checkpoint_transactions = NULL; 70362306a36Sopenharmony_ci } 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci J_ASSERT(transaction->t_state == T_FINISHED); 70662306a36Sopenharmony_ci J_ASSERT(transaction->t_buffers == NULL); 70762306a36Sopenharmony_ci J_ASSERT(transaction->t_forget == NULL); 70862306a36Sopenharmony_ci J_ASSERT(transaction->t_shadow_list == NULL); 70962306a36Sopenharmony_ci J_ASSERT(transaction->t_checkpoint_list == NULL); 71062306a36Sopenharmony_ci J_ASSERT(atomic_read(&transaction->t_updates) == 0); 71162306a36Sopenharmony_ci J_ASSERT(journal->j_committing_transaction != transaction); 71262306a36Sopenharmony_ci J_ASSERT(journal->j_running_transaction != transaction); 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci trace_jbd2_drop_transaction(journal, transaction); 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci jbd2_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); 71762306a36Sopenharmony_ci} 718