162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0+ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/fs/jbd2/journal.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Copyright 1998 Red Hat corp --- All Rights Reserved 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Generic filesystem journal-writing code; part of the ext2fs 1062306a36Sopenharmony_ci * journaling system. 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * This file manages journals: areas of disk reserved for logging 1362306a36Sopenharmony_ci * transactional updates. This includes the kernel journaling thread 1462306a36Sopenharmony_ci * which is responsible for scheduling updates to the log. 1562306a36Sopenharmony_ci * 1662306a36Sopenharmony_ci * We do not actually manage the physical storage of the journal in this 1762306a36Sopenharmony_ci * file: that is left to a per-journal policy function, which allows us 1862306a36Sopenharmony_ci * to store the journal within a filesystem-specified area for ext2 1962306a36Sopenharmony_ci * journaling (ext2 can use a reserved inode for storing the log). 2062306a36Sopenharmony_ci */ 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#include <linux/module.h> 2362306a36Sopenharmony_ci#include <linux/time.h> 2462306a36Sopenharmony_ci#include <linux/fs.h> 2562306a36Sopenharmony_ci#include <linux/jbd2.h> 2662306a36Sopenharmony_ci#include <linux/errno.h> 2762306a36Sopenharmony_ci#include <linux/slab.h> 2862306a36Sopenharmony_ci#include <linux/init.h> 2962306a36Sopenharmony_ci#include <linux/mm.h> 3062306a36Sopenharmony_ci#include <linux/freezer.h> 3162306a36Sopenharmony_ci#include <linux/pagemap.h> 3262306a36Sopenharmony_ci#include <linux/kthread.h> 3362306a36Sopenharmony_ci#include <linux/poison.h> 3462306a36Sopenharmony_ci#include <linux/proc_fs.h> 3562306a36Sopenharmony_ci#include <linux/seq_file.h> 3662306a36Sopenharmony_ci#include <linux/math64.h> 3762306a36Sopenharmony_ci#include <linux/hash.h> 3862306a36Sopenharmony_ci#include <linux/log2.h> 3962306a36Sopenharmony_ci#include <linux/vmalloc.h> 4062306a36Sopenharmony_ci#include <linux/backing-dev.h> 4162306a36Sopenharmony_ci#include <linux/bitops.h> 4262306a36Sopenharmony_ci#include <linux/ratelimit.h> 4362306a36Sopenharmony_ci#include <linux/sched/mm.h> 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci#define CREATE_TRACE_POINTS 4662306a36Sopenharmony_ci#include <trace/events/jbd2.h> 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci#include <linux/uaccess.h> 4962306a36Sopenharmony_ci#include <asm/page.h> 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci#ifdef CONFIG_JBD2_DEBUG 5262306a36Sopenharmony_cistatic ushort jbd2_journal_enable_debug __read_mostly; 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cimodule_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644); 5562306a36Sopenharmony_ciMODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2"); 5662306a36Sopenharmony_ci#endif 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_extend); 5962306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_stop); 6062306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_lock_updates); 6162306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_unlock_updates); 6262306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_get_write_access); 6362306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_get_create_access); 6462306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_get_undo_access); 6562306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_set_triggers); 6662306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_dirty_metadata); 6762306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_forget); 6862306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_flush); 6962306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_revoke); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_init_dev); 7262306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_init_inode); 7362306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_check_used_features); 7462306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_check_available_features); 7562306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_set_features); 7662306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_load); 7762306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_destroy); 7862306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_abort); 7962306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_errno); 8062306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_ack_err); 8162306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_clear_err); 8262306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_log_wait_commit); 8362306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_start_commit); 8462306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_force_commit_nested); 8562306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_wipe); 8662306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_blocks_per_page); 8762306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_invalidate_folio); 8862306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); 8962306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_force_commit); 9062306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_inode_ranged_write); 9162306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_inode_ranged_wait); 9262306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers); 9362306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_init_jbd_inode); 9462306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_release_jbd_inode); 9562306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate); 9662306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_inode_cache); 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cistatic int jbd2_journal_create_slab(size_t slab_size); 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci#ifdef CONFIG_JBD2_DEBUG 10162306a36Sopenharmony_civoid __jbd2_debug(int level, const char *file, const char *func, 10262306a36Sopenharmony_ci unsigned int line, const char *fmt, ...) 10362306a36Sopenharmony_ci{ 10462306a36Sopenharmony_ci struct va_format vaf; 10562306a36Sopenharmony_ci va_list args; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci if (level > jbd2_journal_enable_debug) 10862306a36Sopenharmony_ci return; 10962306a36Sopenharmony_ci va_start(args, fmt); 11062306a36Sopenharmony_ci vaf.fmt = fmt; 11162306a36Sopenharmony_ci vaf.va = &args; 11262306a36Sopenharmony_ci printk(KERN_DEBUG "%s: (%s, %u): %pV", file, func, line, &vaf); 11362306a36Sopenharmony_ci va_end(args); 11462306a36Sopenharmony_ci} 11562306a36Sopenharmony_ci#endif 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci/* Checksumming functions */ 11862306a36Sopenharmony_cistatic __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci __u32 csum; 12162306a36Sopenharmony_ci __be32 old_csum; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci old_csum = sb->s_checksum; 12462306a36Sopenharmony_ci sb->s_checksum = 0; 12562306a36Sopenharmony_ci csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t)); 12662306a36Sopenharmony_ci sb->s_checksum = old_csum; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci return cpu_to_be32(csum); 12962306a36Sopenharmony_ci} 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci/* 13262306a36Sopenharmony_ci * Helper function used to manage commit timeouts 13362306a36Sopenharmony_ci */ 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_cistatic void commit_timeout(struct timer_list *t) 13662306a36Sopenharmony_ci{ 13762306a36Sopenharmony_ci journal_t *journal = from_timer(journal, t, j_commit_timer); 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci wake_up_process(journal->j_task); 14062306a36Sopenharmony_ci} 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci/* 14362306a36Sopenharmony_ci * kjournald2: The main thread function used to manage a logging device 14462306a36Sopenharmony_ci * journal. 14562306a36Sopenharmony_ci * 14662306a36Sopenharmony_ci * This kernel thread is responsible for two things: 14762306a36Sopenharmony_ci * 14862306a36Sopenharmony_ci * 1) COMMIT: Every so often we need to commit the current state of the 14962306a36Sopenharmony_ci * filesystem to disk. The journal thread is responsible for writing 15062306a36Sopenharmony_ci * all of the metadata buffers to disk. If a fast commit is ongoing 15162306a36Sopenharmony_ci * journal thread waits until it's done and then continues from 15262306a36Sopenharmony_ci * there on. 15362306a36Sopenharmony_ci * 15462306a36Sopenharmony_ci * 2) CHECKPOINT: We cannot reuse a used section of the log file until all 15562306a36Sopenharmony_ci * of the data in that part of the log has been rewritten elsewhere on 15662306a36Sopenharmony_ci * the disk. Flushing these old buffers to reclaim space in the log is 15762306a36Sopenharmony_ci * known as checkpointing, and this thread is responsible for that job. 15862306a36Sopenharmony_ci */ 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_cistatic int kjournald2(void *arg) 16162306a36Sopenharmony_ci{ 16262306a36Sopenharmony_ci journal_t *journal = arg; 16362306a36Sopenharmony_ci transaction_t *transaction; 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci /* 16662306a36Sopenharmony_ci * Set up an interval timer which can be used to trigger a commit wakeup 16762306a36Sopenharmony_ci * after the commit interval expires 16862306a36Sopenharmony_ci */ 16962306a36Sopenharmony_ci timer_setup(&journal->j_commit_timer, commit_timeout, 0); 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci set_freezable(); 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci /* Record that the journal thread is running */ 17462306a36Sopenharmony_ci journal->j_task = current; 17562306a36Sopenharmony_ci wake_up(&journal->j_wait_done_commit); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci /* 17862306a36Sopenharmony_ci * Make sure that no allocations from this kernel thread will ever 17962306a36Sopenharmony_ci * recurse to the fs layer because we are responsible for the 18062306a36Sopenharmony_ci * transaction commit and any fs involvement might get stuck waiting for 18162306a36Sopenharmony_ci * the trasn. commit. 18262306a36Sopenharmony_ci */ 18362306a36Sopenharmony_ci memalloc_nofs_save(); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci /* 18662306a36Sopenharmony_ci * And now, wait forever for commit wakeup events. 18762306a36Sopenharmony_ci */ 18862306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ciloop: 19162306a36Sopenharmony_ci if (journal->j_flags & JBD2_UNMOUNT) 19262306a36Sopenharmony_ci goto end_loop; 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci jbd2_debug(1, "commit_sequence=%u, commit_request=%u\n", 19562306a36Sopenharmony_ci journal->j_commit_sequence, journal->j_commit_request); 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_ci if (journal->j_commit_sequence != journal->j_commit_request) { 19862306a36Sopenharmony_ci jbd2_debug(1, "OK, requests differ\n"); 19962306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 20062306a36Sopenharmony_ci del_timer_sync(&journal->j_commit_timer); 20162306a36Sopenharmony_ci jbd2_journal_commit_transaction(journal); 20262306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 20362306a36Sopenharmony_ci goto loop; 20462306a36Sopenharmony_ci } 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci wake_up(&journal->j_wait_done_commit); 20762306a36Sopenharmony_ci if (freezing(current)) { 20862306a36Sopenharmony_ci /* 20962306a36Sopenharmony_ci * The simpler the better. Flushing journal isn't a 21062306a36Sopenharmony_ci * good idea, because that depends on threads that may 21162306a36Sopenharmony_ci * be already stopped. 21262306a36Sopenharmony_ci */ 21362306a36Sopenharmony_ci jbd2_debug(1, "Now suspending kjournald2\n"); 21462306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 21562306a36Sopenharmony_ci try_to_freeze(); 21662306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 21762306a36Sopenharmony_ci } else { 21862306a36Sopenharmony_ci /* 21962306a36Sopenharmony_ci * We assume on resume that commits are already there, 22062306a36Sopenharmony_ci * so we don't sleep 22162306a36Sopenharmony_ci */ 22262306a36Sopenharmony_ci DEFINE_WAIT(wait); 22362306a36Sopenharmony_ci int should_sleep = 1; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci prepare_to_wait(&journal->j_wait_commit, &wait, 22662306a36Sopenharmony_ci TASK_INTERRUPTIBLE); 22762306a36Sopenharmony_ci if (journal->j_commit_sequence != journal->j_commit_request) 22862306a36Sopenharmony_ci should_sleep = 0; 22962306a36Sopenharmony_ci transaction = journal->j_running_transaction; 23062306a36Sopenharmony_ci if (transaction && time_after_eq(jiffies, 23162306a36Sopenharmony_ci transaction->t_expires)) 23262306a36Sopenharmony_ci should_sleep = 0; 23362306a36Sopenharmony_ci if (journal->j_flags & JBD2_UNMOUNT) 23462306a36Sopenharmony_ci should_sleep = 0; 23562306a36Sopenharmony_ci if (should_sleep) { 23662306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 23762306a36Sopenharmony_ci schedule(); 23862306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 23962306a36Sopenharmony_ci } 24062306a36Sopenharmony_ci finish_wait(&journal->j_wait_commit, &wait); 24162306a36Sopenharmony_ci } 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci jbd2_debug(1, "kjournald2 wakes\n"); 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci /* 24662306a36Sopenharmony_ci * Were we woken up by a commit wakeup event? 24762306a36Sopenharmony_ci */ 24862306a36Sopenharmony_ci transaction = journal->j_running_transaction; 24962306a36Sopenharmony_ci if (transaction && time_after_eq(jiffies, transaction->t_expires)) { 25062306a36Sopenharmony_ci journal->j_commit_request = transaction->t_tid; 25162306a36Sopenharmony_ci jbd2_debug(1, "woke because of timeout\n"); 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci goto loop; 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ciend_loop: 25662306a36Sopenharmony_ci del_timer_sync(&journal->j_commit_timer); 25762306a36Sopenharmony_ci journal->j_task = NULL; 25862306a36Sopenharmony_ci wake_up(&journal->j_wait_done_commit); 25962306a36Sopenharmony_ci jbd2_debug(1, "Journal thread exiting.\n"); 26062306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 26162306a36Sopenharmony_ci return 0; 26262306a36Sopenharmony_ci} 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_cistatic int jbd2_journal_start_thread(journal_t *journal) 26562306a36Sopenharmony_ci{ 26662306a36Sopenharmony_ci struct task_struct *t; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci t = kthread_run(kjournald2, journal, "jbd2/%s", 26962306a36Sopenharmony_ci journal->j_devname); 27062306a36Sopenharmony_ci if (IS_ERR(t)) 27162306a36Sopenharmony_ci return PTR_ERR(t); 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci wait_event(journal->j_wait_done_commit, journal->j_task != NULL); 27462306a36Sopenharmony_ci return 0; 27562306a36Sopenharmony_ci} 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_cistatic void journal_kill_thread(journal_t *journal) 27862306a36Sopenharmony_ci{ 27962306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 28062306a36Sopenharmony_ci journal->j_flags |= JBD2_UNMOUNT; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci while (journal->j_task) { 28362306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 28462306a36Sopenharmony_ci wake_up(&journal->j_wait_commit); 28562306a36Sopenharmony_ci wait_event(journal->j_wait_done_commit, journal->j_task == NULL); 28662306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 28762306a36Sopenharmony_ci } 28862306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 28962306a36Sopenharmony_ci} 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci/* 29262306a36Sopenharmony_ci * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal. 29362306a36Sopenharmony_ci * 29462306a36Sopenharmony_ci * Writes a metadata buffer to a given disk block. The actual IO is not 29562306a36Sopenharmony_ci * performed but a new buffer_head is constructed which labels the data 29662306a36Sopenharmony_ci * to be written with the correct destination disk block. 29762306a36Sopenharmony_ci * 29862306a36Sopenharmony_ci * Any magic-number escaping which needs to be done will cause a 29962306a36Sopenharmony_ci * copy-out here. If the buffer happens to start with the 30062306a36Sopenharmony_ci * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the 30162306a36Sopenharmony_ci * magic number is only written to the log for descripter blocks. In 30262306a36Sopenharmony_ci * this case, we copy the data and replace the first word with 0, and we 30362306a36Sopenharmony_ci * return a result code which indicates that this buffer needs to be 30462306a36Sopenharmony_ci * marked as an escaped buffer in the corresponding log descriptor 30562306a36Sopenharmony_ci * block. The missing word can then be restored when the block is read 30662306a36Sopenharmony_ci * during recovery. 30762306a36Sopenharmony_ci * 30862306a36Sopenharmony_ci * If the source buffer has already been modified by a new transaction 30962306a36Sopenharmony_ci * since we took the last commit snapshot, we use the frozen copy of 31062306a36Sopenharmony_ci * that data for IO. If we end up using the existing buffer_head's data 31162306a36Sopenharmony_ci * for the write, then we have to make sure nobody modifies it while the 31262306a36Sopenharmony_ci * IO is in progress. do_get_write_access() handles this. 31362306a36Sopenharmony_ci * 31462306a36Sopenharmony_ci * The function returns a pointer to the buffer_head to be used for IO. 31562306a36Sopenharmony_ci * 31662306a36Sopenharmony_ci * 31762306a36Sopenharmony_ci * Return value: 31862306a36Sopenharmony_ci * <0: Error 31962306a36Sopenharmony_ci * >=0: Finished OK 32062306a36Sopenharmony_ci * 32162306a36Sopenharmony_ci * On success: 32262306a36Sopenharmony_ci * Bit 0 set == escape performed on the data 32362306a36Sopenharmony_ci * Bit 1 set == buffer copy-out performed (kfree the data after IO) 32462306a36Sopenharmony_ci */ 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ciint jbd2_journal_write_metadata_buffer(transaction_t *transaction, 32762306a36Sopenharmony_ci struct journal_head *jh_in, 32862306a36Sopenharmony_ci struct buffer_head **bh_out, 32962306a36Sopenharmony_ci sector_t blocknr) 33062306a36Sopenharmony_ci{ 33162306a36Sopenharmony_ci int need_copy_out = 0; 33262306a36Sopenharmony_ci int done_copy_out = 0; 33362306a36Sopenharmony_ci int do_escape = 0; 33462306a36Sopenharmony_ci char *mapped_data; 33562306a36Sopenharmony_ci struct buffer_head *new_bh; 33662306a36Sopenharmony_ci struct folio *new_folio; 33762306a36Sopenharmony_ci unsigned int new_offset; 33862306a36Sopenharmony_ci struct buffer_head *bh_in = jh2bh(jh_in); 33962306a36Sopenharmony_ci journal_t *journal = transaction->t_journal; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci /* 34262306a36Sopenharmony_ci * The buffer really shouldn't be locked: only the current committing 34362306a36Sopenharmony_ci * transaction is allowed to write it, so nobody else is allowed 34462306a36Sopenharmony_ci * to do any IO. 34562306a36Sopenharmony_ci * 34662306a36Sopenharmony_ci * akpm: except if we're journalling data, and write() output is 34762306a36Sopenharmony_ci * also part of a shared mapping, and another thread has 34862306a36Sopenharmony_ci * decided to launch a writepage() against this buffer. 34962306a36Sopenharmony_ci */ 35062306a36Sopenharmony_ci J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci /* keep subsequent assertions sane */ 35562306a36Sopenharmony_ci atomic_set(&new_bh->b_count, 1); 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci spin_lock(&jh_in->b_state_lock); 35862306a36Sopenharmony_cirepeat: 35962306a36Sopenharmony_ci /* 36062306a36Sopenharmony_ci * If a new transaction has already done a buffer copy-out, then 36162306a36Sopenharmony_ci * we use that version of the data for the commit. 36262306a36Sopenharmony_ci */ 36362306a36Sopenharmony_ci if (jh_in->b_frozen_data) { 36462306a36Sopenharmony_ci done_copy_out = 1; 36562306a36Sopenharmony_ci new_folio = virt_to_folio(jh_in->b_frozen_data); 36662306a36Sopenharmony_ci new_offset = offset_in_folio(new_folio, jh_in->b_frozen_data); 36762306a36Sopenharmony_ci } else { 36862306a36Sopenharmony_ci new_folio = jh2bh(jh_in)->b_folio; 36962306a36Sopenharmony_ci new_offset = offset_in_folio(new_folio, jh2bh(jh_in)->b_data); 37062306a36Sopenharmony_ci } 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci mapped_data = kmap_local_folio(new_folio, new_offset); 37362306a36Sopenharmony_ci /* 37462306a36Sopenharmony_ci * Fire data frozen trigger if data already wasn't frozen. Do this 37562306a36Sopenharmony_ci * before checking for escaping, as the trigger may modify the magic 37662306a36Sopenharmony_ci * offset. If a copy-out happens afterwards, it will have the correct 37762306a36Sopenharmony_ci * data in the buffer. 37862306a36Sopenharmony_ci */ 37962306a36Sopenharmony_ci if (!done_copy_out) 38062306a36Sopenharmony_ci jbd2_buffer_frozen_trigger(jh_in, mapped_data, 38162306a36Sopenharmony_ci jh_in->b_triggers); 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci /* 38462306a36Sopenharmony_ci * Check for escaping 38562306a36Sopenharmony_ci */ 38662306a36Sopenharmony_ci if (*((__be32 *)mapped_data) == cpu_to_be32(JBD2_MAGIC_NUMBER)) { 38762306a36Sopenharmony_ci need_copy_out = 1; 38862306a36Sopenharmony_ci do_escape = 1; 38962306a36Sopenharmony_ci } 39062306a36Sopenharmony_ci kunmap_local(mapped_data); 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci /* 39362306a36Sopenharmony_ci * Do we need to do a data copy? 39462306a36Sopenharmony_ci */ 39562306a36Sopenharmony_ci if (need_copy_out && !done_copy_out) { 39662306a36Sopenharmony_ci char *tmp; 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci spin_unlock(&jh_in->b_state_lock); 39962306a36Sopenharmony_ci tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); 40062306a36Sopenharmony_ci if (!tmp) { 40162306a36Sopenharmony_ci brelse(new_bh); 40262306a36Sopenharmony_ci return -ENOMEM; 40362306a36Sopenharmony_ci } 40462306a36Sopenharmony_ci spin_lock(&jh_in->b_state_lock); 40562306a36Sopenharmony_ci if (jh_in->b_frozen_data) { 40662306a36Sopenharmony_ci jbd2_free(tmp, bh_in->b_size); 40762306a36Sopenharmony_ci goto repeat; 40862306a36Sopenharmony_ci } 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci jh_in->b_frozen_data = tmp; 41162306a36Sopenharmony_ci memcpy_from_folio(tmp, new_folio, new_offset, bh_in->b_size); 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci new_folio = virt_to_folio(tmp); 41462306a36Sopenharmony_ci new_offset = offset_in_folio(new_folio, tmp); 41562306a36Sopenharmony_ci done_copy_out = 1; 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci /* 41862306a36Sopenharmony_ci * This isn't strictly necessary, as we're using frozen 41962306a36Sopenharmony_ci * data for the escaping, but it keeps consistency with 42062306a36Sopenharmony_ci * b_frozen_data usage. 42162306a36Sopenharmony_ci */ 42262306a36Sopenharmony_ci jh_in->b_frozen_triggers = jh_in->b_triggers; 42362306a36Sopenharmony_ci } 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci /* 42662306a36Sopenharmony_ci * Did we need to do an escaping? Now we've done all the 42762306a36Sopenharmony_ci * copying, we can finally do so. 42862306a36Sopenharmony_ci */ 42962306a36Sopenharmony_ci if (do_escape) { 43062306a36Sopenharmony_ci mapped_data = kmap_local_folio(new_folio, new_offset); 43162306a36Sopenharmony_ci *((unsigned int *)mapped_data) = 0; 43262306a36Sopenharmony_ci kunmap_local(mapped_data); 43362306a36Sopenharmony_ci } 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci folio_set_bh(new_bh, new_folio, new_offset); 43662306a36Sopenharmony_ci new_bh->b_size = bh_in->b_size; 43762306a36Sopenharmony_ci new_bh->b_bdev = journal->j_dev; 43862306a36Sopenharmony_ci new_bh->b_blocknr = blocknr; 43962306a36Sopenharmony_ci new_bh->b_private = bh_in; 44062306a36Sopenharmony_ci set_buffer_mapped(new_bh); 44162306a36Sopenharmony_ci set_buffer_dirty(new_bh); 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci *bh_out = new_bh; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci /* 44662306a36Sopenharmony_ci * The to-be-written buffer needs to get moved to the io queue, 44762306a36Sopenharmony_ci * and the original buffer whose contents we are shadowing or 44862306a36Sopenharmony_ci * copying is moved to the transaction's shadow queue. 44962306a36Sopenharmony_ci */ 45062306a36Sopenharmony_ci JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); 45162306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 45262306a36Sopenharmony_ci __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); 45362306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 45462306a36Sopenharmony_ci set_buffer_shadow(bh_in); 45562306a36Sopenharmony_ci spin_unlock(&jh_in->b_state_lock); 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci return do_escape | (done_copy_out << 1); 45862306a36Sopenharmony_ci} 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci/* 46162306a36Sopenharmony_ci * Allocation code for the journal file. Manage the space left in the 46262306a36Sopenharmony_ci * journal, so that we can begin checkpointing when appropriate. 46362306a36Sopenharmony_ci */ 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci/* 46662306a36Sopenharmony_ci * Called with j_state_lock locked for writing. 46762306a36Sopenharmony_ci * Returns true if a transaction commit was started. 46862306a36Sopenharmony_ci */ 46962306a36Sopenharmony_cistatic int __jbd2_log_start_commit(journal_t *journal, tid_t target) 47062306a36Sopenharmony_ci{ 47162306a36Sopenharmony_ci /* Return if the txn has already requested to be committed */ 47262306a36Sopenharmony_ci if (journal->j_commit_request == target) 47362306a36Sopenharmony_ci return 0; 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci /* 47662306a36Sopenharmony_ci * The only transaction we can possibly wait upon is the 47762306a36Sopenharmony_ci * currently running transaction (if it exists). Otherwise, 47862306a36Sopenharmony_ci * the target tid must be an old one. 47962306a36Sopenharmony_ci */ 48062306a36Sopenharmony_ci if (journal->j_running_transaction && 48162306a36Sopenharmony_ci journal->j_running_transaction->t_tid == target) { 48262306a36Sopenharmony_ci /* 48362306a36Sopenharmony_ci * We want a new commit: OK, mark the request and wakeup the 48462306a36Sopenharmony_ci * commit thread. We do _not_ do the commit ourselves. 48562306a36Sopenharmony_ci */ 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci journal->j_commit_request = target; 48862306a36Sopenharmony_ci jbd2_debug(1, "JBD2: requesting commit %u/%u\n", 48962306a36Sopenharmony_ci journal->j_commit_request, 49062306a36Sopenharmony_ci journal->j_commit_sequence); 49162306a36Sopenharmony_ci journal->j_running_transaction->t_requested = jiffies; 49262306a36Sopenharmony_ci wake_up(&journal->j_wait_commit); 49362306a36Sopenharmony_ci return 1; 49462306a36Sopenharmony_ci } else if (!tid_geq(journal->j_commit_request, target)) 49562306a36Sopenharmony_ci /* This should never happen, but if it does, preserve 49662306a36Sopenharmony_ci the evidence before kjournald goes into a loop and 49762306a36Sopenharmony_ci increments j_commit_sequence beyond all recognition. */ 49862306a36Sopenharmony_ci WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n", 49962306a36Sopenharmony_ci journal->j_commit_request, 50062306a36Sopenharmony_ci journal->j_commit_sequence, 50162306a36Sopenharmony_ci target, journal->j_running_transaction ? 50262306a36Sopenharmony_ci journal->j_running_transaction->t_tid : 0); 50362306a36Sopenharmony_ci return 0; 50462306a36Sopenharmony_ci} 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ciint jbd2_log_start_commit(journal_t *journal, tid_t tid) 50762306a36Sopenharmony_ci{ 50862306a36Sopenharmony_ci int ret; 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 51162306a36Sopenharmony_ci ret = __jbd2_log_start_commit(journal, tid); 51262306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 51362306a36Sopenharmony_ci return ret; 51462306a36Sopenharmony_ci} 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci/* 51762306a36Sopenharmony_ci * Force and wait any uncommitted transactions. We can only force the running 51862306a36Sopenharmony_ci * transaction if we don't have an active handle, otherwise, we will deadlock. 51962306a36Sopenharmony_ci * Returns: <0 in case of error, 52062306a36Sopenharmony_ci * 0 if nothing to commit, 52162306a36Sopenharmony_ci * 1 if transaction was successfully committed. 52262306a36Sopenharmony_ci */ 52362306a36Sopenharmony_cistatic int __jbd2_journal_force_commit(journal_t *journal) 52462306a36Sopenharmony_ci{ 52562306a36Sopenharmony_ci transaction_t *transaction = NULL; 52662306a36Sopenharmony_ci tid_t tid; 52762306a36Sopenharmony_ci int need_to_start = 0, ret = 0; 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 53062306a36Sopenharmony_ci if (journal->j_running_transaction && !current->journal_info) { 53162306a36Sopenharmony_ci transaction = journal->j_running_transaction; 53262306a36Sopenharmony_ci if (!tid_geq(journal->j_commit_request, transaction->t_tid)) 53362306a36Sopenharmony_ci need_to_start = 1; 53462306a36Sopenharmony_ci } else if (journal->j_committing_transaction) 53562306a36Sopenharmony_ci transaction = journal->j_committing_transaction; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci if (!transaction) { 53862306a36Sopenharmony_ci /* Nothing to commit */ 53962306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 54062306a36Sopenharmony_ci return 0; 54162306a36Sopenharmony_ci } 54262306a36Sopenharmony_ci tid = transaction->t_tid; 54362306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 54462306a36Sopenharmony_ci if (need_to_start) 54562306a36Sopenharmony_ci jbd2_log_start_commit(journal, tid); 54662306a36Sopenharmony_ci ret = jbd2_log_wait_commit(journal, tid); 54762306a36Sopenharmony_ci if (!ret) 54862306a36Sopenharmony_ci ret = 1; 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci return ret; 55162306a36Sopenharmony_ci} 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci/** 55462306a36Sopenharmony_ci * jbd2_journal_force_commit_nested - Force and wait upon a commit if the 55562306a36Sopenharmony_ci * calling process is not within transaction. 55662306a36Sopenharmony_ci * 55762306a36Sopenharmony_ci * @journal: journal to force 55862306a36Sopenharmony_ci * Returns true if progress was made. 55962306a36Sopenharmony_ci * 56062306a36Sopenharmony_ci * This is used for forcing out undo-protected data which contains 56162306a36Sopenharmony_ci * bitmaps, when the fs is running out of space. 56262306a36Sopenharmony_ci */ 56362306a36Sopenharmony_ciint jbd2_journal_force_commit_nested(journal_t *journal) 56462306a36Sopenharmony_ci{ 56562306a36Sopenharmony_ci int ret; 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci ret = __jbd2_journal_force_commit(journal); 56862306a36Sopenharmony_ci return ret > 0; 56962306a36Sopenharmony_ci} 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci/** 57262306a36Sopenharmony_ci * jbd2_journal_force_commit() - force any uncommitted transactions 57362306a36Sopenharmony_ci * @journal: journal to force 57462306a36Sopenharmony_ci * 57562306a36Sopenharmony_ci * Caller want unconditional commit. We can only force the running transaction 57662306a36Sopenharmony_ci * if we don't have an active handle, otherwise, we will deadlock. 57762306a36Sopenharmony_ci */ 57862306a36Sopenharmony_ciint jbd2_journal_force_commit(journal_t *journal) 57962306a36Sopenharmony_ci{ 58062306a36Sopenharmony_ci int ret; 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci J_ASSERT(!current->journal_info); 58362306a36Sopenharmony_ci ret = __jbd2_journal_force_commit(journal); 58462306a36Sopenharmony_ci if (ret > 0) 58562306a36Sopenharmony_ci ret = 0; 58662306a36Sopenharmony_ci return ret; 58762306a36Sopenharmony_ci} 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci/* 59062306a36Sopenharmony_ci * Start a commit of the current running transaction (if any). Returns true 59162306a36Sopenharmony_ci * if a transaction is going to be committed (or is currently already 59262306a36Sopenharmony_ci * committing), and fills its tid in at *ptid 59362306a36Sopenharmony_ci */ 59462306a36Sopenharmony_ciint jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) 59562306a36Sopenharmony_ci{ 59662306a36Sopenharmony_ci int ret = 0; 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 59962306a36Sopenharmony_ci if (journal->j_running_transaction) { 60062306a36Sopenharmony_ci tid_t tid = journal->j_running_transaction->t_tid; 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci __jbd2_log_start_commit(journal, tid); 60362306a36Sopenharmony_ci /* There's a running transaction and we've just made sure 60462306a36Sopenharmony_ci * it's commit has been scheduled. */ 60562306a36Sopenharmony_ci if (ptid) 60662306a36Sopenharmony_ci *ptid = tid; 60762306a36Sopenharmony_ci ret = 1; 60862306a36Sopenharmony_ci } else if (journal->j_committing_transaction) { 60962306a36Sopenharmony_ci /* 61062306a36Sopenharmony_ci * If commit has been started, then we have to wait for 61162306a36Sopenharmony_ci * completion of that transaction. 61262306a36Sopenharmony_ci */ 61362306a36Sopenharmony_ci if (ptid) 61462306a36Sopenharmony_ci *ptid = journal->j_committing_transaction->t_tid; 61562306a36Sopenharmony_ci ret = 1; 61662306a36Sopenharmony_ci } 61762306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 61862306a36Sopenharmony_ci return ret; 61962306a36Sopenharmony_ci} 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci/* 62262306a36Sopenharmony_ci * Return 1 if a given transaction has not yet sent barrier request 62362306a36Sopenharmony_ci * connected with a transaction commit. If 0 is returned, transaction 62462306a36Sopenharmony_ci * may or may not have sent the barrier. Used to avoid sending barrier 62562306a36Sopenharmony_ci * twice in common cases. 62662306a36Sopenharmony_ci */ 62762306a36Sopenharmony_ciint jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid) 62862306a36Sopenharmony_ci{ 62962306a36Sopenharmony_ci int ret = 0; 63062306a36Sopenharmony_ci transaction_t *commit_trans; 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci if (!(journal->j_flags & JBD2_BARRIER)) 63362306a36Sopenharmony_ci return 0; 63462306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 63562306a36Sopenharmony_ci /* Transaction already committed? */ 63662306a36Sopenharmony_ci if (tid_geq(journal->j_commit_sequence, tid)) 63762306a36Sopenharmony_ci goto out; 63862306a36Sopenharmony_ci commit_trans = journal->j_committing_transaction; 63962306a36Sopenharmony_ci if (!commit_trans || commit_trans->t_tid != tid) { 64062306a36Sopenharmony_ci ret = 1; 64162306a36Sopenharmony_ci goto out; 64262306a36Sopenharmony_ci } 64362306a36Sopenharmony_ci /* 64462306a36Sopenharmony_ci * Transaction is being committed and we already proceeded to 64562306a36Sopenharmony_ci * submitting a flush to fs partition? 64662306a36Sopenharmony_ci */ 64762306a36Sopenharmony_ci if (journal->j_fs_dev != journal->j_dev) { 64862306a36Sopenharmony_ci if (!commit_trans->t_need_data_flush || 64962306a36Sopenharmony_ci commit_trans->t_state >= T_COMMIT_DFLUSH) 65062306a36Sopenharmony_ci goto out; 65162306a36Sopenharmony_ci } else { 65262306a36Sopenharmony_ci if (commit_trans->t_state >= T_COMMIT_JFLUSH) 65362306a36Sopenharmony_ci goto out; 65462306a36Sopenharmony_ci } 65562306a36Sopenharmony_ci ret = 1; 65662306a36Sopenharmony_ciout: 65762306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 65862306a36Sopenharmony_ci return ret; 65962306a36Sopenharmony_ci} 66062306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_trans_will_send_data_barrier); 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci/* 66362306a36Sopenharmony_ci * Wait for a specified commit to complete. 66462306a36Sopenharmony_ci * The caller may not hold the journal lock. 66562306a36Sopenharmony_ci */ 66662306a36Sopenharmony_ciint jbd2_log_wait_commit(journal_t *journal, tid_t tid) 66762306a36Sopenharmony_ci{ 66862306a36Sopenharmony_ci int err = 0; 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 67162306a36Sopenharmony_ci#ifdef CONFIG_PROVE_LOCKING 67262306a36Sopenharmony_ci /* 67362306a36Sopenharmony_ci * Some callers make sure transaction is already committing and in that 67462306a36Sopenharmony_ci * case we cannot block on open handles anymore. So don't warn in that 67562306a36Sopenharmony_ci * case. 67662306a36Sopenharmony_ci */ 67762306a36Sopenharmony_ci if (tid_gt(tid, journal->j_commit_sequence) && 67862306a36Sopenharmony_ci (!journal->j_committing_transaction || 67962306a36Sopenharmony_ci journal->j_committing_transaction->t_tid != tid)) { 68062306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 68162306a36Sopenharmony_ci jbd2_might_wait_for_commit(journal); 68262306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 68362306a36Sopenharmony_ci } 68462306a36Sopenharmony_ci#endif 68562306a36Sopenharmony_ci#ifdef CONFIG_JBD2_DEBUG 68662306a36Sopenharmony_ci if (!tid_geq(journal->j_commit_request, tid)) { 68762306a36Sopenharmony_ci printk(KERN_ERR 68862306a36Sopenharmony_ci "%s: error: j_commit_request=%u, tid=%u\n", 68962306a36Sopenharmony_ci __func__, journal->j_commit_request, tid); 69062306a36Sopenharmony_ci } 69162306a36Sopenharmony_ci#endif 69262306a36Sopenharmony_ci while (tid_gt(tid, journal->j_commit_sequence)) { 69362306a36Sopenharmony_ci jbd2_debug(1, "JBD2: want %u, j_commit_sequence=%u\n", 69462306a36Sopenharmony_ci tid, journal->j_commit_sequence); 69562306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 69662306a36Sopenharmony_ci wake_up(&journal->j_wait_commit); 69762306a36Sopenharmony_ci wait_event(journal->j_wait_done_commit, 69862306a36Sopenharmony_ci !tid_gt(tid, journal->j_commit_sequence)); 69962306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 70062306a36Sopenharmony_ci } 70162306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci if (unlikely(is_journal_aborted(journal))) 70462306a36Sopenharmony_ci err = -EIO; 70562306a36Sopenharmony_ci return err; 70662306a36Sopenharmony_ci} 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci/* 70962306a36Sopenharmony_ci * Start a fast commit. If there's an ongoing fast or full commit wait for 71062306a36Sopenharmony_ci * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY 71162306a36Sopenharmony_ci * if a fast commit is not needed, either because there's an already a commit 71262306a36Sopenharmony_ci * going on or this tid has already been committed. Returns -EINVAL if no jbd2 71362306a36Sopenharmony_ci * commit has yet been performed. 71462306a36Sopenharmony_ci */ 71562306a36Sopenharmony_ciint jbd2_fc_begin_commit(journal_t *journal, tid_t tid) 71662306a36Sopenharmony_ci{ 71762306a36Sopenharmony_ci if (unlikely(is_journal_aborted(journal))) 71862306a36Sopenharmony_ci return -EIO; 71962306a36Sopenharmony_ci /* 72062306a36Sopenharmony_ci * Fast commits only allowed if at least one full commit has 72162306a36Sopenharmony_ci * been processed. 72262306a36Sopenharmony_ci */ 72362306a36Sopenharmony_ci if (!journal->j_stats.ts_tid) 72462306a36Sopenharmony_ci return -EINVAL; 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 72762306a36Sopenharmony_ci if (tid <= journal->j_commit_sequence) { 72862306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 72962306a36Sopenharmony_ci return -EALREADY; 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING || 73362306a36Sopenharmony_ci (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) { 73462306a36Sopenharmony_ci DEFINE_WAIT(wait); 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_ci prepare_to_wait(&journal->j_fc_wait, &wait, 73762306a36Sopenharmony_ci TASK_UNINTERRUPTIBLE); 73862306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 73962306a36Sopenharmony_ci schedule(); 74062306a36Sopenharmony_ci finish_wait(&journal->j_fc_wait, &wait); 74162306a36Sopenharmony_ci return -EALREADY; 74262306a36Sopenharmony_ci } 74362306a36Sopenharmony_ci journal->j_flags |= JBD2_FAST_COMMIT_ONGOING; 74462306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 74562306a36Sopenharmony_ci jbd2_journal_lock_updates(journal); 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci return 0; 74862306a36Sopenharmony_ci} 74962306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_fc_begin_commit); 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci/* 75262306a36Sopenharmony_ci * Stop a fast commit. If fallback is set, this function starts commit of 75362306a36Sopenharmony_ci * TID tid before any other fast commit can start. 75462306a36Sopenharmony_ci */ 75562306a36Sopenharmony_cistatic int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) 75662306a36Sopenharmony_ci{ 75762306a36Sopenharmony_ci jbd2_journal_unlock_updates(journal); 75862306a36Sopenharmony_ci if (journal->j_fc_cleanup_callback) 75962306a36Sopenharmony_ci journal->j_fc_cleanup_callback(journal, 0, tid); 76062306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 76162306a36Sopenharmony_ci journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; 76262306a36Sopenharmony_ci if (fallback) 76362306a36Sopenharmony_ci journal->j_flags |= JBD2_FULL_COMMIT_ONGOING; 76462306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 76562306a36Sopenharmony_ci wake_up(&journal->j_fc_wait); 76662306a36Sopenharmony_ci if (fallback) 76762306a36Sopenharmony_ci return jbd2_complete_transaction(journal, tid); 76862306a36Sopenharmony_ci return 0; 76962306a36Sopenharmony_ci} 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ciint jbd2_fc_end_commit(journal_t *journal) 77262306a36Sopenharmony_ci{ 77362306a36Sopenharmony_ci return __jbd2_fc_end_commit(journal, 0, false); 77462306a36Sopenharmony_ci} 77562306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_fc_end_commit); 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ciint jbd2_fc_end_commit_fallback(journal_t *journal) 77862306a36Sopenharmony_ci{ 77962306a36Sopenharmony_ci tid_t tid; 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 78262306a36Sopenharmony_ci tid = journal->j_running_transaction ? 78362306a36Sopenharmony_ci journal->j_running_transaction->t_tid : 0; 78462306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 78562306a36Sopenharmony_ci return __jbd2_fc_end_commit(journal, tid, true); 78662306a36Sopenharmony_ci} 78762306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_fc_end_commit_fallback); 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci/* Return 1 when transaction with given tid has already committed. */ 79062306a36Sopenharmony_ciint jbd2_transaction_committed(journal_t *journal, tid_t tid) 79162306a36Sopenharmony_ci{ 79262306a36Sopenharmony_ci int ret = 1; 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 79562306a36Sopenharmony_ci if (journal->j_running_transaction && 79662306a36Sopenharmony_ci journal->j_running_transaction->t_tid == tid) 79762306a36Sopenharmony_ci ret = 0; 79862306a36Sopenharmony_ci if (journal->j_committing_transaction && 79962306a36Sopenharmony_ci journal->j_committing_transaction->t_tid == tid) 80062306a36Sopenharmony_ci ret = 0; 80162306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 80262306a36Sopenharmony_ci return ret; 80362306a36Sopenharmony_ci} 80462306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_transaction_committed); 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci/* 80762306a36Sopenharmony_ci * When this function returns the transaction corresponding to tid 80862306a36Sopenharmony_ci * will be completed. If the transaction has currently running, start 80962306a36Sopenharmony_ci * committing that transaction before waiting for it to complete. If 81062306a36Sopenharmony_ci * the transaction id is stale, it is by definition already completed, 81162306a36Sopenharmony_ci * so just return SUCCESS. 81262306a36Sopenharmony_ci */ 81362306a36Sopenharmony_ciint jbd2_complete_transaction(journal_t *journal, tid_t tid) 81462306a36Sopenharmony_ci{ 81562306a36Sopenharmony_ci int need_to_wait = 1; 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 81862306a36Sopenharmony_ci if (journal->j_running_transaction && 81962306a36Sopenharmony_ci journal->j_running_transaction->t_tid == tid) { 82062306a36Sopenharmony_ci if (journal->j_commit_request != tid) { 82162306a36Sopenharmony_ci /* transaction not yet started, so request it */ 82262306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 82362306a36Sopenharmony_ci jbd2_log_start_commit(journal, tid); 82462306a36Sopenharmony_ci goto wait_commit; 82562306a36Sopenharmony_ci } 82662306a36Sopenharmony_ci } else if (!(journal->j_committing_transaction && 82762306a36Sopenharmony_ci journal->j_committing_transaction->t_tid == tid)) 82862306a36Sopenharmony_ci need_to_wait = 0; 82962306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 83062306a36Sopenharmony_ci if (!need_to_wait) 83162306a36Sopenharmony_ci return 0; 83262306a36Sopenharmony_ciwait_commit: 83362306a36Sopenharmony_ci return jbd2_log_wait_commit(journal, tid); 83462306a36Sopenharmony_ci} 83562306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_complete_transaction); 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci/* 83862306a36Sopenharmony_ci * Log buffer allocation routines: 83962306a36Sopenharmony_ci */ 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ciint jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp) 84262306a36Sopenharmony_ci{ 84362306a36Sopenharmony_ci unsigned long blocknr; 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 84662306a36Sopenharmony_ci J_ASSERT(journal->j_free > 1); 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci blocknr = journal->j_head; 84962306a36Sopenharmony_ci journal->j_head++; 85062306a36Sopenharmony_ci journal->j_free--; 85162306a36Sopenharmony_ci if (journal->j_head == journal->j_last) 85262306a36Sopenharmony_ci journal->j_head = journal->j_first; 85362306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 85462306a36Sopenharmony_ci return jbd2_journal_bmap(journal, blocknr, retp); 85562306a36Sopenharmony_ci} 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci/* Map one fast commit buffer for use by the file system */ 85862306a36Sopenharmony_ciint jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out) 85962306a36Sopenharmony_ci{ 86062306a36Sopenharmony_ci unsigned long long pblock; 86162306a36Sopenharmony_ci unsigned long blocknr; 86262306a36Sopenharmony_ci int ret = 0; 86362306a36Sopenharmony_ci struct buffer_head *bh; 86462306a36Sopenharmony_ci int fc_off; 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci *bh_out = NULL; 86762306a36Sopenharmony_ci 86862306a36Sopenharmony_ci if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) { 86962306a36Sopenharmony_ci fc_off = journal->j_fc_off; 87062306a36Sopenharmony_ci blocknr = journal->j_fc_first + fc_off; 87162306a36Sopenharmony_ci journal->j_fc_off++; 87262306a36Sopenharmony_ci } else { 87362306a36Sopenharmony_ci ret = -EINVAL; 87462306a36Sopenharmony_ci } 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci if (ret) 87762306a36Sopenharmony_ci return ret; 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci ret = jbd2_journal_bmap(journal, blocknr, &pblock); 88062306a36Sopenharmony_ci if (ret) 88162306a36Sopenharmony_ci return ret; 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci bh = __getblk(journal->j_dev, pblock, journal->j_blocksize); 88462306a36Sopenharmony_ci if (!bh) 88562306a36Sopenharmony_ci return -ENOMEM; 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci journal->j_fc_wbuf[fc_off] = bh; 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci *bh_out = bh; 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci return 0; 89362306a36Sopenharmony_ci} 89462306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_fc_get_buf); 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci/* 89762306a36Sopenharmony_ci * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf 89862306a36Sopenharmony_ci * for completion. 89962306a36Sopenharmony_ci */ 90062306a36Sopenharmony_ciint jbd2_fc_wait_bufs(journal_t *journal, int num_blks) 90162306a36Sopenharmony_ci{ 90262306a36Sopenharmony_ci struct buffer_head *bh; 90362306a36Sopenharmony_ci int i, j_fc_off; 90462306a36Sopenharmony_ci 90562306a36Sopenharmony_ci j_fc_off = journal->j_fc_off; 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci /* 90862306a36Sopenharmony_ci * Wait in reverse order to minimize chances of us being woken up before 90962306a36Sopenharmony_ci * all IOs have completed 91062306a36Sopenharmony_ci */ 91162306a36Sopenharmony_ci for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) { 91262306a36Sopenharmony_ci bh = journal->j_fc_wbuf[i]; 91362306a36Sopenharmony_ci wait_on_buffer(bh); 91462306a36Sopenharmony_ci /* 91562306a36Sopenharmony_ci * Update j_fc_off so jbd2_fc_release_bufs can release remain 91662306a36Sopenharmony_ci * buffer head. 91762306a36Sopenharmony_ci */ 91862306a36Sopenharmony_ci if (unlikely(!buffer_uptodate(bh))) { 91962306a36Sopenharmony_ci journal->j_fc_off = i + 1; 92062306a36Sopenharmony_ci return -EIO; 92162306a36Sopenharmony_ci } 92262306a36Sopenharmony_ci put_bh(bh); 92362306a36Sopenharmony_ci journal->j_fc_wbuf[i] = NULL; 92462306a36Sopenharmony_ci } 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_ci return 0; 92762306a36Sopenharmony_ci} 92862306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_fc_wait_bufs); 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_ciint jbd2_fc_release_bufs(journal_t *journal) 93162306a36Sopenharmony_ci{ 93262306a36Sopenharmony_ci struct buffer_head *bh; 93362306a36Sopenharmony_ci int i, j_fc_off; 93462306a36Sopenharmony_ci 93562306a36Sopenharmony_ci j_fc_off = journal->j_fc_off; 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci for (i = j_fc_off - 1; i >= 0; i--) { 93862306a36Sopenharmony_ci bh = journal->j_fc_wbuf[i]; 93962306a36Sopenharmony_ci if (!bh) 94062306a36Sopenharmony_ci break; 94162306a36Sopenharmony_ci put_bh(bh); 94262306a36Sopenharmony_ci journal->j_fc_wbuf[i] = NULL; 94362306a36Sopenharmony_ci } 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ci return 0; 94662306a36Sopenharmony_ci} 94762306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_fc_release_bufs); 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci/* 95062306a36Sopenharmony_ci * Conversion of logical to physical block numbers for the journal 95162306a36Sopenharmony_ci * 95262306a36Sopenharmony_ci * On external journals the journal blocks are identity-mapped, so 95362306a36Sopenharmony_ci * this is a no-op. If needed, we can use j_blk_offset - everything is 95462306a36Sopenharmony_ci * ready. 95562306a36Sopenharmony_ci */ 95662306a36Sopenharmony_ciint jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, 95762306a36Sopenharmony_ci unsigned long long *retp) 95862306a36Sopenharmony_ci{ 95962306a36Sopenharmony_ci int err = 0; 96062306a36Sopenharmony_ci unsigned long long ret; 96162306a36Sopenharmony_ci sector_t block = blocknr; 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci if (journal->j_bmap) { 96462306a36Sopenharmony_ci err = journal->j_bmap(journal, &block); 96562306a36Sopenharmony_ci if (err == 0) 96662306a36Sopenharmony_ci *retp = block; 96762306a36Sopenharmony_ci } else if (journal->j_inode) { 96862306a36Sopenharmony_ci ret = bmap(journal->j_inode, &block); 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci if (ret || !block) { 97162306a36Sopenharmony_ci printk(KERN_ALERT "%s: journal block not found " 97262306a36Sopenharmony_ci "at offset %lu on %s\n", 97362306a36Sopenharmony_ci __func__, blocknr, journal->j_devname); 97462306a36Sopenharmony_ci err = -EIO; 97562306a36Sopenharmony_ci jbd2_journal_abort(journal, err); 97662306a36Sopenharmony_ci } else { 97762306a36Sopenharmony_ci *retp = block; 97862306a36Sopenharmony_ci } 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci } else { 98162306a36Sopenharmony_ci *retp = blocknr; /* +journal->j_blk_offset */ 98262306a36Sopenharmony_ci } 98362306a36Sopenharmony_ci return err; 98462306a36Sopenharmony_ci} 98562306a36Sopenharmony_ci 98662306a36Sopenharmony_ci/* 98762306a36Sopenharmony_ci * We play buffer_head aliasing tricks to write data/metadata blocks to 98862306a36Sopenharmony_ci * the journal without copying their contents, but for journal 98962306a36Sopenharmony_ci * descriptor blocks we do need to generate bona fide buffers. 99062306a36Sopenharmony_ci * 99162306a36Sopenharmony_ci * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying 99262306a36Sopenharmony_ci * the buffer's contents they really should run flush_dcache_page(bh->b_page). 99362306a36Sopenharmony_ci * But we don't bother doing that, so there will be coherency problems with 99462306a36Sopenharmony_ci * mmaps of blockdevs which hold live JBD-controlled filesystems. 99562306a36Sopenharmony_ci */ 99662306a36Sopenharmony_cistruct buffer_head * 99762306a36Sopenharmony_cijbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type) 99862306a36Sopenharmony_ci{ 99962306a36Sopenharmony_ci journal_t *journal = transaction->t_journal; 100062306a36Sopenharmony_ci struct buffer_head *bh; 100162306a36Sopenharmony_ci unsigned long long blocknr; 100262306a36Sopenharmony_ci journal_header_t *header; 100362306a36Sopenharmony_ci int err; 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci err = jbd2_journal_next_log_block(journal, &blocknr); 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_ci if (err) 100862306a36Sopenharmony_ci return NULL; 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 101162306a36Sopenharmony_ci if (!bh) 101262306a36Sopenharmony_ci return NULL; 101362306a36Sopenharmony_ci atomic_dec(&transaction->t_outstanding_credits); 101462306a36Sopenharmony_ci lock_buffer(bh); 101562306a36Sopenharmony_ci memset(bh->b_data, 0, journal->j_blocksize); 101662306a36Sopenharmony_ci header = (journal_header_t *)bh->b_data; 101762306a36Sopenharmony_ci header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 101862306a36Sopenharmony_ci header->h_blocktype = cpu_to_be32(type); 101962306a36Sopenharmony_ci header->h_sequence = cpu_to_be32(transaction->t_tid); 102062306a36Sopenharmony_ci set_buffer_uptodate(bh); 102162306a36Sopenharmony_ci unlock_buffer(bh); 102262306a36Sopenharmony_ci BUFFER_TRACE(bh, "return this buffer"); 102362306a36Sopenharmony_ci return bh; 102462306a36Sopenharmony_ci} 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_civoid jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh) 102762306a36Sopenharmony_ci{ 102862306a36Sopenharmony_ci struct jbd2_journal_block_tail *tail; 102962306a36Sopenharmony_ci __u32 csum; 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci if (!jbd2_journal_has_csum_v2or3(j)) 103262306a36Sopenharmony_ci return; 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - 103562306a36Sopenharmony_ci sizeof(struct jbd2_journal_block_tail)); 103662306a36Sopenharmony_ci tail->t_checksum = 0; 103762306a36Sopenharmony_ci csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); 103862306a36Sopenharmony_ci tail->t_checksum = cpu_to_be32(csum); 103962306a36Sopenharmony_ci} 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci/* 104262306a36Sopenharmony_ci * Return tid of the oldest transaction in the journal and block in the journal 104362306a36Sopenharmony_ci * where the transaction starts. 104462306a36Sopenharmony_ci * 104562306a36Sopenharmony_ci * If the journal is now empty, return which will be the next transaction ID 104662306a36Sopenharmony_ci * we will write and where will that transaction start. 104762306a36Sopenharmony_ci * 104862306a36Sopenharmony_ci * The return value is 0 if journal tail cannot be pushed any further, 1 if 104962306a36Sopenharmony_ci * it can. 105062306a36Sopenharmony_ci */ 105162306a36Sopenharmony_ciint jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid, 105262306a36Sopenharmony_ci unsigned long *block) 105362306a36Sopenharmony_ci{ 105462306a36Sopenharmony_ci transaction_t *transaction; 105562306a36Sopenharmony_ci int ret; 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 105862306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 105962306a36Sopenharmony_ci transaction = journal->j_checkpoint_transactions; 106062306a36Sopenharmony_ci if (transaction) { 106162306a36Sopenharmony_ci *tid = transaction->t_tid; 106262306a36Sopenharmony_ci *block = transaction->t_log_start; 106362306a36Sopenharmony_ci } else if ((transaction = journal->j_committing_transaction) != NULL) { 106462306a36Sopenharmony_ci *tid = transaction->t_tid; 106562306a36Sopenharmony_ci *block = transaction->t_log_start; 106662306a36Sopenharmony_ci } else if ((transaction = journal->j_running_transaction) != NULL) { 106762306a36Sopenharmony_ci *tid = transaction->t_tid; 106862306a36Sopenharmony_ci *block = journal->j_head; 106962306a36Sopenharmony_ci } else { 107062306a36Sopenharmony_ci *tid = journal->j_transaction_sequence; 107162306a36Sopenharmony_ci *block = journal->j_head; 107262306a36Sopenharmony_ci } 107362306a36Sopenharmony_ci ret = tid_gt(*tid, journal->j_tail_sequence); 107462306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 107562306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci return ret; 107862306a36Sopenharmony_ci} 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci/* 108162306a36Sopenharmony_ci * Update information in journal structure and in on disk journal superblock 108262306a36Sopenharmony_ci * about log tail. This function does not check whether information passed in 108362306a36Sopenharmony_ci * really pushes log tail further. It's responsibility of the caller to make 108462306a36Sopenharmony_ci * sure provided log tail information is valid (e.g. by holding 108562306a36Sopenharmony_ci * j_checkpoint_mutex all the time between computing log tail and calling this 108662306a36Sopenharmony_ci * function as is the case with jbd2_cleanup_journal_tail()). 108762306a36Sopenharmony_ci * 108862306a36Sopenharmony_ci * Requires j_checkpoint_mutex 108962306a36Sopenharmony_ci */ 109062306a36Sopenharmony_ciint __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 109162306a36Sopenharmony_ci{ 109262306a36Sopenharmony_ci unsigned long freed; 109362306a36Sopenharmony_ci int ret; 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci /* 109862306a36Sopenharmony_ci * We cannot afford for write to remain in drive's caches since as 109962306a36Sopenharmony_ci * soon as we update j_tail, next transaction can start reusing journal 110062306a36Sopenharmony_ci * space and if we lose sb update during power failure we'd replay 110162306a36Sopenharmony_ci * old transaction with possibly newly overwritten data. 110262306a36Sopenharmony_ci */ 110362306a36Sopenharmony_ci ret = jbd2_journal_update_sb_log_tail(journal, tid, block, REQ_FUA); 110462306a36Sopenharmony_ci if (ret) 110562306a36Sopenharmony_ci goto out; 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 110862306a36Sopenharmony_ci freed = block - journal->j_tail; 110962306a36Sopenharmony_ci if (block < journal->j_tail) 111062306a36Sopenharmony_ci freed += journal->j_last - journal->j_first; 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ci trace_jbd2_update_log_tail(journal, tid, block, freed); 111362306a36Sopenharmony_ci jbd2_debug(1, 111462306a36Sopenharmony_ci "Cleaning journal tail from %u to %u (offset %lu), " 111562306a36Sopenharmony_ci "freeing %lu\n", 111662306a36Sopenharmony_ci journal->j_tail_sequence, tid, block, freed); 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci journal->j_free += freed; 111962306a36Sopenharmony_ci journal->j_tail_sequence = tid; 112062306a36Sopenharmony_ci journal->j_tail = block; 112162306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ciout: 112462306a36Sopenharmony_ci return ret; 112562306a36Sopenharmony_ci} 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_ci/* 112862306a36Sopenharmony_ci * This is a variation of __jbd2_update_log_tail which checks for validity of 112962306a36Sopenharmony_ci * provided log tail and locks j_checkpoint_mutex. So it is safe against races 113062306a36Sopenharmony_ci * with other threads updating log tail. 113162306a36Sopenharmony_ci */ 113262306a36Sopenharmony_civoid jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 113362306a36Sopenharmony_ci{ 113462306a36Sopenharmony_ci mutex_lock_io(&journal->j_checkpoint_mutex); 113562306a36Sopenharmony_ci if (tid_gt(tid, journal->j_tail_sequence)) 113662306a36Sopenharmony_ci __jbd2_update_log_tail(journal, tid, block); 113762306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 113862306a36Sopenharmony_ci} 113962306a36Sopenharmony_ci 114062306a36Sopenharmony_cistruct jbd2_stats_proc_session { 114162306a36Sopenharmony_ci journal_t *journal; 114262306a36Sopenharmony_ci struct transaction_stats_s *stats; 114362306a36Sopenharmony_ci int start; 114462306a36Sopenharmony_ci int max; 114562306a36Sopenharmony_ci}; 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_cistatic void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos) 114862306a36Sopenharmony_ci{ 114962306a36Sopenharmony_ci return *pos ? NULL : SEQ_START_TOKEN; 115062306a36Sopenharmony_ci} 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_cistatic void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos) 115362306a36Sopenharmony_ci{ 115462306a36Sopenharmony_ci (*pos)++; 115562306a36Sopenharmony_ci return NULL; 115662306a36Sopenharmony_ci} 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_cistatic int jbd2_seq_info_show(struct seq_file *seq, void *v) 115962306a36Sopenharmony_ci{ 116062306a36Sopenharmony_ci struct jbd2_stats_proc_session *s = seq->private; 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci if (v != SEQ_START_TOKEN) 116362306a36Sopenharmony_ci return 0; 116462306a36Sopenharmony_ci seq_printf(seq, "%lu transactions (%lu requested), " 116562306a36Sopenharmony_ci "each up to %u blocks\n", 116662306a36Sopenharmony_ci s->stats->ts_tid, s->stats->ts_requested, 116762306a36Sopenharmony_ci s->journal->j_max_transaction_buffers); 116862306a36Sopenharmony_ci if (s->stats->ts_tid == 0) 116962306a36Sopenharmony_ci return 0; 117062306a36Sopenharmony_ci seq_printf(seq, "average: \n %ums waiting for transaction\n", 117162306a36Sopenharmony_ci jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid)); 117262306a36Sopenharmony_ci seq_printf(seq, " %ums request delay\n", 117362306a36Sopenharmony_ci (s->stats->ts_requested == 0) ? 0 : 117462306a36Sopenharmony_ci jiffies_to_msecs(s->stats->run.rs_request_delay / 117562306a36Sopenharmony_ci s->stats->ts_requested)); 117662306a36Sopenharmony_ci seq_printf(seq, " %ums running transaction\n", 117762306a36Sopenharmony_ci jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid)); 117862306a36Sopenharmony_ci seq_printf(seq, " %ums transaction was being locked\n", 117962306a36Sopenharmony_ci jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid)); 118062306a36Sopenharmony_ci seq_printf(seq, " %ums flushing data (in ordered mode)\n", 118162306a36Sopenharmony_ci jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid)); 118262306a36Sopenharmony_ci seq_printf(seq, " %ums logging transaction\n", 118362306a36Sopenharmony_ci jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid)); 118462306a36Sopenharmony_ci seq_printf(seq, " %lluus average transaction commit time\n", 118562306a36Sopenharmony_ci div_u64(s->journal->j_average_commit_time, 1000)); 118662306a36Sopenharmony_ci seq_printf(seq, " %lu handles per transaction\n", 118762306a36Sopenharmony_ci s->stats->run.rs_handle_count / s->stats->ts_tid); 118862306a36Sopenharmony_ci seq_printf(seq, " %lu blocks per transaction\n", 118962306a36Sopenharmony_ci s->stats->run.rs_blocks / s->stats->ts_tid); 119062306a36Sopenharmony_ci seq_printf(seq, " %lu logged blocks per transaction\n", 119162306a36Sopenharmony_ci s->stats->run.rs_blocks_logged / s->stats->ts_tid); 119262306a36Sopenharmony_ci return 0; 119362306a36Sopenharmony_ci} 119462306a36Sopenharmony_ci 119562306a36Sopenharmony_cistatic void jbd2_seq_info_stop(struct seq_file *seq, void *v) 119662306a36Sopenharmony_ci{ 119762306a36Sopenharmony_ci} 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_cistatic const struct seq_operations jbd2_seq_info_ops = { 120062306a36Sopenharmony_ci .start = jbd2_seq_info_start, 120162306a36Sopenharmony_ci .next = jbd2_seq_info_next, 120262306a36Sopenharmony_ci .stop = jbd2_seq_info_stop, 120362306a36Sopenharmony_ci .show = jbd2_seq_info_show, 120462306a36Sopenharmony_ci}; 120562306a36Sopenharmony_ci 120662306a36Sopenharmony_cistatic int jbd2_seq_info_open(struct inode *inode, struct file *file) 120762306a36Sopenharmony_ci{ 120862306a36Sopenharmony_ci journal_t *journal = pde_data(inode); 120962306a36Sopenharmony_ci struct jbd2_stats_proc_session *s; 121062306a36Sopenharmony_ci int rc, size; 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci s = kmalloc(sizeof(*s), GFP_KERNEL); 121362306a36Sopenharmony_ci if (s == NULL) 121462306a36Sopenharmony_ci return -ENOMEM; 121562306a36Sopenharmony_ci size = sizeof(struct transaction_stats_s); 121662306a36Sopenharmony_ci s->stats = kmalloc(size, GFP_KERNEL); 121762306a36Sopenharmony_ci if (s->stats == NULL) { 121862306a36Sopenharmony_ci kfree(s); 121962306a36Sopenharmony_ci return -ENOMEM; 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci spin_lock(&journal->j_history_lock); 122262306a36Sopenharmony_ci memcpy(s->stats, &journal->j_stats, size); 122362306a36Sopenharmony_ci s->journal = journal; 122462306a36Sopenharmony_ci spin_unlock(&journal->j_history_lock); 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci rc = seq_open(file, &jbd2_seq_info_ops); 122762306a36Sopenharmony_ci if (rc == 0) { 122862306a36Sopenharmony_ci struct seq_file *m = file->private_data; 122962306a36Sopenharmony_ci m->private = s; 123062306a36Sopenharmony_ci } else { 123162306a36Sopenharmony_ci kfree(s->stats); 123262306a36Sopenharmony_ci kfree(s); 123362306a36Sopenharmony_ci } 123462306a36Sopenharmony_ci return rc; 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ci} 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_cistatic int jbd2_seq_info_release(struct inode *inode, struct file *file) 123962306a36Sopenharmony_ci{ 124062306a36Sopenharmony_ci struct seq_file *seq = file->private_data; 124162306a36Sopenharmony_ci struct jbd2_stats_proc_session *s = seq->private; 124262306a36Sopenharmony_ci kfree(s->stats); 124362306a36Sopenharmony_ci kfree(s); 124462306a36Sopenharmony_ci return seq_release(inode, file); 124562306a36Sopenharmony_ci} 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_cistatic const struct proc_ops jbd2_info_proc_ops = { 124862306a36Sopenharmony_ci .proc_open = jbd2_seq_info_open, 124962306a36Sopenharmony_ci .proc_read = seq_read, 125062306a36Sopenharmony_ci .proc_lseek = seq_lseek, 125162306a36Sopenharmony_ci .proc_release = jbd2_seq_info_release, 125262306a36Sopenharmony_ci}; 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_cistatic struct proc_dir_entry *proc_jbd2_stats; 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_cistatic void jbd2_stats_proc_init(journal_t *journal) 125762306a36Sopenharmony_ci{ 125862306a36Sopenharmony_ci journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); 125962306a36Sopenharmony_ci if (journal->j_proc_entry) { 126062306a36Sopenharmony_ci proc_create_data("info", S_IRUGO, journal->j_proc_entry, 126162306a36Sopenharmony_ci &jbd2_info_proc_ops, journal); 126262306a36Sopenharmony_ci } 126362306a36Sopenharmony_ci} 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_cistatic void jbd2_stats_proc_exit(journal_t *journal) 126662306a36Sopenharmony_ci{ 126762306a36Sopenharmony_ci remove_proc_entry("info", journal->j_proc_entry); 126862306a36Sopenharmony_ci remove_proc_entry(journal->j_devname, proc_jbd2_stats); 126962306a36Sopenharmony_ci} 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci/* Minimum size of descriptor tag */ 127262306a36Sopenharmony_cistatic int jbd2_min_tag_size(void) 127362306a36Sopenharmony_ci{ 127462306a36Sopenharmony_ci /* 127562306a36Sopenharmony_ci * Tag with 32-bit block numbers does not use last four bytes of the 127662306a36Sopenharmony_ci * structure 127762306a36Sopenharmony_ci */ 127862306a36Sopenharmony_ci return sizeof(journal_block_tag_t) - 4; 127962306a36Sopenharmony_ci} 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci/** 128262306a36Sopenharmony_ci * jbd2_journal_shrink_scan() 128362306a36Sopenharmony_ci * @shrink: shrinker to work on 128462306a36Sopenharmony_ci * @sc: reclaim request to process 128562306a36Sopenharmony_ci * 128662306a36Sopenharmony_ci * Scan the checkpointed buffer on the checkpoint list and release the 128762306a36Sopenharmony_ci * journal_head. 128862306a36Sopenharmony_ci */ 128962306a36Sopenharmony_cistatic unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink, 129062306a36Sopenharmony_ci struct shrink_control *sc) 129162306a36Sopenharmony_ci{ 129262306a36Sopenharmony_ci journal_t *journal = container_of(shrink, journal_t, j_shrinker); 129362306a36Sopenharmony_ci unsigned long nr_to_scan = sc->nr_to_scan; 129462306a36Sopenharmony_ci unsigned long nr_shrunk; 129562306a36Sopenharmony_ci unsigned long count; 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count); 129862306a36Sopenharmony_ci trace_jbd2_shrink_scan_enter(journal, sc->nr_to_scan, count); 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_ci nr_shrunk = jbd2_journal_shrink_checkpoint_list(journal, &nr_to_scan); 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ci count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count); 130362306a36Sopenharmony_ci trace_jbd2_shrink_scan_exit(journal, nr_to_scan, nr_shrunk, count); 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_ci return nr_shrunk; 130662306a36Sopenharmony_ci} 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_ci/** 130962306a36Sopenharmony_ci * jbd2_journal_shrink_count() 131062306a36Sopenharmony_ci * @shrink: shrinker to work on 131162306a36Sopenharmony_ci * @sc: reclaim request to process 131262306a36Sopenharmony_ci * 131362306a36Sopenharmony_ci * Count the number of checkpoint buffers on the checkpoint list. 131462306a36Sopenharmony_ci */ 131562306a36Sopenharmony_cistatic unsigned long jbd2_journal_shrink_count(struct shrinker *shrink, 131662306a36Sopenharmony_ci struct shrink_control *sc) 131762306a36Sopenharmony_ci{ 131862306a36Sopenharmony_ci journal_t *journal = container_of(shrink, journal_t, j_shrinker); 131962306a36Sopenharmony_ci unsigned long count; 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count); 132262306a36Sopenharmony_ci trace_jbd2_shrink_count(journal, sc->nr_to_scan, count); 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci return count; 132562306a36Sopenharmony_ci} 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_ci/* 132862306a36Sopenharmony_ci * If the journal init or create aborts, we need to mark the journal 132962306a36Sopenharmony_ci * superblock as being NULL to prevent the journal destroy from writing 133062306a36Sopenharmony_ci * back a bogus superblock. 133162306a36Sopenharmony_ci */ 133262306a36Sopenharmony_cistatic void journal_fail_superblock(journal_t *journal) 133362306a36Sopenharmony_ci{ 133462306a36Sopenharmony_ci struct buffer_head *bh = journal->j_sb_buffer; 133562306a36Sopenharmony_ci brelse(bh); 133662306a36Sopenharmony_ci journal->j_sb_buffer = NULL; 133762306a36Sopenharmony_ci} 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_ci/* 134062306a36Sopenharmony_ci * Check the superblock for a given journal, performing initial 134162306a36Sopenharmony_ci * validation of the format. 134262306a36Sopenharmony_ci */ 134362306a36Sopenharmony_cistatic int journal_check_superblock(journal_t *journal) 134462306a36Sopenharmony_ci{ 134562306a36Sopenharmony_ci journal_superblock_t *sb = journal->j_superblock; 134662306a36Sopenharmony_ci int num_fc_blks; 134762306a36Sopenharmony_ci int err = -EINVAL; 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) || 135062306a36Sopenharmony_ci sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { 135162306a36Sopenharmony_ci printk(KERN_WARNING "JBD2: no valid journal superblock found\n"); 135262306a36Sopenharmony_ci return err; 135362306a36Sopenharmony_ci } 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ci if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 && 135662306a36Sopenharmony_ci be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) { 135762306a36Sopenharmony_ci printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n"); 135862306a36Sopenharmony_ci return err; 135962306a36Sopenharmony_ci } 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) { 136262306a36Sopenharmony_ci printk(KERN_WARNING "JBD2: journal file too short\n"); 136362306a36Sopenharmony_ci return err; 136462306a36Sopenharmony_ci } 136562306a36Sopenharmony_ci 136662306a36Sopenharmony_ci if (be32_to_cpu(sb->s_first) == 0 || 136762306a36Sopenharmony_ci be32_to_cpu(sb->s_first) >= journal->j_total_len) { 136862306a36Sopenharmony_ci printk(KERN_WARNING 136962306a36Sopenharmony_ci "JBD2: Invalid start block of journal: %u\n", 137062306a36Sopenharmony_ci be32_to_cpu(sb->s_first)); 137162306a36Sopenharmony_ci return err; 137262306a36Sopenharmony_ci } 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci /* 137562306a36Sopenharmony_ci * If this is a V2 superblock, then we have to check the 137662306a36Sopenharmony_ci * features flags on it. 137762306a36Sopenharmony_ci */ 137862306a36Sopenharmony_ci if (!jbd2_format_support_feature(journal)) 137962306a36Sopenharmony_ci return 0; 138062306a36Sopenharmony_ci 138162306a36Sopenharmony_ci if ((sb->s_feature_ro_compat & 138262306a36Sopenharmony_ci ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) || 138362306a36Sopenharmony_ci (sb->s_feature_incompat & 138462306a36Sopenharmony_ci ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) { 138562306a36Sopenharmony_ci printk(KERN_WARNING "JBD2: Unrecognised features on journal\n"); 138662306a36Sopenharmony_ci return err; 138762306a36Sopenharmony_ci } 138862306a36Sopenharmony_ci 138962306a36Sopenharmony_ci num_fc_blks = jbd2_has_feature_fast_commit(journal) ? 139062306a36Sopenharmony_ci jbd2_journal_get_num_fc_blks(sb) : 0; 139162306a36Sopenharmony_ci if (be32_to_cpu(sb->s_maxlen) < JBD2_MIN_JOURNAL_BLOCKS || 139262306a36Sopenharmony_ci be32_to_cpu(sb->s_maxlen) - JBD2_MIN_JOURNAL_BLOCKS < num_fc_blks) { 139362306a36Sopenharmony_ci printk(KERN_ERR "JBD2: journal file too short %u,%d\n", 139462306a36Sopenharmony_ci be32_to_cpu(sb->s_maxlen), num_fc_blks); 139562306a36Sopenharmony_ci return err; 139662306a36Sopenharmony_ci } 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci if (jbd2_has_feature_csum2(journal) && 139962306a36Sopenharmony_ci jbd2_has_feature_csum3(journal)) { 140062306a36Sopenharmony_ci /* Can't have checksum v2 and v3 at the same time! */ 140162306a36Sopenharmony_ci printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 " 140262306a36Sopenharmony_ci "at the same time!\n"); 140362306a36Sopenharmony_ci return err; 140462306a36Sopenharmony_ci } 140562306a36Sopenharmony_ci 140662306a36Sopenharmony_ci if (jbd2_journal_has_csum_v2or3_feature(journal) && 140762306a36Sopenharmony_ci jbd2_has_feature_checksum(journal)) { 140862306a36Sopenharmony_ci /* Can't have checksum v1 and v2 on at the same time! */ 140962306a36Sopenharmony_ci printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " 141062306a36Sopenharmony_ci "at the same time!\n"); 141162306a36Sopenharmony_ci return err; 141262306a36Sopenharmony_ci } 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_ci /* Load the checksum driver */ 141562306a36Sopenharmony_ci if (jbd2_journal_has_csum_v2or3_feature(journal)) { 141662306a36Sopenharmony_ci if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) { 141762306a36Sopenharmony_ci printk(KERN_ERR "JBD2: Unknown checksum type\n"); 141862306a36Sopenharmony_ci return err; 141962306a36Sopenharmony_ci } 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_ci journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 142262306a36Sopenharmony_ci if (IS_ERR(journal->j_chksum_driver)) { 142362306a36Sopenharmony_ci printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); 142462306a36Sopenharmony_ci err = PTR_ERR(journal->j_chksum_driver); 142562306a36Sopenharmony_ci journal->j_chksum_driver = NULL; 142662306a36Sopenharmony_ci return err; 142762306a36Sopenharmony_ci } 142862306a36Sopenharmony_ci /* Check superblock checksum */ 142962306a36Sopenharmony_ci if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) { 143062306a36Sopenharmony_ci printk(KERN_ERR "JBD2: journal checksum error\n"); 143162306a36Sopenharmony_ci err = -EFSBADCRC; 143262306a36Sopenharmony_ci return err; 143362306a36Sopenharmony_ci } 143462306a36Sopenharmony_ci } 143562306a36Sopenharmony_ci 143662306a36Sopenharmony_ci return 0; 143762306a36Sopenharmony_ci} 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_cistatic int journal_revoke_records_per_block(journal_t *journal) 144062306a36Sopenharmony_ci{ 144162306a36Sopenharmony_ci int record_size; 144262306a36Sopenharmony_ci int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t); 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci if (jbd2_has_feature_64bit(journal)) 144562306a36Sopenharmony_ci record_size = 8; 144662306a36Sopenharmony_ci else 144762306a36Sopenharmony_ci record_size = 4; 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci if (jbd2_journal_has_csum_v2or3(journal)) 145062306a36Sopenharmony_ci space -= sizeof(struct jbd2_journal_block_tail); 145162306a36Sopenharmony_ci return space / record_size; 145262306a36Sopenharmony_ci} 145362306a36Sopenharmony_ci 145462306a36Sopenharmony_ci/* 145562306a36Sopenharmony_ci * Load the on-disk journal superblock and read the key fields into the 145662306a36Sopenharmony_ci * journal_t. 145762306a36Sopenharmony_ci */ 145862306a36Sopenharmony_cistatic int journal_load_superblock(journal_t *journal) 145962306a36Sopenharmony_ci{ 146062306a36Sopenharmony_ci int err; 146162306a36Sopenharmony_ci struct buffer_head *bh; 146262306a36Sopenharmony_ci journal_superblock_t *sb; 146362306a36Sopenharmony_ci 146462306a36Sopenharmony_ci bh = getblk_unmovable(journal->j_dev, journal->j_blk_offset, 146562306a36Sopenharmony_ci journal->j_blocksize); 146662306a36Sopenharmony_ci if (bh) 146762306a36Sopenharmony_ci err = bh_read(bh, 0); 146862306a36Sopenharmony_ci if (!bh || err < 0) { 146962306a36Sopenharmony_ci pr_err("%s: Cannot read journal superblock\n", __func__); 147062306a36Sopenharmony_ci brelse(bh); 147162306a36Sopenharmony_ci return -EIO; 147262306a36Sopenharmony_ci } 147362306a36Sopenharmony_ci 147462306a36Sopenharmony_ci journal->j_sb_buffer = bh; 147562306a36Sopenharmony_ci sb = (journal_superblock_t *)bh->b_data; 147662306a36Sopenharmony_ci journal->j_superblock = sb; 147762306a36Sopenharmony_ci err = journal_check_superblock(journal); 147862306a36Sopenharmony_ci if (err) { 147962306a36Sopenharmony_ci journal_fail_superblock(journal); 148062306a36Sopenharmony_ci return err; 148162306a36Sopenharmony_ci } 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ci journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); 148462306a36Sopenharmony_ci journal->j_tail = be32_to_cpu(sb->s_start); 148562306a36Sopenharmony_ci journal->j_first = be32_to_cpu(sb->s_first); 148662306a36Sopenharmony_ci journal->j_errno = be32_to_cpu(sb->s_errno); 148762306a36Sopenharmony_ci journal->j_last = be32_to_cpu(sb->s_maxlen); 148862306a36Sopenharmony_ci 148962306a36Sopenharmony_ci if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len) 149062306a36Sopenharmony_ci journal->j_total_len = be32_to_cpu(sb->s_maxlen); 149162306a36Sopenharmony_ci /* Precompute checksum seed for all metadata */ 149262306a36Sopenharmony_ci if (jbd2_journal_has_csum_v2or3(journal)) 149362306a36Sopenharmony_ci journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 149462306a36Sopenharmony_ci sizeof(sb->s_uuid)); 149562306a36Sopenharmony_ci journal->j_revoke_records_per_block = 149662306a36Sopenharmony_ci journal_revoke_records_per_block(journal); 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_ci if (jbd2_has_feature_fast_commit(journal)) { 149962306a36Sopenharmony_ci journal->j_fc_last = be32_to_cpu(sb->s_maxlen); 150062306a36Sopenharmony_ci journal->j_last = journal->j_fc_last - 150162306a36Sopenharmony_ci jbd2_journal_get_num_fc_blks(sb); 150262306a36Sopenharmony_ci journal->j_fc_first = journal->j_last + 1; 150362306a36Sopenharmony_ci journal->j_fc_off = 0; 150462306a36Sopenharmony_ci } 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ci return 0; 150762306a36Sopenharmony_ci} 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci 151062306a36Sopenharmony_ci/* 151162306a36Sopenharmony_ci * Management for journal control blocks: functions to create and 151262306a36Sopenharmony_ci * destroy journal_t structures, and to initialise and read existing 151362306a36Sopenharmony_ci * journal blocks from disk. */ 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_ci/* First: create and setup a journal_t object in memory. We initialise 151662306a36Sopenharmony_ci * very few fields yet: that has to wait until we have created the 151762306a36Sopenharmony_ci * journal structures from from scratch, or loaded them from disk. */ 151862306a36Sopenharmony_ci 151962306a36Sopenharmony_cistatic journal_t *journal_init_common(struct block_device *bdev, 152062306a36Sopenharmony_ci struct block_device *fs_dev, 152162306a36Sopenharmony_ci unsigned long long start, int len, int blocksize) 152262306a36Sopenharmony_ci{ 152362306a36Sopenharmony_ci static struct lock_class_key jbd2_trans_commit_key; 152462306a36Sopenharmony_ci journal_t *journal; 152562306a36Sopenharmony_ci int err; 152662306a36Sopenharmony_ci int n; 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci journal = kzalloc(sizeof(*journal), GFP_KERNEL); 152962306a36Sopenharmony_ci if (!journal) 153062306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci journal->j_blocksize = blocksize; 153362306a36Sopenharmony_ci journal->j_dev = bdev; 153462306a36Sopenharmony_ci journal->j_fs_dev = fs_dev; 153562306a36Sopenharmony_ci journal->j_blk_offset = start; 153662306a36Sopenharmony_ci journal->j_total_len = len; 153762306a36Sopenharmony_ci 153862306a36Sopenharmony_ci err = journal_load_superblock(journal); 153962306a36Sopenharmony_ci if (err) 154062306a36Sopenharmony_ci goto err_cleanup; 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci init_waitqueue_head(&journal->j_wait_transaction_locked); 154362306a36Sopenharmony_ci init_waitqueue_head(&journal->j_wait_done_commit); 154462306a36Sopenharmony_ci init_waitqueue_head(&journal->j_wait_commit); 154562306a36Sopenharmony_ci init_waitqueue_head(&journal->j_wait_updates); 154662306a36Sopenharmony_ci init_waitqueue_head(&journal->j_wait_reserved); 154762306a36Sopenharmony_ci init_waitqueue_head(&journal->j_fc_wait); 154862306a36Sopenharmony_ci mutex_init(&journal->j_abort_mutex); 154962306a36Sopenharmony_ci mutex_init(&journal->j_barrier); 155062306a36Sopenharmony_ci mutex_init(&journal->j_checkpoint_mutex); 155162306a36Sopenharmony_ci spin_lock_init(&journal->j_revoke_lock); 155262306a36Sopenharmony_ci spin_lock_init(&journal->j_list_lock); 155362306a36Sopenharmony_ci spin_lock_init(&journal->j_history_lock); 155462306a36Sopenharmony_ci rwlock_init(&journal->j_state_lock); 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); 155762306a36Sopenharmony_ci journal->j_min_batch_time = 0; 155862306a36Sopenharmony_ci journal->j_max_batch_time = 15000; /* 15ms */ 155962306a36Sopenharmony_ci atomic_set(&journal->j_reserved_credits, 0); 156062306a36Sopenharmony_ci lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle", 156162306a36Sopenharmony_ci &jbd2_trans_commit_key, 0); 156262306a36Sopenharmony_ci 156362306a36Sopenharmony_ci /* The journal is marked for error until we succeed with recovery! */ 156462306a36Sopenharmony_ci journal->j_flags = JBD2_ABORT; 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci /* Set up a default-sized revoke table for the new mount. */ 156762306a36Sopenharmony_ci err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); 156862306a36Sopenharmony_ci if (err) 156962306a36Sopenharmony_ci goto err_cleanup; 157062306a36Sopenharmony_ci 157162306a36Sopenharmony_ci /* 157262306a36Sopenharmony_ci * journal descriptor can store up to n blocks, we need enough 157362306a36Sopenharmony_ci * buffers to write out full descriptor block. 157462306a36Sopenharmony_ci */ 157562306a36Sopenharmony_ci err = -ENOMEM; 157662306a36Sopenharmony_ci n = journal->j_blocksize / jbd2_min_tag_size(); 157762306a36Sopenharmony_ci journal->j_wbufsize = n; 157862306a36Sopenharmony_ci journal->j_fc_wbuf = NULL; 157962306a36Sopenharmony_ci journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *), 158062306a36Sopenharmony_ci GFP_KERNEL); 158162306a36Sopenharmony_ci if (!journal->j_wbuf) 158262306a36Sopenharmony_ci goto err_cleanup; 158362306a36Sopenharmony_ci 158462306a36Sopenharmony_ci err = percpu_counter_init(&journal->j_checkpoint_jh_count, 0, 158562306a36Sopenharmony_ci GFP_KERNEL); 158662306a36Sopenharmony_ci if (err) 158762306a36Sopenharmony_ci goto err_cleanup; 158862306a36Sopenharmony_ci 158962306a36Sopenharmony_ci journal->j_shrink_transaction = NULL; 159062306a36Sopenharmony_ci journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan; 159162306a36Sopenharmony_ci journal->j_shrinker.count_objects = jbd2_journal_shrink_count; 159262306a36Sopenharmony_ci journal->j_shrinker.seeks = DEFAULT_SEEKS; 159362306a36Sopenharmony_ci journal->j_shrinker.batch = journal->j_max_transaction_buffers; 159462306a36Sopenharmony_ci err = register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)", 159562306a36Sopenharmony_ci MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); 159662306a36Sopenharmony_ci if (err) 159762306a36Sopenharmony_ci goto err_cleanup; 159862306a36Sopenharmony_ci 159962306a36Sopenharmony_ci return journal; 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_cierr_cleanup: 160262306a36Sopenharmony_ci percpu_counter_destroy(&journal->j_checkpoint_jh_count); 160362306a36Sopenharmony_ci if (journal->j_chksum_driver) 160462306a36Sopenharmony_ci crypto_free_shash(journal->j_chksum_driver); 160562306a36Sopenharmony_ci kfree(journal->j_wbuf); 160662306a36Sopenharmony_ci jbd2_journal_destroy_revoke(journal); 160762306a36Sopenharmony_ci journal_fail_superblock(journal); 160862306a36Sopenharmony_ci kfree(journal); 160962306a36Sopenharmony_ci return ERR_PTR(err); 161062306a36Sopenharmony_ci} 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci/* jbd2_journal_init_dev and jbd2_journal_init_inode: 161362306a36Sopenharmony_ci * 161462306a36Sopenharmony_ci * Create a journal structure assigned some fixed set of disk blocks to 161562306a36Sopenharmony_ci * the journal. We don't actually touch those disk blocks yet, but we 161662306a36Sopenharmony_ci * need to set up all of the mapping information to tell the journaling 161762306a36Sopenharmony_ci * system where the journal blocks are. 161862306a36Sopenharmony_ci * 161962306a36Sopenharmony_ci */ 162062306a36Sopenharmony_ci 162162306a36Sopenharmony_ci/** 162262306a36Sopenharmony_ci * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure 162362306a36Sopenharmony_ci * @bdev: Block device on which to create the journal 162462306a36Sopenharmony_ci * @fs_dev: Device which hold journalled filesystem for this journal. 162562306a36Sopenharmony_ci * @start: Block nr Start of journal. 162662306a36Sopenharmony_ci * @len: Length of the journal in blocks. 162762306a36Sopenharmony_ci * @blocksize: blocksize of journalling device 162862306a36Sopenharmony_ci * 162962306a36Sopenharmony_ci * Returns: a newly created journal_t * 163062306a36Sopenharmony_ci * 163162306a36Sopenharmony_ci * jbd2_journal_init_dev creates a journal which maps a fixed contiguous 163262306a36Sopenharmony_ci * range of blocks on an arbitrary block device. 163362306a36Sopenharmony_ci * 163462306a36Sopenharmony_ci */ 163562306a36Sopenharmony_cijournal_t *jbd2_journal_init_dev(struct block_device *bdev, 163662306a36Sopenharmony_ci struct block_device *fs_dev, 163762306a36Sopenharmony_ci unsigned long long start, int len, int blocksize) 163862306a36Sopenharmony_ci{ 163962306a36Sopenharmony_ci journal_t *journal; 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci journal = journal_init_common(bdev, fs_dev, start, len, blocksize); 164262306a36Sopenharmony_ci if (IS_ERR(journal)) 164362306a36Sopenharmony_ci return ERR_CAST(journal); 164462306a36Sopenharmony_ci 164562306a36Sopenharmony_ci snprintf(journal->j_devname, sizeof(journal->j_devname), 164662306a36Sopenharmony_ci "%pg", journal->j_dev); 164762306a36Sopenharmony_ci strreplace(journal->j_devname, '/', '!'); 164862306a36Sopenharmony_ci jbd2_stats_proc_init(journal); 164962306a36Sopenharmony_ci 165062306a36Sopenharmony_ci return journal; 165162306a36Sopenharmony_ci} 165262306a36Sopenharmony_ci 165362306a36Sopenharmony_ci/** 165462306a36Sopenharmony_ci * journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode. 165562306a36Sopenharmony_ci * @inode: An inode to create the journal in 165662306a36Sopenharmony_ci * 165762306a36Sopenharmony_ci * jbd2_journal_init_inode creates a journal which maps an on-disk inode as 165862306a36Sopenharmony_ci * the journal. The inode must exist already, must support bmap() and 165962306a36Sopenharmony_ci * must have all data blocks preallocated. 166062306a36Sopenharmony_ci */ 166162306a36Sopenharmony_cijournal_t *jbd2_journal_init_inode(struct inode *inode) 166262306a36Sopenharmony_ci{ 166362306a36Sopenharmony_ci journal_t *journal; 166462306a36Sopenharmony_ci sector_t blocknr; 166562306a36Sopenharmony_ci int err = 0; 166662306a36Sopenharmony_ci 166762306a36Sopenharmony_ci blocknr = 0; 166862306a36Sopenharmony_ci err = bmap(inode, &blocknr); 166962306a36Sopenharmony_ci if (err || !blocknr) { 167062306a36Sopenharmony_ci pr_err("%s: Cannot locate journal superblock\n", __func__); 167162306a36Sopenharmony_ci return err ? ERR_PTR(err) : ERR_PTR(-EINVAL); 167262306a36Sopenharmony_ci } 167362306a36Sopenharmony_ci 167462306a36Sopenharmony_ci jbd2_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n", 167562306a36Sopenharmony_ci inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size, 167662306a36Sopenharmony_ci inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev, 167962306a36Sopenharmony_ci blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits, 168062306a36Sopenharmony_ci inode->i_sb->s_blocksize); 168162306a36Sopenharmony_ci if (IS_ERR(journal)) 168262306a36Sopenharmony_ci return ERR_CAST(journal); 168362306a36Sopenharmony_ci 168462306a36Sopenharmony_ci journal->j_inode = inode; 168562306a36Sopenharmony_ci snprintf(journal->j_devname, sizeof(journal->j_devname), 168662306a36Sopenharmony_ci "%pg-%lu", journal->j_dev, journal->j_inode->i_ino); 168762306a36Sopenharmony_ci strreplace(journal->j_devname, '/', '!'); 168862306a36Sopenharmony_ci jbd2_stats_proc_init(journal); 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci return journal; 169162306a36Sopenharmony_ci} 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_ci/* 169462306a36Sopenharmony_ci * Given a journal_t structure, initialise the various fields for 169562306a36Sopenharmony_ci * startup of a new journaling session. We use this both when creating 169662306a36Sopenharmony_ci * a journal, and after recovering an old journal to reset it for 169762306a36Sopenharmony_ci * subsequent use. 169862306a36Sopenharmony_ci */ 169962306a36Sopenharmony_ci 170062306a36Sopenharmony_cistatic int journal_reset(journal_t *journal) 170162306a36Sopenharmony_ci{ 170262306a36Sopenharmony_ci journal_superblock_t *sb = journal->j_superblock; 170362306a36Sopenharmony_ci unsigned long long first, last; 170462306a36Sopenharmony_ci 170562306a36Sopenharmony_ci first = be32_to_cpu(sb->s_first); 170662306a36Sopenharmony_ci last = be32_to_cpu(sb->s_maxlen); 170762306a36Sopenharmony_ci if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) { 170862306a36Sopenharmony_ci printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n", 170962306a36Sopenharmony_ci first, last); 171062306a36Sopenharmony_ci journal_fail_superblock(journal); 171162306a36Sopenharmony_ci return -EINVAL; 171262306a36Sopenharmony_ci } 171362306a36Sopenharmony_ci 171462306a36Sopenharmony_ci journal->j_first = first; 171562306a36Sopenharmony_ci journal->j_last = last; 171662306a36Sopenharmony_ci 171762306a36Sopenharmony_ci if (journal->j_head != 0 && journal->j_flags & JBD2_CYCLE_RECORD) { 171862306a36Sopenharmony_ci /* 171962306a36Sopenharmony_ci * Disable the cycled recording mode if the journal head block 172062306a36Sopenharmony_ci * number is not correct. 172162306a36Sopenharmony_ci */ 172262306a36Sopenharmony_ci if (journal->j_head < first || journal->j_head >= last) { 172362306a36Sopenharmony_ci printk(KERN_WARNING "JBD2: Incorrect Journal head block %lu, " 172462306a36Sopenharmony_ci "disable journal_cycle_record\n", 172562306a36Sopenharmony_ci journal->j_head); 172662306a36Sopenharmony_ci journal->j_head = journal->j_first; 172762306a36Sopenharmony_ci } 172862306a36Sopenharmony_ci } else { 172962306a36Sopenharmony_ci journal->j_head = journal->j_first; 173062306a36Sopenharmony_ci } 173162306a36Sopenharmony_ci journal->j_tail = journal->j_head; 173262306a36Sopenharmony_ci journal->j_free = journal->j_last - journal->j_first; 173362306a36Sopenharmony_ci 173462306a36Sopenharmony_ci journal->j_tail_sequence = journal->j_transaction_sequence; 173562306a36Sopenharmony_ci journal->j_commit_sequence = journal->j_transaction_sequence - 1; 173662306a36Sopenharmony_ci journal->j_commit_request = journal->j_commit_sequence; 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal); 173962306a36Sopenharmony_ci 174062306a36Sopenharmony_ci /* 174162306a36Sopenharmony_ci * Now that journal recovery is done, turn fast commits off here. This 174262306a36Sopenharmony_ci * way, if fast commit was enabled before the crash but if now FS has 174362306a36Sopenharmony_ci * disabled it, we don't enable fast commits. 174462306a36Sopenharmony_ci */ 174562306a36Sopenharmony_ci jbd2_clear_feature_fast_commit(journal); 174662306a36Sopenharmony_ci 174762306a36Sopenharmony_ci /* 174862306a36Sopenharmony_ci * As a special case, if the on-disk copy is already marked as needing 174962306a36Sopenharmony_ci * no recovery (s_start == 0), then we can safely defer the superblock 175062306a36Sopenharmony_ci * update until the next commit by setting JBD2_FLUSHED. This avoids 175162306a36Sopenharmony_ci * attempting a write to a potential-readonly device. 175262306a36Sopenharmony_ci */ 175362306a36Sopenharmony_ci if (sb->s_start == 0) { 175462306a36Sopenharmony_ci jbd2_debug(1, "JBD2: Skipping superblock update on recovered sb " 175562306a36Sopenharmony_ci "(start %ld, seq %u, errno %d)\n", 175662306a36Sopenharmony_ci journal->j_tail, journal->j_tail_sequence, 175762306a36Sopenharmony_ci journal->j_errno); 175862306a36Sopenharmony_ci journal->j_flags |= JBD2_FLUSHED; 175962306a36Sopenharmony_ci } else { 176062306a36Sopenharmony_ci /* Lock here to make assertions happy... */ 176162306a36Sopenharmony_ci mutex_lock_io(&journal->j_checkpoint_mutex); 176262306a36Sopenharmony_ci /* 176362306a36Sopenharmony_ci * Update log tail information. We use REQ_FUA since new 176462306a36Sopenharmony_ci * transaction will start reusing journal space and so we 176562306a36Sopenharmony_ci * must make sure information about current log tail is on 176662306a36Sopenharmony_ci * disk before that. 176762306a36Sopenharmony_ci */ 176862306a36Sopenharmony_ci jbd2_journal_update_sb_log_tail(journal, 176962306a36Sopenharmony_ci journal->j_tail_sequence, 177062306a36Sopenharmony_ci journal->j_tail, REQ_FUA); 177162306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 177262306a36Sopenharmony_ci } 177362306a36Sopenharmony_ci return jbd2_journal_start_thread(journal); 177462306a36Sopenharmony_ci} 177562306a36Sopenharmony_ci 177662306a36Sopenharmony_ci/* 177762306a36Sopenharmony_ci * This function expects that the caller will have locked the journal 177862306a36Sopenharmony_ci * buffer head, and will return with it unlocked 177962306a36Sopenharmony_ci */ 178062306a36Sopenharmony_cistatic int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags) 178162306a36Sopenharmony_ci{ 178262306a36Sopenharmony_ci struct buffer_head *bh = journal->j_sb_buffer; 178362306a36Sopenharmony_ci journal_superblock_t *sb = journal->j_superblock; 178462306a36Sopenharmony_ci int ret = 0; 178562306a36Sopenharmony_ci 178662306a36Sopenharmony_ci /* Buffer got discarded which means block device got invalidated */ 178762306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 178862306a36Sopenharmony_ci unlock_buffer(bh); 178962306a36Sopenharmony_ci return -EIO; 179062306a36Sopenharmony_ci } 179162306a36Sopenharmony_ci 179262306a36Sopenharmony_ci /* 179362306a36Sopenharmony_ci * Always set high priority flags to exempt from block layer's 179462306a36Sopenharmony_ci * QOS policies, e.g. writeback throttle. 179562306a36Sopenharmony_ci */ 179662306a36Sopenharmony_ci write_flags |= JBD2_JOURNAL_REQ_FLAGS; 179762306a36Sopenharmony_ci if (!(journal->j_flags & JBD2_BARRIER)) 179862306a36Sopenharmony_ci write_flags &= ~(REQ_FUA | REQ_PREFLUSH); 179962306a36Sopenharmony_ci 180062306a36Sopenharmony_ci trace_jbd2_write_superblock(journal, write_flags); 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_ci if (buffer_write_io_error(bh)) { 180362306a36Sopenharmony_ci /* 180462306a36Sopenharmony_ci * Oh, dear. A previous attempt to write the journal 180562306a36Sopenharmony_ci * superblock failed. This could happen because the 180662306a36Sopenharmony_ci * USB device was yanked out. Or it could happen to 180762306a36Sopenharmony_ci * be a transient write error and maybe the block will 180862306a36Sopenharmony_ci * be remapped. Nothing we can do but to retry the 180962306a36Sopenharmony_ci * write and hope for the best. 181062306a36Sopenharmony_ci */ 181162306a36Sopenharmony_ci printk(KERN_ERR "JBD2: previous I/O error detected " 181262306a36Sopenharmony_ci "for journal superblock update for %s.\n", 181362306a36Sopenharmony_ci journal->j_devname); 181462306a36Sopenharmony_ci clear_buffer_write_io_error(bh); 181562306a36Sopenharmony_ci set_buffer_uptodate(bh); 181662306a36Sopenharmony_ci } 181762306a36Sopenharmony_ci if (jbd2_journal_has_csum_v2or3(journal)) 181862306a36Sopenharmony_ci sb->s_checksum = jbd2_superblock_csum(journal, sb); 181962306a36Sopenharmony_ci get_bh(bh); 182062306a36Sopenharmony_ci bh->b_end_io = end_buffer_write_sync; 182162306a36Sopenharmony_ci submit_bh(REQ_OP_WRITE | write_flags, bh); 182262306a36Sopenharmony_ci wait_on_buffer(bh); 182362306a36Sopenharmony_ci if (buffer_write_io_error(bh)) { 182462306a36Sopenharmony_ci clear_buffer_write_io_error(bh); 182562306a36Sopenharmony_ci set_buffer_uptodate(bh); 182662306a36Sopenharmony_ci ret = -EIO; 182762306a36Sopenharmony_ci } 182862306a36Sopenharmony_ci if (ret) { 182962306a36Sopenharmony_ci printk(KERN_ERR "JBD2: I/O error when updating journal superblock for %s.\n", 183062306a36Sopenharmony_ci journal->j_devname); 183162306a36Sopenharmony_ci if (!is_journal_aborted(journal)) 183262306a36Sopenharmony_ci jbd2_journal_abort(journal, ret); 183362306a36Sopenharmony_ci } 183462306a36Sopenharmony_ci 183562306a36Sopenharmony_ci return ret; 183662306a36Sopenharmony_ci} 183762306a36Sopenharmony_ci 183862306a36Sopenharmony_ci/** 183962306a36Sopenharmony_ci * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk. 184062306a36Sopenharmony_ci * @journal: The journal to update. 184162306a36Sopenharmony_ci * @tail_tid: TID of the new transaction at the tail of the log 184262306a36Sopenharmony_ci * @tail_block: The first block of the transaction at the tail of the log 184362306a36Sopenharmony_ci * @write_flags: Flags for the journal sb write operation 184462306a36Sopenharmony_ci * 184562306a36Sopenharmony_ci * Update a journal's superblock information about log tail and write it to 184662306a36Sopenharmony_ci * disk, waiting for the IO to complete. 184762306a36Sopenharmony_ci */ 184862306a36Sopenharmony_ciint jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, 184962306a36Sopenharmony_ci unsigned long tail_block, 185062306a36Sopenharmony_ci blk_opf_t write_flags) 185162306a36Sopenharmony_ci{ 185262306a36Sopenharmony_ci journal_superblock_t *sb = journal->j_superblock; 185362306a36Sopenharmony_ci int ret; 185462306a36Sopenharmony_ci 185562306a36Sopenharmony_ci if (is_journal_aborted(journal)) 185662306a36Sopenharmony_ci return -EIO; 185762306a36Sopenharmony_ci if (test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags)) { 185862306a36Sopenharmony_ci jbd2_journal_abort(journal, -EIO); 185962306a36Sopenharmony_ci return -EIO; 186062306a36Sopenharmony_ci } 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_ci BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 186362306a36Sopenharmony_ci jbd2_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", 186462306a36Sopenharmony_ci tail_block, tail_tid); 186562306a36Sopenharmony_ci 186662306a36Sopenharmony_ci lock_buffer(journal->j_sb_buffer); 186762306a36Sopenharmony_ci sb->s_sequence = cpu_to_be32(tail_tid); 186862306a36Sopenharmony_ci sb->s_start = cpu_to_be32(tail_block); 186962306a36Sopenharmony_ci 187062306a36Sopenharmony_ci ret = jbd2_write_superblock(journal, write_flags); 187162306a36Sopenharmony_ci if (ret) 187262306a36Sopenharmony_ci goto out; 187362306a36Sopenharmony_ci 187462306a36Sopenharmony_ci /* Log is no longer empty */ 187562306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 187662306a36Sopenharmony_ci WARN_ON(!sb->s_sequence); 187762306a36Sopenharmony_ci journal->j_flags &= ~JBD2_FLUSHED; 187862306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 187962306a36Sopenharmony_ci 188062306a36Sopenharmony_ciout: 188162306a36Sopenharmony_ci return ret; 188262306a36Sopenharmony_ci} 188362306a36Sopenharmony_ci 188462306a36Sopenharmony_ci/** 188562306a36Sopenharmony_ci * jbd2_mark_journal_empty() - Mark on disk journal as empty. 188662306a36Sopenharmony_ci * @journal: The journal to update. 188762306a36Sopenharmony_ci * @write_flags: Flags for the journal sb write operation 188862306a36Sopenharmony_ci * 188962306a36Sopenharmony_ci * Update a journal's dynamic superblock fields to show that journal is empty. 189062306a36Sopenharmony_ci * Write updated superblock to disk waiting for IO to complete. 189162306a36Sopenharmony_ci */ 189262306a36Sopenharmony_cistatic void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags) 189362306a36Sopenharmony_ci{ 189462306a36Sopenharmony_ci journal_superblock_t *sb = journal->j_superblock; 189562306a36Sopenharmony_ci bool had_fast_commit = false; 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_ci BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 189862306a36Sopenharmony_ci lock_buffer(journal->j_sb_buffer); 189962306a36Sopenharmony_ci if (sb->s_start == 0) { /* Is it already empty? */ 190062306a36Sopenharmony_ci unlock_buffer(journal->j_sb_buffer); 190162306a36Sopenharmony_ci return; 190262306a36Sopenharmony_ci } 190362306a36Sopenharmony_ci 190462306a36Sopenharmony_ci jbd2_debug(1, "JBD2: Marking journal as empty (seq %u)\n", 190562306a36Sopenharmony_ci journal->j_tail_sequence); 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 190862306a36Sopenharmony_ci sb->s_start = cpu_to_be32(0); 190962306a36Sopenharmony_ci sb->s_head = cpu_to_be32(journal->j_head); 191062306a36Sopenharmony_ci if (jbd2_has_feature_fast_commit(journal)) { 191162306a36Sopenharmony_ci /* 191262306a36Sopenharmony_ci * When journal is clean, no need to commit fast commit flag and 191362306a36Sopenharmony_ci * make file system incompatible with older kernels. 191462306a36Sopenharmony_ci */ 191562306a36Sopenharmony_ci jbd2_clear_feature_fast_commit(journal); 191662306a36Sopenharmony_ci had_fast_commit = true; 191762306a36Sopenharmony_ci } 191862306a36Sopenharmony_ci 191962306a36Sopenharmony_ci jbd2_write_superblock(journal, write_flags); 192062306a36Sopenharmony_ci 192162306a36Sopenharmony_ci if (had_fast_commit) 192262306a36Sopenharmony_ci jbd2_set_feature_fast_commit(journal); 192362306a36Sopenharmony_ci 192462306a36Sopenharmony_ci /* Log is no longer empty */ 192562306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 192662306a36Sopenharmony_ci journal->j_flags |= JBD2_FLUSHED; 192762306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 192862306a36Sopenharmony_ci} 192962306a36Sopenharmony_ci 193062306a36Sopenharmony_ci/** 193162306a36Sopenharmony_ci * __jbd2_journal_erase() - Discard or zeroout journal blocks (excluding superblock) 193262306a36Sopenharmony_ci * @journal: The journal to erase. 193362306a36Sopenharmony_ci * @flags: A discard/zeroout request is sent for each physically contigous 193462306a36Sopenharmony_ci * region of the journal. Either JBD2_JOURNAL_FLUSH_DISCARD or 193562306a36Sopenharmony_ci * JBD2_JOURNAL_FLUSH_ZEROOUT must be set to determine which operation 193662306a36Sopenharmony_ci * to perform. 193762306a36Sopenharmony_ci * 193862306a36Sopenharmony_ci * Note: JBD2_JOURNAL_FLUSH_ZEROOUT attempts to use hardware offload. Zeroes 193962306a36Sopenharmony_ci * will be explicitly written if no hardware offload is available, see 194062306a36Sopenharmony_ci * blkdev_issue_zeroout for more details. 194162306a36Sopenharmony_ci */ 194262306a36Sopenharmony_cistatic int __jbd2_journal_erase(journal_t *journal, unsigned int flags) 194362306a36Sopenharmony_ci{ 194462306a36Sopenharmony_ci int err = 0; 194562306a36Sopenharmony_ci unsigned long block, log_offset; /* logical */ 194662306a36Sopenharmony_ci unsigned long long phys_block, block_start, block_stop; /* physical */ 194762306a36Sopenharmony_ci loff_t byte_start, byte_stop, byte_count; 194862306a36Sopenharmony_ci 194962306a36Sopenharmony_ci /* flags must be set to either discard or zeroout */ 195062306a36Sopenharmony_ci if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags || 195162306a36Sopenharmony_ci ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && 195262306a36Sopenharmony_ci (flags & JBD2_JOURNAL_FLUSH_ZEROOUT))) 195362306a36Sopenharmony_ci return -EINVAL; 195462306a36Sopenharmony_ci 195562306a36Sopenharmony_ci if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && 195662306a36Sopenharmony_ci !bdev_max_discard_sectors(journal->j_dev)) 195762306a36Sopenharmony_ci return -EOPNOTSUPP; 195862306a36Sopenharmony_ci 195962306a36Sopenharmony_ci /* 196062306a36Sopenharmony_ci * lookup block mapping and issue discard/zeroout for each 196162306a36Sopenharmony_ci * contiguous region 196262306a36Sopenharmony_ci */ 196362306a36Sopenharmony_ci log_offset = be32_to_cpu(journal->j_superblock->s_first); 196462306a36Sopenharmony_ci block_start = ~0ULL; 196562306a36Sopenharmony_ci for (block = log_offset; block < journal->j_total_len; block++) { 196662306a36Sopenharmony_ci err = jbd2_journal_bmap(journal, block, &phys_block); 196762306a36Sopenharmony_ci if (err) { 196862306a36Sopenharmony_ci pr_err("JBD2: bad block at offset %lu", block); 196962306a36Sopenharmony_ci return err; 197062306a36Sopenharmony_ci } 197162306a36Sopenharmony_ci 197262306a36Sopenharmony_ci if (block_start == ~0ULL) { 197362306a36Sopenharmony_ci block_start = phys_block; 197462306a36Sopenharmony_ci block_stop = block_start - 1; 197562306a36Sopenharmony_ci } 197662306a36Sopenharmony_ci 197762306a36Sopenharmony_ci /* 197862306a36Sopenharmony_ci * last block not contiguous with current block, 197962306a36Sopenharmony_ci * process last contiguous region and return to this block on 198062306a36Sopenharmony_ci * next loop 198162306a36Sopenharmony_ci */ 198262306a36Sopenharmony_ci if (phys_block != block_stop + 1) { 198362306a36Sopenharmony_ci block--; 198462306a36Sopenharmony_ci } else { 198562306a36Sopenharmony_ci block_stop++; 198662306a36Sopenharmony_ci /* 198762306a36Sopenharmony_ci * if this isn't the last block of journal, 198862306a36Sopenharmony_ci * no need to process now because next block may also 198962306a36Sopenharmony_ci * be part of this contiguous region 199062306a36Sopenharmony_ci */ 199162306a36Sopenharmony_ci if (block != journal->j_total_len - 1) 199262306a36Sopenharmony_ci continue; 199362306a36Sopenharmony_ci } 199462306a36Sopenharmony_ci 199562306a36Sopenharmony_ci /* 199662306a36Sopenharmony_ci * end of contiguous region or this is last block of journal, 199762306a36Sopenharmony_ci * take care of the region 199862306a36Sopenharmony_ci */ 199962306a36Sopenharmony_ci byte_start = block_start * journal->j_blocksize; 200062306a36Sopenharmony_ci byte_stop = block_stop * journal->j_blocksize; 200162306a36Sopenharmony_ci byte_count = (block_stop - block_start + 1) * 200262306a36Sopenharmony_ci journal->j_blocksize; 200362306a36Sopenharmony_ci 200462306a36Sopenharmony_ci truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping, 200562306a36Sopenharmony_ci byte_start, byte_stop); 200662306a36Sopenharmony_ci 200762306a36Sopenharmony_ci if (flags & JBD2_JOURNAL_FLUSH_DISCARD) { 200862306a36Sopenharmony_ci err = blkdev_issue_discard(journal->j_dev, 200962306a36Sopenharmony_ci byte_start >> SECTOR_SHIFT, 201062306a36Sopenharmony_ci byte_count >> SECTOR_SHIFT, 201162306a36Sopenharmony_ci GFP_NOFS); 201262306a36Sopenharmony_ci } else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) { 201362306a36Sopenharmony_ci err = blkdev_issue_zeroout(journal->j_dev, 201462306a36Sopenharmony_ci byte_start >> SECTOR_SHIFT, 201562306a36Sopenharmony_ci byte_count >> SECTOR_SHIFT, 201662306a36Sopenharmony_ci GFP_NOFS, 0); 201762306a36Sopenharmony_ci } 201862306a36Sopenharmony_ci 201962306a36Sopenharmony_ci if (unlikely(err != 0)) { 202062306a36Sopenharmony_ci pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu", 202162306a36Sopenharmony_ci err, block_start, block_stop); 202262306a36Sopenharmony_ci return err; 202362306a36Sopenharmony_ci } 202462306a36Sopenharmony_ci 202562306a36Sopenharmony_ci /* reset start and stop after processing a region */ 202662306a36Sopenharmony_ci block_start = ~0ULL; 202762306a36Sopenharmony_ci } 202862306a36Sopenharmony_ci 202962306a36Sopenharmony_ci return blkdev_issue_flush(journal->j_dev); 203062306a36Sopenharmony_ci} 203162306a36Sopenharmony_ci 203262306a36Sopenharmony_ci/** 203362306a36Sopenharmony_ci * jbd2_journal_update_sb_errno() - Update error in the journal. 203462306a36Sopenharmony_ci * @journal: The journal to update. 203562306a36Sopenharmony_ci * 203662306a36Sopenharmony_ci * Update a journal's errno. Write updated superblock to disk waiting for IO 203762306a36Sopenharmony_ci * to complete. 203862306a36Sopenharmony_ci */ 203962306a36Sopenharmony_civoid jbd2_journal_update_sb_errno(journal_t *journal) 204062306a36Sopenharmony_ci{ 204162306a36Sopenharmony_ci journal_superblock_t *sb = journal->j_superblock; 204262306a36Sopenharmony_ci int errcode; 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci lock_buffer(journal->j_sb_buffer); 204562306a36Sopenharmony_ci errcode = journal->j_errno; 204662306a36Sopenharmony_ci if (errcode == -ESHUTDOWN) 204762306a36Sopenharmony_ci errcode = 0; 204862306a36Sopenharmony_ci jbd2_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode); 204962306a36Sopenharmony_ci sb->s_errno = cpu_to_be32(errcode); 205062306a36Sopenharmony_ci 205162306a36Sopenharmony_ci jbd2_write_superblock(journal, REQ_FUA); 205262306a36Sopenharmony_ci} 205362306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_update_sb_errno); 205462306a36Sopenharmony_ci 205562306a36Sopenharmony_ci/** 205662306a36Sopenharmony_ci * jbd2_journal_load() - Read journal from disk. 205762306a36Sopenharmony_ci * @journal: Journal to act on. 205862306a36Sopenharmony_ci * 205962306a36Sopenharmony_ci * Given a journal_t structure which tells us which disk blocks contain 206062306a36Sopenharmony_ci * a journal, read the journal from disk to initialise the in-memory 206162306a36Sopenharmony_ci * structures. 206262306a36Sopenharmony_ci */ 206362306a36Sopenharmony_ciint jbd2_journal_load(journal_t *journal) 206462306a36Sopenharmony_ci{ 206562306a36Sopenharmony_ci int err; 206662306a36Sopenharmony_ci journal_superblock_t *sb = journal->j_superblock; 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci /* 206962306a36Sopenharmony_ci * Create a slab for this blocksize 207062306a36Sopenharmony_ci */ 207162306a36Sopenharmony_ci err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize)); 207262306a36Sopenharmony_ci if (err) 207362306a36Sopenharmony_ci return err; 207462306a36Sopenharmony_ci 207562306a36Sopenharmony_ci /* Let the recovery code check whether it needs to recover any 207662306a36Sopenharmony_ci * data from the journal. */ 207762306a36Sopenharmony_ci err = jbd2_journal_recover(journal); 207862306a36Sopenharmony_ci if (err) { 207962306a36Sopenharmony_ci pr_warn("JBD2: journal recovery failed\n"); 208062306a36Sopenharmony_ci return err; 208162306a36Sopenharmony_ci } 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ci if (journal->j_failed_commit) { 208462306a36Sopenharmony_ci printk(KERN_ERR "JBD2: journal transaction %u on %s " 208562306a36Sopenharmony_ci "is corrupt.\n", journal->j_failed_commit, 208662306a36Sopenharmony_ci journal->j_devname); 208762306a36Sopenharmony_ci return -EFSCORRUPTED; 208862306a36Sopenharmony_ci } 208962306a36Sopenharmony_ci /* 209062306a36Sopenharmony_ci * clear JBD2_ABORT flag initialized in journal_init_common 209162306a36Sopenharmony_ci * here to update log tail information with the newest seq. 209262306a36Sopenharmony_ci */ 209362306a36Sopenharmony_ci journal->j_flags &= ~JBD2_ABORT; 209462306a36Sopenharmony_ci 209562306a36Sopenharmony_ci /* OK, we've finished with the dynamic journal bits: 209662306a36Sopenharmony_ci * reinitialise the dynamic contents of the superblock in memory 209762306a36Sopenharmony_ci * and reset them on disk. */ 209862306a36Sopenharmony_ci err = journal_reset(journal); 209962306a36Sopenharmony_ci if (err) { 210062306a36Sopenharmony_ci pr_warn("JBD2: journal reset failed\n"); 210162306a36Sopenharmony_ci return err; 210262306a36Sopenharmony_ci } 210362306a36Sopenharmony_ci 210462306a36Sopenharmony_ci journal->j_flags |= JBD2_LOADED; 210562306a36Sopenharmony_ci return 0; 210662306a36Sopenharmony_ci} 210762306a36Sopenharmony_ci 210862306a36Sopenharmony_ci/** 210962306a36Sopenharmony_ci * jbd2_journal_destroy() - Release a journal_t structure. 211062306a36Sopenharmony_ci * @journal: Journal to act on. 211162306a36Sopenharmony_ci * 211262306a36Sopenharmony_ci * Release a journal_t structure once it is no longer in use by the 211362306a36Sopenharmony_ci * journaled object. 211462306a36Sopenharmony_ci * Return <0 if we couldn't clean up the journal. 211562306a36Sopenharmony_ci */ 211662306a36Sopenharmony_ciint jbd2_journal_destroy(journal_t *journal) 211762306a36Sopenharmony_ci{ 211862306a36Sopenharmony_ci int err = 0; 211962306a36Sopenharmony_ci 212062306a36Sopenharmony_ci /* Wait for the commit thread to wake up and die. */ 212162306a36Sopenharmony_ci journal_kill_thread(journal); 212262306a36Sopenharmony_ci 212362306a36Sopenharmony_ci /* Force a final log commit */ 212462306a36Sopenharmony_ci if (journal->j_running_transaction) 212562306a36Sopenharmony_ci jbd2_journal_commit_transaction(journal); 212662306a36Sopenharmony_ci 212762306a36Sopenharmony_ci /* Force any old transactions to disk */ 212862306a36Sopenharmony_ci 212962306a36Sopenharmony_ci /* Totally anal locking here... */ 213062306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 213162306a36Sopenharmony_ci while (journal->j_checkpoint_transactions != NULL) { 213262306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 213362306a36Sopenharmony_ci mutex_lock_io(&journal->j_checkpoint_mutex); 213462306a36Sopenharmony_ci err = jbd2_log_do_checkpoint(journal); 213562306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 213662306a36Sopenharmony_ci /* 213762306a36Sopenharmony_ci * If checkpointing failed, just free the buffers to avoid 213862306a36Sopenharmony_ci * looping forever 213962306a36Sopenharmony_ci */ 214062306a36Sopenharmony_ci if (err) { 214162306a36Sopenharmony_ci jbd2_journal_destroy_checkpoint(journal); 214262306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 214362306a36Sopenharmony_ci break; 214462306a36Sopenharmony_ci } 214562306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 214662306a36Sopenharmony_ci } 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_ci J_ASSERT(journal->j_running_transaction == NULL); 214962306a36Sopenharmony_ci J_ASSERT(journal->j_committing_transaction == NULL); 215062306a36Sopenharmony_ci J_ASSERT(journal->j_checkpoint_transactions == NULL); 215162306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 215262306a36Sopenharmony_ci 215362306a36Sopenharmony_ci /* 215462306a36Sopenharmony_ci * OK, all checkpoint transactions have been checked, now check the 215562306a36Sopenharmony_ci * write out io error flag and abort the journal if some buffer failed 215662306a36Sopenharmony_ci * to write back to the original location, otherwise the filesystem 215762306a36Sopenharmony_ci * may become inconsistent. 215862306a36Sopenharmony_ci */ 215962306a36Sopenharmony_ci if (!is_journal_aborted(journal) && 216062306a36Sopenharmony_ci test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags)) 216162306a36Sopenharmony_ci jbd2_journal_abort(journal, -EIO); 216262306a36Sopenharmony_ci 216362306a36Sopenharmony_ci if (journal->j_sb_buffer) { 216462306a36Sopenharmony_ci if (!is_journal_aborted(journal)) { 216562306a36Sopenharmony_ci mutex_lock_io(&journal->j_checkpoint_mutex); 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 216862306a36Sopenharmony_ci journal->j_tail_sequence = 216962306a36Sopenharmony_ci ++journal->j_transaction_sequence; 217062306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 217162306a36Sopenharmony_ci 217262306a36Sopenharmony_ci jbd2_mark_journal_empty(journal, REQ_PREFLUSH | REQ_FUA); 217362306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 217462306a36Sopenharmony_ci } else 217562306a36Sopenharmony_ci err = -EIO; 217662306a36Sopenharmony_ci brelse(journal->j_sb_buffer); 217762306a36Sopenharmony_ci } 217862306a36Sopenharmony_ci 217962306a36Sopenharmony_ci if (journal->j_shrinker.flags & SHRINKER_REGISTERED) { 218062306a36Sopenharmony_ci percpu_counter_destroy(&journal->j_checkpoint_jh_count); 218162306a36Sopenharmony_ci unregister_shrinker(&journal->j_shrinker); 218262306a36Sopenharmony_ci } 218362306a36Sopenharmony_ci if (journal->j_proc_entry) 218462306a36Sopenharmony_ci jbd2_stats_proc_exit(journal); 218562306a36Sopenharmony_ci iput(journal->j_inode); 218662306a36Sopenharmony_ci if (journal->j_revoke) 218762306a36Sopenharmony_ci jbd2_journal_destroy_revoke(journal); 218862306a36Sopenharmony_ci if (journal->j_chksum_driver) 218962306a36Sopenharmony_ci crypto_free_shash(journal->j_chksum_driver); 219062306a36Sopenharmony_ci kfree(journal->j_fc_wbuf); 219162306a36Sopenharmony_ci kfree(journal->j_wbuf); 219262306a36Sopenharmony_ci kfree(journal); 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_ci return err; 219562306a36Sopenharmony_ci} 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci 219862306a36Sopenharmony_ci/** 219962306a36Sopenharmony_ci * jbd2_journal_check_used_features() - Check if features specified are used. 220062306a36Sopenharmony_ci * @journal: Journal to check. 220162306a36Sopenharmony_ci * @compat: bitmask of compatible features 220262306a36Sopenharmony_ci * @ro: bitmask of features that force read-only mount 220362306a36Sopenharmony_ci * @incompat: bitmask of incompatible features 220462306a36Sopenharmony_ci * 220562306a36Sopenharmony_ci * Check whether the journal uses all of a given set of 220662306a36Sopenharmony_ci * features. Return true (non-zero) if it does. 220762306a36Sopenharmony_ci **/ 220862306a36Sopenharmony_ci 220962306a36Sopenharmony_ciint jbd2_journal_check_used_features(journal_t *journal, unsigned long compat, 221062306a36Sopenharmony_ci unsigned long ro, unsigned long incompat) 221162306a36Sopenharmony_ci{ 221262306a36Sopenharmony_ci journal_superblock_t *sb; 221362306a36Sopenharmony_ci 221462306a36Sopenharmony_ci if (!compat && !ro && !incompat) 221562306a36Sopenharmony_ci return 1; 221662306a36Sopenharmony_ci if (!jbd2_format_support_feature(journal)) 221762306a36Sopenharmony_ci return 0; 221862306a36Sopenharmony_ci 221962306a36Sopenharmony_ci sb = journal->j_superblock; 222062306a36Sopenharmony_ci 222162306a36Sopenharmony_ci if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) && 222262306a36Sopenharmony_ci ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) && 222362306a36Sopenharmony_ci ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat)) 222462306a36Sopenharmony_ci return 1; 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci return 0; 222762306a36Sopenharmony_ci} 222862306a36Sopenharmony_ci 222962306a36Sopenharmony_ci/** 223062306a36Sopenharmony_ci * jbd2_journal_check_available_features() - Check feature set in journalling layer 223162306a36Sopenharmony_ci * @journal: Journal to check. 223262306a36Sopenharmony_ci * @compat: bitmask of compatible features 223362306a36Sopenharmony_ci * @ro: bitmask of features that force read-only mount 223462306a36Sopenharmony_ci * @incompat: bitmask of incompatible features 223562306a36Sopenharmony_ci * 223662306a36Sopenharmony_ci * Check whether the journaling code supports the use of 223762306a36Sopenharmony_ci * all of a given set of features on this journal. Return true 223862306a36Sopenharmony_ci * (non-zero) if it can. */ 223962306a36Sopenharmony_ci 224062306a36Sopenharmony_ciint jbd2_journal_check_available_features(journal_t *journal, unsigned long compat, 224162306a36Sopenharmony_ci unsigned long ro, unsigned long incompat) 224262306a36Sopenharmony_ci{ 224362306a36Sopenharmony_ci if (!compat && !ro && !incompat) 224462306a36Sopenharmony_ci return 1; 224562306a36Sopenharmony_ci 224662306a36Sopenharmony_ci if (!jbd2_format_support_feature(journal)) 224762306a36Sopenharmony_ci return 0; 224862306a36Sopenharmony_ci 224962306a36Sopenharmony_ci if ((compat & JBD2_KNOWN_COMPAT_FEATURES) == compat && 225062306a36Sopenharmony_ci (ro & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro && 225162306a36Sopenharmony_ci (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat) 225262306a36Sopenharmony_ci return 1; 225362306a36Sopenharmony_ci 225462306a36Sopenharmony_ci return 0; 225562306a36Sopenharmony_ci} 225662306a36Sopenharmony_ci 225762306a36Sopenharmony_cistatic int 225862306a36Sopenharmony_cijbd2_journal_initialize_fast_commit(journal_t *journal) 225962306a36Sopenharmony_ci{ 226062306a36Sopenharmony_ci journal_superblock_t *sb = journal->j_superblock; 226162306a36Sopenharmony_ci unsigned long long num_fc_blks; 226262306a36Sopenharmony_ci 226362306a36Sopenharmony_ci num_fc_blks = jbd2_journal_get_num_fc_blks(sb); 226462306a36Sopenharmony_ci if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS) 226562306a36Sopenharmony_ci return -ENOSPC; 226662306a36Sopenharmony_ci 226762306a36Sopenharmony_ci /* Are we called twice? */ 226862306a36Sopenharmony_ci WARN_ON(journal->j_fc_wbuf != NULL); 226962306a36Sopenharmony_ci journal->j_fc_wbuf = kmalloc_array(num_fc_blks, 227062306a36Sopenharmony_ci sizeof(struct buffer_head *), GFP_KERNEL); 227162306a36Sopenharmony_ci if (!journal->j_fc_wbuf) 227262306a36Sopenharmony_ci return -ENOMEM; 227362306a36Sopenharmony_ci 227462306a36Sopenharmony_ci journal->j_fc_wbufsize = num_fc_blks; 227562306a36Sopenharmony_ci journal->j_fc_last = journal->j_last; 227662306a36Sopenharmony_ci journal->j_last = journal->j_fc_last - num_fc_blks; 227762306a36Sopenharmony_ci journal->j_fc_first = journal->j_last + 1; 227862306a36Sopenharmony_ci journal->j_fc_off = 0; 227962306a36Sopenharmony_ci journal->j_free = journal->j_last - journal->j_first; 228062306a36Sopenharmony_ci journal->j_max_transaction_buffers = 228162306a36Sopenharmony_ci jbd2_journal_get_max_txn_bufs(journal); 228262306a36Sopenharmony_ci 228362306a36Sopenharmony_ci return 0; 228462306a36Sopenharmony_ci} 228562306a36Sopenharmony_ci 228662306a36Sopenharmony_ci/** 228762306a36Sopenharmony_ci * jbd2_journal_set_features() - Mark a given journal feature in the superblock 228862306a36Sopenharmony_ci * @journal: Journal to act on. 228962306a36Sopenharmony_ci * @compat: bitmask of compatible features 229062306a36Sopenharmony_ci * @ro: bitmask of features that force read-only mount 229162306a36Sopenharmony_ci * @incompat: bitmask of incompatible features 229262306a36Sopenharmony_ci * 229362306a36Sopenharmony_ci * Mark a given journal feature as present on the 229462306a36Sopenharmony_ci * superblock. Returns true if the requested features could be set. 229562306a36Sopenharmony_ci * 229662306a36Sopenharmony_ci */ 229762306a36Sopenharmony_ci 229862306a36Sopenharmony_ciint jbd2_journal_set_features(journal_t *journal, unsigned long compat, 229962306a36Sopenharmony_ci unsigned long ro, unsigned long incompat) 230062306a36Sopenharmony_ci{ 230162306a36Sopenharmony_ci#define INCOMPAT_FEATURE_ON(f) \ 230262306a36Sopenharmony_ci ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f))) 230362306a36Sopenharmony_ci#define COMPAT_FEATURE_ON(f) \ 230462306a36Sopenharmony_ci ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f))) 230562306a36Sopenharmony_ci journal_superblock_t *sb; 230662306a36Sopenharmony_ci 230762306a36Sopenharmony_ci if (jbd2_journal_check_used_features(journal, compat, ro, incompat)) 230862306a36Sopenharmony_ci return 1; 230962306a36Sopenharmony_ci 231062306a36Sopenharmony_ci if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) 231162306a36Sopenharmony_ci return 0; 231262306a36Sopenharmony_ci 231362306a36Sopenharmony_ci /* If enabling v2 checksums, turn on v3 instead */ 231462306a36Sopenharmony_ci if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) { 231562306a36Sopenharmony_ci incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2; 231662306a36Sopenharmony_ci incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3; 231762306a36Sopenharmony_ci } 231862306a36Sopenharmony_ci 231962306a36Sopenharmony_ci /* Asking for checksumming v3 and v1? Only give them v3. */ 232062306a36Sopenharmony_ci if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 && 232162306a36Sopenharmony_ci compat & JBD2_FEATURE_COMPAT_CHECKSUM) 232262306a36Sopenharmony_ci compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; 232362306a36Sopenharmony_ci 232462306a36Sopenharmony_ci jbd2_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", 232562306a36Sopenharmony_ci compat, ro, incompat); 232662306a36Sopenharmony_ci 232762306a36Sopenharmony_ci sb = journal->j_superblock; 232862306a36Sopenharmony_ci 232962306a36Sopenharmony_ci if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) { 233062306a36Sopenharmony_ci if (jbd2_journal_initialize_fast_commit(journal)) { 233162306a36Sopenharmony_ci pr_err("JBD2: Cannot enable fast commits.\n"); 233262306a36Sopenharmony_ci return 0; 233362306a36Sopenharmony_ci } 233462306a36Sopenharmony_ci } 233562306a36Sopenharmony_ci 233662306a36Sopenharmony_ci /* Load the checksum driver if necessary */ 233762306a36Sopenharmony_ci if ((journal->j_chksum_driver == NULL) && 233862306a36Sopenharmony_ci INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { 233962306a36Sopenharmony_ci journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 234062306a36Sopenharmony_ci if (IS_ERR(journal->j_chksum_driver)) { 234162306a36Sopenharmony_ci printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); 234262306a36Sopenharmony_ci journal->j_chksum_driver = NULL; 234362306a36Sopenharmony_ci return 0; 234462306a36Sopenharmony_ci } 234562306a36Sopenharmony_ci /* Precompute checksum seed for all metadata */ 234662306a36Sopenharmony_ci journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 234762306a36Sopenharmony_ci sizeof(sb->s_uuid)); 234862306a36Sopenharmony_ci } 234962306a36Sopenharmony_ci 235062306a36Sopenharmony_ci lock_buffer(journal->j_sb_buffer); 235162306a36Sopenharmony_ci 235262306a36Sopenharmony_ci /* If enabling v3 checksums, update superblock */ 235362306a36Sopenharmony_ci if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { 235462306a36Sopenharmony_ci sb->s_checksum_type = JBD2_CRC32C_CHKSUM; 235562306a36Sopenharmony_ci sb->s_feature_compat &= 235662306a36Sopenharmony_ci ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); 235762306a36Sopenharmony_ci } 235862306a36Sopenharmony_ci 235962306a36Sopenharmony_ci /* If enabling v1 checksums, downgrade superblock */ 236062306a36Sopenharmony_ci if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) 236162306a36Sopenharmony_ci sb->s_feature_incompat &= 236262306a36Sopenharmony_ci ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 | 236362306a36Sopenharmony_ci JBD2_FEATURE_INCOMPAT_CSUM_V3); 236462306a36Sopenharmony_ci 236562306a36Sopenharmony_ci sb->s_feature_compat |= cpu_to_be32(compat); 236662306a36Sopenharmony_ci sb->s_feature_ro_compat |= cpu_to_be32(ro); 236762306a36Sopenharmony_ci sb->s_feature_incompat |= cpu_to_be32(incompat); 236862306a36Sopenharmony_ci unlock_buffer(journal->j_sb_buffer); 236962306a36Sopenharmony_ci journal->j_revoke_records_per_block = 237062306a36Sopenharmony_ci journal_revoke_records_per_block(journal); 237162306a36Sopenharmony_ci 237262306a36Sopenharmony_ci return 1; 237362306a36Sopenharmony_ci#undef COMPAT_FEATURE_ON 237462306a36Sopenharmony_ci#undef INCOMPAT_FEATURE_ON 237562306a36Sopenharmony_ci} 237662306a36Sopenharmony_ci 237762306a36Sopenharmony_ci/* 237862306a36Sopenharmony_ci * jbd2_journal_clear_features() - Clear a given journal feature in the 237962306a36Sopenharmony_ci * superblock 238062306a36Sopenharmony_ci * @journal: Journal to act on. 238162306a36Sopenharmony_ci * @compat: bitmask of compatible features 238262306a36Sopenharmony_ci * @ro: bitmask of features that force read-only mount 238362306a36Sopenharmony_ci * @incompat: bitmask of incompatible features 238462306a36Sopenharmony_ci * 238562306a36Sopenharmony_ci * Clear a given journal feature as present on the 238662306a36Sopenharmony_ci * superblock. 238762306a36Sopenharmony_ci */ 238862306a36Sopenharmony_civoid jbd2_journal_clear_features(journal_t *journal, unsigned long compat, 238962306a36Sopenharmony_ci unsigned long ro, unsigned long incompat) 239062306a36Sopenharmony_ci{ 239162306a36Sopenharmony_ci journal_superblock_t *sb; 239262306a36Sopenharmony_ci 239362306a36Sopenharmony_ci jbd2_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n", 239462306a36Sopenharmony_ci compat, ro, incompat); 239562306a36Sopenharmony_ci 239662306a36Sopenharmony_ci sb = journal->j_superblock; 239762306a36Sopenharmony_ci 239862306a36Sopenharmony_ci sb->s_feature_compat &= ~cpu_to_be32(compat); 239962306a36Sopenharmony_ci sb->s_feature_ro_compat &= ~cpu_to_be32(ro); 240062306a36Sopenharmony_ci sb->s_feature_incompat &= ~cpu_to_be32(incompat); 240162306a36Sopenharmony_ci journal->j_revoke_records_per_block = 240262306a36Sopenharmony_ci journal_revoke_records_per_block(journal); 240362306a36Sopenharmony_ci} 240462306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_clear_features); 240562306a36Sopenharmony_ci 240662306a36Sopenharmony_ci/** 240762306a36Sopenharmony_ci * jbd2_journal_flush() - Flush journal 240862306a36Sopenharmony_ci * @journal: Journal to act on. 240962306a36Sopenharmony_ci * @flags: optional operation on the journal blocks after the flush (see below) 241062306a36Sopenharmony_ci * 241162306a36Sopenharmony_ci * Flush all data for a given journal to disk and empty the journal. 241262306a36Sopenharmony_ci * Filesystems can use this when remounting readonly to ensure that 241362306a36Sopenharmony_ci * recovery does not need to happen on remount. Optionally, a discard or zeroout 241462306a36Sopenharmony_ci * can be issued on the journal blocks after flushing. 241562306a36Sopenharmony_ci * 241662306a36Sopenharmony_ci * flags: 241762306a36Sopenharmony_ci * JBD2_JOURNAL_FLUSH_DISCARD: issues discards for the journal blocks 241862306a36Sopenharmony_ci * JBD2_JOURNAL_FLUSH_ZEROOUT: issues zeroouts for the journal blocks 241962306a36Sopenharmony_ci */ 242062306a36Sopenharmony_ciint jbd2_journal_flush(journal_t *journal, unsigned int flags) 242162306a36Sopenharmony_ci{ 242262306a36Sopenharmony_ci int err = 0; 242362306a36Sopenharmony_ci transaction_t *transaction = NULL; 242462306a36Sopenharmony_ci 242562306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 242662306a36Sopenharmony_ci 242762306a36Sopenharmony_ci /* Force everything buffered to the log... */ 242862306a36Sopenharmony_ci if (journal->j_running_transaction) { 242962306a36Sopenharmony_ci transaction = journal->j_running_transaction; 243062306a36Sopenharmony_ci __jbd2_log_start_commit(journal, transaction->t_tid); 243162306a36Sopenharmony_ci } else if (journal->j_committing_transaction) 243262306a36Sopenharmony_ci transaction = journal->j_committing_transaction; 243362306a36Sopenharmony_ci 243462306a36Sopenharmony_ci /* Wait for the log commit to complete... */ 243562306a36Sopenharmony_ci if (transaction) { 243662306a36Sopenharmony_ci tid_t tid = transaction->t_tid; 243762306a36Sopenharmony_ci 243862306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 243962306a36Sopenharmony_ci jbd2_log_wait_commit(journal, tid); 244062306a36Sopenharmony_ci } else { 244162306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 244262306a36Sopenharmony_ci } 244362306a36Sopenharmony_ci 244462306a36Sopenharmony_ci /* ...and flush everything in the log out to disk. */ 244562306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 244662306a36Sopenharmony_ci while (!err && journal->j_checkpoint_transactions != NULL) { 244762306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 244862306a36Sopenharmony_ci mutex_lock_io(&journal->j_checkpoint_mutex); 244962306a36Sopenharmony_ci err = jbd2_log_do_checkpoint(journal); 245062306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 245162306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 245262306a36Sopenharmony_ci } 245362306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 245462306a36Sopenharmony_ci 245562306a36Sopenharmony_ci if (is_journal_aborted(journal)) 245662306a36Sopenharmony_ci return -EIO; 245762306a36Sopenharmony_ci 245862306a36Sopenharmony_ci mutex_lock_io(&journal->j_checkpoint_mutex); 245962306a36Sopenharmony_ci if (!err) { 246062306a36Sopenharmony_ci err = jbd2_cleanup_journal_tail(journal); 246162306a36Sopenharmony_ci if (err < 0) { 246262306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 246362306a36Sopenharmony_ci goto out; 246462306a36Sopenharmony_ci } 246562306a36Sopenharmony_ci err = 0; 246662306a36Sopenharmony_ci } 246762306a36Sopenharmony_ci 246862306a36Sopenharmony_ci /* Finally, mark the journal as really needing no recovery. 246962306a36Sopenharmony_ci * This sets s_start==0 in the underlying superblock, which is 247062306a36Sopenharmony_ci * the magic code for a fully-recovered superblock. Any future 247162306a36Sopenharmony_ci * commits of data to the journal will restore the current 247262306a36Sopenharmony_ci * s_start value. */ 247362306a36Sopenharmony_ci jbd2_mark_journal_empty(journal, REQ_FUA); 247462306a36Sopenharmony_ci 247562306a36Sopenharmony_ci if (flags) 247662306a36Sopenharmony_ci err = __jbd2_journal_erase(journal, flags); 247762306a36Sopenharmony_ci 247862306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 247962306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 248062306a36Sopenharmony_ci J_ASSERT(!journal->j_running_transaction); 248162306a36Sopenharmony_ci J_ASSERT(!journal->j_committing_transaction); 248262306a36Sopenharmony_ci J_ASSERT(!journal->j_checkpoint_transactions); 248362306a36Sopenharmony_ci J_ASSERT(journal->j_head == journal->j_tail); 248462306a36Sopenharmony_ci J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 248562306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 248662306a36Sopenharmony_ciout: 248762306a36Sopenharmony_ci return err; 248862306a36Sopenharmony_ci} 248962306a36Sopenharmony_ci 249062306a36Sopenharmony_ci/** 249162306a36Sopenharmony_ci * jbd2_journal_wipe() - Wipe journal contents 249262306a36Sopenharmony_ci * @journal: Journal to act on. 249362306a36Sopenharmony_ci * @write: flag (see below) 249462306a36Sopenharmony_ci * 249562306a36Sopenharmony_ci * Wipe out all of the contents of a journal, safely. This will produce 249662306a36Sopenharmony_ci * a warning if the journal contains any valid recovery information. 249762306a36Sopenharmony_ci * Must be called between journal_init_*() and jbd2_journal_load(). 249862306a36Sopenharmony_ci * 249962306a36Sopenharmony_ci * If 'write' is non-zero, then we wipe out the journal on disk; otherwise 250062306a36Sopenharmony_ci * we merely suppress recovery. 250162306a36Sopenharmony_ci */ 250262306a36Sopenharmony_ci 250362306a36Sopenharmony_ciint jbd2_journal_wipe(journal_t *journal, int write) 250462306a36Sopenharmony_ci{ 250562306a36Sopenharmony_ci int err; 250662306a36Sopenharmony_ci 250762306a36Sopenharmony_ci J_ASSERT (!(journal->j_flags & JBD2_LOADED)); 250862306a36Sopenharmony_ci 250962306a36Sopenharmony_ci if (!journal->j_tail) 251062306a36Sopenharmony_ci return 0; 251162306a36Sopenharmony_ci 251262306a36Sopenharmony_ci printk(KERN_WARNING "JBD2: %s recovery information on journal\n", 251362306a36Sopenharmony_ci write ? "Clearing" : "Ignoring"); 251462306a36Sopenharmony_ci 251562306a36Sopenharmony_ci err = jbd2_journal_skip_recovery(journal); 251662306a36Sopenharmony_ci if (write) { 251762306a36Sopenharmony_ci /* Lock to make assertions happy... */ 251862306a36Sopenharmony_ci mutex_lock_io(&journal->j_checkpoint_mutex); 251962306a36Sopenharmony_ci jbd2_mark_journal_empty(journal, REQ_FUA); 252062306a36Sopenharmony_ci mutex_unlock(&journal->j_checkpoint_mutex); 252162306a36Sopenharmony_ci } 252262306a36Sopenharmony_ci 252362306a36Sopenharmony_ci return err; 252462306a36Sopenharmony_ci} 252562306a36Sopenharmony_ci 252662306a36Sopenharmony_ci/** 252762306a36Sopenharmony_ci * jbd2_journal_abort () - Shutdown the journal immediately. 252862306a36Sopenharmony_ci * @journal: the journal to shutdown. 252962306a36Sopenharmony_ci * @errno: an error number to record in the journal indicating 253062306a36Sopenharmony_ci * the reason for the shutdown. 253162306a36Sopenharmony_ci * 253262306a36Sopenharmony_ci * Perform a complete, immediate shutdown of the ENTIRE 253362306a36Sopenharmony_ci * journal (not of a single transaction). This operation cannot be 253462306a36Sopenharmony_ci * undone without closing and reopening the journal. 253562306a36Sopenharmony_ci * 253662306a36Sopenharmony_ci * The jbd2_journal_abort function is intended to support higher level error 253762306a36Sopenharmony_ci * recovery mechanisms such as the ext2/ext3 remount-readonly error 253862306a36Sopenharmony_ci * mode. 253962306a36Sopenharmony_ci * 254062306a36Sopenharmony_ci * Journal abort has very specific semantics. Any existing dirty, 254162306a36Sopenharmony_ci * unjournaled buffers in the main filesystem will still be written to 254262306a36Sopenharmony_ci * disk by bdflush, but the journaling mechanism will be suspended 254362306a36Sopenharmony_ci * immediately and no further transaction commits will be honoured. 254462306a36Sopenharmony_ci * 254562306a36Sopenharmony_ci * Any dirty, journaled buffers will be written back to disk without 254662306a36Sopenharmony_ci * hitting the journal. Atomicity cannot be guaranteed on an aborted 254762306a36Sopenharmony_ci * filesystem, but we _do_ attempt to leave as much data as possible 254862306a36Sopenharmony_ci * behind for fsck to use for cleanup. 254962306a36Sopenharmony_ci * 255062306a36Sopenharmony_ci * Any attempt to get a new transaction handle on a journal which is in 255162306a36Sopenharmony_ci * ABORT state will just result in an -EROFS error return. A 255262306a36Sopenharmony_ci * jbd2_journal_stop on an existing handle will return -EIO if we have 255362306a36Sopenharmony_ci * entered abort state during the update. 255462306a36Sopenharmony_ci * 255562306a36Sopenharmony_ci * Recursive transactions are not disturbed by journal abort until the 255662306a36Sopenharmony_ci * final jbd2_journal_stop, which will receive the -EIO error. 255762306a36Sopenharmony_ci * 255862306a36Sopenharmony_ci * Finally, the jbd2_journal_abort call allows the caller to supply an errno 255962306a36Sopenharmony_ci * which will be recorded (if possible) in the journal superblock. This 256062306a36Sopenharmony_ci * allows a client to record failure conditions in the middle of a 256162306a36Sopenharmony_ci * transaction without having to complete the transaction to record the 256262306a36Sopenharmony_ci * failure to disk. ext3_error, for example, now uses this 256362306a36Sopenharmony_ci * functionality. 256462306a36Sopenharmony_ci * 256562306a36Sopenharmony_ci */ 256662306a36Sopenharmony_ci 256762306a36Sopenharmony_civoid jbd2_journal_abort(journal_t *journal, int errno) 256862306a36Sopenharmony_ci{ 256962306a36Sopenharmony_ci transaction_t *transaction; 257062306a36Sopenharmony_ci 257162306a36Sopenharmony_ci /* 257262306a36Sopenharmony_ci * Lock the aborting procedure until everything is done, this avoid 257362306a36Sopenharmony_ci * races between filesystem's error handling flow (e.g. ext4_abort()), 257462306a36Sopenharmony_ci * ensure panic after the error info is written into journal's 257562306a36Sopenharmony_ci * superblock. 257662306a36Sopenharmony_ci */ 257762306a36Sopenharmony_ci mutex_lock(&journal->j_abort_mutex); 257862306a36Sopenharmony_ci /* 257962306a36Sopenharmony_ci * ESHUTDOWN always takes precedence because a file system check 258062306a36Sopenharmony_ci * caused by any other journal abort error is not required after 258162306a36Sopenharmony_ci * a shutdown triggered. 258262306a36Sopenharmony_ci */ 258362306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 258462306a36Sopenharmony_ci if (journal->j_flags & JBD2_ABORT) { 258562306a36Sopenharmony_ci int old_errno = journal->j_errno; 258662306a36Sopenharmony_ci 258762306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 258862306a36Sopenharmony_ci if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) { 258962306a36Sopenharmony_ci journal->j_errno = errno; 259062306a36Sopenharmony_ci jbd2_journal_update_sb_errno(journal); 259162306a36Sopenharmony_ci } 259262306a36Sopenharmony_ci mutex_unlock(&journal->j_abort_mutex); 259362306a36Sopenharmony_ci return; 259462306a36Sopenharmony_ci } 259562306a36Sopenharmony_ci 259662306a36Sopenharmony_ci /* 259762306a36Sopenharmony_ci * Mark the abort as occurred and start current running transaction 259862306a36Sopenharmony_ci * to release all journaled buffer. 259962306a36Sopenharmony_ci */ 260062306a36Sopenharmony_ci pr_err("Aborting journal on device %s.\n", journal->j_devname); 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_ci journal->j_flags |= JBD2_ABORT; 260362306a36Sopenharmony_ci journal->j_errno = errno; 260462306a36Sopenharmony_ci transaction = journal->j_running_transaction; 260562306a36Sopenharmony_ci if (transaction) 260662306a36Sopenharmony_ci __jbd2_log_start_commit(journal, transaction->t_tid); 260762306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 260862306a36Sopenharmony_ci 260962306a36Sopenharmony_ci /* 261062306a36Sopenharmony_ci * Record errno to the journal super block, so that fsck and jbd2 261162306a36Sopenharmony_ci * layer could realise that a filesystem check is needed. 261262306a36Sopenharmony_ci */ 261362306a36Sopenharmony_ci jbd2_journal_update_sb_errno(journal); 261462306a36Sopenharmony_ci mutex_unlock(&journal->j_abort_mutex); 261562306a36Sopenharmony_ci} 261662306a36Sopenharmony_ci 261762306a36Sopenharmony_ci/** 261862306a36Sopenharmony_ci * jbd2_journal_errno() - returns the journal's error state. 261962306a36Sopenharmony_ci * @journal: journal to examine. 262062306a36Sopenharmony_ci * 262162306a36Sopenharmony_ci * This is the errno number set with jbd2_journal_abort(), the last 262262306a36Sopenharmony_ci * time the journal was mounted - if the journal was stopped 262362306a36Sopenharmony_ci * without calling abort this will be 0. 262462306a36Sopenharmony_ci * 262562306a36Sopenharmony_ci * If the journal has been aborted on this mount time -EROFS will 262662306a36Sopenharmony_ci * be returned. 262762306a36Sopenharmony_ci */ 262862306a36Sopenharmony_ciint jbd2_journal_errno(journal_t *journal) 262962306a36Sopenharmony_ci{ 263062306a36Sopenharmony_ci int err; 263162306a36Sopenharmony_ci 263262306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 263362306a36Sopenharmony_ci if (journal->j_flags & JBD2_ABORT) 263462306a36Sopenharmony_ci err = -EROFS; 263562306a36Sopenharmony_ci else 263662306a36Sopenharmony_ci err = journal->j_errno; 263762306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 263862306a36Sopenharmony_ci return err; 263962306a36Sopenharmony_ci} 264062306a36Sopenharmony_ci 264162306a36Sopenharmony_ci/** 264262306a36Sopenharmony_ci * jbd2_journal_clear_err() - clears the journal's error state 264362306a36Sopenharmony_ci * @journal: journal to act on. 264462306a36Sopenharmony_ci * 264562306a36Sopenharmony_ci * An error must be cleared or acked to take a FS out of readonly 264662306a36Sopenharmony_ci * mode. 264762306a36Sopenharmony_ci */ 264862306a36Sopenharmony_ciint jbd2_journal_clear_err(journal_t *journal) 264962306a36Sopenharmony_ci{ 265062306a36Sopenharmony_ci int err = 0; 265162306a36Sopenharmony_ci 265262306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 265362306a36Sopenharmony_ci if (journal->j_flags & JBD2_ABORT) 265462306a36Sopenharmony_ci err = -EROFS; 265562306a36Sopenharmony_ci else 265662306a36Sopenharmony_ci journal->j_errno = 0; 265762306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 265862306a36Sopenharmony_ci return err; 265962306a36Sopenharmony_ci} 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci/** 266262306a36Sopenharmony_ci * jbd2_journal_ack_err() - Ack journal err. 266362306a36Sopenharmony_ci * @journal: journal to act on. 266462306a36Sopenharmony_ci * 266562306a36Sopenharmony_ci * An error must be cleared or acked to take a FS out of readonly 266662306a36Sopenharmony_ci * mode. 266762306a36Sopenharmony_ci */ 266862306a36Sopenharmony_civoid jbd2_journal_ack_err(journal_t *journal) 266962306a36Sopenharmony_ci{ 267062306a36Sopenharmony_ci write_lock(&journal->j_state_lock); 267162306a36Sopenharmony_ci if (journal->j_errno) 267262306a36Sopenharmony_ci journal->j_flags |= JBD2_ACK_ERR; 267362306a36Sopenharmony_ci write_unlock(&journal->j_state_lock); 267462306a36Sopenharmony_ci} 267562306a36Sopenharmony_ci 267662306a36Sopenharmony_ciint jbd2_journal_blocks_per_page(struct inode *inode) 267762306a36Sopenharmony_ci{ 267862306a36Sopenharmony_ci return 1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits); 267962306a36Sopenharmony_ci} 268062306a36Sopenharmony_ci 268162306a36Sopenharmony_ci/* 268262306a36Sopenharmony_ci * helper functions to deal with 32 or 64bit block numbers. 268362306a36Sopenharmony_ci */ 268462306a36Sopenharmony_cisize_t journal_tag_bytes(journal_t *journal) 268562306a36Sopenharmony_ci{ 268662306a36Sopenharmony_ci size_t sz; 268762306a36Sopenharmony_ci 268862306a36Sopenharmony_ci if (jbd2_has_feature_csum3(journal)) 268962306a36Sopenharmony_ci return sizeof(journal_block_tag3_t); 269062306a36Sopenharmony_ci 269162306a36Sopenharmony_ci sz = sizeof(journal_block_tag_t); 269262306a36Sopenharmony_ci 269362306a36Sopenharmony_ci if (jbd2_has_feature_csum2(journal)) 269462306a36Sopenharmony_ci sz += sizeof(__u16); 269562306a36Sopenharmony_ci 269662306a36Sopenharmony_ci if (jbd2_has_feature_64bit(journal)) 269762306a36Sopenharmony_ci return sz; 269862306a36Sopenharmony_ci else 269962306a36Sopenharmony_ci return sz - sizeof(__u32); 270062306a36Sopenharmony_ci} 270162306a36Sopenharmony_ci 270262306a36Sopenharmony_ci/* 270362306a36Sopenharmony_ci * JBD memory management 270462306a36Sopenharmony_ci * 270562306a36Sopenharmony_ci * These functions are used to allocate block-sized chunks of memory 270662306a36Sopenharmony_ci * used for making copies of buffer_head data. Very often it will be 270762306a36Sopenharmony_ci * page-sized chunks of data, but sometimes it will be in 270862306a36Sopenharmony_ci * sub-page-size chunks. (For example, 16k pages on Power systems 270962306a36Sopenharmony_ci * with a 4k block file system.) For blocks smaller than a page, we 271062306a36Sopenharmony_ci * use a SLAB allocator. There are slab caches for each block size, 271162306a36Sopenharmony_ci * which are allocated at mount time, if necessary, and we only free 271262306a36Sopenharmony_ci * (all of) the slab caches when/if the jbd2 module is unloaded. For 271362306a36Sopenharmony_ci * this reason we don't need to a mutex to protect access to 271462306a36Sopenharmony_ci * jbd2_slab[] allocating or releasing memory; only in 271562306a36Sopenharmony_ci * jbd2_journal_create_slab(). 271662306a36Sopenharmony_ci */ 271762306a36Sopenharmony_ci#define JBD2_MAX_SLABS 8 271862306a36Sopenharmony_cistatic struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS]; 271962306a36Sopenharmony_ci 272062306a36Sopenharmony_cistatic const char *jbd2_slab_names[JBD2_MAX_SLABS] = { 272162306a36Sopenharmony_ci "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k", 272262306a36Sopenharmony_ci "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k" 272362306a36Sopenharmony_ci}; 272462306a36Sopenharmony_ci 272562306a36Sopenharmony_ci 272662306a36Sopenharmony_cistatic void jbd2_journal_destroy_slabs(void) 272762306a36Sopenharmony_ci{ 272862306a36Sopenharmony_ci int i; 272962306a36Sopenharmony_ci 273062306a36Sopenharmony_ci for (i = 0; i < JBD2_MAX_SLABS; i++) { 273162306a36Sopenharmony_ci kmem_cache_destroy(jbd2_slab[i]); 273262306a36Sopenharmony_ci jbd2_slab[i] = NULL; 273362306a36Sopenharmony_ci } 273462306a36Sopenharmony_ci} 273562306a36Sopenharmony_ci 273662306a36Sopenharmony_cistatic int jbd2_journal_create_slab(size_t size) 273762306a36Sopenharmony_ci{ 273862306a36Sopenharmony_ci static DEFINE_MUTEX(jbd2_slab_create_mutex); 273962306a36Sopenharmony_ci int i = order_base_2(size) - 10; 274062306a36Sopenharmony_ci size_t slab_size; 274162306a36Sopenharmony_ci 274262306a36Sopenharmony_ci if (size == PAGE_SIZE) 274362306a36Sopenharmony_ci return 0; 274462306a36Sopenharmony_ci 274562306a36Sopenharmony_ci if (i >= JBD2_MAX_SLABS) 274662306a36Sopenharmony_ci return -EINVAL; 274762306a36Sopenharmony_ci 274862306a36Sopenharmony_ci if (unlikely(i < 0)) 274962306a36Sopenharmony_ci i = 0; 275062306a36Sopenharmony_ci mutex_lock(&jbd2_slab_create_mutex); 275162306a36Sopenharmony_ci if (jbd2_slab[i]) { 275262306a36Sopenharmony_ci mutex_unlock(&jbd2_slab_create_mutex); 275362306a36Sopenharmony_ci return 0; /* Already created */ 275462306a36Sopenharmony_ci } 275562306a36Sopenharmony_ci 275662306a36Sopenharmony_ci slab_size = 1 << (i+10); 275762306a36Sopenharmony_ci jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size, 275862306a36Sopenharmony_ci slab_size, 0, NULL); 275962306a36Sopenharmony_ci mutex_unlock(&jbd2_slab_create_mutex); 276062306a36Sopenharmony_ci if (!jbd2_slab[i]) { 276162306a36Sopenharmony_ci printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n"); 276262306a36Sopenharmony_ci return -ENOMEM; 276362306a36Sopenharmony_ci } 276462306a36Sopenharmony_ci return 0; 276562306a36Sopenharmony_ci} 276662306a36Sopenharmony_ci 276762306a36Sopenharmony_cistatic struct kmem_cache *get_slab(size_t size) 276862306a36Sopenharmony_ci{ 276962306a36Sopenharmony_ci int i = order_base_2(size) - 10; 277062306a36Sopenharmony_ci 277162306a36Sopenharmony_ci BUG_ON(i >= JBD2_MAX_SLABS); 277262306a36Sopenharmony_ci if (unlikely(i < 0)) 277362306a36Sopenharmony_ci i = 0; 277462306a36Sopenharmony_ci BUG_ON(jbd2_slab[i] == NULL); 277562306a36Sopenharmony_ci return jbd2_slab[i]; 277662306a36Sopenharmony_ci} 277762306a36Sopenharmony_ci 277862306a36Sopenharmony_civoid *jbd2_alloc(size_t size, gfp_t flags) 277962306a36Sopenharmony_ci{ 278062306a36Sopenharmony_ci void *ptr; 278162306a36Sopenharmony_ci 278262306a36Sopenharmony_ci BUG_ON(size & (size-1)); /* Must be a power of 2 */ 278362306a36Sopenharmony_ci 278462306a36Sopenharmony_ci if (size < PAGE_SIZE) 278562306a36Sopenharmony_ci ptr = kmem_cache_alloc(get_slab(size), flags); 278662306a36Sopenharmony_ci else 278762306a36Sopenharmony_ci ptr = (void *)__get_free_pages(flags, get_order(size)); 278862306a36Sopenharmony_ci 278962306a36Sopenharmony_ci /* Check alignment; SLUB has gotten this wrong in the past, 279062306a36Sopenharmony_ci * and this can lead to user data corruption! */ 279162306a36Sopenharmony_ci BUG_ON(((unsigned long) ptr) & (size-1)); 279262306a36Sopenharmony_ci 279362306a36Sopenharmony_ci return ptr; 279462306a36Sopenharmony_ci} 279562306a36Sopenharmony_ci 279662306a36Sopenharmony_civoid jbd2_free(void *ptr, size_t size) 279762306a36Sopenharmony_ci{ 279862306a36Sopenharmony_ci if (size < PAGE_SIZE) 279962306a36Sopenharmony_ci kmem_cache_free(get_slab(size), ptr); 280062306a36Sopenharmony_ci else 280162306a36Sopenharmony_ci free_pages((unsigned long)ptr, get_order(size)); 280262306a36Sopenharmony_ci}; 280362306a36Sopenharmony_ci 280462306a36Sopenharmony_ci/* 280562306a36Sopenharmony_ci * Journal_head storage management 280662306a36Sopenharmony_ci */ 280762306a36Sopenharmony_cistatic struct kmem_cache *jbd2_journal_head_cache; 280862306a36Sopenharmony_ci#ifdef CONFIG_JBD2_DEBUG 280962306a36Sopenharmony_cistatic atomic_t nr_journal_heads = ATOMIC_INIT(0); 281062306a36Sopenharmony_ci#endif 281162306a36Sopenharmony_ci 281262306a36Sopenharmony_cistatic int __init jbd2_journal_init_journal_head_cache(void) 281362306a36Sopenharmony_ci{ 281462306a36Sopenharmony_ci J_ASSERT(!jbd2_journal_head_cache); 281562306a36Sopenharmony_ci jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", 281662306a36Sopenharmony_ci sizeof(struct journal_head), 281762306a36Sopenharmony_ci 0, /* offset */ 281862306a36Sopenharmony_ci SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU, 281962306a36Sopenharmony_ci NULL); /* ctor */ 282062306a36Sopenharmony_ci if (!jbd2_journal_head_cache) { 282162306a36Sopenharmony_ci printk(KERN_EMERG "JBD2: no memory for journal_head cache\n"); 282262306a36Sopenharmony_ci return -ENOMEM; 282362306a36Sopenharmony_ci } 282462306a36Sopenharmony_ci return 0; 282562306a36Sopenharmony_ci} 282662306a36Sopenharmony_ci 282762306a36Sopenharmony_cistatic void jbd2_journal_destroy_journal_head_cache(void) 282862306a36Sopenharmony_ci{ 282962306a36Sopenharmony_ci kmem_cache_destroy(jbd2_journal_head_cache); 283062306a36Sopenharmony_ci jbd2_journal_head_cache = NULL; 283162306a36Sopenharmony_ci} 283262306a36Sopenharmony_ci 283362306a36Sopenharmony_ci/* 283462306a36Sopenharmony_ci * journal_head splicing and dicing 283562306a36Sopenharmony_ci */ 283662306a36Sopenharmony_cistatic struct journal_head *journal_alloc_journal_head(void) 283762306a36Sopenharmony_ci{ 283862306a36Sopenharmony_ci struct journal_head *ret; 283962306a36Sopenharmony_ci 284062306a36Sopenharmony_ci#ifdef CONFIG_JBD2_DEBUG 284162306a36Sopenharmony_ci atomic_inc(&nr_journal_heads); 284262306a36Sopenharmony_ci#endif 284362306a36Sopenharmony_ci ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); 284462306a36Sopenharmony_ci if (!ret) { 284562306a36Sopenharmony_ci jbd2_debug(1, "out of memory for journal_head\n"); 284662306a36Sopenharmony_ci pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); 284762306a36Sopenharmony_ci ret = kmem_cache_zalloc(jbd2_journal_head_cache, 284862306a36Sopenharmony_ci GFP_NOFS | __GFP_NOFAIL); 284962306a36Sopenharmony_ci } 285062306a36Sopenharmony_ci if (ret) 285162306a36Sopenharmony_ci spin_lock_init(&ret->b_state_lock); 285262306a36Sopenharmony_ci return ret; 285362306a36Sopenharmony_ci} 285462306a36Sopenharmony_ci 285562306a36Sopenharmony_cistatic void journal_free_journal_head(struct journal_head *jh) 285662306a36Sopenharmony_ci{ 285762306a36Sopenharmony_ci#ifdef CONFIG_JBD2_DEBUG 285862306a36Sopenharmony_ci atomic_dec(&nr_journal_heads); 285962306a36Sopenharmony_ci memset(jh, JBD2_POISON_FREE, sizeof(*jh)); 286062306a36Sopenharmony_ci#endif 286162306a36Sopenharmony_ci kmem_cache_free(jbd2_journal_head_cache, jh); 286262306a36Sopenharmony_ci} 286362306a36Sopenharmony_ci 286462306a36Sopenharmony_ci/* 286562306a36Sopenharmony_ci * A journal_head is attached to a buffer_head whenever JBD has an 286662306a36Sopenharmony_ci * interest in the buffer. 286762306a36Sopenharmony_ci * 286862306a36Sopenharmony_ci * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit 286962306a36Sopenharmony_ci * is set. This bit is tested in core kernel code where we need to take 287062306a36Sopenharmony_ci * JBD-specific actions. Testing the zeroness of ->b_private is not reliable 287162306a36Sopenharmony_ci * there. 287262306a36Sopenharmony_ci * 287362306a36Sopenharmony_ci * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one. 287462306a36Sopenharmony_ci * 287562306a36Sopenharmony_ci * When a buffer has its BH_JBD bit set it is immune from being released by 287662306a36Sopenharmony_ci * core kernel code, mainly via ->b_count. 287762306a36Sopenharmony_ci * 287862306a36Sopenharmony_ci * A journal_head is detached from its buffer_head when the journal_head's 287962306a36Sopenharmony_ci * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint 288062306a36Sopenharmony_ci * transaction (b_cp_transaction) hold their references to b_jcount. 288162306a36Sopenharmony_ci * 288262306a36Sopenharmony_ci * Various places in the kernel want to attach a journal_head to a buffer_head 288362306a36Sopenharmony_ci * _before_ attaching the journal_head to a transaction. To protect the 288462306a36Sopenharmony_ci * journal_head in this situation, jbd2_journal_add_journal_head elevates the 288562306a36Sopenharmony_ci * journal_head's b_jcount refcount by one. The caller must call 288662306a36Sopenharmony_ci * jbd2_journal_put_journal_head() to undo this. 288762306a36Sopenharmony_ci * 288862306a36Sopenharmony_ci * So the typical usage would be: 288962306a36Sopenharmony_ci * 289062306a36Sopenharmony_ci * (Attach a journal_head if needed. Increments b_jcount) 289162306a36Sopenharmony_ci * struct journal_head *jh = jbd2_journal_add_journal_head(bh); 289262306a36Sopenharmony_ci * ... 289362306a36Sopenharmony_ci * (Get another reference for transaction) 289462306a36Sopenharmony_ci * jbd2_journal_grab_journal_head(bh); 289562306a36Sopenharmony_ci * jh->b_transaction = xxx; 289662306a36Sopenharmony_ci * (Put original reference) 289762306a36Sopenharmony_ci * jbd2_journal_put_journal_head(jh); 289862306a36Sopenharmony_ci */ 289962306a36Sopenharmony_ci 290062306a36Sopenharmony_ci/* 290162306a36Sopenharmony_ci * Give a buffer_head a journal_head. 290262306a36Sopenharmony_ci * 290362306a36Sopenharmony_ci * May sleep. 290462306a36Sopenharmony_ci */ 290562306a36Sopenharmony_cistruct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) 290662306a36Sopenharmony_ci{ 290762306a36Sopenharmony_ci struct journal_head *jh; 290862306a36Sopenharmony_ci struct journal_head *new_jh = NULL; 290962306a36Sopenharmony_ci 291062306a36Sopenharmony_cirepeat: 291162306a36Sopenharmony_ci if (!buffer_jbd(bh)) 291262306a36Sopenharmony_ci new_jh = journal_alloc_journal_head(); 291362306a36Sopenharmony_ci 291462306a36Sopenharmony_ci jbd_lock_bh_journal_head(bh); 291562306a36Sopenharmony_ci if (buffer_jbd(bh)) { 291662306a36Sopenharmony_ci jh = bh2jh(bh); 291762306a36Sopenharmony_ci } else { 291862306a36Sopenharmony_ci J_ASSERT_BH(bh, 291962306a36Sopenharmony_ci (atomic_read(&bh->b_count) > 0) || 292062306a36Sopenharmony_ci (bh->b_folio && bh->b_folio->mapping)); 292162306a36Sopenharmony_ci 292262306a36Sopenharmony_ci if (!new_jh) { 292362306a36Sopenharmony_ci jbd_unlock_bh_journal_head(bh); 292462306a36Sopenharmony_ci goto repeat; 292562306a36Sopenharmony_ci } 292662306a36Sopenharmony_ci 292762306a36Sopenharmony_ci jh = new_jh; 292862306a36Sopenharmony_ci new_jh = NULL; /* We consumed it */ 292962306a36Sopenharmony_ci set_buffer_jbd(bh); 293062306a36Sopenharmony_ci bh->b_private = jh; 293162306a36Sopenharmony_ci jh->b_bh = bh; 293262306a36Sopenharmony_ci get_bh(bh); 293362306a36Sopenharmony_ci BUFFER_TRACE(bh, "added journal_head"); 293462306a36Sopenharmony_ci } 293562306a36Sopenharmony_ci jh->b_jcount++; 293662306a36Sopenharmony_ci jbd_unlock_bh_journal_head(bh); 293762306a36Sopenharmony_ci if (new_jh) 293862306a36Sopenharmony_ci journal_free_journal_head(new_jh); 293962306a36Sopenharmony_ci return bh->b_private; 294062306a36Sopenharmony_ci} 294162306a36Sopenharmony_ci 294262306a36Sopenharmony_ci/* 294362306a36Sopenharmony_ci * Grab a ref against this buffer_head's journal_head. If it ended up not 294462306a36Sopenharmony_ci * having a journal_head, return NULL 294562306a36Sopenharmony_ci */ 294662306a36Sopenharmony_cistruct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) 294762306a36Sopenharmony_ci{ 294862306a36Sopenharmony_ci struct journal_head *jh = NULL; 294962306a36Sopenharmony_ci 295062306a36Sopenharmony_ci jbd_lock_bh_journal_head(bh); 295162306a36Sopenharmony_ci if (buffer_jbd(bh)) { 295262306a36Sopenharmony_ci jh = bh2jh(bh); 295362306a36Sopenharmony_ci jh->b_jcount++; 295462306a36Sopenharmony_ci } 295562306a36Sopenharmony_ci jbd_unlock_bh_journal_head(bh); 295662306a36Sopenharmony_ci return jh; 295762306a36Sopenharmony_ci} 295862306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_grab_journal_head); 295962306a36Sopenharmony_ci 296062306a36Sopenharmony_cistatic void __journal_remove_journal_head(struct buffer_head *bh) 296162306a36Sopenharmony_ci{ 296262306a36Sopenharmony_ci struct journal_head *jh = bh2jh(bh); 296362306a36Sopenharmony_ci 296462306a36Sopenharmony_ci J_ASSERT_JH(jh, jh->b_transaction == NULL); 296562306a36Sopenharmony_ci J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 296662306a36Sopenharmony_ci J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); 296762306a36Sopenharmony_ci J_ASSERT_JH(jh, jh->b_jlist == BJ_None); 296862306a36Sopenharmony_ci J_ASSERT_BH(bh, buffer_jbd(bh)); 296962306a36Sopenharmony_ci J_ASSERT_BH(bh, jh2bh(jh) == bh); 297062306a36Sopenharmony_ci BUFFER_TRACE(bh, "remove journal_head"); 297162306a36Sopenharmony_ci 297262306a36Sopenharmony_ci /* Unlink before dropping the lock */ 297362306a36Sopenharmony_ci bh->b_private = NULL; 297462306a36Sopenharmony_ci jh->b_bh = NULL; /* debug, really */ 297562306a36Sopenharmony_ci clear_buffer_jbd(bh); 297662306a36Sopenharmony_ci} 297762306a36Sopenharmony_ci 297862306a36Sopenharmony_cistatic void journal_release_journal_head(struct journal_head *jh, size_t b_size) 297962306a36Sopenharmony_ci{ 298062306a36Sopenharmony_ci if (jh->b_frozen_data) { 298162306a36Sopenharmony_ci printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); 298262306a36Sopenharmony_ci jbd2_free(jh->b_frozen_data, b_size); 298362306a36Sopenharmony_ci } 298462306a36Sopenharmony_ci if (jh->b_committed_data) { 298562306a36Sopenharmony_ci printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); 298662306a36Sopenharmony_ci jbd2_free(jh->b_committed_data, b_size); 298762306a36Sopenharmony_ci } 298862306a36Sopenharmony_ci journal_free_journal_head(jh); 298962306a36Sopenharmony_ci} 299062306a36Sopenharmony_ci 299162306a36Sopenharmony_ci/* 299262306a36Sopenharmony_ci * Drop a reference on the passed journal_head. If it fell to zero then 299362306a36Sopenharmony_ci * release the journal_head from the buffer_head. 299462306a36Sopenharmony_ci */ 299562306a36Sopenharmony_civoid jbd2_journal_put_journal_head(struct journal_head *jh) 299662306a36Sopenharmony_ci{ 299762306a36Sopenharmony_ci struct buffer_head *bh = jh2bh(jh); 299862306a36Sopenharmony_ci 299962306a36Sopenharmony_ci jbd_lock_bh_journal_head(bh); 300062306a36Sopenharmony_ci J_ASSERT_JH(jh, jh->b_jcount > 0); 300162306a36Sopenharmony_ci --jh->b_jcount; 300262306a36Sopenharmony_ci if (!jh->b_jcount) { 300362306a36Sopenharmony_ci __journal_remove_journal_head(bh); 300462306a36Sopenharmony_ci jbd_unlock_bh_journal_head(bh); 300562306a36Sopenharmony_ci journal_release_journal_head(jh, bh->b_size); 300662306a36Sopenharmony_ci __brelse(bh); 300762306a36Sopenharmony_ci } else { 300862306a36Sopenharmony_ci jbd_unlock_bh_journal_head(bh); 300962306a36Sopenharmony_ci } 301062306a36Sopenharmony_ci} 301162306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_put_journal_head); 301262306a36Sopenharmony_ci 301362306a36Sopenharmony_ci/* 301462306a36Sopenharmony_ci * Initialize jbd inode head 301562306a36Sopenharmony_ci */ 301662306a36Sopenharmony_civoid jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode) 301762306a36Sopenharmony_ci{ 301862306a36Sopenharmony_ci jinode->i_transaction = NULL; 301962306a36Sopenharmony_ci jinode->i_next_transaction = NULL; 302062306a36Sopenharmony_ci jinode->i_vfs_inode = inode; 302162306a36Sopenharmony_ci jinode->i_flags = 0; 302262306a36Sopenharmony_ci jinode->i_dirty_start = 0; 302362306a36Sopenharmony_ci jinode->i_dirty_end = 0; 302462306a36Sopenharmony_ci INIT_LIST_HEAD(&jinode->i_list); 302562306a36Sopenharmony_ci} 302662306a36Sopenharmony_ci 302762306a36Sopenharmony_ci/* 302862306a36Sopenharmony_ci * Function to be called before we start removing inode from memory (i.e., 302962306a36Sopenharmony_ci * clear_inode() is a fine place to be called from). It removes inode from 303062306a36Sopenharmony_ci * transaction's lists. 303162306a36Sopenharmony_ci */ 303262306a36Sopenharmony_civoid jbd2_journal_release_jbd_inode(journal_t *journal, 303362306a36Sopenharmony_ci struct jbd2_inode *jinode) 303462306a36Sopenharmony_ci{ 303562306a36Sopenharmony_ci if (!journal) 303662306a36Sopenharmony_ci return; 303762306a36Sopenharmony_cirestart: 303862306a36Sopenharmony_ci spin_lock(&journal->j_list_lock); 303962306a36Sopenharmony_ci /* Is commit writing out inode - we have to wait */ 304062306a36Sopenharmony_ci if (jinode->i_flags & JI_COMMIT_RUNNING) { 304162306a36Sopenharmony_ci wait_queue_head_t *wq; 304262306a36Sopenharmony_ci DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); 304362306a36Sopenharmony_ci wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); 304462306a36Sopenharmony_ci prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); 304562306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 304662306a36Sopenharmony_ci schedule(); 304762306a36Sopenharmony_ci finish_wait(wq, &wait.wq_entry); 304862306a36Sopenharmony_ci goto restart; 304962306a36Sopenharmony_ci } 305062306a36Sopenharmony_ci 305162306a36Sopenharmony_ci if (jinode->i_transaction) { 305262306a36Sopenharmony_ci list_del(&jinode->i_list); 305362306a36Sopenharmony_ci jinode->i_transaction = NULL; 305462306a36Sopenharmony_ci } 305562306a36Sopenharmony_ci spin_unlock(&journal->j_list_lock); 305662306a36Sopenharmony_ci} 305762306a36Sopenharmony_ci 305862306a36Sopenharmony_ci 305962306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS 306062306a36Sopenharmony_ci 306162306a36Sopenharmony_ci#define JBD2_STATS_PROC_NAME "fs/jbd2" 306262306a36Sopenharmony_ci 306362306a36Sopenharmony_cistatic void __init jbd2_create_jbd_stats_proc_entry(void) 306462306a36Sopenharmony_ci{ 306562306a36Sopenharmony_ci proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL); 306662306a36Sopenharmony_ci} 306762306a36Sopenharmony_ci 306862306a36Sopenharmony_cistatic void __exit jbd2_remove_jbd_stats_proc_entry(void) 306962306a36Sopenharmony_ci{ 307062306a36Sopenharmony_ci if (proc_jbd2_stats) 307162306a36Sopenharmony_ci remove_proc_entry(JBD2_STATS_PROC_NAME, NULL); 307262306a36Sopenharmony_ci} 307362306a36Sopenharmony_ci 307462306a36Sopenharmony_ci#else 307562306a36Sopenharmony_ci 307662306a36Sopenharmony_ci#define jbd2_create_jbd_stats_proc_entry() do {} while (0) 307762306a36Sopenharmony_ci#define jbd2_remove_jbd_stats_proc_entry() do {} while (0) 307862306a36Sopenharmony_ci 307962306a36Sopenharmony_ci#endif 308062306a36Sopenharmony_ci 308162306a36Sopenharmony_cistruct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache; 308262306a36Sopenharmony_ci 308362306a36Sopenharmony_cistatic int __init jbd2_journal_init_inode_cache(void) 308462306a36Sopenharmony_ci{ 308562306a36Sopenharmony_ci J_ASSERT(!jbd2_inode_cache); 308662306a36Sopenharmony_ci jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0); 308762306a36Sopenharmony_ci if (!jbd2_inode_cache) { 308862306a36Sopenharmony_ci pr_emerg("JBD2: failed to create inode cache\n"); 308962306a36Sopenharmony_ci return -ENOMEM; 309062306a36Sopenharmony_ci } 309162306a36Sopenharmony_ci return 0; 309262306a36Sopenharmony_ci} 309362306a36Sopenharmony_ci 309462306a36Sopenharmony_cistatic int __init jbd2_journal_init_handle_cache(void) 309562306a36Sopenharmony_ci{ 309662306a36Sopenharmony_ci J_ASSERT(!jbd2_handle_cache); 309762306a36Sopenharmony_ci jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY); 309862306a36Sopenharmony_ci if (!jbd2_handle_cache) { 309962306a36Sopenharmony_ci printk(KERN_EMERG "JBD2: failed to create handle cache\n"); 310062306a36Sopenharmony_ci return -ENOMEM; 310162306a36Sopenharmony_ci } 310262306a36Sopenharmony_ci return 0; 310362306a36Sopenharmony_ci} 310462306a36Sopenharmony_ci 310562306a36Sopenharmony_cistatic void jbd2_journal_destroy_inode_cache(void) 310662306a36Sopenharmony_ci{ 310762306a36Sopenharmony_ci kmem_cache_destroy(jbd2_inode_cache); 310862306a36Sopenharmony_ci jbd2_inode_cache = NULL; 310962306a36Sopenharmony_ci} 311062306a36Sopenharmony_ci 311162306a36Sopenharmony_cistatic void jbd2_journal_destroy_handle_cache(void) 311262306a36Sopenharmony_ci{ 311362306a36Sopenharmony_ci kmem_cache_destroy(jbd2_handle_cache); 311462306a36Sopenharmony_ci jbd2_handle_cache = NULL; 311562306a36Sopenharmony_ci} 311662306a36Sopenharmony_ci 311762306a36Sopenharmony_ci/* 311862306a36Sopenharmony_ci * Module startup and shutdown 311962306a36Sopenharmony_ci */ 312062306a36Sopenharmony_ci 312162306a36Sopenharmony_cistatic int __init journal_init_caches(void) 312262306a36Sopenharmony_ci{ 312362306a36Sopenharmony_ci int ret; 312462306a36Sopenharmony_ci 312562306a36Sopenharmony_ci ret = jbd2_journal_init_revoke_record_cache(); 312662306a36Sopenharmony_ci if (ret == 0) 312762306a36Sopenharmony_ci ret = jbd2_journal_init_revoke_table_cache(); 312862306a36Sopenharmony_ci if (ret == 0) 312962306a36Sopenharmony_ci ret = jbd2_journal_init_journal_head_cache(); 313062306a36Sopenharmony_ci if (ret == 0) 313162306a36Sopenharmony_ci ret = jbd2_journal_init_handle_cache(); 313262306a36Sopenharmony_ci if (ret == 0) 313362306a36Sopenharmony_ci ret = jbd2_journal_init_inode_cache(); 313462306a36Sopenharmony_ci if (ret == 0) 313562306a36Sopenharmony_ci ret = jbd2_journal_init_transaction_cache(); 313662306a36Sopenharmony_ci return ret; 313762306a36Sopenharmony_ci} 313862306a36Sopenharmony_ci 313962306a36Sopenharmony_cistatic void jbd2_journal_destroy_caches(void) 314062306a36Sopenharmony_ci{ 314162306a36Sopenharmony_ci jbd2_journal_destroy_revoke_record_cache(); 314262306a36Sopenharmony_ci jbd2_journal_destroy_revoke_table_cache(); 314362306a36Sopenharmony_ci jbd2_journal_destroy_journal_head_cache(); 314462306a36Sopenharmony_ci jbd2_journal_destroy_handle_cache(); 314562306a36Sopenharmony_ci jbd2_journal_destroy_inode_cache(); 314662306a36Sopenharmony_ci jbd2_journal_destroy_transaction_cache(); 314762306a36Sopenharmony_ci jbd2_journal_destroy_slabs(); 314862306a36Sopenharmony_ci} 314962306a36Sopenharmony_ci 315062306a36Sopenharmony_cistatic int __init journal_init(void) 315162306a36Sopenharmony_ci{ 315262306a36Sopenharmony_ci int ret; 315362306a36Sopenharmony_ci 315462306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); 315562306a36Sopenharmony_ci 315662306a36Sopenharmony_ci ret = journal_init_caches(); 315762306a36Sopenharmony_ci if (ret == 0) { 315862306a36Sopenharmony_ci jbd2_create_jbd_stats_proc_entry(); 315962306a36Sopenharmony_ci } else { 316062306a36Sopenharmony_ci jbd2_journal_destroy_caches(); 316162306a36Sopenharmony_ci } 316262306a36Sopenharmony_ci return ret; 316362306a36Sopenharmony_ci} 316462306a36Sopenharmony_ci 316562306a36Sopenharmony_cistatic void __exit journal_exit(void) 316662306a36Sopenharmony_ci{ 316762306a36Sopenharmony_ci#ifdef CONFIG_JBD2_DEBUG 316862306a36Sopenharmony_ci int n = atomic_read(&nr_journal_heads); 316962306a36Sopenharmony_ci if (n) 317062306a36Sopenharmony_ci printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n); 317162306a36Sopenharmony_ci#endif 317262306a36Sopenharmony_ci jbd2_remove_jbd_stats_proc_entry(); 317362306a36Sopenharmony_ci jbd2_journal_destroy_caches(); 317462306a36Sopenharmony_ci} 317562306a36Sopenharmony_ci 317662306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 317762306a36Sopenharmony_cimodule_init(journal_init); 317862306a36Sopenharmony_cimodule_exit(journal_exit); 317962306a36Sopenharmony_ci 3180