18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0+
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * linux/fs/jbd2/transaction.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Copyright 1998 Red Hat corp --- All Rights Reserved
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Generic filesystem transaction handling code; part of the ext2fs
108c2ecf20Sopenharmony_ci * journaling system.
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci * This file manages transactions (compound commits managed by the
138c2ecf20Sopenharmony_ci * journaling code) and handles (individual atomic operations by the
148c2ecf20Sopenharmony_ci * filesystem).
158c2ecf20Sopenharmony_ci */
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci#include <linux/time.h>
188c2ecf20Sopenharmony_ci#include <linux/fs.h>
198c2ecf20Sopenharmony_ci#include <linux/jbd2.h>
208c2ecf20Sopenharmony_ci#include <linux/errno.h>
218c2ecf20Sopenharmony_ci#include <linux/slab.h>
228c2ecf20Sopenharmony_ci#include <linux/timer.h>
238c2ecf20Sopenharmony_ci#include <linux/mm.h>
248c2ecf20Sopenharmony_ci#include <linux/highmem.h>
258c2ecf20Sopenharmony_ci#include <linux/hrtimer.h>
268c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
278c2ecf20Sopenharmony_ci#include <linux/bug.h>
288c2ecf20Sopenharmony_ci#include <linux/module.h>
298c2ecf20Sopenharmony_ci#include <linux/sched/mm.h>
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci#include <trace/events/jbd2.h>
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_cistatic void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
348c2ecf20Sopenharmony_cistatic void __jbd2_journal_unfile_buffer(struct journal_head *jh);
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_cistatic struct kmem_cache *transaction_cache;
378c2ecf20Sopenharmony_ciint __init jbd2_journal_init_transaction_cache(void)
388c2ecf20Sopenharmony_ci{
398c2ecf20Sopenharmony_ci	J_ASSERT(!transaction_cache);
408c2ecf20Sopenharmony_ci	transaction_cache = kmem_cache_create("jbd2_transaction_s",
418c2ecf20Sopenharmony_ci					sizeof(transaction_t),
428c2ecf20Sopenharmony_ci					0,
438c2ecf20Sopenharmony_ci					SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
448c2ecf20Sopenharmony_ci					NULL);
458c2ecf20Sopenharmony_ci	if (!transaction_cache) {
468c2ecf20Sopenharmony_ci		pr_emerg("JBD2: failed to create transaction cache\n");
478c2ecf20Sopenharmony_ci		return -ENOMEM;
488c2ecf20Sopenharmony_ci	}
498c2ecf20Sopenharmony_ci	return 0;
508c2ecf20Sopenharmony_ci}
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_civoid jbd2_journal_destroy_transaction_cache(void)
538c2ecf20Sopenharmony_ci{
548c2ecf20Sopenharmony_ci	kmem_cache_destroy(transaction_cache);
558c2ecf20Sopenharmony_ci	transaction_cache = NULL;
568c2ecf20Sopenharmony_ci}
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_civoid jbd2_journal_free_transaction(transaction_t *transaction)
598c2ecf20Sopenharmony_ci{
608c2ecf20Sopenharmony_ci	if (unlikely(ZERO_OR_NULL_PTR(transaction)))
618c2ecf20Sopenharmony_ci		return;
628c2ecf20Sopenharmony_ci	kmem_cache_free(transaction_cache, transaction);
638c2ecf20Sopenharmony_ci}
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci/*
668c2ecf20Sopenharmony_ci * Base amount of descriptor blocks we reserve for each transaction.
678c2ecf20Sopenharmony_ci */
688c2ecf20Sopenharmony_cistatic int jbd2_descriptor_blocks_per_trans(journal_t *journal)
698c2ecf20Sopenharmony_ci{
708c2ecf20Sopenharmony_ci	int tag_space = journal->j_blocksize - sizeof(journal_header_t);
718c2ecf20Sopenharmony_ci	int tags_per_block;
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	/* Subtract UUID */
748c2ecf20Sopenharmony_ci	tag_space -= 16;
758c2ecf20Sopenharmony_ci	if (jbd2_journal_has_csum_v2or3(journal))
768c2ecf20Sopenharmony_ci		tag_space -= sizeof(struct jbd2_journal_block_tail);
778c2ecf20Sopenharmony_ci	/* Commit code leaves a slack space of 16 bytes at the end of block */
788c2ecf20Sopenharmony_ci	tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
798c2ecf20Sopenharmony_ci	/*
808c2ecf20Sopenharmony_ci	 * Revoke descriptors are accounted separately so we need to reserve
818c2ecf20Sopenharmony_ci	 * space for commit block and normal transaction descriptor blocks.
828c2ecf20Sopenharmony_ci	 */
838c2ecf20Sopenharmony_ci	return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
848c2ecf20Sopenharmony_ci				tags_per_block);
858c2ecf20Sopenharmony_ci}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci/*
888c2ecf20Sopenharmony_ci * jbd2_get_transaction: obtain a new transaction_t object.
898c2ecf20Sopenharmony_ci *
908c2ecf20Sopenharmony_ci * Simply initialise a new transaction. Initialize it in
918c2ecf20Sopenharmony_ci * RUNNING state and add it to the current journal (which should not
928c2ecf20Sopenharmony_ci * have an existing running transaction: we only make a new transaction
938c2ecf20Sopenharmony_ci * once we have started to commit the old one).
948c2ecf20Sopenharmony_ci *
958c2ecf20Sopenharmony_ci * Preconditions:
968c2ecf20Sopenharmony_ci *	The journal MUST be locked.  We don't perform atomic mallocs on the
978c2ecf20Sopenharmony_ci *	new transaction	and we can't block without protecting against other
988c2ecf20Sopenharmony_ci *	processes trying to touch the journal while it is in transition.
998c2ecf20Sopenharmony_ci *
1008c2ecf20Sopenharmony_ci */
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_cistatic void jbd2_get_transaction(journal_t *journal,
1038c2ecf20Sopenharmony_ci				transaction_t *transaction)
1048c2ecf20Sopenharmony_ci{
1058c2ecf20Sopenharmony_ci	transaction->t_journal = journal;
1068c2ecf20Sopenharmony_ci	transaction->t_state = T_RUNNING;
1078c2ecf20Sopenharmony_ci	transaction->t_start_time = ktime_get();
1088c2ecf20Sopenharmony_ci	transaction->t_tid = journal->j_transaction_sequence++;
1098c2ecf20Sopenharmony_ci	transaction->t_expires = jiffies + journal->j_commit_interval;
1108c2ecf20Sopenharmony_ci	spin_lock_init(&transaction->t_handle_lock);
1118c2ecf20Sopenharmony_ci	atomic_set(&transaction->t_updates, 0);
1128c2ecf20Sopenharmony_ci	atomic_set(&transaction->t_outstanding_credits,
1138c2ecf20Sopenharmony_ci		   jbd2_descriptor_blocks_per_trans(journal) +
1148c2ecf20Sopenharmony_ci		   atomic_read(&journal->j_reserved_credits));
1158c2ecf20Sopenharmony_ci	atomic_set(&transaction->t_outstanding_revokes, 0);
1168c2ecf20Sopenharmony_ci	atomic_set(&transaction->t_handle_count, 0);
1178c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&transaction->t_inode_list);
1188c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&transaction->t_private_list);
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	/* Set up the commit timer for the new transaction. */
1218c2ecf20Sopenharmony_ci	journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires);
1228c2ecf20Sopenharmony_ci	add_timer(&journal->j_commit_timer);
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	J_ASSERT(journal->j_running_transaction == NULL);
1258c2ecf20Sopenharmony_ci	journal->j_running_transaction = transaction;
1268c2ecf20Sopenharmony_ci	transaction->t_max_wait = 0;
1278c2ecf20Sopenharmony_ci	transaction->t_start = jiffies;
1288c2ecf20Sopenharmony_ci	transaction->t_requested = 0;
1298c2ecf20Sopenharmony_ci}
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci/*
1328c2ecf20Sopenharmony_ci * Handle management.
1338c2ecf20Sopenharmony_ci *
1348c2ecf20Sopenharmony_ci * A handle_t is an object which represents a single atomic update to a
1358c2ecf20Sopenharmony_ci * filesystem, and which tracks all of the modifications which form part
1368c2ecf20Sopenharmony_ci * of that one update.
1378c2ecf20Sopenharmony_ci */
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci/*
1408c2ecf20Sopenharmony_ci * Update transaction's maximum wait time, if debugging is enabled.
1418c2ecf20Sopenharmony_ci *
1428c2ecf20Sopenharmony_ci * In order for t_max_wait to be reliable, it must be protected by a
1438c2ecf20Sopenharmony_ci * lock.  But doing so will mean that start_this_handle() can not be
1448c2ecf20Sopenharmony_ci * run in parallel on SMP systems, which limits our scalability.  So
1458c2ecf20Sopenharmony_ci * unless debugging is enabled, we no longer update t_max_wait, which
1468c2ecf20Sopenharmony_ci * means that maximum wait time reported by the jbd2_run_stats
1478c2ecf20Sopenharmony_ci * tracepoint will always be zero.
1488c2ecf20Sopenharmony_ci */
1498c2ecf20Sopenharmony_cistatic inline void update_t_max_wait(transaction_t *transaction,
1508c2ecf20Sopenharmony_ci				     unsigned long ts)
1518c2ecf20Sopenharmony_ci{
1528c2ecf20Sopenharmony_ci#ifdef CONFIG_JBD2_DEBUG
1538c2ecf20Sopenharmony_ci	if (jbd2_journal_enable_debug &&
1548c2ecf20Sopenharmony_ci	    time_after(transaction->t_start, ts)) {
1558c2ecf20Sopenharmony_ci		ts = jbd2_time_diff(ts, transaction->t_start);
1568c2ecf20Sopenharmony_ci		spin_lock(&transaction->t_handle_lock);
1578c2ecf20Sopenharmony_ci		if (ts > transaction->t_max_wait)
1588c2ecf20Sopenharmony_ci			transaction->t_max_wait = ts;
1598c2ecf20Sopenharmony_ci		spin_unlock(&transaction->t_handle_lock);
1608c2ecf20Sopenharmony_ci	}
1618c2ecf20Sopenharmony_ci#endif
1628c2ecf20Sopenharmony_ci}
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci/*
1658c2ecf20Sopenharmony_ci * Wait until running transaction passes to T_FLUSH state and new transaction
1668c2ecf20Sopenharmony_ci * can thus be started. Also starts the commit if needed. The function expects
1678c2ecf20Sopenharmony_ci * running transaction to exist and releases j_state_lock.
1688c2ecf20Sopenharmony_ci */
1698c2ecf20Sopenharmony_cistatic void wait_transaction_locked(journal_t *journal)
1708c2ecf20Sopenharmony_ci	__releases(journal->j_state_lock)
1718c2ecf20Sopenharmony_ci{
1728c2ecf20Sopenharmony_ci	DEFINE_WAIT(wait);
1738c2ecf20Sopenharmony_ci	int need_to_start;
1748c2ecf20Sopenharmony_ci	tid_t tid = journal->j_running_transaction->t_tid;
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
1778c2ecf20Sopenharmony_ci			TASK_UNINTERRUPTIBLE);
1788c2ecf20Sopenharmony_ci	need_to_start = !tid_geq(journal->j_commit_request, tid);
1798c2ecf20Sopenharmony_ci	read_unlock(&journal->j_state_lock);
1808c2ecf20Sopenharmony_ci	if (need_to_start)
1818c2ecf20Sopenharmony_ci		jbd2_log_start_commit(journal, tid);
1828c2ecf20Sopenharmony_ci	jbd2_might_wait_for_commit(journal);
1838c2ecf20Sopenharmony_ci	schedule();
1848c2ecf20Sopenharmony_ci	finish_wait(&journal->j_wait_transaction_locked, &wait);
1858c2ecf20Sopenharmony_ci}
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci/*
1888c2ecf20Sopenharmony_ci * Wait until running transaction transitions from T_SWITCH to T_FLUSH
1898c2ecf20Sopenharmony_ci * state and new transaction can thus be started. The function releases
1908c2ecf20Sopenharmony_ci * j_state_lock.
1918c2ecf20Sopenharmony_ci */
1928c2ecf20Sopenharmony_cistatic void wait_transaction_switching(journal_t *journal)
1938c2ecf20Sopenharmony_ci	__releases(journal->j_state_lock)
1948c2ecf20Sopenharmony_ci{
1958c2ecf20Sopenharmony_ci	DEFINE_WAIT(wait);
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci	if (WARN_ON(!journal->j_running_transaction ||
1988c2ecf20Sopenharmony_ci		    journal->j_running_transaction->t_state != T_SWITCH)) {
1998c2ecf20Sopenharmony_ci		read_unlock(&journal->j_state_lock);
2008c2ecf20Sopenharmony_ci		return;
2018c2ecf20Sopenharmony_ci	}
2028c2ecf20Sopenharmony_ci	prepare_to_wait_exclusive(&journal->j_wait_transaction_locked, &wait,
2038c2ecf20Sopenharmony_ci			TASK_UNINTERRUPTIBLE);
2048c2ecf20Sopenharmony_ci	read_unlock(&journal->j_state_lock);
2058c2ecf20Sopenharmony_ci	/*
2068c2ecf20Sopenharmony_ci	 * We don't call jbd2_might_wait_for_commit() here as there's no
2078c2ecf20Sopenharmony_ci	 * waiting for outstanding handles happening anymore in T_SWITCH state
2088c2ecf20Sopenharmony_ci	 * and handling of reserved handles actually relies on that for
2098c2ecf20Sopenharmony_ci	 * correctness.
2108c2ecf20Sopenharmony_ci	 */
2118c2ecf20Sopenharmony_ci	schedule();
2128c2ecf20Sopenharmony_ci	finish_wait(&journal->j_wait_transaction_locked, &wait);
2138c2ecf20Sopenharmony_ci}
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_cistatic void sub_reserved_credits(journal_t *journal, int blocks)
2168c2ecf20Sopenharmony_ci{
2178c2ecf20Sopenharmony_ci	atomic_sub(blocks, &journal->j_reserved_credits);
2188c2ecf20Sopenharmony_ci	wake_up(&journal->j_wait_reserved);
2198c2ecf20Sopenharmony_ci}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci/*
2228c2ecf20Sopenharmony_ci * Wait until we can add credits for handle to the running transaction.  Called
2238c2ecf20Sopenharmony_ci * with j_state_lock held for reading. Returns 0 if handle joined the running
2248c2ecf20Sopenharmony_ci * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and
2258c2ecf20Sopenharmony_ci * caller must retry.
2268c2ecf20Sopenharmony_ci */
2278c2ecf20Sopenharmony_cistatic int add_transaction_credits(journal_t *journal, int blocks,
2288c2ecf20Sopenharmony_ci				   int rsv_blocks)
2298c2ecf20Sopenharmony_ci{
2308c2ecf20Sopenharmony_ci	transaction_t *t = journal->j_running_transaction;
2318c2ecf20Sopenharmony_ci	int needed;
2328c2ecf20Sopenharmony_ci	int total = blocks + rsv_blocks;
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci	/*
2358c2ecf20Sopenharmony_ci	 * If the current transaction is locked down for commit, wait
2368c2ecf20Sopenharmony_ci	 * for the lock to be released.
2378c2ecf20Sopenharmony_ci	 */
2388c2ecf20Sopenharmony_ci	if (t->t_state != T_RUNNING) {
2398c2ecf20Sopenharmony_ci		WARN_ON_ONCE(t->t_state >= T_FLUSH);
2408c2ecf20Sopenharmony_ci		wait_transaction_locked(journal);
2418c2ecf20Sopenharmony_ci		return 1;
2428c2ecf20Sopenharmony_ci	}
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	/*
2458c2ecf20Sopenharmony_ci	 * If there is not enough space left in the log to write all
2468c2ecf20Sopenharmony_ci	 * potential buffers requested by this operation, we need to
2478c2ecf20Sopenharmony_ci	 * stall pending a log checkpoint to free some more log space.
2488c2ecf20Sopenharmony_ci	 */
2498c2ecf20Sopenharmony_ci	needed = atomic_add_return(total, &t->t_outstanding_credits);
2508c2ecf20Sopenharmony_ci	if (needed > journal->j_max_transaction_buffers) {
2518c2ecf20Sopenharmony_ci		/*
2528c2ecf20Sopenharmony_ci		 * If the current transaction is already too large,
2538c2ecf20Sopenharmony_ci		 * then start to commit it: we can then go back and
2548c2ecf20Sopenharmony_ci		 * attach this handle to a new transaction.
2558c2ecf20Sopenharmony_ci		 */
2568c2ecf20Sopenharmony_ci		atomic_sub(total, &t->t_outstanding_credits);
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci		/*
2598c2ecf20Sopenharmony_ci		 * Is the number of reserved credits in the current transaction too
2608c2ecf20Sopenharmony_ci		 * big to fit this handle? Wait until reserved credits are freed.
2618c2ecf20Sopenharmony_ci		 */
2628c2ecf20Sopenharmony_ci		if (atomic_read(&journal->j_reserved_credits) + total >
2638c2ecf20Sopenharmony_ci		    journal->j_max_transaction_buffers) {
2648c2ecf20Sopenharmony_ci			read_unlock(&journal->j_state_lock);
2658c2ecf20Sopenharmony_ci			jbd2_might_wait_for_commit(journal);
2668c2ecf20Sopenharmony_ci			wait_event(journal->j_wait_reserved,
2678c2ecf20Sopenharmony_ci				   atomic_read(&journal->j_reserved_credits) + total <=
2688c2ecf20Sopenharmony_ci				   journal->j_max_transaction_buffers);
2698c2ecf20Sopenharmony_ci			return 1;
2708c2ecf20Sopenharmony_ci		}
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci		wait_transaction_locked(journal);
2738c2ecf20Sopenharmony_ci		return 1;
2748c2ecf20Sopenharmony_ci	}
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci	/*
2778c2ecf20Sopenharmony_ci	 * The commit code assumes that it can get enough log space
2788c2ecf20Sopenharmony_ci	 * without forcing a checkpoint.  This is *critical* for
2798c2ecf20Sopenharmony_ci	 * correctness: a checkpoint of a buffer which is also
2808c2ecf20Sopenharmony_ci	 * associated with a committing transaction creates a deadlock,
2818c2ecf20Sopenharmony_ci	 * so commit simply cannot force through checkpoints.
2828c2ecf20Sopenharmony_ci	 *
2838c2ecf20Sopenharmony_ci	 * We must therefore ensure the necessary space in the journal
2848c2ecf20Sopenharmony_ci	 * *before* starting to dirty potentially checkpointed buffers
2858c2ecf20Sopenharmony_ci	 * in the new transaction.
2868c2ecf20Sopenharmony_ci	 */
2878c2ecf20Sopenharmony_ci	if (jbd2_log_space_left(journal) < journal->j_max_transaction_buffers) {
2888c2ecf20Sopenharmony_ci		atomic_sub(total, &t->t_outstanding_credits);
2898c2ecf20Sopenharmony_ci		read_unlock(&journal->j_state_lock);
2908c2ecf20Sopenharmony_ci		jbd2_might_wait_for_commit(journal);
2918c2ecf20Sopenharmony_ci		write_lock(&journal->j_state_lock);
2928c2ecf20Sopenharmony_ci		if (jbd2_log_space_left(journal) <
2938c2ecf20Sopenharmony_ci					journal->j_max_transaction_buffers)
2948c2ecf20Sopenharmony_ci			__jbd2_log_wait_for_space(journal);
2958c2ecf20Sopenharmony_ci		write_unlock(&journal->j_state_lock);
2968c2ecf20Sopenharmony_ci		return 1;
2978c2ecf20Sopenharmony_ci	}
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci	/* No reservation? We are done... */
3008c2ecf20Sopenharmony_ci	if (!rsv_blocks)
3018c2ecf20Sopenharmony_ci		return 0;
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci	needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits);
3048c2ecf20Sopenharmony_ci	/* We allow at most half of a transaction to be reserved */
3058c2ecf20Sopenharmony_ci	if (needed > journal->j_max_transaction_buffers / 2) {
3068c2ecf20Sopenharmony_ci		sub_reserved_credits(journal, rsv_blocks);
3078c2ecf20Sopenharmony_ci		atomic_sub(total, &t->t_outstanding_credits);
3088c2ecf20Sopenharmony_ci		read_unlock(&journal->j_state_lock);
3098c2ecf20Sopenharmony_ci		jbd2_might_wait_for_commit(journal);
3108c2ecf20Sopenharmony_ci		wait_event(journal->j_wait_reserved,
3118c2ecf20Sopenharmony_ci			 atomic_read(&journal->j_reserved_credits) + rsv_blocks
3128c2ecf20Sopenharmony_ci			 <= journal->j_max_transaction_buffers / 2);
3138c2ecf20Sopenharmony_ci		return 1;
3148c2ecf20Sopenharmony_ci	}
3158c2ecf20Sopenharmony_ci	return 0;
3168c2ecf20Sopenharmony_ci}
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci/*
3198c2ecf20Sopenharmony_ci * start_this_handle: Given a handle, deal with any locking or stalling
3208c2ecf20Sopenharmony_ci * needed to make sure that there is enough journal space for the handle
3218c2ecf20Sopenharmony_ci * to begin.  Attach the handle to a transaction and set up the
3228c2ecf20Sopenharmony_ci * transaction's buffer credits.
3238c2ecf20Sopenharmony_ci */
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_cistatic int start_this_handle(journal_t *journal, handle_t *handle,
3268c2ecf20Sopenharmony_ci			     gfp_t gfp_mask)
3278c2ecf20Sopenharmony_ci{
3288c2ecf20Sopenharmony_ci	transaction_t	*transaction, *new_transaction = NULL;
3298c2ecf20Sopenharmony_ci	int		blocks = handle->h_total_credits;
3308c2ecf20Sopenharmony_ci	int		rsv_blocks = 0;
3318c2ecf20Sopenharmony_ci	unsigned long ts = jiffies;
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_ci	if (handle->h_rsv_handle)
3348c2ecf20Sopenharmony_ci		rsv_blocks = handle->h_rsv_handle->h_total_credits;
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci	/*
3378c2ecf20Sopenharmony_ci	 * Limit the number of reserved credits to 1/2 of maximum transaction
3388c2ecf20Sopenharmony_ci	 * size and limit the number of total credits to not exceed maximum
3398c2ecf20Sopenharmony_ci	 * transaction size per operation.
3408c2ecf20Sopenharmony_ci	 */
3418c2ecf20Sopenharmony_ci	if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
3428c2ecf20Sopenharmony_ci	    (rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
3438c2ecf20Sopenharmony_ci		printk(KERN_ERR "JBD2: %s wants too many credits "
3448c2ecf20Sopenharmony_ci		       "credits:%d rsv_credits:%d max:%d\n",
3458c2ecf20Sopenharmony_ci		       current->comm, blocks, rsv_blocks,
3468c2ecf20Sopenharmony_ci		       journal->j_max_transaction_buffers);
3478c2ecf20Sopenharmony_ci		WARN_ON(1);
3488c2ecf20Sopenharmony_ci		return -ENOSPC;
3498c2ecf20Sopenharmony_ci	}
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_cialloc_transaction:
3528c2ecf20Sopenharmony_ci	/*
3538c2ecf20Sopenharmony_ci	 * This check is racy but it is just an optimization of allocating new
3548c2ecf20Sopenharmony_ci	 * transaction early if there are high chances we'll need it. If we
3558c2ecf20Sopenharmony_ci	 * guess wrong, we'll retry or free unused transaction.
3568c2ecf20Sopenharmony_ci	 */
3578c2ecf20Sopenharmony_ci	if (!data_race(journal->j_running_transaction)) {
3588c2ecf20Sopenharmony_ci		/*
3598c2ecf20Sopenharmony_ci		 * If __GFP_FS is not present, then we may be being called from
3608c2ecf20Sopenharmony_ci		 * inside the fs writeback layer, so we MUST NOT fail.
3618c2ecf20Sopenharmony_ci		 */
3628c2ecf20Sopenharmony_ci		if ((gfp_mask & __GFP_FS) == 0)
3638c2ecf20Sopenharmony_ci			gfp_mask |= __GFP_NOFAIL;
3648c2ecf20Sopenharmony_ci		new_transaction = kmem_cache_zalloc(transaction_cache,
3658c2ecf20Sopenharmony_ci						    gfp_mask);
3668c2ecf20Sopenharmony_ci		if (!new_transaction)
3678c2ecf20Sopenharmony_ci			return -ENOMEM;
3688c2ecf20Sopenharmony_ci	}
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci	jbd_debug(3, "New handle %p going live.\n", handle);
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci	/*
3738c2ecf20Sopenharmony_ci	 * We need to hold j_state_lock until t_updates has been incremented,
3748c2ecf20Sopenharmony_ci	 * for proper journal barrier handling
3758c2ecf20Sopenharmony_ci	 */
3768c2ecf20Sopenharmony_cirepeat:
3778c2ecf20Sopenharmony_ci	read_lock(&journal->j_state_lock);
3788c2ecf20Sopenharmony_ci	BUG_ON(journal->j_flags & JBD2_UNMOUNT);
3798c2ecf20Sopenharmony_ci	if (is_journal_aborted(journal) ||
3808c2ecf20Sopenharmony_ci	    (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
3818c2ecf20Sopenharmony_ci		read_unlock(&journal->j_state_lock);
3828c2ecf20Sopenharmony_ci		jbd2_journal_free_transaction(new_transaction);
3838c2ecf20Sopenharmony_ci		return -EROFS;
3848c2ecf20Sopenharmony_ci	}
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci	/*
3878c2ecf20Sopenharmony_ci	 * Wait on the journal's transaction barrier if necessary. Specifically
3888c2ecf20Sopenharmony_ci	 * we allow reserved handles to proceed because otherwise commit could
3898c2ecf20Sopenharmony_ci	 * deadlock on page writeback not being able to complete.
3908c2ecf20Sopenharmony_ci	 */
3918c2ecf20Sopenharmony_ci	if (!handle->h_reserved && journal->j_barrier_count) {
3928c2ecf20Sopenharmony_ci		read_unlock(&journal->j_state_lock);
3938c2ecf20Sopenharmony_ci		wait_event(journal->j_wait_transaction_locked,
3948c2ecf20Sopenharmony_ci				journal->j_barrier_count == 0);
3958c2ecf20Sopenharmony_ci		goto repeat;
3968c2ecf20Sopenharmony_ci	}
3978c2ecf20Sopenharmony_ci
3988c2ecf20Sopenharmony_ci	if (!journal->j_running_transaction) {
3998c2ecf20Sopenharmony_ci		read_unlock(&journal->j_state_lock);
4008c2ecf20Sopenharmony_ci		if (!new_transaction)
4018c2ecf20Sopenharmony_ci			goto alloc_transaction;
4028c2ecf20Sopenharmony_ci		write_lock(&journal->j_state_lock);
4038c2ecf20Sopenharmony_ci		if (!journal->j_running_transaction &&
4048c2ecf20Sopenharmony_ci		    (handle->h_reserved || !journal->j_barrier_count)) {
4058c2ecf20Sopenharmony_ci			jbd2_get_transaction(journal, new_transaction);
4068c2ecf20Sopenharmony_ci			new_transaction = NULL;
4078c2ecf20Sopenharmony_ci		}
4088c2ecf20Sopenharmony_ci		write_unlock(&journal->j_state_lock);
4098c2ecf20Sopenharmony_ci		goto repeat;
4108c2ecf20Sopenharmony_ci	}
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci	transaction = journal->j_running_transaction;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci	if (!handle->h_reserved) {
4158c2ecf20Sopenharmony_ci		/* We may have dropped j_state_lock - restart in that case */
4168c2ecf20Sopenharmony_ci		if (add_transaction_credits(journal, blocks, rsv_blocks))
4178c2ecf20Sopenharmony_ci			goto repeat;
4188c2ecf20Sopenharmony_ci	} else {
4198c2ecf20Sopenharmony_ci		/*
4208c2ecf20Sopenharmony_ci		 * We have handle reserved so we are allowed to join T_LOCKED
4218c2ecf20Sopenharmony_ci		 * transaction and we don't have to check for transaction size
4228c2ecf20Sopenharmony_ci		 * and journal space. But we still have to wait while running
4238c2ecf20Sopenharmony_ci		 * transaction is being switched to a committing one as it
4248c2ecf20Sopenharmony_ci		 * won't wait for any handles anymore.
4258c2ecf20Sopenharmony_ci		 */
4268c2ecf20Sopenharmony_ci		if (transaction->t_state == T_SWITCH) {
4278c2ecf20Sopenharmony_ci			wait_transaction_switching(journal);
4288c2ecf20Sopenharmony_ci			goto repeat;
4298c2ecf20Sopenharmony_ci		}
4308c2ecf20Sopenharmony_ci		sub_reserved_credits(journal, blocks);
4318c2ecf20Sopenharmony_ci		handle->h_reserved = 0;
4328c2ecf20Sopenharmony_ci	}
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci	/* OK, account for the buffers that this operation expects to
4358c2ecf20Sopenharmony_ci	 * use and add the handle to the running transaction.
4368c2ecf20Sopenharmony_ci	 */
4378c2ecf20Sopenharmony_ci	update_t_max_wait(transaction, ts);
4388c2ecf20Sopenharmony_ci	handle->h_transaction = transaction;
4398c2ecf20Sopenharmony_ci	handle->h_requested_credits = blocks;
4408c2ecf20Sopenharmony_ci	handle->h_revoke_credits_requested = handle->h_revoke_credits;
4418c2ecf20Sopenharmony_ci	handle->h_start_jiffies = jiffies;
4428c2ecf20Sopenharmony_ci	atomic_inc(&transaction->t_updates);
4438c2ecf20Sopenharmony_ci	atomic_inc(&transaction->t_handle_count);
4448c2ecf20Sopenharmony_ci	jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n",
4458c2ecf20Sopenharmony_ci		  handle, blocks,
4468c2ecf20Sopenharmony_ci		  atomic_read(&transaction->t_outstanding_credits),
4478c2ecf20Sopenharmony_ci		  jbd2_log_space_left(journal));
4488c2ecf20Sopenharmony_ci	read_unlock(&journal->j_state_lock);
4498c2ecf20Sopenharmony_ci	current->journal_info = handle;
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	rwsem_acquire_read(&journal->j_trans_commit_map, 0, 0, _THIS_IP_);
4528c2ecf20Sopenharmony_ci	jbd2_journal_free_transaction(new_transaction);
4538c2ecf20Sopenharmony_ci	/*
4548c2ecf20Sopenharmony_ci	 * Ensure that no allocations done while the transaction is open are
4558c2ecf20Sopenharmony_ci	 * going to recurse back to the fs layer.
4568c2ecf20Sopenharmony_ci	 */
4578c2ecf20Sopenharmony_ci	handle->saved_alloc_context = memalloc_nofs_save();
4588c2ecf20Sopenharmony_ci	return 0;
4598c2ecf20Sopenharmony_ci}
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci/* Allocate a new handle.  This should probably be in a slab... */
4628c2ecf20Sopenharmony_cistatic handle_t *new_handle(int nblocks)
4638c2ecf20Sopenharmony_ci{
4648c2ecf20Sopenharmony_ci	handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
4658c2ecf20Sopenharmony_ci	if (!handle)
4668c2ecf20Sopenharmony_ci		return NULL;
4678c2ecf20Sopenharmony_ci	handle->h_total_credits = nblocks;
4688c2ecf20Sopenharmony_ci	handle->h_ref = 1;
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_ci	return handle;
4718c2ecf20Sopenharmony_ci}
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_cihandle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
4748c2ecf20Sopenharmony_ci			      int revoke_records, gfp_t gfp_mask,
4758c2ecf20Sopenharmony_ci			      unsigned int type, unsigned int line_no)
4768c2ecf20Sopenharmony_ci{
4778c2ecf20Sopenharmony_ci	handle_t *handle = journal_current_handle();
4788c2ecf20Sopenharmony_ci	int err;
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci	if (!journal)
4818c2ecf20Sopenharmony_ci		return ERR_PTR(-EROFS);
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci	if (handle) {
4848c2ecf20Sopenharmony_ci		J_ASSERT(handle->h_transaction->t_journal == journal);
4858c2ecf20Sopenharmony_ci		handle->h_ref++;
4868c2ecf20Sopenharmony_ci		return handle;
4878c2ecf20Sopenharmony_ci	}
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci	nblocks += DIV_ROUND_UP(revoke_records,
4908c2ecf20Sopenharmony_ci				journal->j_revoke_records_per_block);
4918c2ecf20Sopenharmony_ci	handle = new_handle(nblocks);
4928c2ecf20Sopenharmony_ci	if (!handle)
4938c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
4948c2ecf20Sopenharmony_ci	if (rsv_blocks) {
4958c2ecf20Sopenharmony_ci		handle_t *rsv_handle;
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci		rsv_handle = new_handle(rsv_blocks);
4988c2ecf20Sopenharmony_ci		if (!rsv_handle) {
4998c2ecf20Sopenharmony_ci			jbd2_free_handle(handle);
5008c2ecf20Sopenharmony_ci			return ERR_PTR(-ENOMEM);
5018c2ecf20Sopenharmony_ci		}
5028c2ecf20Sopenharmony_ci		rsv_handle->h_reserved = 1;
5038c2ecf20Sopenharmony_ci		rsv_handle->h_journal = journal;
5048c2ecf20Sopenharmony_ci		handle->h_rsv_handle = rsv_handle;
5058c2ecf20Sopenharmony_ci	}
5068c2ecf20Sopenharmony_ci	handle->h_revoke_credits = revoke_records;
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_ci	err = start_this_handle(journal, handle, gfp_mask);
5098c2ecf20Sopenharmony_ci	if (err < 0) {
5108c2ecf20Sopenharmony_ci		if (handle->h_rsv_handle)
5118c2ecf20Sopenharmony_ci			jbd2_free_handle(handle->h_rsv_handle);
5128c2ecf20Sopenharmony_ci		jbd2_free_handle(handle);
5138c2ecf20Sopenharmony_ci		return ERR_PTR(err);
5148c2ecf20Sopenharmony_ci	}
5158c2ecf20Sopenharmony_ci	handle->h_type = type;
5168c2ecf20Sopenharmony_ci	handle->h_line_no = line_no;
5178c2ecf20Sopenharmony_ci	trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
5188c2ecf20Sopenharmony_ci				handle->h_transaction->t_tid, type,
5198c2ecf20Sopenharmony_ci				line_no, nblocks);
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	return handle;
5228c2ecf20Sopenharmony_ci}
5238c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2__journal_start);
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_ci/**
5278c2ecf20Sopenharmony_ci * jbd2_journal_start() - Obtain a new handle.
5288c2ecf20Sopenharmony_ci * @journal: Journal to start transaction on.
5298c2ecf20Sopenharmony_ci * @nblocks: number of block buffer we might modify
5308c2ecf20Sopenharmony_ci *
5318c2ecf20Sopenharmony_ci * We make sure that the transaction can guarantee at least nblocks of
5328c2ecf20Sopenharmony_ci * modified buffers in the log.  We block until the log can guarantee
5338c2ecf20Sopenharmony_ci * that much space. Additionally, if rsv_blocks > 0, we also create another
5348c2ecf20Sopenharmony_ci * handle with rsv_blocks reserved blocks in the journal. This handle is
5358c2ecf20Sopenharmony_ci * stored in h_rsv_handle. It is not attached to any particular transaction
5368c2ecf20Sopenharmony_ci * and thus doesn't block transaction commit. If the caller uses this reserved
5378c2ecf20Sopenharmony_ci * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop()
5388c2ecf20Sopenharmony_ci * on the parent handle will dispose the reserved one. Reserved handle has to
5398c2ecf20Sopenharmony_ci * be converted to a normal handle using jbd2_journal_start_reserved() before
5408c2ecf20Sopenharmony_ci * it can be used.
5418c2ecf20Sopenharmony_ci *
5428c2ecf20Sopenharmony_ci * Return a pointer to a newly allocated handle, or an ERR_PTR() value
5438c2ecf20Sopenharmony_ci * on failure.
5448c2ecf20Sopenharmony_ci */
5458c2ecf20Sopenharmony_cihandle_t *jbd2_journal_start(journal_t *journal, int nblocks)
5468c2ecf20Sopenharmony_ci{
5478c2ecf20Sopenharmony_ci	return jbd2__journal_start(journal, nblocks, 0, 0, GFP_NOFS, 0, 0);
5488c2ecf20Sopenharmony_ci}
5498c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_start);
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_cistatic void __jbd2_journal_unreserve_handle(handle_t *handle, transaction_t *t)
5528c2ecf20Sopenharmony_ci{
5538c2ecf20Sopenharmony_ci	journal_t *journal = handle->h_journal;
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci	WARN_ON(!handle->h_reserved);
5568c2ecf20Sopenharmony_ci	sub_reserved_credits(journal, handle->h_total_credits);
5578c2ecf20Sopenharmony_ci	if (t)
5588c2ecf20Sopenharmony_ci		atomic_sub(handle->h_total_credits, &t->t_outstanding_credits);
5598c2ecf20Sopenharmony_ci}
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_civoid jbd2_journal_free_reserved(handle_t *handle)
5628c2ecf20Sopenharmony_ci{
5638c2ecf20Sopenharmony_ci	journal_t *journal = handle->h_journal;
5648c2ecf20Sopenharmony_ci
5658c2ecf20Sopenharmony_ci	/* Get j_state_lock to pin running transaction if it exists */
5668c2ecf20Sopenharmony_ci	read_lock(&journal->j_state_lock);
5678c2ecf20Sopenharmony_ci	__jbd2_journal_unreserve_handle(handle, journal->j_running_transaction);
5688c2ecf20Sopenharmony_ci	read_unlock(&journal->j_state_lock);
5698c2ecf20Sopenharmony_ci	jbd2_free_handle(handle);
5708c2ecf20Sopenharmony_ci}
5718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_free_reserved);
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci/**
5748c2ecf20Sopenharmony_ci * jbd2_journal_start_reserved() - start reserved handle
5758c2ecf20Sopenharmony_ci * @handle: handle to start
5768c2ecf20Sopenharmony_ci * @type: for handle statistics
5778c2ecf20Sopenharmony_ci * @line_no: for handle statistics
5788c2ecf20Sopenharmony_ci *
5798c2ecf20Sopenharmony_ci * Start handle that has been previously reserved with jbd2_journal_reserve().
5808c2ecf20Sopenharmony_ci * This attaches @handle to the running transaction (or creates one if there's
5818c2ecf20Sopenharmony_ci * not transaction running). Unlike jbd2_journal_start() this function cannot
5828c2ecf20Sopenharmony_ci * block on journal commit, checkpointing, or similar stuff. It can block on
5838c2ecf20Sopenharmony_ci * memory allocation or frozen journal though.
5848c2ecf20Sopenharmony_ci *
5858c2ecf20Sopenharmony_ci * Return 0 on success, non-zero on error - handle is freed in that case.
5868c2ecf20Sopenharmony_ci */
5878c2ecf20Sopenharmony_ciint jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
5888c2ecf20Sopenharmony_ci				unsigned int line_no)
5898c2ecf20Sopenharmony_ci{
5908c2ecf20Sopenharmony_ci	journal_t *journal = handle->h_journal;
5918c2ecf20Sopenharmony_ci	int ret = -EIO;
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_ci	if (WARN_ON(!handle->h_reserved)) {
5948c2ecf20Sopenharmony_ci		/* Someone passed in normal handle? Just stop it. */
5958c2ecf20Sopenharmony_ci		jbd2_journal_stop(handle);
5968c2ecf20Sopenharmony_ci		return ret;
5978c2ecf20Sopenharmony_ci	}
5988c2ecf20Sopenharmony_ci	/*
5998c2ecf20Sopenharmony_ci	 * Usefulness of mixing of reserved and unreserved handles is
6008c2ecf20Sopenharmony_ci	 * questionable. So far nobody seems to need it so just error out.
6018c2ecf20Sopenharmony_ci	 */
6028c2ecf20Sopenharmony_ci	if (WARN_ON(current->journal_info)) {
6038c2ecf20Sopenharmony_ci		jbd2_journal_free_reserved(handle);
6048c2ecf20Sopenharmony_ci		return ret;
6058c2ecf20Sopenharmony_ci	}
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_ci	handle->h_journal = NULL;
6088c2ecf20Sopenharmony_ci	/*
6098c2ecf20Sopenharmony_ci	 * GFP_NOFS is here because callers are likely from writeback or
6108c2ecf20Sopenharmony_ci	 * similarly constrained call sites
6118c2ecf20Sopenharmony_ci	 */
6128c2ecf20Sopenharmony_ci	ret = start_this_handle(journal, handle, GFP_NOFS);
6138c2ecf20Sopenharmony_ci	if (ret < 0) {
6148c2ecf20Sopenharmony_ci		handle->h_journal = journal;
6158c2ecf20Sopenharmony_ci		jbd2_journal_free_reserved(handle);
6168c2ecf20Sopenharmony_ci		return ret;
6178c2ecf20Sopenharmony_ci	}
6188c2ecf20Sopenharmony_ci	handle->h_type = type;
6198c2ecf20Sopenharmony_ci	handle->h_line_no = line_no;
6208c2ecf20Sopenharmony_ci	trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
6218c2ecf20Sopenharmony_ci				handle->h_transaction->t_tid, type,
6228c2ecf20Sopenharmony_ci				line_no, handle->h_total_credits);
6238c2ecf20Sopenharmony_ci	return 0;
6248c2ecf20Sopenharmony_ci}
6258c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_start_reserved);
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci/**
6288c2ecf20Sopenharmony_ci * jbd2_journal_extend() - extend buffer credits.
6298c2ecf20Sopenharmony_ci * @handle:  handle to 'extend'
6308c2ecf20Sopenharmony_ci * @nblocks: nr blocks to try to extend by.
6318c2ecf20Sopenharmony_ci * @revoke_records: number of revoke records to try to extend by.
6328c2ecf20Sopenharmony_ci *
6338c2ecf20Sopenharmony_ci * Some transactions, such as large extends and truncates, can be done
6348c2ecf20Sopenharmony_ci * atomically all at once or in several stages.  The operation requests
6358c2ecf20Sopenharmony_ci * a credit for a number of buffer modifications in advance, but can
6368c2ecf20Sopenharmony_ci * extend its credit if it needs more.
6378c2ecf20Sopenharmony_ci *
6388c2ecf20Sopenharmony_ci * jbd2_journal_extend tries to give the running handle more buffer credits.
6398c2ecf20Sopenharmony_ci * It does not guarantee that allocation - this is a best-effort only.
6408c2ecf20Sopenharmony_ci * The calling process MUST be able to deal cleanly with a failure to
6418c2ecf20Sopenharmony_ci * extend here.
6428c2ecf20Sopenharmony_ci *
6438c2ecf20Sopenharmony_ci * Return 0 on success, non-zero on failure.
6448c2ecf20Sopenharmony_ci *
6458c2ecf20Sopenharmony_ci * return code < 0 implies an error
6468c2ecf20Sopenharmony_ci * return code > 0 implies normal transaction-full status.
6478c2ecf20Sopenharmony_ci */
6488c2ecf20Sopenharmony_ciint jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
6498c2ecf20Sopenharmony_ci{
6508c2ecf20Sopenharmony_ci	transaction_t *transaction = handle->h_transaction;
6518c2ecf20Sopenharmony_ci	journal_t *journal;
6528c2ecf20Sopenharmony_ci	int result;
6538c2ecf20Sopenharmony_ci	int wanted;
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_ci	if (is_handle_aborted(handle))
6568c2ecf20Sopenharmony_ci		return -EROFS;
6578c2ecf20Sopenharmony_ci	journal = transaction->t_journal;
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci	result = 1;
6608c2ecf20Sopenharmony_ci
6618c2ecf20Sopenharmony_ci	read_lock(&journal->j_state_lock);
6628c2ecf20Sopenharmony_ci
6638c2ecf20Sopenharmony_ci	/* Don't extend a locked-down transaction! */
6648c2ecf20Sopenharmony_ci	if (transaction->t_state != T_RUNNING) {
6658c2ecf20Sopenharmony_ci		jbd_debug(3, "denied handle %p %d blocks: "
6668c2ecf20Sopenharmony_ci			  "transaction not running\n", handle, nblocks);
6678c2ecf20Sopenharmony_ci		goto error_out;
6688c2ecf20Sopenharmony_ci	}
6698c2ecf20Sopenharmony_ci
6708c2ecf20Sopenharmony_ci	nblocks += DIV_ROUND_UP(
6718c2ecf20Sopenharmony_ci			handle->h_revoke_credits_requested + revoke_records,
6728c2ecf20Sopenharmony_ci			journal->j_revoke_records_per_block) -
6738c2ecf20Sopenharmony_ci		DIV_ROUND_UP(
6748c2ecf20Sopenharmony_ci			handle->h_revoke_credits_requested,
6758c2ecf20Sopenharmony_ci			journal->j_revoke_records_per_block);
6768c2ecf20Sopenharmony_ci	spin_lock(&transaction->t_handle_lock);
6778c2ecf20Sopenharmony_ci	wanted = atomic_add_return(nblocks,
6788c2ecf20Sopenharmony_ci				   &transaction->t_outstanding_credits);
6798c2ecf20Sopenharmony_ci
6808c2ecf20Sopenharmony_ci	if (wanted > journal->j_max_transaction_buffers) {
6818c2ecf20Sopenharmony_ci		jbd_debug(3, "denied handle %p %d blocks: "
6828c2ecf20Sopenharmony_ci			  "transaction too large\n", handle, nblocks);
6838c2ecf20Sopenharmony_ci		atomic_sub(nblocks, &transaction->t_outstanding_credits);
6848c2ecf20Sopenharmony_ci		goto unlock;
6858c2ecf20Sopenharmony_ci	}
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ci	trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
6888c2ecf20Sopenharmony_ci				 transaction->t_tid,
6898c2ecf20Sopenharmony_ci				 handle->h_type, handle->h_line_no,
6908c2ecf20Sopenharmony_ci				 handle->h_total_credits,
6918c2ecf20Sopenharmony_ci				 nblocks);
6928c2ecf20Sopenharmony_ci
6938c2ecf20Sopenharmony_ci	handle->h_total_credits += nblocks;
6948c2ecf20Sopenharmony_ci	handle->h_requested_credits += nblocks;
6958c2ecf20Sopenharmony_ci	handle->h_revoke_credits += revoke_records;
6968c2ecf20Sopenharmony_ci	handle->h_revoke_credits_requested += revoke_records;
6978c2ecf20Sopenharmony_ci	result = 0;
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_ci	jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
7008c2ecf20Sopenharmony_ciunlock:
7018c2ecf20Sopenharmony_ci	spin_unlock(&transaction->t_handle_lock);
7028c2ecf20Sopenharmony_cierror_out:
7038c2ecf20Sopenharmony_ci	read_unlock(&journal->j_state_lock);
7048c2ecf20Sopenharmony_ci	return result;
7058c2ecf20Sopenharmony_ci}
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_cistatic void stop_this_handle(handle_t *handle)
7088c2ecf20Sopenharmony_ci{
7098c2ecf20Sopenharmony_ci	transaction_t *transaction = handle->h_transaction;
7108c2ecf20Sopenharmony_ci	journal_t *journal = transaction->t_journal;
7118c2ecf20Sopenharmony_ci	int revokes;
7128c2ecf20Sopenharmony_ci
7138c2ecf20Sopenharmony_ci	J_ASSERT(journal_current_handle() == handle);
7148c2ecf20Sopenharmony_ci	J_ASSERT(atomic_read(&transaction->t_updates) > 0);
7158c2ecf20Sopenharmony_ci	current->journal_info = NULL;
7168c2ecf20Sopenharmony_ci	/*
7178c2ecf20Sopenharmony_ci	 * Subtract necessary revoke descriptor blocks from handle credits. We
7188c2ecf20Sopenharmony_ci	 * take care to account only for revoke descriptor blocks the
7198c2ecf20Sopenharmony_ci	 * transaction will really need as large sequences of transactions with
7208c2ecf20Sopenharmony_ci	 * small numbers of revokes are relatively common.
7218c2ecf20Sopenharmony_ci	 */
7228c2ecf20Sopenharmony_ci	revokes = handle->h_revoke_credits_requested - handle->h_revoke_credits;
7238c2ecf20Sopenharmony_ci	if (revokes) {
7248c2ecf20Sopenharmony_ci		int t_revokes, revoke_descriptors;
7258c2ecf20Sopenharmony_ci		int rr_per_blk = journal->j_revoke_records_per_block;
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci		WARN_ON_ONCE(DIV_ROUND_UP(revokes, rr_per_blk)
7288c2ecf20Sopenharmony_ci				> handle->h_total_credits);
7298c2ecf20Sopenharmony_ci		t_revokes = atomic_add_return(revokes,
7308c2ecf20Sopenharmony_ci				&transaction->t_outstanding_revokes);
7318c2ecf20Sopenharmony_ci		revoke_descriptors =
7328c2ecf20Sopenharmony_ci			DIV_ROUND_UP(t_revokes, rr_per_blk) -
7338c2ecf20Sopenharmony_ci			DIV_ROUND_UP(t_revokes - revokes, rr_per_blk);
7348c2ecf20Sopenharmony_ci		handle->h_total_credits -= revoke_descriptors;
7358c2ecf20Sopenharmony_ci	}
7368c2ecf20Sopenharmony_ci	atomic_sub(handle->h_total_credits,
7378c2ecf20Sopenharmony_ci		   &transaction->t_outstanding_credits);
7388c2ecf20Sopenharmony_ci	if (handle->h_rsv_handle)
7398c2ecf20Sopenharmony_ci		__jbd2_journal_unreserve_handle(handle->h_rsv_handle,
7408c2ecf20Sopenharmony_ci						transaction);
7418c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&transaction->t_updates))
7428c2ecf20Sopenharmony_ci		wake_up(&journal->j_wait_updates);
7438c2ecf20Sopenharmony_ci
7448c2ecf20Sopenharmony_ci	rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
7458c2ecf20Sopenharmony_ci	/*
7468c2ecf20Sopenharmony_ci	 * Scope of the GFP_NOFS context is over here and so we can restore the
7478c2ecf20Sopenharmony_ci	 * original alloc context.
7488c2ecf20Sopenharmony_ci	 */
7498c2ecf20Sopenharmony_ci	memalloc_nofs_restore(handle->saved_alloc_context);
7508c2ecf20Sopenharmony_ci}
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci/**
7538c2ecf20Sopenharmony_ci * jbd2__journal_restart() - restart a handle .
7548c2ecf20Sopenharmony_ci * @handle:  handle to restart
7558c2ecf20Sopenharmony_ci * @nblocks: nr credits requested
7568c2ecf20Sopenharmony_ci * @revoke_records: number of revoke record credits requested
7578c2ecf20Sopenharmony_ci * @gfp_mask: memory allocation flags (for start_this_handle)
7588c2ecf20Sopenharmony_ci *
7598c2ecf20Sopenharmony_ci * Restart a handle for a multi-transaction filesystem
7608c2ecf20Sopenharmony_ci * operation.
7618c2ecf20Sopenharmony_ci *
7628c2ecf20Sopenharmony_ci * If the jbd2_journal_extend() call above fails to grant new buffer credits
7638c2ecf20Sopenharmony_ci * to a running handle, a call to jbd2_journal_restart will commit the
7648c2ecf20Sopenharmony_ci * handle's transaction so far and reattach the handle to a new
7658c2ecf20Sopenharmony_ci * transaction capable of guaranteeing the requested number of
7668c2ecf20Sopenharmony_ci * credits. We preserve reserved handle if there's any attached to the
7678c2ecf20Sopenharmony_ci * passed in handle.
7688c2ecf20Sopenharmony_ci */
7698c2ecf20Sopenharmony_ciint jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
7708c2ecf20Sopenharmony_ci			  gfp_t gfp_mask)
7718c2ecf20Sopenharmony_ci{
7728c2ecf20Sopenharmony_ci	transaction_t *transaction = handle->h_transaction;
7738c2ecf20Sopenharmony_ci	journal_t *journal;
7748c2ecf20Sopenharmony_ci	tid_t		tid;
7758c2ecf20Sopenharmony_ci	int		need_to_start;
7768c2ecf20Sopenharmony_ci	int		ret;
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	/* If we've had an abort of any type, don't even think about
7798c2ecf20Sopenharmony_ci	 * actually doing the restart! */
7808c2ecf20Sopenharmony_ci	if (is_handle_aborted(handle))
7818c2ecf20Sopenharmony_ci		return 0;
7828c2ecf20Sopenharmony_ci	journal = transaction->t_journal;
7838c2ecf20Sopenharmony_ci	tid = transaction->t_tid;
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_ci	/*
7868c2ecf20Sopenharmony_ci	 * First unlink the handle from its current transaction, and start the
7878c2ecf20Sopenharmony_ci	 * commit on that.
7888c2ecf20Sopenharmony_ci	 */
7898c2ecf20Sopenharmony_ci	jbd_debug(2, "restarting handle %p\n", handle);
7908c2ecf20Sopenharmony_ci	stop_this_handle(handle);
7918c2ecf20Sopenharmony_ci	handle->h_transaction = NULL;
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_ci	/*
7948c2ecf20Sopenharmony_ci	 * TODO: If we use READ_ONCE / WRITE_ONCE for j_commit_request we can
7958c2ecf20Sopenharmony_ci 	 * get rid of pointless j_state_lock traffic like this.
7968c2ecf20Sopenharmony_ci	 */
7978c2ecf20Sopenharmony_ci	read_lock(&journal->j_state_lock);
7988c2ecf20Sopenharmony_ci	need_to_start = !tid_geq(journal->j_commit_request, tid);
7998c2ecf20Sopenharmony_ci	read_unlock(&journal->j_state_lock);
8008c2ecf20Sopenharmony_ci	if (need_to_start)
8018c2ecf20Sopenharmony_ci		jbd2_log_start_commit(journal, tid);
8028c2ecf20Sopenharmony_ci	handle->h_total_credits = nblocks +
8038c2ecf20Sopenharmony_ci		DIV_ROUND_UP(revoke_records,
8048c2ecf20Sopenharmony_ci			     journal->j_revoke_records_per_block);
8058c2ecf20Sopenharmony_ci	handle->h_revoke_credits = revoke_records;
8068c2ecf20Sopenharmony_ci	ret = start_this_handle(journal, handle, gfp_mask);
8078c2ecf20Sopenharmony_ci	trace_jbd2_handle_restart(journal->j_fs_dev->bd_dev,
8088c2ecf20Sopenharmony_ci				 ret ? 0 : handle->h_transaction->t_tid,
8098c2ecf20Sopenharmony_ci				 handle->h_type, handle->h_line_no,
8108c2ecf20Sopenharmony_ci				 handle->h_total_credits);
8118c2ecf20Sopenharmony_ci	return ret;
8128c2ecf20Sopenharmony_ci}
8138c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2__journal_restart);
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci
8168c2ecf20Sopenharmony_ciint jbd2_journal_restart(handle_t *handle, int nblocks)
8178c2ecf20Sopenharmony_ci{
8188c2ecf20Sopenharmony_ci	return jbd2__journal_restart(handle, nblocks, 0, GFP_NOFS);
8198c2ecf20Sopenharmony_ci}
8208c2ecf20Sopenharmony_ciEXPORT_SYMBOL(jbd2_journal_restart);
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci/**
8238c2ecf20Sopenharmony_ci * jbd2_journal_lock_updates () - establish a transaction barrier.
8248c2ecf20Sopenharmony_ci * @journal:  Journal to establish a barrier on.
8258c2ecf20Sopenharmony_ci *
8268c2ecf20Sopenharmony_ci * This locks out any further updates from being started, and blocks
8278c2ecf20Sopenharmony_ci * until all existing updates have completed, returning only once the
8288c2ecf20Sopenharmony_ci * journal is in a quiescent state with no updates running.
8298c2ecf20Sopenharmony_ci *
8308c2ecf20Sopenharmony_ci * The journal lock should not be held on entry.
8318c2ecf20Sopenharmony_ci */
8328c2ecf20Sopenharmony_civoid jbd2_journal_lock_updates(journal_t *journal)
8338c2ecf20Sopenharmony_ci{
8348c2ecf20Sopenharmony_ci	DEFINE_WAIT(wait);
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_ci	jbd2_might_wait_for_commit(journal);
8378c2ecf20Sopenharmony_ci
8388c2ecf20Sopenharmony_ci	write_lock(&journal->j_state_lock);
8398c2ecf20Sopenharmony_ci	++journal->j_barrier_count;
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci	/* Wait until there are no reserved handles */
8428c2ecf20Sopenharmony_ci	if (atomic_read(&journal->j_reserved_credits)) {
8438c2ecf20Sopenharmony_ci		write_unlock(&journal->j_state_lock);
8448c2ecf20Sopenharmony_ci		wait_event(journal->j_wait_reserved,
8458c2ecf20Sopenharmony_ci			   atomic_read(&journal->j_reserved_credits) == 0);
8468c2ecf20Sopenharmony_ci		write_lock(&journal->j_state_lock);
8478c2ecf20Sopenharmony_ci	}
8488c2ecf20Sopenharmony_ci
8498c2ecf20Sopenharmony_ci	/* Wait until there are no running updates */
8508c2ecf20Sopenharmony_ci	while (1) {
8518c2ecf20Sopenharmony_ci		transaction_t *transaction = journal->j_running_transaction;
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_ci		if (!transaction)
8548c2ecf20Sopenharmony_ci			break;
8558c2ecf20Sopenharmony_ci
8568c2ecf20Sopenharmony_ci		spin_lock(&transaction->t_handle_lock);
8578c2ecf20Sopenharmony_ci		prepare_to_wait(&journal->j_wait_updates, &wait,
8588c2ecf20Sopenharmony_ci				TASK_UNINTERRUPTIBLE);
8598c2ecf20Sopenharmony_ci		if (!atomic_read(&transaction->t_updates)) {
8608c2ecf20Sopenharmony_ci			spin_unlock(&transaction->t_handle_lock);
8618c2ecf20Sopenharmony_ci			finish_wait(&journal->j_wait_updates, &wait);
8628c2ecf20Sopenharmony_ci			break;
8638c2ecf20Sopenharmony_ci		}
8648c2ecf20Sopenharmony_ci		spin_unlock(&transaction->t_handle_lock);
8658c2ecf20Sopenharmony_ci		write_unlock(&journal->j_state_lock);
8668c2ecf20Sopenharmony_ci		schedule();
8678c2ecf20Sopenharmony_ci		finish_wait(&journal->j_wait_updates, &wait);
8688c2ecf20Sopenharmony_ci		write_lock(&journal->j_state_lock);
8698c2ecf20Sopenharmony_ci	}
8708c2ecf20Sopenharmony_ci	write_unlock(&journal->j_state_lock);
8718c2ecf20Sopenharmony_ci
8728c2ecf20Sopenharmony_ci	/*
8738c2ecf20Sopenharmony_ci	 * We have now established a barrier against other normal updates, but
8748c2ecf20Sopenharmony_ci	 * we also need to barrier against other jbd2_journal_lock_updates() calls
8758c2ecf20Sopenharmony_ci	 * to make sure that we serialise special journal-locked operations
8768c2ecf20Sopenharmony_ci	 * too.
8778c2ecf20Sopenharmony_ci	 */
8788c2ecf20Sopenharmony_ci	mutex_lock(&journal->j_barrier);
8798c2ecf20Sopenharmony_ci}
8808c2ecf20Sopenharmony_ci
8818c2ecf20Sopenharmony_ci/**
8828c2ecf20Sopenharmony_ci * jbd2_journal_unlock_updates () - release barrier
8838c2ecf20Sopenharmony_ci * @journal:  Journal to release the barrier on.
8848c2ecf20Sopenharmony_ci *
8858c2ecf20Sopenharmony_ci * Release a transaction barrier obtained with jbd2_journal_lock_updates().
8868c2ecf20Sopenharmony_ci *
8878c2ecf20Sopenharmony_ci * Should be called without the journal lock held.
8888c2ecf20Sopenharmony_ci */
8898c2ecf20Sopenharmony_civoid jbd2_journal_unlock_updates (journal_t *journal)
8908c2ecf20Sopenharmony_ci{
8918c2ecf20Sopenharmony_ci	J_ASSERT(journal->j_barrier_count != 0);
8928c2ecf20Sopenharmony_ci
8938c2ecf20Sopenharmony_ci	mutex_unlock(&journal->j_barrier);
8948c2ecf20Sopenharmony_ci	write_lock(&journal->j_state_lock);
8958c2ecf20Sopenharmony_ci	--journal->j_barrier_count;
8968c2ecf20Sopenharmony_ci	write_unlock(&journal->j_state_lock);
8978c2ecf20Sopenharmony_ci	wake_up_all(&journal->j_wait_transaction_locked);
8988c2ecf20Sopenharmony_ci}
8998c2ecf20Sopenharmony_ci
9008c2ecf20Sopenharmony_cistatic void warn_dirty_buffer(struct buffer_head *bh)
9018c2ecf20Sopenharmony_ci{
9028c2ecf20Sopenharmony_ci	printk(KERN_WARNING
9038c2ecf20Sopenharmony_ci	       "JBD2: Spotted dirty metadata buffer (dev = %pg, blocknr = %llu). "
9048c2ecf20Sopenharmony_ci	       "There's a risk of filesystem corruption in case of system "
9058c2ecf20Sopenharmony_ci	       "crash.\n",
9068c2ecf20Sopenharmony_ci	       bh->b_bdev, (unsigned long long)bh->b_blocknr);
9078c2ecf20Sopenharmony_ci}
9088c2ecf20Sopenharmony_ci
9098c2ecf20Sopenharmony_ci/* Call t_frozen trigger and copy buffer data into jh->b_frozen_data. */
9108c2ecf20Sopenharmony_cistatic void jbd2_freeze_jh_data(struct journal_head *jh)
9118c2ecf20Sopenharmony_ci{
9128c2ecf20Sopenharmony_ci	struct page *page;
9138c2ecf20Sopenharmony_ci	int offset;
9148c2ecf20Sopenharmony_ci	char *source;
9158c2ecf20Sopenharmony_ci	struct buffer_head *bh = jh2bh(jh);
9168c2ecf20Sopenharmony_ci
9178c2ecf20Sopenharmony_ci	J_EXPECT_JH(jh, buffer_uptodate(bh), "Possible IO failure.\n");
9188c2ecf20Sopenharmony_ci	page = bh->b_page;
9198c2ecf20Sopenharmony_ci	offset = offset_in_page(bh->b_data);
9208c2ecf20Sopenharmony_ci	source = kmap_atomic(page);
9218c2ecf20Sopenharmony_ci	/* Fire data frozen trigger just before we copy the data */
9228c2ecf20Sopenharmony_ci	jbd2_buffer_frozen_trigger(jh, source + offset, jh->b_triggers);
9238c2ecf20Sopenharmony_ci	memcpy(jh->b_frozen_data, source + offset, bh->b_size);
9248c2ecf20Sopenharmony_ci	kunmap_atomic(source);
9258c2ecf20Sopenharmony_ci
9268c2ecf20Sopenharmony_ci	/*
9278c2ecf20Sopenharmony_ci	 * Now that the frozen data is saved off, we need to store any matching
9288c2ecf20Sopenharmony_ci	 * triggers.
9298c2ecf20Sopenharmony_ci	 */
9308c2ecf20Sopenharmony_ci	jh->b_frozen_triggers = jh->b_triggers;
9318c2ecf20Sopenharmony_ci}
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_ci/*
9348c2ecf20Sopenharmony_ci * If the buffer is already part of the current transaction, then there
9358c2ecf20Sopenharmony_ci * is nothing we need to do.  If it is already part of a prior
9368c2ecf20Sopenharmony_ci * transaction which we are still committing to disk, then we need to
9378c2ecf20Sopenharmony_ci * make sure that we do not overwrite the old copy: we do copy-out to
9388c2ecf20Sopenharmony_ci * preserve the copy going to disk.  We also account the buffer against
9398c2ecf20Sopenharmony_ci * the handle's metadata buffer credits (unless the buffer is already
9408c2ecf20Sopenharmony_ci * part of the transaction, that is).
9418c2ecf20Sopenharmony_ci *
9428c2ecf20Sopenharmony_ci */
9438c2ecf20Sopenharmony_cistatic int
9448c2ecf20Sopenharmony_cido_get_write_access(handle_t *handle, struct journal_head *jh,
9458c2ecf20Sopenharmony_ci			int force_copy)
9468c2ecf20Sopenharmony_ci{
9478c2ecf20Sopenharmony_ci	struct buffer_head *bh;
9488c2ecf20Sopenharmony_ci	transaction_t *transaction = handle->h_transaction;
9498c2ecf20Sopenharmony_ci	journal_t *journal;
9508c2ecf20Sopenharmony_ci	int error;
9518c2ecf20Sopenharmony_ci	char *frozen_buffer = NULL;
9528c2ecf20Sopenharmony_ci	unsigned long start_lock, time_lock;
9538c2ecf20Sopenharmony_ci
9548c2ecf20Sopenharmony_ci	journal = transaction->t_journal;
9558c2ecf20Sopenharmony_ci
9568c2ecf20Sopenharmony_ci	jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
9578c2ecf20Sopenharmony_ci
9588c2ecf20Sopenharmony_ci	JBUFFER_TRACE(jh, "entry");
9598c2ecf20Sopenharmony_cirepeat:
9608c2ecf20Sopenharmony_ci	bh = jh2bh(jh);
9618c2ecf20Sopenharmony_ci
9628c2ecf20Sopenharmony_ci	/* @@@ Need to check for errors here at some point. */
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci 	start_lock = jiffies;
9658c2ecf20Sopenharmony_ci	lock_buffer(bh);
9668c2ecf20Sopenharmony_ci	spin_lock(&jh->b_state_lock);
9678c2ecf20Sopenharmony_ci
9688c2ecf20Sopenharmony_ci	/* If it takes too long to lock the buffer, trace it */
9698c2ecf20Sopenharmony_ci	time_lock = jbd2_time_diff(start_lock, jiffies);
9708c2ecf20Sopenharmony_ci	if (time_lock > HZ/10)
9718c2ecf20Sopenharmony_ci		trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev,
9728c2ecf20Sopenharmony_ci			jiffies_to_msecs(time_lock));
9738c2ecf20Sopenharmony_ci
9748c2ecf20Sopenharmony_ci	/* We now hold the buffer lock so it is safe to query the buffer
9758c2ecf20Sopenharmony_ci	 * state.  Is the buffer dirty?
9768c2ecf20Sopenharmony_ci	 *
9778c2ecf20Sopenharmony_ci	 * If so, there are two possibilities.  The buffer may be
9788c2ecf20Sopenharmony_ci	 * non-journaled, and undergoing a quite legitimate writeback.
9798c2ecf20Sopenharmony_ci	 * Otherwise, it is journaled, and we don't expect dirty buffers
9808c2ecf20Sopenharmony_ci	 * in that state (the buffers should be marked JBD_Dirty
9818c2ecf20Sopenharmony_ci	 * instead.)  So either the IO is being done under our own
9828c2ecf20Sopenharmony_ci	 * control and this is a bug, or it's a third party IO such as
9838c2ecf20Sopenharmony_ci	 * dump(8) (which may leave the buffer scheduled for read ---
9848c2ecf20Sopenharmony_ci	 * ie. locked but not dirty) or tune2fs (which may actually have
9858c2ecf20Sopenharmony_ci	 * the buffer dirtied, ugh.)  */
9868c2ecf20Sopenharmony_ci
9878c2ecf20Sopenharmony_ci	if (buffer_dirty(bh) && jh->b_transaction) {
9888c2ecf20Sopenharmony_ci		warn_dirty_buffer(bh);
9898c2ecf20Sopenharmony_ci		/*
9908c2ecf20Sopenharmony_ci		 * We need to clean the dirty flag and we must do it under the
9918c2ecf20Sopenharmony_ci		 * buffer lock to be sure we don't race with running write-out.
9928c2ecf20Sopenharmony_ci		 */
9938c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "Journalling dirty buffer");
9948c2ecf20Sopenharmony_ci		clear_buffer_dirty(bh);
9958c2ecf20Sopenharmony_ci		/*
9968c2ecf20Sopenharmony_ci		 * The buffer is going to be added to BJ_Reserved list now and
9978c2ecf20Sopenharmony_ci		 * nothing guarantees jbd2_journal_dirty_metadata() will be
9988c2ecf20Sopenharmony_ci		 * ever called for it. So we need to set jbddirty bit here to
9998c2ecf20Sopenharmony_ci		 * make sure the buffer is dirtied and written out when the
10008c2ecf20Sopenharmony_ci		 * journaling machinery is done with it.
10018c2ecf20Sopenharmony_ci		 */
10028c2ecf20Sopenharmony_ci		set_buffer_jbddirty(bh);
10038c2ecf20Sopenharmony_ci	}
10048c2ecf20Sopenharmony_ci
10058c2ecf20Sopenharmony_ci	error = -EROFS;
10068c2ecf20Sopenharmony_ci	if (is_handle_aborted(handle)) {
10078c2ecf20Sopenharmony_ci		spin_unlock(&jh->b_state_lock);
10088c2ecf20Sopenharmony_ci		unlock_buffer(bh);
10098c2ecf20Sopenharmony_ci		goto out;
10108c2ecf20Sopenharmony_ci	}
10118c2ecf20Sopenharmony_ci	error = 0;
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ci	/*
10148c2ecf20Sopenharmony_ci	 * The buffer is already part of this transaction if b_transaction or
10158c2ecf20Sopenharmony_ci	 * b_next_transaction points to it
10168c2ecf20Sopenharmony_ci	 */
10178c2ecf20Sopenharmony_ci	if (jh->b_transaction == transaction ||
10188c2ecf20Sopenharmony_ci	    jh->b_next_transaction == transaction) {
10198c2ecf20Sopenharmony_ci		unlock_buffer(bh);
10208c2ecf20Sopenharmony_ci		goto done;
10218c2ecf20Sopenharmony_ci	}
10228c2ecf20Sopenharmony_ci
10238c2ecf20Sopenharmony_ci	/*
10248c2ecf20Sopenharmony_ci	 * this is the first time this transaction is touching this buffer,
10258c2ecf20Sopenharmony_ci	 * reset the modified flag
10268c2ecf20Sopenharmony_ci	 */
10278c2ecf20Sopenharmony_ci	jh->b_modified = 0;
10288c2ecf20Sopenharmony_ci
10298c2ecf20Sopenharmony_ci	/*
10308c2ecf20Sopenharmony_ci	 * If the buffer is not journaled right now, we need to make sure it
10318c2ecf20Sopenharmony_ci	 * doesn't get written to disk before the caller actually commits the
10328c2ecf20Sopenharmony_ci	 * new data
10338c2ecf20Sopenharmony_ci	 */
10348c2ecf20Sopenharmony_ci	if (!jh->b_transaction) {
10358c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "no transaction");
10368c2ecf20Sopenharmony_ci		J_ASSERT_JH(jh, !jh->b_next_transaction);
10378c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "file as BJ_Reserved");
10388c2ecf20Sopenharmony_ci		/*
10398c2ecf20Sopenharmony_ci		 * Make sure all stores to jh (b_modified, b_frozen_data) are
10408c2ecf20Sopenharmony_ci		 * visible before attaching it to the running transaction.
10418c2ecf20Sopenharmony_ci		 * Paired with barrier in jbd2_write_access_granted()
10428c2ecf20Sopenharmony_ci		 */
10438c2ecf20Sopenharmony_ci		smp_wmb();
10448c2ecf20Sopenharmony_ci		spin_lock(&journal->j_list_lock);
10458c2ecf20Sopenharmony_ci		if (test_clear_buffer_dirty(bh)) {
10468c2ecf20Sopenharmony_ci			/*
10478c2ecf20Sopenharmony_ci			 * Execute buffer dirty clearing and jh->b_transaction
10488c2ecf20Sopenharmony_ci			 * assignment under journal->j_list_lock locked to
10498c2ecf20Sopenharmony_ci			 * prevent bh being removed from checkpoint list if
10508c2ecf20Sopenharmony_ci			 * the buffer is in an intermediate state (not dirty
10518c2ecf20Sopenharmony_ci			 * and jh->b_transaction is NULL).
10528c2ecf20Sopenharmony_ci			 */
10538c2ecf20Sopenharmony_ci			JBUFFER_TRACE(jh, "Journalling dirty buffer");
10548c2ecf20Sopenharmony_ci			set_buffer_jbddirty(bh);
10558c2ecf20Sopenharmony_ci		}
10568c2ecf20Sopenharmony_ci		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
10578c2ecf20Sopenharmony_ci		spin_unlock(&journal->j_list_lock);
10588c2ecf20Sopenharmony_ci		unlock_buffer(bh);
10598c2ecf20Sopenharmony_ci		goto done;
10608c2ecf20Sopenharmony_ci	}
10618c2ecf20Sopenharmony_ci	unlock_buffer(bh);
10628c2ecf20Sopenharmony_ci
10638c2ecf20Sopenharmony_ci	/*
10648c2ecf20Sopenharmony_ci	 * If there is already a copy-out version of this buffer, then we don't
10658c2ecf20Sopenharmony_ci	 * need to make another one
10668c2ecf20Sopenharmony_ci	 */
10678c2ecf20Sopenharmony_ci	if (jh->b_frozen_data) {
10688c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "has frozen data");
10698c2ecf20Sopenharmony_ci		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
10708c2ecf20Sopenharmony_ci		goto attach_next;
10718c2ecf20Sopenharmony_ci	}
10728c2ecf20Sopenharmony_ci
10738c2ecf20Sopenharmony_ci	JBUFFER_TRACE(jh, "owned by older transaction");
10748c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
10758c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
10768c2ecf20Sopenharmony_ci
10778c2ecf20Sopenharmony_ci	/*
10788c2ecf20Sopenharmony_ci	 * There is one case we have to be very careful about.  If the
10798c2ecf20Sopenharmony_ci	 * committing transaction is currently writing this buffer out to disk
10808c2ecf20Sopenharmony_ci	 * and has NOT made a copy-out, then we cannot modify the buffer
10818c2ecf20Sopenharmony_ci	 * contents at all right now.  The essence of copy-out is that it is
10828c2ecf20Sopenharmony_ci	 * the extra copy, not the primary copy, which gets journaled.  If the
10838c2ecf20Sopenharmony_ci	 * primary copy is already going to disk then we cannot do copy-out
10848c2ecf20Sopenharmony_ci	 * here.
10858c2ecf20Sopenharmony_ci	 */
10868c2ecf20Sopenharmony_ci	if (buffer_shadow(bh)) {
10878c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "on shadow: sleep");
10888c2ecf20Sopenharmony_ci		spin_unlock(&jh->b_state_lock);
10898c2ecf20Sopenharmony_ci		wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
10908c2ecf20Sopenharmony_ci		goto repeat;
10918c2ecf20Sopenharmony_ci	}
10928c2ecf20Sopenharmony_ci
10938c2ecf20Sopenharmony_ci	/*
10948c2ecf20Sopenharmony_ci	 * Only do the copy if the currently-owning transaction still needs it.
10958c2ecf20Sopenharmony_ci	 * If buffer isn't on BJ_Metadata list, the committing transaction is
10968c2ecf20Sopenharmony_ci	 * past that stage (here we use the fact that BH_Shadow is set under
10978c2ecf20Sopenharmony_ci	 * bh_state lock together with refiling to BJ_Shadow list and at this
10988c2ecf20Sopenharmony_ci	 * point we know the buffer doesn't have BH_Shadow set).
10998c2ecf20Sopenharmony_ci	 *
11008c2ecf20Sopenharmony_ci	 * Subtle point, though: if this is a get_undo_access, then we will be
11018c2ecf20Sopenharmony_ci	 * relying on the frozen_data to contain the new value of the
11028c2ecf20Sopenharmony_ci	 * committed_data record after the transaction, so we HAVE to force the
11038c2ecf20Sopenharmony_ci	 * frozen_data copy in that case.
11048c2ecf20Sopenharmony_ci	 */
11058c2ecf20Sopenharmony_ci	if (jh->b_jlist == BJ_Metadata || force_copy) {
11068c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "generate frozen data");
11078c2ecf20Sopenharmony_ci		if (!frozen_buffer) {
11088c2ecf20Sopenharmony_ci			JBUFFER_TRACE(jh, "allocate memory for buffer");
11098c2ecf20Sopenharmony_ci			spin_unlock(&jh->b_state_lock);
11108c2ecf20Sopenharmony_ci			frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
11118c2ecf20Sopenharmony_ci						   GFP_NOFS | __GFP_NOFAIL);
11128c2ecf20Sopenharmony_ci			goto repeat;
11138c2ecf20Sopenharmony_ci		}
11148c2ecf20Sopenharmony_ci		jh->b_frozen_data = frozen_buffer;
11158c2ecf20Sopenharmony_ci		frozen_buffer = NULL;
11168c2ecf20Sopenharmony_ci		jbd2_freeze_jh_data(jh);
11178c2ecf20Sopenharmony_ci	}
11188c2ecf20Sopenharmony_ciattach_next:
11198c2ecf20Sopenharmony_ci	/*
11208c2ecf20Sopenharmony_ci	 * Make sure all stores to jh (b_modified, b_frozen_data) are visible
11218c2ecf20Sopenharmony_ci	 * before attaching it to the running transaction. Paired with barrier
11228c2ecf20Sopenharmony_ci	 * in jbd2_write_access_granted()
11238c2ecf20Sopenharmony_ci	 */
11248c2ecf20Sopenharmony_ci	smp_wmb();
11258c2ecf20Sopenharmony_ci	jh->b_next_transaction = transaction;
11268c2ecf20Sopenharmony_ci
11278c2ecf20Sopenharmony_cidone:
11288c2ecf20Sopenharmony_ci	spin_unlock(&jh->b_state_lock);
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_ci	/*
11318c2ecf20Sopenharmony_ci	 * If we are about to journal a buffer, then any revoke pending on it is
11328c2ecf20Sopenharmony_ci	 * no longer valid
11338c2ecf20Sopenharmony_ci	 */
11348c2ecf20Sopenharmony_ci	jbd2_journal_cancel_revoke(handle, jh);
11358c2ecf20Sopenharmony_ci
11368c2ecf20Sopenharmony_ciout:
11378c2ecf20Sopenharmony_ci	if (unlikely(frozen_buffer))	/* It's usually NULL */
11388c2ecf20Sopenharmony_ci		jbd2_free(frozen_buffer, bh->b_size);
11398c2ecf20Sopenharmony_ci
11408c2ecf20Sopenharmony_ci	JBUFFER_TRACE(jh, "exit");
11418c2ecf20Sopenharmony_ci	return error;
11428c2ecf20Sopenharmony_ci}
11438c2ecf20Sopenharmony_ci
11448c2ecf20Sopenharmony_ci/* Fast check whether buffer is already attached to the required transaction */
11458c2ecf20Sopenharmony_cistatic bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh,
11468c2ecf20Sopenharmony_ci							bool undo)
11478c2ecf20Sopenharmony_ci{
11488c2ecf20Sopenharmony_ci	struct journal_head *jh;
11498c2ecf20Sopenharmony_ci	bool ret = false;
11508c2ecf20Sopenharmony_ci
11518c2ecf20Sopenharmony_ci	/* Dirty buffers require special handling... */
11528c2ecf20Sopenharmony_ci	if (buffer_dirty(bh))
11538c2ecf20Sopenharmony_ci		return false;
11548c2ecf20Sopenharmony_ci
11558c2ecf20Sopenharmony_ci	/*
11568c2ecf20Sopenharmony_ci	 * RCU protects us from dereferencing freed pages. So the checks we do
11578c2ecf20Sopenharmony_ci	 * are guaranteed not to oops. However the jh slab object can get freed
11588c2ecf20Sopenharmony_ci	 * & reallocated while we work with it. So we have to be careful. When
11598c2ecf20Sopenharmony_ci	 * we see jh attached to the running transaction, we know it must stay
11608c2ecf20Sopenharmony_ci	 * so until the transaction is committed. Thus jh won't be freed and
11618c2ecf20Sopenharmony_ci	 * will be attached to the same bh while we run.  However it can
11628c2ecf20Sopenharmony_ci	 * happen jh gets freed, reallocated, and attached to the transaction
11638c2ecf20Sopenharmony_ci	 * just after we get pointer to it from bh. So we have to be careful
11648c2ecf20Sopenharmony_ci	 * and recheck jh still belongs to our bh before we return success.
11658c2ecf20Sopenharmony_ci	 */
11668c2ecf20Sopenharmony_ci	rcu_read_lock();
11678c2ecf20Sopenharmony_ci	if (!buffer_jbd(bh))
11688c2ecf20Sopenharmony_ci		goto out;
11698c2ecf20Sopenharmony_ci	/* This should be bh2jh() but that doesn't work with inline functions */
11708c2ecf20Sopenharmony_ci	jh = READ_ONCE(bh->b_private);
11718c2ecf20Sopenharmony_ci	if (!jh)
11728c2ecf20Sopenharmony_ci		goto out;
11738c2ecf20Sopenharmony_ci	/* For undo access buffer must have data copied */
11748c2ecf20Sopenharmony_ci	if (undo && !jh->b_committed_data)
11758c2ecf20Sopenharmony_ci		goto out;
11768c2ecf20Sopenharmony_ci	if (READ_ONCE(jh->b_transaction) != handle->h_transaction &&
11778c2ecf20Sopenharmony_ci	    READ_ONCE(jh->b_next_transaction) != handle->h_transaction)
11788c2ecf20Sopenharmony_ci		goto out;
11798c2ecf20Sopenharmony_ci	/*
11808c2ecf20Sopenharmony_ci	 * There are two reasons for the barrier here:
11818c2ecf20Sopenharmony_ci	 * 1) Make sure to fetch b_bh after we did previous checks so that we
11828c2ecf20Sopenharmony_ci	 * detect when jh went through free, realloc, attach to transaction
11838c2ecf20Sopenharmony_ci	 * while we were checking. Paired with implicit barrier in that path.
11848c2ecf20Sopenharmony_ci	 * 2) So that access to bh done after jbd2_write_access_granted()
11858c2ecf20Sopenharmony_ci	 * doesn't get reordered and see inconsistent state of concurrent
11868c2ecf20Sopenharmony_ci	 * do_get_write_access().
11878c2ecf20Sopenharmony_ci	 */
11888c2ecf20Sopenharmony_ci	smp_mb();
11898c2ecf20Sopenharmony_ci	if (unlikely(jh->b_bh != bh))
11908c2ecf20Sopenharmony_ci		goto out;
11918c2ecf20Sopenharmony_ci	ret = true;
11928c2ecf20Sopenharmony_ciout:
11938c2ecf20Sopenharmony_ci	rcu_read_unlock();
11948c2ecf20Sopenharmony_ci	return ret;
11958c2ecf20Sopenharmony_ci}
11968c2ecf20Sopenharmony_ci
11978c2ecf20Sopenharmony_ci/**
11988c2ecf20Sopenharmony_ci * jbd2_journal_get_write_access() - notify intent to modify a buffer
11998c2ecf20Sopenharmony_ci *				     for metadata (not data) update.
12008c2ecf20Sopenharmony_ci * @handle: transaction to add buffer modifications to
12018c2ecf20Sopenharmony_ci * @bh:     bh to be used for metadata writes
12028c2ecf20Sopenharmony_ci *
12038c2ecf20Sopenharmony_ci * Returns: error code or 0 on success.
12048c2ecf20Sopenharmony_ci *
12058c2ecf20Sopenharmony_ci * In full data journalling mode the buffer may be of type BJ_AsyncData,
12068c2ecf20Sopenharmony_ci * because we're ``write()ing`` a buffer which is also part of a shared mapping.
12078c2ecf20Sopenharmony_ci */
12088c2ecf20Sopenharmony_ci
12098c2ecf20Sopenharmony_ciint jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
12108c2ecf20Sopenharmony_ci{
12118c2ecf20Sopenharmony_ci	struct journal_head *jh;
12128c2ecf20Sopenharmony_ci	int rc;
12138c2ecf20Sopenharmony_ci
12148c2ecf20Sopenharmony_ci	if (is_handle_aborted(handle))
12158c2ecf20Sopenharmony_ci		return -EROFS;
12168c2ecf20Sopenharmony_ci
12178c2ecf20Sopenharmony_ci	if (jbd2_write_access_granted(handle, bh, false))
12188c2ecf20Sopenharmony_ci		return 0;
12198c2ecf20Sopenharmony_ci
12208c2ecf20Sopenharmony_ci	jh = jbd2_journal_add_journal_head(bh);
12218c2ecf20Sopenharmony_ci	/* We do not want to get caught playing with fields which the
12228c2ecf20Sopenharmony_ci	 * log thread also manipulates.  Make sure that the buffer
12238c2ecf20Sopenharmony_ci	 * completes any outstanding IO before proceeding. */
12248c2ecf20Sopenharmony_ci	rc = do_get_write_access(handle, jh, 0);
12258c2ecf20Sopenharmony_ci	jbd2_journal_put_journal_head(jh);
12268c2ecf20Sopenharmony_ci	return rc;
12278c2ecf20Sopenharmony_ci}
12288c2ecf20Sopenharmony_ci
12298c2ecf20Sopenharmony_ci
12308c2ecf20Sopenharmony_ci/*
12318c2ecf20Sopenharmony_ci * When the user wants to journal a newly created buffer_head
12328c2ecf20Sopenharmony_ci * (ie. getblk() returned a new buffer and we are going to populate it
12338c2ecf20Sopenharmony_ci * manually rather than reading off disk), then we need to keep the
12348c2ecf20Sopenharmony_ci * buffer_head locked until it has been completely filled with new
12358c2ecf20Sopenharmony_ci * data.  In this case, we should be able to make the assertion that
12368c2ecf20Sopenharmony_ci * the bh is not already part of an existing transaction.
12378c2ecf20Sopenharmony_ci *
12388c2ecf20Sopenharmony_ci * The buffer should already be locked by the caller by this point.
12398c2ecf20Sopenharmony_ci * There is no lock ranking violation: it was a newly created,
12408c2ecf20Sopenharmony_ci * unlocked buffer beforehand. */
12418c2ecf20Sopenharmony_ci
12428c2ecf20Sopenharmony_ci/**
12438c2ecf20Sopenharmony_ci * jbd2_journal_get_create_access () - notify intent to use newly created bh
12448c2ecf20Sopenharmony_ci * @handle: transaction to new buffer to
12458c2ecf20Sopenharmony_ci * @bh: new buffer.
12468c2ecf20Sopenharmony_ci *
12478c2ecf20Sopenharmony_ci * Call this if you create a new bh.
12488c2ecf20Sopenharmony_ci */
12498c2ecf20Sopenharmony_ciint jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
12508c2ecf20Sopenharmony_ci{
12518c2ecf20Sopenharmony_ci	transaction_t *transaction = handle->h_transaction;
12528c2ecf20Sopenharmony_ci	journal_t *journal;
12538c2ecf20Sopenharmony_ci	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
12548c2ecf20Sopenharmony_ci	int err;
12558c2ecf20Sopenharmony_ci
12568c2ecf20Sopenharmony_ci	jbd_debug(5, "journal_head %p\n", jh);
12578c2ecf20Sopenharmony_ci	err = -EROFS;
12588c2ecf20Sopenharmony_ci	if (is_handle_aborted(handle))
12598c2ecf20Sopenharmony_ci		goto out;
12608c2ecf20Sopenharmony_ci	journal = transaction->t_journal;
12618c2ecf20Sopenharmony_ci	err = 0;
12628c2ecf20Sopenharmony_ci
12638c2ecf20Sopenharmony_ci	JBUFFER_TRACE(jh, "entry");
12648c2ecf20Sopenharmony_ci	/*
12658c2ecf20Sopenharmony_ci	 * The buffer may already belong to this transaction due to pre-zeroing
12668c2ecf20Sopenharmony_ci	 * in the filesystem's new_block code.  It may also be on the previous,
12678c2ecf20Sopenharmony_ci	 * committing transaction's lists, but it HAS to be in Forget state in
12688c2ecf20Sopenharmony_ci	 * that case: the transaction must have deleted the buffer for it to be
12698c2ecf20Sopenharmony_ci	 * reused here.
12708c2ecf20Sopenharmony_ci	 */
12718c2ecf20Sopenharmony_ci	spin_lock(&jh->b_state_lock);
12728c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
12738c2ecf20Sopenharmony_ci		jh->b_transaction == NULL ||
12748c2ecf20Sopenharmony_ci		(jh->b_transaction == journal->j_committing_transaction &&
12758c2ecf20Sopenharmony_ci			  jh->b_jlist == BJ_Forget)));
12768c2ecf20Sopenharmony_ci
12778c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
12788c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
12798c2ecf20Sopenharmony_ci
12808c2ecf20Sopenharmony_ci	if (jh->b_transaction == NULL) {
12818c2ecf20Sopenharmony_ci		/*
12828c2ecf20Sopenharmony_ci		 * Previous jbd2_journal_forget() could have left the buffer
12838c2ecf20Sopenharmony_ci		 * with jbddirty bit set because it was being committed. When
12848c2ecf20Sopenharmony_ci		 * the commit finished, we've filed the buffer for
12858c2ecf20Sopenharmony_ci		 * checkpointing and marked it dirty. Now we are reallocating
12868c2ecf20Sopenharmony_ci		 * the buffer so the transaction freeing it must have
12878c2ecf20Sopenharmony_ci		 * committed and so it's safe to clear the dirty bit.
12888c2ecf20Sopenharmony_ci		 */
12898c2ecf20Sopenharmony_ci		clear_buffer_dirty(jh2bh(jh));
12908c2ecf20Sopenharmony_ci		/* first access by this transaction */
12918c2ecf20Sopenharmony_ci		jh->b_modified = 0;
12928c2ecf20Sopenharmony_ci
12938c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "file as BJ_Reserved");
12948c2ecf20Sopenharmony_ci		spin_lock(&journal->j_list_lock);
12958c2ecf20Sopenharmony_ci		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
12968c2ecf20Sopenharmony_ci		spin_unlock(&journal->j_list_lock);
12978c2ecf20Sopenharmony_ci	} else if (jh->b_transaction == journal->j_committing_transaction) {
12988c2ecf20Sopenharmony_ci		/* first access by this transaction */
12998c2ecf20Sopenharmony_ci		jh->b_modified = 0;
13008c2ecf20Sopenharmony_ci
13018c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "set next transaction");
13028c2ecf20Sopenharmony_ci		spin_lock(&journal->j_list_lock);
13038c2ecf20Sopenharmony_ci		jh->b_next_transaction = transaction;
13048c2ecf20Sopenharmony_ci		spin_unlock(&journal->j_list_lock);
13058c2ecf20Sopenharmony_ci	}
13068c2ecf20Sopenharmony_ci	spin_unlock(&jh->b_state_lock);
13078c2ecf20Sopenharmony_ci
13088c2ecf20Sopenharmony_ci	/*
13098c2ecf20Sopenharmony_ci	 * akpm: I added this.  ext3_alloc_branch can pick up new indirect
13108c2ecf20Sopenharmony_ci	 * blocks which contain freed but then revoked metadata.  We need
13118c2ecf20Sopenharmony_ci	 * to cancel the revoke in case we end up freeing it yet again
13128c2ecf20Sopenharmony_ci	 * and the reallocating as data - this would cause a second revoke,
13138c2ecf20Sopenharmony_ci	 * which hits an assertion error.
13148c2ecf20Sopenharmony_ci	 */
13158c2ecf20Sopenharmony_ci	JBUFFER_TRACE(jh, "cancelling revoke");
13168c2ecf20Sopenharmony_ci	jbd2_journal_cancel_revoke(handle, jh);
13178c2ecf20Sopenharmony_ciout:
13188c2ecf20Sopenharmony_ci	jbd2_journal_put_journal_head(jh);
13198c2ecf20Sopenharmony_ci	return err;
13208c2ecf20Sopenharmony_ci}
13218c2ecf20Sopenharmony_ci
13228c2ecf20Sopenharmony_ci/**
13238c2ecf20Sopenharmony_ci * jbd2_journal_get_undo_access() -  Notify intent to modify metadata with
13248c2ecf20Sopenharmony_ci *     non-rewindable consequences
13258c2ecf20Sopenharmony_ci * @handle: transaction
13268c2ecf20Sopenharmony_ci * @bh: buffer to undo
13278c2ecf20Sopenharmony_ci *
13288c2ecf20Sopenharmony_ci * Sometimes there is a need to distinguish between metadata which has
13298c2ecf20Sopenharmony_ci * been committed to disk and that which has not.  The ext3fs code uses
13308c2ecf20Sopenharmony_ci * this for freeing and allocating space, we have to make sure that we
13318c2ecf20Sopenharmony_ci * do not reuse freed space until the deallocation has been committed,
13328c2ecf20Sopenharmony_ci * since if we overwrote that space we would make the delete
13338c2ecf20Sopenharmony_ci * un-rewindable in case of a crash.
13348c2ecf20Sopenharmony_ci *
13358c2ecf20Sopenharmony_ci * To deal with that, jbd2_journal_get_undo_access requests write access to a
13368c2ecf20Sopenharmony_ci * buffer for parts of non-rewindable operations such as delete
13378c2ecf20Sopenharmony_ci * operations on the bitmaps.  The journaling code must keep a copy of
13388c2ecf20Sopenharmony_ci * the buffer's contents prior to the undo_access call until such time
13398c2ecf20Sopenharmony_ci * as we know that the buffer has definitely been committed to disk.
13408c2ecf20Sopenharmony_ci *
13418c2ecf20Sopenharmony_ci * We never need to know which transaction the committed data is part
13428c2ecf20Sopenharmony_ci * of, buffers touched here are guaranteed to be dirtied later and so
13438c2ecf20Sopenharmony_ci * will be committed to a new transaction in due course, at which point
13448c2ecf20Sopenharmony_ci * we can discard the old committed data pointer.
13458c2ecf20Sopenharmony_ci *
13468c2ecf20Sopenharmony_ci * Returns error number or 0 on success.
13478c2ecf20Sopenharmony_ci */
13488c2ecf20Sopenharmony_ciint jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
13498c2ecf20Sopenharmony_ci{
13508c2ecf20Sopenharmony_ci	int err;
13518c2ecf20Sopenharmony_ci	struct journal_head *jh;
13528c2ecf20Sopenharmony_ci	char *committed_data = NULL;
13538c2ecf20Sopenharmony_ci
13548c2ecf20Sopenharmony_ci	if (is_handle_aborted(handle))
13558c2ecf20Sopenharmony_ci		return -EROFS;
13568c2ecf20Sopenharmony_ci
13578c2ecf20Sopenharmony_ci	if (jbd2_write_access_granted(handle, bh, true))
13588c2ecf20Sopenharmony_ci		return 0;
13598c2ecf20Sopenharmony_ci
13608c2ecf20Sopenharmony_ci	jh = jbd2_journal_add_journal_head(bh);
13618c2ecf20Sopenharmony_ci	JBUFFER_TRACE(jh, "entry");
13628c2ecf20Sopenharmony_ci
13638c2ecf20Sopenharmony_ci	/*
13648c2ecf20Sopenharmony_ci	 * Do this first --- it can drop the journal lock, so we want to
13658c2ecf20Sopenharmony_ci	 * make sure that obtaining the committed_data is done
13668c2ecf20Sopenharmony_ci	 * atomically wrt. completion of any outstanding commits.
13678c2ecf20Sopenharmony_ci	 */
13688c2ecf20Sopenharmony_ci	err = do_get_write_access(handle, jh, 1);
13698c2ecf20Sopenharmony_ci	if (err)
13708c2ecf20Sopenharmony_ci		goto out;
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_cirepeat:
13738c2ecf20Sopenharmony_ci	if (!jh->b_committed_data)
13748c2ecf20Sopenharmony_ci		committed_data = jbd2_alloc(jh2bh(jh)->b_size,
13758c2ecf20Sopenharmony_ci					    GFP_NOFS|__GFP_NOFAIL);
13768c2ecf20Sopenharmony_ci
13778c2ecf20Sopenharmony_ci	spin_lock(&jh->b_state_lock);
13788c2ecf20Sopenharmony_ci	if (!jh->b_committed_data) {
13798c2ecf20Sopenharmony_ci		/* Copy out the current buffer contents into the
13808c2ecf20Sopenharmony_ci		 * preserved, committed copy. */
13818c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "generate b_committed data");
13828c2ecf20Sopenharmony_ci		if (!committed_data) {
13838c2ecf20Sopenharmony_ci			spin_unlock(&jh->b_state_lock);
13848c2ecf20Sopenharmony_ci			goto repeat;
13858c2ecf20Sopenharmony_ci		}
13868c2ecf20Sopenharmony_ci
13878c2ecf20Sopenharmony_ci		jh->b_committed_data = committed_data;
13888c2ecf20Sopenharmony_ci		committed_data = NULL;
13898c2ecf20Sopenharmony_ci		memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
13908c2ecf20Sopenharmony_ci	}
13918c2ecf20Sopenharmony_ci	spin_unlock(&jh->b_state_lock);
13928c2ecf20Sopenharmony_ciout:
13938c2ecf20Sopenharmony_ci	jbd2_journal_put_journal_head(jh);
13948c2ecf20Sopenharmony_ci	if (unlikely(committed_data))
13958c2ecf20Sopenharmony_ci		jbd2_free(committed_data, bh->b_size);
13968c2ecf20Sopenharmony_ci	return err;
13978c2ecf20Sopenharmony_ci}
13988c2ecf20Sopenharmony_ci
13998c2ecf20Sopenharmony_ci/**
14008c2ecf20Sopenharmony_ci * jbd2_journal_set_triggers() - Add triggers for commit writeout
14018c2ecf20Sopenharmony_ci * @bh: buffer to trigger on
14028c2ecf20Sopenharmony_ci * @type: struct jbd2_buffer_trigger_type containing the trigger(s).
14038c2ecf20Sopenharmony_ci *
14048c2ecf20Sopenharmony_ci * Set any triggers on this journal_head.  This is always safe, because
14058c2ecf20Sopenharmony_ci * triggers for a committing buffer will be saved off, and triggers for
14068c2ecf20Sopenharmony_ci * a running transaction will match the buffer in that transaction.
14078c2ecf20Sopenharmony_ci *
14088c2ecf20Sopenharmony_ci * Call with NULL to clear the triggers.
14098c2ecf20Sopenharmony_ci */
14108c2ecf20Sopenharmony_civoid jbd2_journal_set_triggers(struct buffer_head *bh,
14118c2ecf20Sopenharmony_ci			       struct jbd2_buffer_trigger_type *type)
14128c2ecf20Sopenharmony_ci{
14138c2ecf20Sopenharmony_ci	struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
14148c2ecf20Sopenharmony_ci
14158c2ecf20Sopenharmony_ci	if (WARN_ON(!jh))
14168c2ecf20Sopenharmony_ci		return;
14178c2ecf20Sopenharmony_ci	jh->b_triggers = type;
14188c2ecf20Sopenharmony_ci	jbd2_journal_put_journal_head(jh);
14198c2ecf20Sopenharmony_ci}
14208c2ecf20Sopenharmony_ci
14218c2ecf20Sopenharmony_civoid jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
14228c2ecf20Sopenharmony_ci				struct jbd2_buffer_trigger_type *triggers)
14238c2ecf20Sopenharmony_ci{
14248c2ecf20Sopenharmony_ci	struct buffer_head *bh = jh2bh(jh);
14258c2ecf20Sopenharmony_ci
14268c2ecf20Sopenharmony_ci	if (!triggers || !triggers->t_frozen)
14278c2ecf20Sopenharmony_ci		return;
14288c2ecf20Sopenharmony_ci
14298c2ecf20Sopenharmony_ci	triggers->t_frozen(triggers, bh, mapped_data, bh->b_size);
14308c2ecf20Sopenharmony_ci}
14318c2ecf20Sopenharmony_ci
14328c2ecf20Sopenharmony_civoid jbd2_buffer_abort_trigger(struct journal_head *jh,
14338c2ecf20Sopenharmony_ci			       struct jbd2_buffer_trigger_type *triggers)
14348c2ecf20Sopenharmony_ci{
14358c2ecf20Sopenharmony_ci	if (!triggers || !triggers->t_abort)
14368c2ecf20Sopenharmony_ci		return;
14378c2ecf20Sopenharmony_ci
14388c2ecf20Sopenharmony_ci	triggers->t_abort(triggers, jh2bh(jh));
14398c2ecf20Sopenharmony_ci}
14408c2ecf20Sopenharmony_ci
14418c2ecf20Sopenharmony_ci/**
14428c2ecf20Sopenharmony_ci * jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
14438c2ecf20Sopenharmony_ci * @handle: transaction to add buffer to.
14448c2ecf20Sopenharmony_ci * @bh: buffer to mark
14458c2ecf20Sopenharmony_ci *
14468c2ecf20Sopenharmony_ci * mark dirty metadata which needs to be journaled as part of the current
14478c2ecf20Sopenharmony_ci * transaction.
14488c2ecf20Sopenharmony_ci *
14498c2ecf20Sopenharmony_ci * The buffer must have previously had jbd2_journal_get_write_access()
14508c2ecf20Sopenharmony_ci * called so that it has a valid journal_head attached to the buffer
14518c2ecf20Sopenharmony_ci * head.
14528c2ecf20Sopenharmony_ci *
14538c2ecf20Sopenharmony_ci * The buffer is placed on the transaction's metadata list and is marked
14548c2ecf20Sopenharmony_ci * as belonging to the transaction.
14558c2ecf20Sopenharmony_ci *
14568c2ecf20Sopenharmony_ci * Returns error number or 0 on success.
14578c2ecf20Sopenharmony_ci *
14588c2ecf20Sopenharmony_ci * Special care needs to be taken if the buffer already belongs to the
14598c2ecf20Sopenharmony_ci * current committing transaction (in which case we should have frozen
14608c2ecf20Sopenharmony_ci * data present for that commit).  In that case, we don't relink the
14618c2ecf20Sopenharmony_ci * buffer: that only gets done when the old transaction finally
14628c2ecf20Sopenharmony_ci * completes its commit.
14638c2ecf20Sopenharmony_ci */
14648c2ecf20Sopenharmony_ciint jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
14658c2ecf20Sopenharmony_ci{
14668c2ecf20Sopenharmony_ci	transaction_t *transaction = handle->h_transaction;
14678c2ecf20Sopenharmony_ci	journal_t *journal;
14688c2ecf20Sopenharmony_ci	struct journal_head *jh;
14698c2ecf20Sopenharmony_ci	int ret = 0;
14708c2ecf20Sopenharmony_ci
14718c2ecf20Sopenharmony_ci	if (!buffer_jbd(bh))
14728c2ecf20Sopenharmony_ci		return -EUCLEAN;
14738c2ecf20Sopenharmony_ci
14748c2ecf20Sopenharmony_ci	/*
14758c2ecf20Sopenharmony_ci	 * We don't grab jh reference here since the buffer must be part
14768c2ecf20Sopenharmony_ci	 * of the running transaction.
14778c2ecf20Sopenharmony_ci	 */
14788c2ecf20Sopenharmony_ci	jh = bh2jh(bh);
14798c2ecf20Sopenharmony_ci	jbd_debug(5, "journal_head %p\n", jh);
14808c2ecf20Sopenharmony_ci	JBUFFER_TRACE(jh, "entry");
14818c2ecf20Sopenharmony_ci
14828c2ecf20Sopenharmony_ci	/*
14838c2ecf20Sopenharmony_ci	 * This and the following assertions are unreliable since we may see jh
14848c2ecf20Sopenharmony_ci	 * in inconsistent state unless we grab bh_state lock. But this is
14858c2ecf20Sopenharmony_ci	 * crucial to catch bugs so let's do a reliable check until the
14868c2ecf20Sopenharmony_ci	 * lockless handling is fully proven.
14878c2ecf20Sopenharmony_ci	 */
14888c2ecf20Sopenharmony_ci	if (data_race(jh->b_transaction != transaction &&
14898c2ecf20Sopenharmony_ci	    jh->b_next_transaction != transaction)) {
14908c2ecf20Sopenharmony_ci		spin_lock(&jh->b_state_lock);
14918c2ecf20Sopenharmony_ci		J_ASSERT_JH(jh, jh->b_transaction == transaction ||
14928c2ecf20Sopenharmony_ci				jh->b_next_transaction == transaction);
14938c2ecf20Sopenharmony_ci		spin_unlock(&jh->b_state_lock);
14948c2ecf20Sopenharmony_ci	}
14958c2ecf20Sopenharmony_ci	if (jh->b_modified == 1) {
14968c2ecf20Sopenharmony_ci		/* If it's in our transaction it must be in BJ_Metadata list. */
14978c2ecf20Sopenharmony_ci		if (data_race(jh->b_transaction == transaction &&
14988c2ecf20Sopenharmony_ci		    jh->b_jlist != BJ_Metadata)) {
14998c2ecf20Sopenharmony_ci			spin_lock(&jh->b_state_lock);
15008c2ecf20Sopenharmony_ci			if (jh->b_transaction == transaction &&
15018c2ecf20Sopenharmony_ci			    jh->b_jlist != BJ_Metadata)
15028c2ecf20Sopenharmony_ci				pr_err("JBD2: assertion failure: h_type=%u "
15038c2ecf20Sopenharmony_ci				       "h_line_no=%u block_no=%llu jlist=%u\n",
15048c2ecf20Sopenharmony_ci				       handle->h_type, handle->h_line_no,
15058c2ecf20Sopenharmony_ci				       (unsigned long long) bh->b_blocknr,
15068c2ecf20Sopenharmony_ci				       jh->b_jlist);
15078c2ecf20Sopenharmony_ci			J_ASSERT_JH(jh, jh->b_transaction != transaction ||
15088c2ecf20Sopenharmony_ci					jh->b_jlist == BJ_Metadata);
15098c2ecf20Sopenharmony_ci			spin_unlock(&jh->b_state_lock);
15108c2ecf20Sopenharmony_ci		}
15118c2ecf20Sopenharmony_ci		goto out;
15128c2ecf20Sopenharmony_ci	}
15138c2ecf20Sopenharmony_ci
15148c2ecf20Sopenharmony_ci	journal = transaction->t_journal;
15158c2ecf20Sopenharmony_ci	spin_lock(&jh->b_state_lock);
15168c2ecf20Sopenharmony_ci
15178c2ecf20Sopenharmony_ci	if (is_handle_aborted(handle)) {
15188c2ecf20Sopenharmony_ci		/*
15198c2ecf20Sopenharmony_ci		 * Check journal aborting with @jh->b_state_lock locked,
15208c2ecf20Sopenharmony_ci		 * since 'jh->b_transaction' could be replaced with
15218c2ecf20Sopenharmony_ci		 * 'jh->b_next_transaction' during old transaction
15228c2ecf20Sopenharmony_ci		 * committing if journal aborted, which may fail
15238c2ecf20Sopenharmony_ci		 * assertion on 'jh->b_frozen_data == NULL'.
15248c2ecf20Sopenharmony_ci		 */
15258c2ecf20Sopenharmony_ci		ret = -EROFS;
15268c2ecf20Sopenharmony_ci		goto out_unlock_bh;
15278c2ecf20Sopenharmony_ci	}
15288c2ecf20Sopenharmony_ci
15298c2ecf20Sopenharmony_ci	if (jh->b_modified == 0) {
15308c2ecf20Sopenharmony_ci		/*
15318c2ecf20Sopenharmony_ci		 * This buffer's got modified and becoming part
15328c2ecf20Sopenharmony_ci		 * of the transaction. This needs to be done
15338c2ecf20Sopenharmony_ci		 * once a transaction -bzzz
15348c2ecf20Sopenharmony_ci		 */
15358c2ecf20Sopenharmony_ci		if (WARN_ON_ONCE(jbd2_handle_buffer_credits(handle) <= 0)) {
15368c2ecf20Sopenharmony_ci			ret = -ENOSPC;
15378c2ecf20Sopenharmony_ci			goto out_unlock_bh;
15388c2ecf20Sopenharmony_ci		}
15398c2ecf20Sopenharmony_ci		jh->b_modified = 1;
15408c2ecf20Sopenharmony_ci		handle->h_total_credits--;
15418c2ecf20Sopenharmony_ci	}
15428c2ecf20Sopenharmony_ci
15438c2ecf20Sopenharmony_ci	/*
15448c2ecf20Sopenharmony_ci	 * fastpath, to avoid expensive locking.  If this buffer is already
15458c2ecf20Sopenharmony_ci	 * on the running transaction's metadata list there is nothing to do.
15468c2ecf20Sopenharmony_ci	 * Nobody can take it off again because there is a handle open.
15478c2ecf20Sopenharmony_ci	 * I _think_ we're OK here with SMP barriers - a mistaken decision will
15488c2ecf20Sopenharmony_ci	 * result in this test being false, so we go in and take the locks.
15498c2ecf20Sopenharmony_ci	 */
15508c2ecf20Sopenharmony_ci	if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
15518c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "fastpath");
15528c2ecf20Sopenharmony_ci		if (unlikely(jh->b_transaction !=
15538c2ecf20Sopenharmony_ci			     journal->j_running_transaction)) {
15548c2ecf20Sopenharmony_ci			printk(KERN_ERR "JBD2: %s: "
15558c2ecf20Sopenharmony_ci			       "jh->b_transaction (%llu, %p, %u) != "
15568c2ecf20Sopenharmony_ci			       "journal->j_running_transaction (%p, %u)\n",
15578c2ecf20Sopenharmony_ci			       journal->j_devname,
15588c2ecf20Sopenharmony_ci			       (unsigned long long) bh->b_blocknr,
15598c2ecf20Sopenharmony_ci			       jh->b_transaction,
15608c2ecf20Sopenharmony_ci			       jh->b_transaction ? jh->b_transaction->t_tid : 0,
15618c2ecf20Sopenharmony_ci			       journal->j_running_transaction,
15628c2ecf20Sopenharmony_ci			       journal->j_running_transaction ?
15638c2ecf20Sopenharmony_ci			       journal->j_running_transaction->t_tid : 0);
15648c2ecf20Sopenharmony_ci			ret = -EINVAL;
15658c2ecf20Sopenharmony_ci		}
15668c2ecf20Sopenharmony_ci		goto out_unlock_bh;
15678c2ecf20Sopenharmony_ci	}
15688c2ecf20Sopenharmony_ci
15698c2ecf20Sopenharmony_ci	set_buffer_jbddirty(bh);
15708c2ecf20Sopenharmony_ci
15718c2ecf20Sopenharmony_ci	/*
15728c2ecf20Sopenharmony_ci	 * Metadata already on the current transaction list doesn't
15738c2ecf20Sopenharmony_ci	 * need to be filed.  Metadata on another transaction's list must
15748c2ecf20Sopenharmony_ci	 * be committing, and will be refiled once the commit completes:
15758c2ecf20Sopenharmony_ci	 * leave it alone for now.
15768c2ecf20Sopenharmony_ci	 */
15778c2ecf20Sopenharmony_ci	if (jh->b_transaction != transaction) {
15788c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "already on other transaction");
15798c2ecf20Sopenharmony_ci		if (unlikely(((jh->b_transaction !=
15808c2ecf20Sopenharmony_ci			       journal->j_committing_transaction)) ||
15818c2ecf20Sopenharmony_ci			     (jh->b_next_transaction != transaction))) {
15828c2ecf20Sopenharmony_ci			printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: "
15838c2ecf20Sopenharmony_ci			       "bad jh for block %llu: "
15848c2ecf20Sopenharmony_ci			       "transaction (%p, %u), "
15858c2ecf20Sopenharmony_ci			       "jh->b_transaction (%p, %u), "
15868c2ecf20Sopenharmony_ci			       "jh->b_next_transaction (%p, %u), jlist %u\n",
15878c2ecf20Sopenharmony_ci			       journal->j_devname,
15888c2ecf20Sopenharmony_ci			       (unsigned long long) bh->b_blocknr,
15898c2ecf20Sopenharmony_ci			       transaction, transaction->t_tid,
15908c2ecf20Sopenharmony_ci			       jh->b_transaction,
15918c2ecf20Sopenharmony_ci			       jh->b_transaction ?
15928c2ecf20Sopenharmony_ci			       jh->b_transaction->t_tid : 0,
15938c2ecf20Sopenharmony_ci			       jh->b_next_transaction,
15948c2ecf20Sopenharmony_ci			       jh->b_next_transaction ?
15958c2ecf20Sopenharmony_ci			       jh->b_next_transaction->t_tid : 0,
15968c2ecf20Sopenharmony_ci			       jh->b_jlist);
15978c2ecf20Sopenharmony_ci			WARN_ON(1);
15988c2ecf20Sopenharmony_ci			ret = -EINVAL;
15998c2ecf20Sopenharmony_ci		}
16008c2ecf20Sopenharmony_ci		/* And this case is illegal: we can't reuse another
16018c2ecf20Sopenharmony_ci		 * transaction's data buffer, ever. */
16028c2ecf20Sopenharmony_ci		goto out_unlock_bh;
16038c2ecf20Sopenharmony_ci	}
16048c2ecf20Sopenharmony_ci
16058c2ecf20Sopenharmony_ci	/* That test should have eliminated the following case: */
16068c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
16078c2ecf20Sopenharmony_ci
16088c2ecf20Sopenharmony_ci	JBUFFER_TRACE(jh, "file as BJ_Metadata");
16098c2ecf20Sopenharmony_ci	spin_lock(&journal->j_list_lock);
16108c2ecf20Sopenharmony_ci	__jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
16118c2ecf20Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
16128c2ecf20Sopenharmony_ciout_unlock_bh:
16138c2ecf20Sopenharmony_ci	spin_unlock(&jh->b_state_lock);
16148c2ecf20Sopenharmony_ciout:
16158c2ecf20Sopenharmony_ci	JBUFFER_TRACE(jh, "exit");
16168c2ecf20Sopenharmony_ci	return ret;
16178c2ecf20Sopenharmony_ci}
16188c2ecf20Sopenharmony_ci
16198c2ecf20Sopenharmony_ci/**
16208c2ecf20Sopenharmony_ci * jbd2_journal_forget() - bforget() for potentially-journaled buffers.
16218c2ecf20Sopenharmony_ci * @handle: transaction handle
16228c2ecf20Sopenharmony_ci * @bh:     bh to 'forget'
16238c2ecf20Sopenharmony_ci *
16248c2ecf20Sopenharmony_ci * We can only do the bforget if there are no commits pending against the
16258c2ecf20Sopenharmony_ci * buffer.  If the buffer is dirty in the current running transaction we
16268c2ecf20Sopenharmony_ci * can safely unlink it.
16278c2ecf20Sopenharmony_ci *
16288c2ecf20Sopenharmony_ci * bh may not be a journalled buffer at all - it may be a non-JBD
16298c2ecf20Sopenharmony_ci * buffer which came off the hashtable.  Check for this.
16308c2ecf20Sopenharmony_ci *
16318c2ecf20Sopenharmony_ci * Decrements bh->b_count by one.
16328c2ecf20Sopenharmony_ci *
16338c2ecf20Sopenharmony_ci * Allow this call even if the handle has aborted --- it may be part of
16348c2ecf20Sopenharmony_ci * the caller's cleanup after an abort.
16358c2ecf20Sopenharmony_ci */
16368c2ecf20Sopenharmony_ciint jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
16378c2ecf20Sopenharmony_ci{
16388c2ecf20Sopenharmony_ci	transaction_t *transaction = handle->h_transaction;
16398c2ecf20Sopenharmony_ci	journal_t *journal;
16408c2ecf20Sopenharmony_ci	struct journal_head *jh;
16418c2ecf20Sopenharmony_ci	int drop_reserve = 0;
16428c2ecf20Sopenharmony_ci	int err = 0;
16438c2ecf20Sopenharmony_ci	int was_modified = 0;
16448c2ecf20Sopenharmony_ci
16458c2ecf20Sopenharmony_ci	if (is_handle_aborted(handle))
16468c2ecf20Sopenharmony_ci		return -EROFS;
16478c2ecf20Sopenharmony_ci	journal = transaction->t_journal;
16488c2ecf20Sopenharmony_ci
16498c2ecf20Sopenharmony_ci	BUFFER_TRACE(bh, "entry");
16508c2ecf20Sopenharmony_ci
16518c2ecf20Sopenharmony_ci	jh = jbd2_journal_grab_journal_head(bh);
16528c2ecf20Sopenharmony_ci	if (!jh) {
16538c2ecf20Sopenharmony_ci		__bforget(bh);
16548c2ecf20Sopenharmony_ci		return 0;
16558c2ecf20Sopenharmony_ci	}
16568c2ecf20Sopenharmony_ci
16578c2ecf20Sopenharmony_ci	spin_lock(&jh->b_state_lock);
16588c2ecf20Sopenharmony_ci
16598c2ecf20Sopenharmony_ci	/* Critical error: attempting to delete a bitmap buffer, maybe?
16608c2ecf20Sopenharmony_ci	 * Don't do any jbd operations, and return an error. */
16618c2ecf20Sopenharmony_ci	if (!J_EXPECT_JH(jh, !jh->b_committed_data,
16628c2ecf20Sopenharmony_ci			 "inconsistent data on disk")) {
16638c2ecf20Sopenharmony_ci		err = -EIO;
16648c2ecf20Sopenharmony_ci		goto drop;
16658c2ecf20Sopenharmony_ci	}
16668c2ecf20Sopenharmony_ci
16678c2ecf20Sopenharmony_ci	/* keep track of whether or not this transaction modified us */
16688c2ecf20Sopenharmony_ci	was_modified = jh->b_modified;
16698c2ecf20Sopenharmony_ci
16708c2ecf20Sopenharmony_ci	/*
16718c2ecf20Sopenharmony_ci	 * The buffer's going from the transaction, we must drop
16728c2ecf20Sopenharmony_ci	 * all references -bzzz
16738c2ecf20Sopenharmony_ci	 */
16748c2ecf20Sopenharmony_ci	jh->b_modified = 0;
16758c2ecf20Sopenharmony_ci
16768c2ecf20Sopenharmony_ci	if (jh->b_transaction == transaction) {
16778c2ecf20Sopenharmony_ci		J_ASSERT_JH(jh, !jh->b_frozen_data);
16788c2ecf20Sopenharmony_ci
16798c2ecf20Sopenharmony_ci		/* If we are forgetting a buffer which is already part
16808c2ecf20Sopenharmony_ci		 * of this transaction, then we can just drop it from
16818c2ecf20Sopenharmony_ci		 * the transaction immediately. */
16828c2ecf20Sopenharmony_ci		clear_buffer_dirty(bh);
16838c2ecf20Sopenharmony_ci		clear_buffer_jbddirty(bh);
16848c2ecf20Sopenharmony_ci
16858c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
16868c2ecf20Sopenharmony_ci
16878c2ecf20Sopenharmony_ci		/*
16888c2ecf20Sopenharmony_ci		 * we only want to drop a reference if this transaction
16898c2ecf20Sopenharmony_ci		 * modified the buffer
16908c2ecf20Sopenharmony_ci		 */
16918c2ecf20Sopenharmony_ci		if (was_modified)
16928c2ecf20Sopenharmony_ci			drop_reserve = 1;
16938c2ecf20Sopenharmony_ci
16948c2ecf20Sopenharmony_ci		/*
16958c2ecf20Sopenharmony_ci		 * We are no longer going to journal this buffer.
16968c2ecf20Sopenharmony_ci		 * However, the commit of this transaction is still
16978c2ecf20Sopenharmony_ci		 * important to the buffer: the delete that we are now
16988c2ecf20Sopenharmony_ci		 * processing might obsolete an old log entry, so by
16998c2ecf20Sopenharmony_ci		 * committing, we can satisfy the buffer's checkpoint.
17008c2ecf20Sopenharmony_ci		 *
17018c2ecf20Sopenharmony_ci		 * So, if we have a checkpoint on the buffer, we should
17028c2ecf20Sopenharmony_ci		 * now refile the buffer on our BJ_Forget list so that
17038c2ecf20Sopenharmony_ci		 * we know to remove the checkpoint after we commit.
17048c2ecf20Sopenharmony_ci		 */
17058c2ecf20Sopenharmony_ci
17068c2ecf20Sopenharmony_ci		spin_lock(&journal->j_list_lock);
17078c2ecf20Sopenharmony_ci		if (jh->b_cp_transaction) {
17088c2ecf20Sopenharmony_ci			__jbd2_journal_temp_unlink_buffer(jh);
17098c2ecf20Sopenharmony_ci			__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
17108c2ecf20Sopenharmony_ci		} else {
17118c2ecf20Sopenharmony_ci			__jbd2_journal_unfile_buffer(jh);
17128c2ecf20Sopenharmony_ci			jbd2_journal_put_journal_head(jh);
17138c2ecf20Sopenharmony_ci		}
17148c2ecf20Sopenharmony_ci		spin_unlock(&journal->j_list_lock);
17158c2ecf20Sopenharmony_ci	} else if (jh->b_transaction) {
17168c2ecf20Sopenharmony_ci		J_ASSERT_JH(jh, (jh->b_transaction ==
17178c2ecf20Sopenharmony_ci				 journal->j_committing_transaction));
17188c2ecf20Sopenharmony_ci		/* However, if the buffer is still owned by a prior
17198c2ecf20Sopenharmony_ci		 * (committing) transaction, we can't drop it yet... */
17208c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "belongs to older transaction");
17218c2ecf20Sopenharmony_ci		/* ... but we CAN drop it from the new transaction through
17228c2ecf20Sopenharmony_ci		 * marking the buffer as freed and set j_next_transaction to
17238c2ecf20Sopenharmony_ci		 * the new transaction, so that not only the commit code
17248c2ecf20Sopenharmony_ci		 * knows it should clear dirty bits when it is done with the
17258c2ecf20Sopenharmony_ci		 * buffer, but also the buffer can be checkpointed only
17268c2ecf20Sopenharmony_ci		 * after the new transaction commits. */
17278c2ecf20Sopenharmony_ci
17288c2ecf20Sopenharmony_ci		set_buffer_freed(bh);
17298c2ecf20Sopenharmony_ci
17308c2ecf20Sopenharmony_ci		if (!jh->b_next_transaction) {
17318c2ecf20Sopenharmony_ci			spin_lock(&journal->j_list_lock);
17328c2ecf20Sopenharmony_ci			jh->b_next_transaction = transaction;
17338c2ecf20Sopenharmony_ci			spin_unlock(&journal->j_list_lock);
17348c2ecf20Sopenharmony_ci		} else {
17358c2ecf20Sopenharmony_ci			J_ASSERT(jh->b_next_transaction == transaction);
17368c2ecf20Sopenharmony_ci
17378c2ecf20Sopenharmony_ci			/*
17388c2ecf20Sopenharmony_ci			 * only drop a reference if this transaction modified
17398c2ecf20Sopenharmony_ci			 * the buffer
17408c2ecf20Sopenharmony_ci			 */
17418c2ecf20Sopenharmony_ci			if (was_modified)
17428c2ecf20Sopenharmony_ci				drop_reserve = 1;
17438c2ecf20Sopenharmony_ci		}
17448c2ecf20Sopenharmony_ci	} else {
17458c2ecf20Sopenharmony_ci		/*
17468c2ecf20Sopenharmony_ci		 * Finally, if the buffer is not belongs to any
17478c2ecf20Sopenharmony_ci		 * transaction, we can just drop it now if it has no
17488c2ecf20Sopenharmony_ci		 * checkpoint.
17498c2ecf20Sopenharmony_ci		 */
17508c2ecf20Sopenharmony_ci		spin_lock(&journal->j_list_lock);
17518c2ecf20Sopenharmony_ci		if (!jh->b_cp_transaction) {
17528c2ecf20Sopenharmony_ci			JBUFFER_TRACE(jh, "belongs to none transaction");
17538c2ecf20Sopenharmony_ci			spin_unlock(&journal->j_list_lock);
17548c2ecf20Sopenharmony_ci			goto drop;
17558c2ecf20Sopenharmony_ci		}
17568c2ecf20Sopenharmony_ci
17578c2ecf20Sopenharmony_ci		/*
17588c2ecf20Sopenharmony_ci		 * Otherwise, if the buffer has been written to disk,
17598c2ecf20Sopenharmony_ci		 * it is safe to remove the checkpoint and drop it.
17608c2ecf20Sopenharmony_ci		 */
17618c2ecf20Sopenharmony_ci		if (!buffer_dirty(bh)) {
17628c2ecf20Sopenharmony_ci			__jbd2_journal_remove_checkpoint(jh);
17638c2ecf20Sopenharmony_ci			spin_unlock(&journal->j_list_lock);
17648c2ecf20Sopenharmony_ci			goto drop;
17658c2ecf20Sopenharmony_ci		}
17668c2ecf20Sopenharmony_ci
17678c2ecf20Sopenharmony_ci		/*
17688c2ecf20Sopenharmony_ci		 * The buffer is still not written to disk, we should
17698c2ecf20Sopenharmony_ci		 * attach this buffer to current transaction so that the
17708c2ecf20Sopenharmony_ci		 * buffer can be checkpointed only after the current
17718c2ecf20Sopenharmony_ci		 * transaction commits.
17728c2ecf20Sopenharmony_ci		 */
17738c2ecf20Sopenharmony_ci		clear_buffer_dirty(bh);
17748c2ecf20Sopenharmony_ci		__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
17758c2ecf20Sopenharmony_ci		spin_unlock(&journal->j_list_lock);
17768c2ecf20Sopenharmony_ci	}
17778c2ecf20Sopenharmony_cidrop:
17788c2ecf20Sopenharmony_ci	__brelse(bh);
17798c2ecf20Sopenharmony_ci	spin_unlock(&jh->b_state_lock);
17808c2ecf20Sopenharmony_ci	jbd2_journal_put_journal_head(jh);
17818c2ecf20Sopenharmony_ci	if (drop_reserve) {
17828c2ecf20Sopenharmony_ci		/* no need to reserve log space for this block -bzzz */
17838c2ecf20Sopenharmony_ci		handle->h_total_credits++;
17848c2ecf20Sopenharmony_ci	}
17858c2ecf20Sopenharmony_ci	return err;
17868c2ecf20Sopenharmony_ci}
17878c2ecf20Sopenharmony_ci
17888c2ecf20Sopenharmony_ci/**
17898c2ecf20Sopenharmony_ci * jbd2_journal_stop() - complete a transaction
17908c2ecf20Sopenharmony_ci * @handle: transaction to complete.
17918c2ecf20Sopenharmony_ci *
17928c2ecf20Sopenharmony_ci * All done for a particular handle.
17938c2ecf20Sopenharmony_ci *
17948c2ecf20Sopenharmony_ci * There is not much action needed here.  We just return any remaining
17958c2ecf20Sopenharmony_ci * buffer credits to the transaction and remove the handle.  The only
17968c2ecf20Sopenharmony_ci * complication is that we need to start a commit operation if the
17978c2ecf20Sopenharmony_ci * filesystem is marked for synchronous update.
17988c2ecf20Sopenharmony_ci *
17998c2ecf20Sopenharmony_ci * jbd2_journal_stop itself will not usually return an error, but it may
18008c2ecf20Sopenharmony_ci * do so in unusual circumstances.  In particular, expect it to
18018c2ecf20Sopenharmony_ci * return -EIO if a jbd2_journal_abort has been executed since the
18028c2ecf20Sopenharmony_ci * transaction began.
18038c2ecf20Sopenharmony_ci */
18048c2ecf20Sopenharmony_ciint jbd2_journal_stop(handle_t *handle)
18058c2ecf20Sopenharmony_ci{
18068c2ecf20Sopenharmony_ci	transaction_t *transaction = handle->h_transaction;
18078c2ecf20Sopenharmony_ci	journal_t *journal;
18088c2ecf20Sopenharmony_ci	int err = 0, wait_for_commit = 0;
18098c2ecf20Sopenharmony_ci	tid_t tid;
18108c2ecf20Sopenharmony_ci	pid_t pid;
18118c2ecf20Sopenharmony_ci
18128c2ecf20Sopenharmony_ci	if (--handle->h_ref > 0) {
18138c2ecf20Sopenharmony_ci		jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
18148c2ecf20Sopenharmony_ci						 handle->h_ref);
18158c2ecf20Sopenharmony_ci		if (is_handle_aborted(handle))
18168c2ecf20Sopenharmony_ci			return -EIO;
18178c2ecf20Sopenharmony_ci		return 0;
18188c2ecf20Sopenharmony_ci	}
18198c2ecf20Sopenharmony_ci	if (!transaction) {
18208c2ecf20Sopenharmony_ci		/*
18218c2ecf20Sopenharmony_ci		 * Handle is already detached from the transaction so there is
18228c2ecf20Sopenharmony_ci		 * nothing to do other than free the handle.
18238c2ecf20Sopenharmony_ci		 */
18248c2ecf20Sopenharmony_ci		memalloc_nofs_restore(handle->saved_alloc_context);
18258c2ecf20Sopenharmony_ci		goto free_and_exit;
18268c2ecf20Sopenharmony_ci	}
18278c2ecf20Sopenharmony_ci	journal = transaction->t_journal;
18288c2ecf20Sopenharmony_ci	tid = transaction->t_tid;
18298c2ecf20Sopenharmony_ci
18308c2ecf20Sopenharmony_ci	if (is_handle_aborted(handle))
18318c2ecf20Sopenharmony_ci		err = -EIO;
18328c2ecf20Sopenharmony_ci
18338c2ecf20Sopenharmony_ci	jbd_debug(4, "Handle %p going down\n", handle);
18348c2ecf20Sopenharmony_ci	trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
18358c2ecf20Sopenharmony_ci				tid, handle->h_type, handle->h_line_no,
18368c2ecf20Sopenharmony_ci				jiffies - handle->h_start_jiffies,
18378c2ecf20Sopenharmony_ci				handle->h_sync, handle->h_requested_credits,
18388c2ecf20Sopenharmony_ci				(handle->h_requested_credits -
18398c2ecf20Sopenharmony_ci				 handle->h_total_credits));
18408c2ecf20Sopenharmony_ci
18418c2ecf20Sopenharmony_ci	/*
18428c2ecf20Sopenharmony_ci	 * Implement synchronous transaction batching.  If the handle
18438c2ecf20Sopenharmony_ci	 * was synchronous, don't force a commit immediately.  Let's
18448c2ecf20Sopenharmony_ci	 * yield and let another thread piggyback onto this
18458c2ecf20Sopenharmony_ci	 * transaction.  Keep doing that while new threads continue to
18468c2ecf20Sopenharmony_ci	 * arrive.  It doesn't cost much - we're about to run a commit
18478c2ecf20Sopenharmony_ci	 * and sleep on IO anyway.  Speeds up many-threaded, many-dir
18488c2ecf20Sopenharmony_ci	 * operations by 30x or more...
18498c2ecf20Sopenharmony_ci	 *
18508c2ecf20Sopenharmony_ci	 * We try and optimize the sleep time against what the
18518c2ecf20Sopenharmony_ci	 * underlying disk can do, instead of having a static sleep
18528c2ecf20Sopenharmony_ci	 * time.  This is useful for the case where our storage is so
18538c2ecf20Sopenharmony_ci	 * fast that it is more optimal to go ahead and force a flush
18548c2ecf20Sopenharmony_ci	 * and wait for the transaction to be committed than it is to
18558c2ecf20Sopenharmony_ci	 * wait for an arbitrary amount of time for new writers to
18568c2ecf20Sopenharmony_ci	 * join the transaction.  We achieve this by measuring how
18578c2ecf20Sopenharmony_ci	 * long it takes to commit a transaction, and compare it with
18588c2ecf20Sopenharmony_ci	 * how long this transaction has been running, and if run time
18598c2ecf20Sopenharmony_ci	 * < commit time then we sleep for the delta and commit.  This
18608c2ecf20Sopenharmony_ci	 * greatly helps super fast disks that would see slowdowns as
18618c2ecf20Sopenharmony_ci	 * more threads started doing fsyncs.
18628c2ecf20Sopenharmony_ci	 *
18638c2ecf20Sopenharmony_ci	 * But don't do this if this process was the most recent one
18648c2ecf20Sopenharmony_ci	 * to perform a synchronous write.  We do this to detect the
18658c2ecf20Sopenharmony_ci	 * case where a single process is doing a stream of sync
18668c2ecf20Sopenharmony_ci	 * writes.  No point in waiting for joiners in that case.
18678c2ecf20Sopenharmony_ci	 *
18688c2ecf20Sopenharmony_ci	 * Setting max_batch_time to 0 disables this completely.
18698c2ecf20Sopenharmony_ci	 */
18708c2ecf20Sopenharmony_ci	pid = current->pid;
18718c2ecf20Sopenharmony_ci	if (handle->h_sync && journal->j_last_sync_writer != pid &&
18728c2ecf20Sopenharmony_ci	    journal->j_max_batch_time) {
18738c2ecf20Sopenharmony_ci		u64 commit_time, trans_time;
18748c2ecf20Sopenharmony_ci
18758c2ecf20Sopenharmony_ci		journal->j_last_sync_writer = pid;
18768c2ecf20Sopenharmony_ci
18778c2ecf20Sopenharmony_ci		read_lock(&journal->j_state_lock);
18788c2ecf20Sopenharmony_ci		commit_time = journal->j_average_commit_time;
18798c2ecf20Sopenharmony_ci		read_unlock(&journal->j_state_lock);
18808c2ecf20Sopenharmony_ci
18818c2ecf20Sopenharmony_ci		trans_time = ktime_to_ns(ktime_sub(ktime_get(),
18828c2ecf20Sopenharmony_ci						   transaction->t_start_time));
18838c2ecf20Sopenharmony_ci
18848c2ecf20Sopenharmony_ci		commit_time = max_t(u64, commit_time,
18858c2ecf20Sopenharmony_ci				    1000*journal->j_min_batch_time);
18868c2ecf20Sopenharmony_ci		commit_time = min_t(u64, commit_time,
18878c2ecf20Sopenharmony_ci				    1000*journal->j_max_batch_time);
18888c2ecf20Sopenharmony_ci
18898c2ecf20Sopenharmony_ci		if (trans_time < commit_time) {
18908c2ecf20Sopenharmony_ci			ktime_t expires = ktime_add_ns(ktime_get(),
18918c2ecf20Sopenharmony_ci						       commit_time);
18928c2ecf20Sopenharmony_ci			set_current_state(TASK_UNINTERRUPTIBLE);
18938c2ecf20Sopenharmony_ci			schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
18948c2ecf20Sopenharmony_ci		}
18958c2ecf20Sopenharmony_ci	}
18968c2ecf20Sopenharmony_ci
18978c2ecf20Sopenharmony_ci	if (handle->h_sync)
18988c2ecf20Sopenharmony_ci		transaction->t_synchronous_commit = 1;
18998c2ecf20Sopenharmony_ci
19008c2ecf20Sopenharmony_ci	/*
19018c2ecf20Sopenharmony_ci	 * If the handle is marked SYNC, we need to set another commit
19028c2ecf20Sopenharmony_ci	 * going!  We also want to force a commit if the transaction is too
19038c2ecf20Sopenharmony_ci	 * old now.
19048c2ecf20Sopenharmony_ci	 */
19058c2ecf20Sopenharmony_ci	if (handle->h_sync ||
19068c2ecf20Sopenharmony_ci	    time_after_eq(jiffies, transaction->t_expires)) {
19078c2ecf20Sopenharmony_ci		/* Do this even for aborted journals: an abort still
19088c2ecf20Sopenharmony_ci		 * completes the commit thread, it just doesn't write
19098c2ecf20Sopenharmony_ci		 * anything to disk. */
19108c2ecf20Sopenharmony_ci
19118c2ecf20Sopenharmony_ci		jbd_debug(2, "transaction too old, requesting commit for "
19128c2ecf20Sopenharmony_ci					"handle %p\n", handle);
19138c2ecf20Sopenharmony_ci		/* This is non-blocking */
19148c2ecf20Sopenharmony_ci		jbd2_log_start_commit(journal, tid);
19158c2ecf20Sopenharmony_ci
19168c2ecf20Sopenharmony_ci		/*
19178c2ecf20Sopenharmony_ci		 * Special case: JBD2_SYNC synchronous updates require us
19188c2ecf20Sopenharmony_ci		 * to wait for the commit to complete.
19198c2ecf20Sopenharmony_ci		 */
19208c2ecf20Sopenharmony_ci		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
19218c2ecf20Sopenharmony_ci			wait_for_commit = 1;
19228c2ecf20Sopenharmony_ci	}
19238c2ecf20Sopenharmony_ci
19248c2ecf20Sopenharmony_ci	/*
19258c2ecf20Sopenharmony_ci	 * Once stop_this_handle() drops t_updates, the transaction could start
19268c2ecf20Sopenharmony_ci	 * committing on us and eventually disappear.  So we must not
19278c2ecf20Sopenharmony_ci	 * dereference transaction pointer again after calling
19288c2ecf20Sopenharmony_ci	 * stop_this_handle().
19298c2ecf20Sopenharmony_ci	 */
19308c2ecf20Sopenharmony_ci	stop_this_handle(handle);
19318c2ecf20Sopenharmony_ci
19328c2ecf20Sopenharmony_ci	if (wait_for_commit)
19338c2ecf20Sopenharmony_ci		err = jbd2_log_wait_commit(journal, tid);
19348c2ecf20Sopenharmony_ci
19358c2ecf20Sopenharmony_cifree_and_exit:
19368c2ecf20Sopenharmony_ci	if (handle->h_rsv_handle)
19378c2ecf20Sopenharmony_ci		jbd2_free_handle(handle->h_rsv_handle);
19388c2ecf20Sopenharmony_ci	jbd2_free_handle(handle);
19398c2ecf20Sopenharmony_ci	return err;
19408c2ecf20Sopenharmony_ci}
19418c2ecf20Sopenharmony_ci
19428c2ecf20Sopenharmony_ci/*
19438c2ecf20Sopenharmony_ci *
19448c2ecf20Sopenharmony_ci * List management code snippets: various functions for manipulating the
19458c2ecf20Sopenharmony_ci * transaction buffer lists.
19468c2ecf20Sopenharmony_ci *
19478c2ecf20Sopenharmony_ci */
19488c2ecf20Sopenharmony_ci
19498c2ecf20Sopenharmony_ci/*
19508c2ecf20Sopenharmony_ci * Append a buffer to a transaction list, given the transaction's list head
19518c2ecf20Sopenharmony_ci * pointer.
19528c2ecf20Sopenharmony_ci *
19538c2ecf20Sopenharmony_ci * j_list_lock is held.
19548c2ecf20Sopenharmony_ci *
19558c2ecf20Sopenharmony_ci * jh->b_state_lock is held.
19568c2ecf20Sopenharmony_ci */
19578c2ecf20Sopenharmony_ci
19588c2ecf20Sopenharmony_cistatic inline void
19598c2ecf20Sopenharmony_ci__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
19608c2ecf20Sopenharmony_ci{
19618c2ecf20Sopenharmony_ci	if (!*list) {
19628c2ecf20Sopenharmony_ci		jh->b_tnext = jh->b_tprev = jh;
19638c2ecf20Sopenharmony_ci		*list = jh;
19648c2ecf20Sopenharmony_ci	} else {
19658c2ecf20Sopenharmony_ci		/* Insert at the tail of the list to preserve order */
19668c2ecf20Sopenharmony_ci		struct journal_head *first = *list, *last = first->b_tprev;
19678c2ecf20Sopenharmony_ci		jh->b_tprev = last;
19688c2ecf20Sopenharmony_ci		jh->b_tnext = first;
19698c2ecf20Sopenharmony_ci		last->b_tnext = first->b_tprev = jh;
19708c2ecf20Sopenharmony_ci	}
19718c2ecf20Sopenharmony_ci}
19728c2ecf20Sopenharmony_ci
19738c2ecf20Sopenharmony_ci/*
19748c2ecf20Sopenharmony_ci * Remove a buffer from a transaction list, given the transaction's list
19758c2ecf20Sopenharmony_ci * head pointer.
19768c2ecf20Sopenharmony_ci *
19778c2ecf20Sopenharmony_ci * Called with j_list_lock held, and the journal may not be locked.
19788c2ecf20Sopenharmony_ci *
19798c2ecf20Sopenharmony_ci * jh->b_state_lock is held.
19808c2ecf20Sopenharmony_ci */
19818c2ecf20Sopenharmony_ci
19828c2ecf20Sopenharmony_cistatic inline void
19838c2ecf20Sopenharmony_ci__blist_del_buffer(struct journal_head **list, struct journal_head *jh)
19848c2ecf20Sopenharmony_ci{
19858c2ecf20Sopenharmony_ci	if (*list == jh) {
19868c2ecf20Sopenharmony_ci		*list = jh->b_tnext;
19878c2ecf20Sopenharmony_ci		if (*list == jh)
19888c2ecf20Sopenharmony_ci			*list = NULL;
19898c2ecf20Sopenharmony_ci	}
19908c2ecf20Sopenharmony_ci	jh->b_tprev->b_tnext = jh->b_tnext;
19918c2ecf20Sopenharmony_ci	jh->b_tnext->b_tprev = jh->b_tprev;
19928c2ecf20Sopenharmony_ci}
19938c2ecf20Sopenharmony_ci
19948c2ecf20Sopenharmony_ci/*
19958c2ecf20Sopenharmony_ci * Remove a buffer from the appropriate transaction list.
19968c2ecf20Sopenharmony_ci *
19978c2ecf20Sopenharmony_ci * Note that this function can *change* the value of
19988c2ecf20Sopenharmony_ci * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or
19998c2ecf20Sopenharmony_ci * t_reserved_list.  If the caller is holding onto a copy of one of these
20008c2ecf20Sopenharmony_ci * pointers, it could go bad.  Generally the caller needs to re-read the
20018c2ecf20Sopenharmony_ci * pointer from the transaction_t.
20028c2ecf20Sopenharmony_ci *
20038c2ecf20Sopenharmony_ci * Called under j_list_lock.
20048c2ecf20Sopenharmony_ci */
20058c2ecf20Sopenharmony_cistatic void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
20068c2ecf20Sopenharmony_ci{
20078c2ecf20Sopenharmony_ci	struct journal_head **list = NULL;
20088c2ecf20Sopenharmony_ci	transaction_t *transaction;
20098c2ecf20Sopenharmony_ci	struct buffer_head *bh = jh2bh(jh);
20108c2ecf20Sopenharmony_ci
20118c2ecf20Sopenharmony_ci	lockdep_assert_held(&jh->b_state_lock);
20128c2ecf20Sopenharmony_ci	transaction = jh->b_transaction;
20138c2ecf20Sopenharmony_ci	if (transaction)
20148c2ecf20Sopenharmony_ci		assert_spin_locked(&transaction->t_journal->j_list_lock);
20158c2ecf20Sopenharmony_ci
20168c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
20178c2ecf20Sopenharmony_ci	if (jh->b_jlist != BJ_None)
20188c2ecf20Sopenharmony_ci		J_ASSERT_JH(jh, transaction != NULL);
20198c2ecf20Sopenharmony_ci
20208c2ecf20Sopenharmony_ci	switch (jh->b_jlist) {
20218c2ecf20Sopenharmony_ci	case BJ_None:
20228c2ecf20Sopenharmony_ci		return;
20238c2ecf20Sopenharmony_ci	case BJ_Metadata:
20248c2ecf20Sopenharmony_ci		transaction->t_nr_buffers--;
20258c2ecf20Sopenharmony_ci		J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
20268c2ecf20Sopenharmony_ci		list = &transaction->t_buffers;
20278c2ecf20Sopenharmony_ci		break;
20288c2ecf20Sopenharmony_ci	case BJ_Forget:
20298c2ecf20Sopenharmony_ci		list = &transaction->t_forget;
20308c2ecf20Sopenharmony_ci		break;
20318c2ecf20Sopenharmony_ci	case BJ_Shadow:
20328c2ecf20Sopenharmony_ci		list = &transaction->t_shadow_list;
20338c2ecf20Sopenharmony_ci		break;
20348c2ecf20Sopenharmony_ci	case BJ_Reserved:
20358c2ecf20Sopenharmony_ci		list = &transaction->t_reserved_list;
20368c2ecf20Sopenharmony_ci		break;
20378c2ecf20Sopenharmony_ci	}
20388c2ecf20Sopenharmony_ci
20398c2ecf20Sopenharmony_ci	__blist_del_buffer(list, jh);
20408c2ecf20Sopenharmony_ci	jh->b_jlist = BJ_None;
20418c2ecf20Sopenharmony_ci	if (transaction && is_journal_aborted(transaction->t_journal))
20428c2ecf20Sopenharmony_ci		clear_buffer_jbddirty(bh);
20438c2ecf20Sopenharmony_ci	else if (test_clear_buffer_jbddirty(bh))
20448c2ecf20Sopenharmony_ci		mark_buffer_dirty(bh);	/* Expose it to the VM */
20458c2ecf20Sopenharmony_ci}
20468c2ecf20Sopenharmony_ci
20478c2ecf20Sopenharmony_ci/*
20488c2ecf20Sopenharmony_ci * Remove buffer from all transactions. The caller is responsible for dropping
20498c2ecf20Sopenharmony_ci * the jh reference that belonged to the transaction.
20508c2ecf20Sopenharmony_ci *
20518c2ecf20Sopenharmony_ci * Called with bh_state lock and j_list_lock
20528c2ecf20Sopenharmony_ci */
20538c2ecf20Sopenharmony_cistatic void __jbd2_journal_unfile_buffer(struct journal_head *jh)
20548c2ecf20Sopenharmony_ci{
20558c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_transaction != NULL);
20568c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
20578c2ecf20Sopenharmony_ci
20588c2ecf20Sopenharmony_ci	__jbd2_journal_temp_unlink_buffer(jh);
20598c2ecf20Sopenharmony_ci	jh->b_transaction = NULL;
20608c2ecf20Sopenharmony_ci}
20618c2ecf20Sopenharmony_ci
20628c2ecf20Sopenharmony_civoid jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
20638c2ecf20Sopenharmony_ci{
20648c2ecf20Sopenharmony_ci	struct buffer_head *bh = jh2bh(jh);
20658c2ecf20Sopenharmony_ci
20668c2ecf20Sopenharmony_ci	/* Get reference so that buffer cannot be freed before we unlock it */
20678c2ecf20Sopenharmony_ci	get_bh(bh);
20688c2ecf20Sopenharmony_ci	spin_lock(&jh->b_state_lock);
20698c2ecf20Sopenharmony_ci	spin_lock(&journal->j_list_lock);
20708c2ecf20Sopenharmony_ci	__jbd2_journal_unfile_buffer(jh);
20718c2ecf20Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
20728c2ecf20Sopenharmony_ci	spin_unlock(&jh->b_state_lock);
20738c2ecf20Sopenharmony_ci	jbd2_journal_put_journal_head(jh);
20748c2ecf20Sopenharmony_ci	__brelse(bh);
20758c2ecf20Sopenharmony_ci}
20768c2ecf20Sopenharmony_ci
20778c2ecf20Sopenharmony_ci/*
20788c2ecf20Sopenharmony_ci * Called from jbd2_journal_try_to_free_buffers().
20798c2ecf20Sopenharmony_ci *
20808c2ecf20Sopenharmony_ci * Called under jh->b_state_lock
20818c2ecf20Sopenharmony_ci */
20828c2ecf20Sopenharmony_cistatic void
20838c2ecf20Sopenharmony_ci__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
20848c2ecf20Sopenharmony_ci{
20858c2ecf20Sopenharmony_ci	struct journal_head *jh;
20868c2ecf20Sopenharmony_ci
20878c2ecf20Sopenharmony_ci	jh = bh2jh(bh);
20888c2ecf20Sopenharmony_ci
20898c2ecf20Sopenharmony_ci	if (buffer_locked(bh) || buffer_dirty(bh))
20908c2ecf20Sopenharmony_ci		goto out;
20918c2ecf20Sopenharmony_ci
20928c2ecf20Sopenharmony_ci	if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
20938c2ecf20Sopenharmony_ci		goto out;
20948c2ecf20Sopenharmony_ci
20958c2ecf20Sopenharmony_ci	spin_lock(&journal->j_list_lock);
20968c2ecf20Sopenharmony_ci	if (jh->b_cp_transaction != NULL) {
20978c2ecf20Sopenharmony_ci		/* written-back checkpointed metadata buffer */
20988c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "remove from checkpoint list");
20998c2ecf20Sopenharmony_ci		__jbd2_journal_remove_checkpoint(jh);
21008c2ecf20Sopenharmony_ci	}
21018c2ecf20Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
21028c2ecf20Sopenharmony_ciout:
21038c2ecf20Sopenharmony_ci	return;
21048c2ecf20Sopenharmony_ci}
21058c2ecf20Sopenharmony_ci
21068c2ecf20Sopenharmony_ci/**
21078c2ecf20Sopenharmony_ci * jbd2_journal_try_to_free_buffers() - try to free page buffers.
21088c2ecf20Sopenharmony_ci * @journal: journal for operation
21098c2ecf20Sopenharmony_ci * @page: to try and free
21108c2ecf20Sopenharmony_ci *
21118c2ecf20Sopenharmony_ci * For all the buffers on this page,
21128c2ecf20Sopenharmony_ci * if they are fully written out ordered data, move them onto BUF_CLEAN
21138c2ecf20Sopenharmony_ci * so try_to_free_buffers() can reap them.
21148c2ecf20Sopenharmony_ci *
21158c2ecf20Sopenharmony_ci * This function returns non-zero if we wish try_to_free_buffers()
21168c2ecf20Sopenharmony_ci * to be called. We do this if the page is releasable by try_to_free_buffers().
21178c2ecf20Sopenharmony_ci * We also do it if the page has locked or dirty buffers and the caller wants
21188c2ecf20Sopenharmony_ci * us to perform sync or async writeout.
21198c2ecf20Sopenharmony_ci *
21208c2ecf20Sopenharmony_ci * This complicates JBD locking somewhat.  We aren't protected by the
21218c2ecf20Sopenharmony_ci * BKL here.  We wish to remove the buffer from its committing or
21228c2ecf20Sopenharmony_ci * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
21238c2ecf20Sopenharmony_ci *
21248c2ecf20Sopenharmony_ci * This may *change* the value of transaction_t->t_datalist, so anyone
21258c2ecf20Sopenharmony_ci * who looks at t_datalist needs to lock against this function.
21268c2ecf20Sopenharmony_ci *
21278c2ecf20Sopenharmony_ci * Even worse, someone may be doing a jbd2_journal_dirty_data on this
21288c2ecf20Sopenharmony_ci * buffer.  So we need to lock against that.  jbd2_journal_dirty_data()
21298c2ecf20Sopenharmony_ci * will come out of the lock with the buffer dirty, which makes it
21308c2ecf20Sopenharmony_ci * ineligible for release here.
21318c2ecf20Sopenharmony_ci *
21328c2ecf20Sopenharmony_ci * Who else is affected by this?  hmm...  Really the only contender
21338c2ecf20Sopenharmony_ci * is do_get_write_access() - it could be looking at the buffer while
21348c2ecf20Sopenharmony_ci * journal_try_to_free_buffer() is changing its state.  But that
21358c2ecf20Sopenharmony_ci * cannot happen because we never reallocate freed data as metadata
21368c2ecf20Sopenharmony_ci * while the data is part of a transaction.  Yes?
21378c2ecf20Sopenharmony_ci *
21388c2ecf20Sopenharmony_ci * Return 0 on failure, 1 on success
21398c2ecf20Sopenharmony_ci */
21408c2ecf20Sopenharmony_ciint jbd2_journal_try_to_free_buffers(journal_t *journal, struct page *page)
21418c2ecf20Sopenharmony_ci{
21428c2ecf20Sopenharmony_ci	struct buffer_head *head;
21438c2ecf20Sopenharmony_ci	struct buffer_head *bh;
21448c2ecf20Sopenharmony_ci	int ret = 0;
21458c2ecf20Sopenharmony_ci
21468c2ecf20Sopenharmony_ci	J_ASSERT(PageLocked(page));
21478c2ecf20Sopenharmony_ci
21488c2ecf20Sopenharmony_ci	head = page_buffers(page);
21498c2ecf20Sopenharmony_ci	bh = head;
21508c2ecf20Sopenharmony_ci	do {
21518c2ecf20Sopenharmony_ci		struct journal_head *jh;
21528c2ecf20Sopenharmony_ci
21538c2ecf20Sopenharmony_ci		/*
21548c2ecf20Sopenharmony_ci		 * We take our own ref against the journal_head here to avoid
21558c2ecf20Sopenharmony_ci		 * having to add tons of locking around each instance of
21568c2ecf20Sopenharmony_ci		 * jbd2_journal_put_journal_head().
21578c2ecf20Sopenharmony_ci		 */
21588c2ecf20Sopenharmony_ci		jh = jbd2_journal_grab_journal_head(bh);
21598c2ecf20Sopenharmony_ci		if (!jh)
21608c2ecf20Sopenharmony_ci			continue;
21618c2ecf20Sopenharmony_ci
21628c2ecf20Sopenharmony_ci		spin_lock(&jh->b_state_lock);
21638c2ecf20Sopenharmony_ci		__journal_try_to_free_buffer(journal, bh);
21648c2ecf20Sopenharmony_ci		spin_unlock(&jh->b_state_lock);
21658c2ecf20Sopenharmony_ci		jbd2_journal_put_journal_head(jh);
21668c2ecf20Sopenharmony_ci		if (buffer_jbd(bh))
21678c2ecf20Sopenharmony_ci			goto busy;
21688c2ecf20Sopenharmony_ci	} while ((bh = bh->b_this_page) != head);
21698c2ecf20Sopenharmony_ci
21708c2ecf20Sopenharmony_ci	ret = try_to_free_buffers(page);
21718c2ecf20Sopenharmony_cibusy:
21728c2ecf20Sopenharmony_ci	return ret;
21738c2ecf20Sopenharmony_ci}
21748c2ecf20Sopenharmony_ci
21758c2ecf20Sopenharmony_ci/*
21768c2ecf20Sopenharmony_ci * This buffer is no longer needed.  If it is on an older transaction's
21778c2ecf20Sopenharmony_ci * checkpoint list we need to record it on this transaction's forget list
21788c2ecf20Sopenharmony_ci * to pin this buffer (and hence its checkpointing transaction) down until
21798c2ecf20Sopenharmony_ci * this transaction commits.  If the buffer isn't on a checkpoint list, we
21808c2ecf20Sopenharmony_ci * release it.
21818c2ecf20Sopenharmony_ci * Returns non-zero if JBD no longer has an interest in the buffer.
21828c2ecf20Sopenharmony_ci *
21838c2ecf20Sopenharmony_ci * Called under j_list_lock.
21848c2ecf20Sopenharmony_ci *
21858c2ecf20Sopenharmony_ci * Called under jh->b_state_lock.
21868c2ecf20Sopenharmony_ci */
21878c2ecf20Sopenharmony_cistatic int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
21888c2ecf20Sopenharmony_ci{
21898c2ecf20Sopenharmony_ci	int may_free = 1;
21908c2ecf20Sopenharmony_ci	struct buffer_head *bh = jh2bh(jh);
21918c2ecf20Sopenharmony_ci
21928c2ecf20Sopenharmony_ci	if (jh->b_cp_transaction) {
21938c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "on running+cp transaction");
21948c2ecf20Sopenharmony_ci		__jbd2_journal_temp_unlink_buffer(jh);
21958c2ecf20Sopenharmony_ci		/*
21968c2ecf20Sopenharmony_ci		 * We don't want to write the buffer anymore, clear the
21978c2ecf20Sopenharmony_ci		 * bit so that we don't confuse checks in
21988c2ecf20Sopenharmony_ci		 * __journal_file_buffer
21998c2ecf20Sopenharmony_ci		 */
22008c2ecf20Sopenharmony_ci		clear_buffer_dirty(bh);
22018c2ecf20Sopenharmony_ci		__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
22028c2ecf20Sopenharmony_ci		may_free = 0;
22038c2ecf20Sopenharmony_ci	} else {
22048c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "on running transaction");
22058c2ecf20Sopenharmony_ci		__jbd2_journal_unfile_buffer(jh);
22068c2ecf20Sopenharmony_ci		jbd2_journal_put_journal_head(jh);
22078c2ecf20Sopenharmony_ci	}
22088c2ecf20Sopenharmony_ci	return may_free;
22098c2ecf20Sopenharmony_ci}
22108c2ecf20Sopenharmony_ci
22118c2ecf20Sopenharmony_ci/*
22128c2ecf20Sopenharmony_ci * jbd2_journal_invalidatepage
22138c2ecf20Sopenharmony_ci *
22148c2ecf20Sopenharmony_ci * This code is tricky.  It has a number of cases to deal with.
22158c2ecf20Sopenharmony_ci *
22168c2ecf20Sopenharmony_ci * There are two invariants which this code relies on:
22178c2ecf20Sopenharmony_ci *
22188c2ecf20Sopenharmony_ci * i_size must be updated on disk before we start calling invalidatepage on the
22198c2ecf20Sopenharmony_ci * data.
22208c2ecf20Sopenharmony_ci *
22218c2ecf20Sopenharmony_ci *  This is done in ext3 by defining an ext3_setattr method which
22228c2ecf20Sopenharmony_ci *  updates i_size before truncate gets going.  By maintaining this
22238c2ecf20Sopenharmony_ci *  invariant, we can be sure that it is safe to throw away any buffers
22248c2ecf20Sopenharmony_ci *  attached to the current transaction: once the transaction commits,
22258c2ecf20Sopenharmony_ci *  we know that the data will not be needed.
22268c2ecf20Sopenharmony_ci *
22278c2ecf20Sopenharmony_ci *  Note however that we can *not* throw away data belonging to the
22288c2ecf20Sopenharmony_ci *  previous, committing transaction!
22298c2ecf20Sopenharmony_ci *
22308c2ecf20Sopenharmony_ci * Any disk blocks which *are* part of the previous, committing
22318c2ecf20Sopenharmony_ci * transaction (and which therefore cannot be discarded immediately) are
22328c2ecf20Sopenharmony_ci * not going to be reused in the new running transaction
22338c2ecf20Sopenharmony_ci *
22348c2ecf20Sopenharmony_ci *  The bitmap committed_data images guarantee this: any block which is
22358c2ecf20Sopenharmony_ci *  allocated in one transaction and removed in the next will be marked
22368c2ecf20Sopenharmony_ci *  as in-use in the committed_data bitmap, so cannot be reused until
22378c2ecf20Sopenharmony_ci *  the next transaction to delete the block commits.  This means that
22388c2ecf20Sopenharmony_ci *  leaving committing buffers dirty is quite safe: the disk blocks
22398c2ecf20Sopenharmony_ci *  cannot be reallocated to a different file and so buffer aliasing is
22408c2ecf20Sopenharmony_ci *  not possible.
22418c2ecf20Sopenharmony_ci *
22428c2ecf20Sopenharmony_ci *
22438c2ecf20Sopenharmony_ci * The above applies mainly to ordered data mode.  In writeback mode we
22448c2ecf20Sopenharmony_ci * don't make guarantees about the order in which data hits disk --- in
22458c2ecf20Sopenharmony_ci * particular we don't guarantee that new dirty data is flushed before
22468c2ecf20Sopenharmony_ci * transaction commit --- so it is always safe just to discard data
22478c2ecf20Sopenharmony_ci * immediately in that mode.  --sct
22488c2ecf20Sopenharmony_ci */
22498c2ecf20Sopenharmony_ci
22508c2ecf20Sopenharmony_ci/*
22518c2ecf20Sopenharmony_ci * The journal_unmap_buffer helper function returns zero if the buffer
22528c2ecf20Sopenharmony_ci * concerned remains pinned as an anonymous buffer belonging to an older
22538c2ecf20Sopenharmony_ci * transaction.
22548c2ecf20Sopenharmony_ci *
22558c2ecf20Sopenharmony_ci * We're outside-transaction here.  Either or both of j_running_transaction
22568c2ecf20Sopenharmony_ci * and j_committing_transaction may be NULL.
22578c2ecf20Sopenharmony_ci */
22588c2ecf20Sopenharmony_cistatic int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
22598c2ecf20Sopenharmony_ci				int partial_page)
22608c2ecf20Sopenharmony_ci{
22618c2ecf20Sopenharmony_ci	transaction_t *transaction;
22628c2ecf20Sopenharmony_ci	struct journal_head *jh;
22638c2ecf20Sopenharmony_ci	int may_free = 1;
22648c2ecf20Sopenharmony_ci
22658c2ecf20Sopenharmony_ci	BUFFER_TRACE(bh, "entry");
22668c2ecf20Sopenharmony_ci
22678c2ecf20Sopenharmony_ci	/*
22688c2ecf20Sopenharmony_ci	 * It is safe to proceed here without the j_list_lock because the
22698c2ecf20Sopenharmony_ci	 * buffers cannot be stolen by try_to_free_buffers as long as we are
22708c2ecf20Sopenharmony_ci	 * holding the page lock. --sct
22718c2ecf20Sopenharmony_ci	 */
22728c2ecf20Sopenharmony_ci
22738c2ecf20Sopenharmony_ci	jh = jbd2_journal_grab_journal_head(bh);
22748c2ecf20Sopenharmony_ci	if (!jh)
22758c2ecf20Sopenharmony_ci		goto zap_buffer_unlocked;
22768c2ecf20Sopenharmony_ci
22778c2ecf20Sopenharmony_ci	/* OK, we have data buffer in journaled mode */
22788c2ecf20Sopenharmony_ci	write_lock(&journal->j_state_lock);
22798c2ecf20Sopenharmony_ci	spin_lock(&jh->b_state_lock);
22808c2ecf20Sopenharmony_ci	spin_lock(&journal->j_list_lock);
22818c2ecf20Sopenharmony_ci
22828c2ecf20Sopenharmony_ci	/*
22838c2ecf20Sopenharmony_ci	 * We cannot remove the buffer from checkpoint lists until the
22848c2ecf20Sopenharmony_ci	 * transaction adding inode to orphan list (let's call it T)
22858c2ecf20Sopenharmony_ci	 * is committed.  Otherwise if the transaction changing the
22868c2ecf20Sopenharmony_ci	 * buffer would be cleaned from the journal before T is
22878c2ecf20Sopenharmony_ci	 * committed, a crash will cause that the correct contents of
22888c2ecf20Sopenharmony_ci	 * the buffer will be lost.  On the other hand we have to
22898c2ecf20Sopenharmony_ci	 * clear the buffer dirty bit at latest at the moment when the
22908c2ecf20Sopenharmony_ci	 * transaction marking the buffer as freed in the filesystem
22918c2ecf20Sopenharmony_ci	 * structures is committed because from that moment on the
22928c2ecf20Sopenharmony_ci	 * block can be reallocated and used by a different page.
22938c2ecf20Sopenharmony_ci	 * Since the block hasn't been freed yet but the inode has
22948c2ecf20Sopenharmony_ci	 * already been added to orphan list, it is safe for us to add
22958c2ecf20Sopenharmony_ci	 * the buffer to BJ_Forget list of the newest transaction.
22968c2ecf20Sopenharmony_ci	 *
22978c2ecf20Sopenharmony_ci	 * Also we have to clear buffer_mapped flag of a truncated buffer
22988c2ecf20Sopenharmony_ci	 * because the buffer_head may be attached to the page straddling
22998c2ecf20Sopenharmony_ci	 * i_size (can happen only when blocksize < pagesize) and thus the
23008c2ecf20Sopenharmony_ci	 * buffer_head can be reused when the file is extended again. So we end
23018c2ecf20Sopenharmony_ci	 * up keeping around invalidated buffers attached to transactions'
23028c2ecf20Sopenharmony_ci	 * BJ_Forget list just to stop checkpointing code from cleaning up
23038c2ecf20Sopenharmony_ci	 * the transaction this buffer was modified in.
23048c2ecf20Sopenharmony_ci	 */
23058c2ecf20Sopenharmony_ci	transaction = jh->b_transaction;
23068c2ecf20Sopenharmony_ci	if (transaction == NULL) {
23078c2ecf20Sopenharmony_ci		/* First case: not on any transaction.  If it
23088c2ecf20Sopenharmony_ci		 * has no checkpoint link, then we can zap it:
23098c2ecf20Sopenharmony_ci		 * it's a writeback-mode buffer so we don't care
23108c2ecf20Sopenharmony_ci		 * if it hits disk safely. */
23118c2ecf20Sopenharmony_ci		if (!jh->b_cp_transaction) {
23128c2ecf20Sopenharmony_ci			JBUFFER_TRACE(jh, "not on any transaction: zap");
23138c2ecf20Sopenharmony_ci			goto zap_buffer;
23148c2ecf20Sopenharmony_ci		}
23158c2ecf20Sopenharmony_ci
23168c2ecf20Sopenharmony_ci		if (!buffer_dirty(bh)) {
23178c2ecf20Sopenharmony_ci			/* bdflush has written it.  We can drop it now */
23188c2ecf20Sopenharmony_ci			__jbd2_journal_remove_checkpoint(jh);
23198c2ecf20Sopenharmony_ci			goto zap_buffer;
23208c2ecf20Sopenharmony_ci		}
23218c2ecf20Sopenharmony_ci
23228c2ecf20Sopenharmony_ci		/* OK, it must be in the journal but still not
23238c2ecf20Sopenharmony_ci		 * written fully to disk: it's metadata or
23248c2ecf20Sopenharmony_ci		 * journaled data... */
23258c2ecf20Sopenharmony_ci
23268c2ecf20Sopenharmony_ci		if (journal->j_running_transaction) {
23278c2ecf20Sopenharmony_ci			/* ... and once the current transaction has
23288c2ecf20Sopenharmony_ci			 * committed, the buffer won't be needed any
23298c2ecf20Sopenharmony_ci			 * longer. */
23308c2ecf20Sopenharmony_ci			JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
23318c2ecf20Sopenharmony_ci			may_free = __dispose_buffer(jh,
23328c2ecf20Sopenharmony_ci					journal->j_running_transaction);
23338c2ecf20Sopenharmony_ci			goto zap_buffer;
23348c2ecf20Sopenharmony_ci		} else {
23358c2ecf20Sopenharmony_ci			/* There is no currently-running transaction. So the
23368c2ecf20Sopenharmony_ci			 * orphan record which we wrote for this file must have
23378c2ecf20Sopenharmony_ci			 * passed into commit.  We must attach this buffer to
23388c2ecf20Sopenharmony_ci			 * the committing transaction, if it exists. */
23398c2ecf20Sopenharmony_ci			if (journal->j_committing_transaction) {
23408c2ecf20Sopenharmony_ci				JBUFFER_TRACE(jh, "give to committing trans");
23418c2ecf20Sopenharmony_ci				may_free = __dispose_buffer(jh,
23428c2ecf20Sopenharmony_ci					journal->j_committing_transaction);
23438c2ecf20Sopenharmony_ci				goto zap_buffer;
23448c2ecf20Sopenharmony_ci			} else {
23458c2ecf20Sopenharmony_ci				/* The orphan record's transaction has
23468c2ecf20Sopenharmony_ci				 * committed.  We can cleanse this buffer */
23478c2ecf20Sopenharmony_ci				clear_buffer_jbddirty(bh);
23488c2ecf20Sopenharmony_ci				__jbd2_journal_remove_checkpoint(jh);
23498c2ecf20Sopenharmony_ci				goto zap_buffer;
23508c2ecf20Sopenharmony_ci			}
23518c2ecf20Sopenharmony_ci		}
23528c2ecf20Sopenharmony_ci	} else if (transaction == journal->j_committing_transaction) {
23538c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "on committing transaction");
23548c2ecf20Sopenharmony_ci		/*
23558c2ecf20Sopenharmony_ci		 * The buffer is committing, we simply cannot touch
23568c2ecf20Sopenharmony_ci		 * it. If the page is straddling i_size we have to wait
23578c2ecf20Sopenharmony_ci		 * for commit and try again.
23588c2ecf20Sopenharmony_ci		 */
23598c2ecf20Sopenharmony_ci		if (partial_page) {
23608c2ecf20Sopenharmony_ci			spin_unlock(&journal->j_list_lock);
23618c2ecf20Sopenharmony_ci			spin_unlock(&jh->b_state_lock);
23628c2ecf20Sopenharmony_ci			write_unlock(&journal->j_state_lock);
23638c2ecf20Sopenharmony_ci			jbd2_journal_put_journal_head(jh);
23648c2ecf20Sopenharmony_ci			/* Already zapped buffer? Nothing to do... */
23658c2ecf20Sopenharmony_ci			if (!bh->b_bdev)
23668c2ecf20Sopenharmony_ci				return 0;
23678c2ecf20Sopenharmony_ci			return -EBUSY;
23688c2ecf20Sopenharmony_ci		}
23698c2ecf20Sopenharmony_ci		/*
23708c2ecf20Sopenharmony_ci		 * OK, buffer won't be reachable after truncate. We just clear
23718c2ecf20Sopenharmony_ci		 * b_modified to not confuse transaction credit accounting, and
23728c2ecf20Sopenharmony_ci		 * set j_next_transaction to the running transaction (if there
23738c2ecf20Sopenharmony_ci		 * is one) and mark buffer as freed so that commit code knows
23748c2ecf20Sopenharmony_ci		 * it should clear dirty bits when it is done with the buffer.
23758c2ecf20Sopenharmony_ci		 */
23768c2ecf20Sopenharmony_ci		set_buffer_freed(bh);
23778c2ecf20Sopenharmony_ci		if (journal->j_running_transaction && buffer_jbddirty(bh))
23788c2ecf20Sopenharmony_ci			jh->b_next_transaction = journal->j_running_transaction;
23798c2ecf20Sopenharmony_ci		jh->b_modified = 0;
23808c2ecf20Sopenharmony_ci		spin_unlock(&journal->j_list_lock);
23818c2ecf20Sopenharmony_ci		spin_unlock(&jh->b_state_lock);
23828c2ecf20Sopenharmony_ci		write_unlock(&journal->j_state_lock);
23838c2ecf20Sopenharmony_ci		jbd2_journal_put_journal_head(jh);
23848c2ecf20Sopenharmony_ci		return 0;
23858c2ecf20Sopenharmony_ci	} else {
23868c2ecf20Sopenharmony_ci		/* Good, the buffer belongs to the running transaction.
23878c2ecf20Sopenharmony_ci		 * We are writing our own transaction's data, not any
23888c2ecf20Sopenharmony_ci		 * previous one's, so it is safe to throw it away
23898c2ecf20Sopenharmony_ci		 * (remember that we expect the filesystem to have set
23908c2ecf20Sopenharmony_ci		 * i_size already for this truncate so recovery will not
23918c2ecf20Sopenharmony_ci		 * expose the disk blocks we are discarding here.) */
23928c2ecf20Sopenharmony_ci		J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
23938c2ecf20Sopenharmony_ci		JBUFFER_TRACE(jh, "on running transaction");
23948c2ecf20Sopenharmony_ci		may_free = __dispose_buffer(jh, transaction);
23958c2ecf20Sopenharmony_ci	}
23968c2ecf20Sopenharmony_ci
23978c2ecf20Sopenharmony_cizap_buffer:
23988c2ecf20Sopenharmony_ci	/*
23998c2ecf20Sopenharmony_ci	 * This is tricky. Although the buffer is truncated, it may be reused
24008c2ecf20Sopenharmony_ci	 * if blocksize < pagesize and it is attached to the page straddling
24018c2ecf20Sopenharmony_ci	 * EOF. Since the buffer might have been added to BJ_Forget list of the
24028c2ecf20Sopenharmony_ci	 * running transaction, journal_get_write_access() won't clear
24038c2ecf20Sopenharmony_ci	 * b_modified and credit accounting gets confused. So clear b_modified
24048c2ecf20Sopenharmony_ci	 * here.
24058c2ecf20Sopenharmony_ci	 */
24068c2ecf20Sopenharmony_ci	jh->b_modified = 0;
24078c2ecf20Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
24088c2ecf20Sopenharmony_ci	spin_unlock(&jh->b_state_lock);
24098c2ecf20Sopenharmony_ci	write_unlock(&journal->j_state_lock);
24108c2ecf20Sopenharmony_ci	jbd2_journal_put_journal_head(jh);
24118c2ecf20Sopenharmony_cizap_buffer_unlocked:
24128c2ecf20Sopenharmony_ci	clear_buffer_dirty(bh);
24138c2ecf20Sopenharmony_ci	J_ASSERT_BH(bh, !buffer_jbddirty(bh));
24148c2ecf20Sopenharmony_ci	clear_buffer_mapped(bh);
24158c2ecf20Sopenharmony_ci	clear_buffer_req(bh);
24168c2ecf20Sopenharmony_ci	clear_buffer_new(bh);
24178c2ecf20Sopenharmony_ci	clear_buffer_delay(bh);
24188c2ecf20Sopenharmony_ci	clear_buffer_unwritten(bh);
24198c2ecf20Sopenharmony_ci	bh->b_bdev = NULL;
24208c2ecf20Sopenharmony_ci	return may_free;
24218c2ecf20Sopenharmony_ci}
24228c2ecf20Sopenharmony_ci
24238c2ecf20Sopenharmony_ci/**
24248c2ecf20Sopenharmony_ci * jbd2_journal_invalidatepage()
24258c2ecf20Sopenharmony_ci * @journal: journal to use for flush...
24268c2ecf20Sopenharmony_ci * @page:    page to flush
24278c2ecf20Sopenharmony_ci * @offset:  start of the range to invalidate
24288c2ecf20Sopenharmony_ci * @length:  length of the range to invalidate
24298c2ecf20Sopenharmony_ci *
24308c2ecf20Sopenharmony_ci * Reap page buffers containing data after in the specified range in page.
24318c2ecf20Sopenharmony_ci * Can return -EBUSY if buffers are part of the committing transaction and
24328c2ecf20Sopenharmony_ci * the page is straddling i_size. Caller then has to wait for current commit
24338c2ecf20Sopenharmony_ci * and try again.
24348c2ecf20Sopenharmony_ci */
24358c2ecf20Sopenharmony_ciint jbd2_journal_invalidatepage(journal_t *journal,
24368c2ecf20Sopenharmony_ci				struct page *page,
24378c2ecf20Sopenharmony_ci				unsigned int offset,
24388c2ecf20Sopenharmony_ci				unsigned int length)
24398c2ecf20Sopenharmony_ci{
24408c2ecf20Sopenharmony_ci	struct buffer_head *head, *bh, *next;
24418c2ecf20Sopenharmony_ci	unsigned int stop = offset + length;
24428c2ecf20Sopenharmony_ci	unsigned int curr_off = 0;
24438c2ecf20Sopenharmony_ci	int partial_page = (offset || length < PAGE_SIZE);
24448c2ecf20Sopenharmony_ci	int may_free = 1;
24458c2ecf20Sopenharmony_ci	int ret = 0;
24468c2ecf20Sopenharmony_ci
24478c2ecf20Sopenharmony_ci	if (!PageLocked(page))
24488c2ecf20Sopenharmony_ci		BUG();
24498c2ecf20Sopenharmony_ci	if (!page_has_buffers(page))
24508c2ecf20Sopenharmony_ci		return 0;
24518c2ecf20Sopenharmony_ci
24528c2ecf20Sopenharmony_ci	BUG_ON(stop > PAGE_SIZE || stop < length);
24538c2ecf20Sopenharmony_ci
24548c2ecf20Sopenharmony_ci	/* We will potentially be playing with lists other than just the
24558c2ecf20Sopenharmony_ci	 * data lists (especially for journaled data mode), so be
24568c2ecf20Sopenharmony_ci	 * cautious in our locking. */
24578c2ecf20Sopenharmony_ci
24588c2ecf20Sopenharmony_ci	head = bh = page_buffers(page);
24598c2ecf20Sopenharmony_ci	do {
24608c2ecf20Sopenharmony_ci		unsigned int next_off = curr_off + bh->b_size;
24618c2ecf20Sopenharmony_ci		next = bh->b_this_page;
24628c2ecf20Sopenharmony_ci
24638c2ecf20Sopenharmony_ci		if (next_off > stop)
24648c2ecf20Sopenharmony_ci			return 0;
24658c2ecf20Sopenharmony_ci
24668c2ecf20Sopenharmony_ci		if (offset <= curr_off) {
24678c2ecf20Sopenharmony_ci			/* This block is wholly outside the truncation point */
24688c2ecf20Sopenharmony_ci			lock_buffer(bh);
24698c2ecf20Sopenharmony_ci			ret = journal_unmap_buffer(journal, bh, partial_page);
24708c2ecf20Sopenharmony_ci			unlock_buffer(bh);
24718c2ecf20Sopenharmony_ci			if (ret < 0)
24728c2ecf20Sopenharmony_ci				return ret;
24738c2ecf20Sopenharmony_ci			may_free &= ret;
24748c2ecf20Sopenharmony_ci		}
24758c2ecf20Sopenharmony_ci		curr_off = next_off;
24768c2ecf20Sopenharmony_ci		bh = next;
24778c2ecf20Sopenharmony_ci
24788c2ecf20Sopenharmony_ci	} while (bh != head);
24798c2ecf20Sopenharmony_ci
24808c2ecf20Sopenharmony_ci	if (!partial_page) {
24818c2ecf20Sopenharmony_ci		if (may_free && try_to_free_buffers(page))
24828c2ecf20Sopenharmony_ci			J_ASSERT(!page_has_buffers(page));
24838c2ecf20Sopenharmony_ci	}
24848c2ecf20Sopenharmony_ci	return 0;
24858c2ecf20Sopenharmony_ci}
24868c2ecf20Sopenharmony_ci
24878c2ecf20Sopenharmony_ci/*
24888c2ecf20Sopenharmony_ci * File a buffer on the given transaction list.
24898c2ecf20Sopenharmony_ci */
24908c2ecf20Sopenharmony_civoid __jbd2_journal_file_buffer(struct journal_head *jh,
24918c2ecf20Sopenharmony_ci			transaction_t *transaction, int jlist)
24928c2ecf20Sopenharmony_ci{
24938c2ecf20Sopenharmony_ci	struct journal_head **list = NULL;
24948c2ecf20Sopenharmony_ci	int was_dirty = 0;
24958c2ecf20Sopenharmony_ci	struct buffer_head *bh = jh2bh(jh);
24968c2ecf20Sopenharmony_ci
24978c2ecf20Sopenharmony_ci	lockdep_assert_held(&jh->b_state_lock);
24988c2ecf20Sopenharmony_ci	assert_spin_locked(&transaction->t_journal->j_list_lock);
24998c2ecf20Sopenharmony_ci
25008c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
25018c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_transaction == transaction ||
25028c2ecf20Sopenharmony_ci				jh->b_transaction == NULL);
25038c2ecf20Sopenharmony_ci
25048c2ecf20Sopenharmony_ci	if (jh->b_transaction && jh->b_jlist == jlist)
25058c2ecf20Sopenharmony_ci		return;
25068c2ecf20Sopenharmony_ci
25078c2ecf20Sopenharmony_ci	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
25088c2ecf20Sopenharmony_ci	    jlist == BJ_Shadow || jlist == BJ_Forget) {
25098c2ecf20Sopenharmony_ci		/*
25108c2ecf20Sopenharmony_ci		 * For metadata buffers, we track dirty bit in buffer_jbddirty
25118c2ecf20Sopenharmony_ci		 * instead of buffer_dirty. We should not see a dirty bit set
25128c2ecf20Sopenharmony_ci		 * here because we clear it in do_get_write_access but e.g.
25138c2ecf20Sopenharmony_ci		 * tune2fs can modify the sb and set the dirty bit at any time
25148c2ecf20Sopenharmony_ci		 * so we try to gracefully handle that.
25158c2ecf20Sopenharmony_ci		 */
25168c2ecf20Sopenharmony_ci		if (buffer_dirty(bh))
25178c2ecf20Sopenharmony_ci			warn_dirty_buffer(bh);
25188c2ecf20Sopenharmony_ci		if (test_clear_buffer_dirty(bh) ||
25198c2ecf20Sopenharmony_ci		    test_clear_buffer_jbddirty(bh))
25208c2ecf20Sopenharmony_ci			was_dirty = 1;
25218c2ecf20Sopenharmony_ci	}
25228c2ecf20Sopenharmony_ci
25238c2ecf20Sopenharmony_ci	if (jh->b_transaction)
25248c2ecf20Sopenharmony_ci		__jbd2_journal_temp_unlink_buffer(jh);
25258c2ecf20Sopenharmony_ci	else
25268c2ecf20Sopenharmony_ci		jbd2_journal_grab_journal_head(bh);
25278c2ecf20Sopenharmony_ci	jh->b_transaction = transaction;
25288c2ecf20Sopenharmony_ci
25298c2ecf20Sopenharmony_ci	switch (jlist) {
25308c2ecf20Sopenharmony_ci	case BJ_None:
25318c2ecf20Sopenharmony_ci		J_ASSERT_JH(jh, !jh->b_committed_data);
25328c2ecf20Sopenharmony_ci		J_ASSERT_JH(jh, !jh->b_frozen_data);
25338c2ecf20Sopenharmony_ci		return;
25348c2ecf20Sopenharmony_ci	case BJ_Metadata:
25358c2ecf20Sopenharmony_ci		transaction->t_nr_buffers++;
25368c2ecf20Sopenharmony_ci		list = &transaction->t_buffers;
25378c2ecf20Sopenharmony_ci		break;
25388c2ecf20Sopenharmony_ci	case BJ_Forget:
25398c2ecf20Sopenharmony_ci		list = &transaction->t_forget;
25408c2ecf20Sopenharmony_ci		break;
25418c2ecf20Sopenharmony_ci	case BJ_Shadow:
25428c2ecf20Sopenharmony_ci		list = &transaction->t_shadow_list;
25438c2ecf20Sopenharmony_ci		break;
25448c2ecf20Sopenharmony_ci	case BJ_Reserved:
25458c2ecf20Sopenharmony_ci		list = &transaction->t_reserved_list;
25468c2ecf20Sopenharmony_ci		break;
25478c2ecf20Sopenharmony_ci	}
25488c2ecf20Sopenharmony_ci
25498c2ecf20Sopenharmony_ci	__blist_add_buffer(list, jh);
25508c2ecf20Sopenharmony_ci	jh->b_jlist = jlist;
25518c2ecf20Sopenharmony_ci
25528c2ecf20Sopenharmony_ci	if (was_dirty)
25538c2ecf20Sopenharmony_ci		set_buffer_jbddirty(bh);
25548c2ecf20Sopenharmony_ci}
25558c2ecf20Sopenharmony_ci
25568c2ecf20Sopenharmony_civoid jbd2_journal_file_buffer(struct journal_head *jh,
25578c2ecf20Sopenharmony_ci				transaction_t *transaction, int jlist)
25588c2ecf20Sopenharmony_ci{
25598c2ecf20Sopenharmony_ci	spin_lock(&jh->b_state_lock);
25608c2ecf20Sopenharmony_ci	spin_lock(&transaction->t_journal->j_list_lock);
25618c2ecf20Sopenharmony_ci	__jbd2_journal_file_buffer(jh, transaction, jlist);
25628c2ecf20Sopenharmony_ci	spin_unlock(&transaction->t_journal->j_list_lock);
25638c2ecf20Sopenharmony_ci	spin_unlock(&jh->b_state_lock);
25648c2ecf20Sopenharmony_ci}
25658c2ecf20Sopenharmony_ci
25668c2ecf20Sopenharmony_ci/*
25678c2ecf20Sopenharmony_ci * Remove a buffer from its current buffer list in preparation for
25688c2ecf20Sopenharmony_ci * dropping it from its current transaction entirely.  If the buffer has
25698c2ecf20Sopenharmony_ci * already started to be used by a subsequent transaction, refile the
25708c2ecf20Sopenharmony_ci * buffer on that transaction's metadata list.
25718c2ecf20Sopenharmony_ci *
25728c2ecf20Sopenharmony_ci * Called under j_list_lock
25738c2ecf20Sopenharmony_ci * Called under jh->b_state_lock
25748c2ecf20Sopenharmony_ci *
25758c2ecf20Sopenharmony_ci * When this function returns true, there's no next transaction to refile to
25768c2ecf20Sopenharmony_ci * and the caller has to drop jh reference through
25778c2ecf20Sopenharmony_ci * jbd2_journal_put_journal_head().
25788c2ecf20Sopenharmony_ci */
25798c2ecf20Sopenharmony_cibool __jbd2_journal_refile_buffer(struct journal_head *jh)
25808c2ecf20Sopenharmony_ci{
25818c2ecf20Sopenharmony_ci	int was_dirty, jlist;
25828c2ecf20Sopenharmony_ci	struct buffer_head *bh = jh2bh(jh);
25838c2ecf20Sopenharmony_ci
25848c2ecf20Sopenharmony_ci	lockdep_assert_held(&jh->b_state_lock);
25858c2ecf20Sopenharmony_ci	if (jh->b_transaction)
25868c2ecf20Sopenharmony_ci		assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
25878c2ecf20Sopenharmony_ci
25888c2ecf20Sopenharmony_ci	/* If the buffer is now unused, just drop it. */
25898c2ecf20Sopenharmony_ci	if (jh->b_next_transaction == NULL) {
25908c2ecf20Sopenharmony_ci		__jbd2_journal_unfile_buffer(jh);
25918c2ecf20Sopenharmony_ci		return true;
25928c2ecf20Sopenharmony_ci	}
25938c2ecf20Sopenharmony_ci
25948c2ecf20Sopenharmony_ci	/*
25958c2ecf20Sopenharmony_ci	 * It has been modified by a later transaction: add it to the new
25968c2ecf20Sopenharmony_ci	 * transaction's metadata list.
25978c2ecf20Sopenharmony_ci	 */
25988c2ecf20Sopenharmony_ci
25998c2ecf20Sopenharmony_ci	was_dirty = test_clear_buffer_jbddirty(bh);
26008c2ecf20Sopenharmony_ci	__jbd2_journal_temp_unlink_buffer(jh);
26018c2ecf20Sopenharmony_ci
26028c2ecf20Sopenharmony_ci	/*
26038c2ecf20Sopenharmony_ci	 * b_transaction must be set, otherwise the new b_transaction won't
26048c2ecf20Sopenharmony_ci	 * be holding jh reference
26058c2ecf20Sopenharmony_ci	 */
26068c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_transaction != NULL);
26078c2ecf20Sopenharmony_ci
26088c2ecf20Sopenharmony_ci	/*
26098c2ecf20Sopenharmony_ci	 * We set b_transaction here because b_next_transaction will inherit
26108c2ecf20Sopenharmony_ci	 * our jh reference and thus __jbd2_journal_file_buffer() must not
26118c2ecf20Sopenharmony_ci	 * take a new one.
26128c2ecf20Sopenharmony_ci	 */
26138c2ecf20Sopenharmony_ci	WRITE_ONCE(jh->b_transaction, jh->b_next_transaction);
26148c2ecf20Sopenharmony_ci	WRITE_ONCE(jh->b_next_transaction, NULL);
26158c2ecf20Sopenharmony_ci	if (buffer_freed(bh))
26168c2ecf20Sopenharmony_ci		jlist = BJ_Forget;
26178c2ecf20Sopenharmony_ci	else if (jh->b_modified)
26188c2ecf20Sopenharmony_ci		jlist = BJ_Metadata;
26198c2ecf20Sopenharmony_ci	else
26208c2ecf20Sopenharmony_ci		jlist = BJ_Reserved;
26218c2ecf20Sopenharmony_ci	__jbd2_journal_file_buffer(jh, jh->b_transaction, jlist);
26228c2ecf20Sopenharmony_ci	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
26238c2ecf20Sopenharmony_ci
26248c2ecf20Sopenharmony_ci	if (was_dirty)
26258c2ecf20Sopenharmony_ci		set_buffer_jbddirty(bh);
26268c2ecf20Sopenharmony_ci	return false;
26278c2ecf20Sopenharmony_ci}
26288c2ecf20Sopenharmony_ci
26298c2ecf20Sopenharmony_ci/*
26308c2ecf20Sopenharmony_ci * __jbd2_journal_refile_buffer() with necessary locking added. We take our
26318c2ecf20Sopenharmony_ci * bh reference so that we can safely unlock bh.
26328c2ecf20Sopenharmony_ci *
26338c2ecf20Sopenharmony_ci * The jh and bh may be freed by this call.
26348c2ecf20Sopenharmony_ci */
26358c2ecf20Sopenharmony_civoid jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
26368c2ecf20Sopenharmony_ci{
26378c2ecf20Sopenharmony_ci	bool drop;
26388c2ecf20Sopenharmony_ci
26398c2ecf20Sopenharmony_ci	spin_lock(&jh->b_state_lock);
26408c2ecf20Sopenharmony_ci	spin_lock(&journal->j_list_lock);
26418c2ecf20Sopenharmony_ci	drop = __jbd2_journal_refile_buffer(jh);
26428c2ecf20Sopenharmony_ci	spin_unlock(&jh->b_state_lock);
26438c2ecf20Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
26448c2ecf20Sopenharmony_ci	if (drop)
26458c2ecf20Sopenharmony_ci		jbd2_journal_put_journal_head(jh);
26468c2ecf20Sopenharmony_ci}
26478c2ecf20Sopenharmony_ci
26488c2ecf20Sopenharmony_ci/*
26498c2ecf20Sopenharmony_ci * File inode in the inode list of the handle's transaction
26508c2ecf20Sopenharmony_ci */
26518c2ecf20Sopenharmony_cistatic int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
26528c2ecf20Sopenharmony_ci		unsigned long flags, loff_t start_byte, loff_t end_byte)
26538c2ecf20Sopenharmony_ci{
26548c2ecf20Sopenharmony_ci	transaction_t *transaction = handle->h_transaction;
26558c2ecf20Sopenharmony_ci	journal_t *journal;
26568c2ecf20Sopenharmony_ci
26578c2ecf20Sopenharmony_ci	if (is_handle_aborted(handle))
26588c2ecf20Sopenharmony_ci		return -EROFS;
26598c2ecf20Sopenharmony_ci	journal = transaction->t_journal;
26608c2ecf20Sopenharmony_ci
26618c2ecf20Sopenharmony_ci	jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
26628c2ecf20Sopenharmony_ci			transaction->t_tid);
26638c2ecf20Sopenharmony_ci
26648c2ecf20Sopenharmony_ci	spin_lock(&journal->j_list_lock);
26658c2ecf20Sopenharmony_ci	jinode->i_flags |= flags;
26668c2ecf20Sopenharmony_ci
26678c2ecf20Sopenharmony_ci	if (jinode->i_dirty_end) {
26688c2ecf20Sopenharmony_ci		jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
26698c2ecf20Sopenharmony_ci		jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
26708c2ecf20Sopenharmony_ci	} else {
26718c2ecf20Sopenharmony_ci		jinode->i_dirty_start = start_byte;
26728c2ecf20Sopenharmony_ci		jinode->i_dirty_end = end_byte;
26738c2ecf20Sopenharmony_ci	}
26748c2ecf20Sopenharmony_ci
26758c2ecf20Sopenharmony_ci	/* Is inode already attached where we need it? */
26768c2ecf20Sopenharmony_ci	if (jinode->i_transaction == transaction ||
26778c2ecf20Sopenharmony_ci	    jinode->i_next_transaction == transaction)
26788c2ecf20Sopenharmony_ci		goto done;
26798c2ecf20Sopenharmony_ci
26808c2ecf20Sopenharmony_ci	/*
26818c2ecf20Sopenharmony_ci	 * We only ever set this variable to 1 so the test is safe. Since
26828c2ecf20Sopenharmony_ci	 * t_need_data_flush is likely to be set, we do the test to save some
26838c2ecf20Sopenharmony_ci	 * cacheline bouncing
26848c2ecf20Sopenharmony_ci	 */
26858c2ecf20Sopenharmony_ci	if (!transaction->t_need_data_flush)
26868c2ecf20Sopenharmony_ci		transaction->t_need_data_flush = 1;
26878c2ecf20Sopenharmony_ci	/* On some different transaction's list - should be
26888c2ecf20Sopenharmony_ci	 * the committing one */
26898c2ecf20Sopenharmony_ci	if (jinode->i_transaction) {
26908c2ecf20Sopenharmony_ci		J_ASSERT(jinode->i_next_transaction == NULL);
26918c2ecf20Sopenharmony_ci		J_ASSERT(jinode->i_transaction ==
26928c2ecf20Sopenharmony_ci					journal->j_committing_transaction);
26938c2ecf20Sopenharmony_ci		jinode->i_next_transaction = transaction;
26948c2ecf20Sopenharmony_ci		goto done;
26958c2ecf20Sopenharmony_ci	}
26968c2ecf20Sopenharmony_ci	/* Not on any transaction list... */
26978c2ecf20Sopenharmony_ci	J_ASSERT(!jinode->i_next_transaction);
26988c2ecf20Sopenharmony_ci	jinode->i_transaction = transaction;
26998c2ecf20Sopenharmony_ci	list_add(&jinode->i_list, &transaction->t_inode_list);
27008c2ecf20Sopenharmony_cidone:
27018c2ecf20Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
27028c2ecf20Sopenharmony_ci
27038c2ecf20Sopenharmony_ci	return 0;
27048c2ecf20Sopenharmony_ci}
27058c2ecf20Sopenharmony_ci
27068c2ecf20Sopenharmony_ciint jbd2_journal_inode_ranged_write(handle_t *handle,
27078c2ecf20Sopenharmony_ci		struct jbd2_inode *jinode, loff_t start_byte, loff_t length)
27088c2ecf20Sopenharmony_ci{
27098c2ecf20Sopenharmony_ci	return jbd2_journal_file_inode(handle, jinode,
27108c2ecf20Sopenharmony_ci			JI_WRITE_DATA | JI_WAIT_DATA, start_byte,
27118c2ecf20Sopenharmony_ci			start_byte + length - 1);
27128c2ecf20Sopenharmony_ci}
27138c2ecf20Sopenharmony_ci
27148c2ecf20Sopenharmony_ciint jbd2_journal_inode_ranged_wait(handle_t *handle, struct jbd2_inode *jinode,
27158c2ecf20Sopenharmony_ci		loff_t start_byte, loff_t length)
27168c2ecf20Sopenharmony_ci{
27178c2ecf20Sopenharmony_ci	return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA,
27188c2ecf20Sopenharmony_ci			start_byte, start_byte + length - 1);
27198c2ecf20Sopenharmony_ci}
27208c2ecf20Sopenharmony_ci
27218c2ecf20Sopenharmony_ci/*
27228c2ecf20Sopenharmony_ci * File truncate and transaction commit interact with each other in a
27238c2ecf20Sopenharmony_ci * non-trivial way.  If a transaction writing data block A is
27248c2ecf20Sopenharmony_ci * committing, we cannot discard the data by truncate until we have
27258c2ecf20Sopenharmony_ci * written them.  Otherwise if we crashed after the transaction with
27268c2ecf20Sopenharmony_ci * write has committed but before the transaction with truncate has
27278c2ecf20Sopenharmony_ci * committed, we could see stale data in block A.  This function is a
27288c2ecf20Sopenharmony_ci * helper to solve this problem.  It starts writeout of the truncated
27298c2ecf20Sopenharmony_ci * part in case it is in the committing transaction.
27308c2ecf20Sopenharmony_ci *
27318c2ecf20Sopenharmony_ci * Filesystem code must call this function when inode is journaled in
27328c2ecf20Sopenharmony_ci * ordered mode before truncation happens and after the inode has been
27338c2ecf20Sopenharmony_ci * placed on orphan list with the new inode size. The second condition
27348c2ecf20Sopenharmony_ci * avoids the race that someone writes new data and we start
27358c2ecf20Sopenharmony_ci * committing the transaction after this function has been called but
27368c2ecf20Sopenharmony_ci * before a transaction for truncate is started (and furthermore it
27378c2ecf20Sopenharmony_ci * allows us to optimize the case where the addition to orphan list
27388c2ecf20Sopenharmony_ci * happens in the same transaction as write --- we don't have to write
27398c2ecf20Sopenharmony_ci * any data in such case).
27408c2ecf20Sopenharmony_ci */
27418c2ecf20Sopenharmony_ciint jbd2_journal_begin_ordered_truncate(journal_t *journal,
27428c2ecf20Sopenharmony_ci					struct jbd2_inode *jinode,
27438c2ecf20Sopenharmony_ci					loff_t new_size)
27448c2ecf20Sopenharmony_ci{
27458c2ecf20Sopenharmony_ci	transaction_t *inode_trans, *commit_trans;
27468c2ecf20Sopenharmony_ci	int ret = 0;
27478c2ecf20Sopenharmony_ci
27488c2ecf20Sopenharmony_ci	/* This is a quick check to avoid locking if not necessary */
27498c2ecf20Sopenharmony_ci	if (!jinode->i_transaction)
27508c2ecf20Sopenharmony_ci		goto out;
27518c2ecf20Sopenharmony_ci	/* Locks are here just to force reading of recent values, it is
27528c2ecf20Sopenharmony_ci	 * enough that the transaction was not committing before we started
27538c2ecf20Sopenharmony_ci	 * a transaction adding the inode to orphan list */
27548c2ecf20Sopenharmony_ci	read_lock(&journal->j_state_lock);
27558c2ecf20Sopenharmony_ci	commit_trans = journal->j_committing_transaction;
27568c2ecf20Sopenharmony_ci	read_unlock(&journal->j_state_lock);
27578c2ecf20Sopenharmony_ci	spin_lock(&journal->j_list_lock);
27588c2ecf20Sopenharmony_ci	inode_trans = jinode->i_transaction;
27598c2ecf20Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
27608c2ecf20Sopenharmony_ci	if (inode_trans == commit_trans) {
27618c2ecf20Sopenharmony_ci		ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
27628c2ecf20Sopenharmony_ci			new_size, LLONG_MAX);
27638c2ecf20Sopenharmony_ci		if (ret)
27648c2ecf20Sopenharmony_ci			jbd2_journal_abort(journal, ret);
27658c2ecf20Sopenharmony_ci	}
27668c2ecf20Sopenharmony_ciout:
27678c2ecf20Sopenharmony_ci	return ret;
27688c2ecf20Sopenharmony_ci}
2769