162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0+
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * linux/fs/jbd2/commit.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Copyright 1998 Red Hat corp --- All Rights Reserved
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * Journal commit routines for the generic filesystem journaling code;
1062306a36Sopenharmony_ci * part of the ext2fs journaling system.
1162306a36Sopenharmony_ci */
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <linux/time.h>
1462306a36Sopenharmony_ci#include <linux/fs.h>
1562306a36Sopenharmony_ci#include <linux/jbd2.h>
1662306a36Sopenharmony_ci#include <linux/errno.h>
1762306a36Sopenharmony_ci#include <linux/slab.h>
1862306a36Sopenharmony_ci#include <linux/mm.h>
1962306a36Sopenharmony_ci#include <linux/pagemap.h>
2062306a36Sopenharmony_ci#include <linux/jiffies.h>
2162306a36Sopenharmony_ci#include <linux/crc32.h>
2262306a36Sopenharmony_ci#include <linux/writeback.h>
2362306a36Sopenharmony_ci#include <linux/backing-dev.h>
2462306a36Sopenharmony_ci#include <linux/bio.h>
2562306a36Sopenharmony_ci#include <linux/blkdev.h>
2662306a36Sopenharmony_ci#include <linux/bitops.h>
2762306a36Sopenharmony_ci#include <trace/events/jbd2.h>
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci/*
3062306a36Sopenharmony_ci * IO end handler for temporary buffer_heads handling writes to the journal.
3162306a36Sopenharmony_ci */
3262306a36Sopenharmony_cistatic void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
3362306a36Sopenharmony_ci{
3462306a36Sopenharmony_ci	struct buffer_head *orig_bh = bh->b_private;
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	BUFFER_TRACE(bh, "");
3762306a36Sopenharmony_ci	if (uptodate)
3862306a36Sopenharmony_ci		set_buffer_uptodate(bh);
3962306a36Sopenharmony_ci	else
4062306a36Sopenharmony_ci		clear_buffer_uptodate(bh);
4162306a36Sopenharmony_ci	if (orig_bh) {
4262306a36Sopenharmony_ci		clear_bit_unlock(BH_Shadow, &orig_bh->b_state);
4362306a36Sopenharmony_ci		smp_mb__after_atomic();
4462306a36Sopenharmony_ci		wake_up_bit(&orig_bh->b_state, BH_Shadow);
4562306a36Sopenharmony_ci	}
4662306a36Sopenharmony_ci	unlock_buffer(bh);
4762306a36Sopenharmony_ci}
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci/*
5062306a36Sopenharmony_ci * When an ext4 file is truncated, it is possible that some pages are not
5162306a36Sopenharmony_ci * successfully freed, because they are attached to a committing transaction.
5262306a36Sopenharmony_ci * After the transaction commits, these pages are left on the LRU, with no
5362306a36Sopenharmony_ci * ->mapping, and with attached buffers.  These pages are trivially reclaimable
5462306a36Sopenharmony_ci * by the VM, but their apparent absence upsets the VM accounting, and it makes
5562306a36Sopenharmony_ci * the numbers in /proc/meminfo look odd.
5662306a36Sopenharmony_ci *
5762306a36Sopenharmony_ci * So here, we have a buffer which has just come off the forget list.  Look to
5862306a36Sopenharmony_ci * see if we can strip all buffers from the backing page.
5962306a36Sopenharmony_ci *
6062306a36Sopenharmony_ci * Called under lock_journal(), and possibly under journal_datalist_lock.  The
6162306a36Sopenharmony_ci * caller provided us with a ref against the buffer, and we drop that here.
6262306a36Sopenharmony_ci */
6362306a36Sopenharmony_cistatic void release_buffer_page(struct buffer_head *bh)
6462306a36Sopenharmony_ci{
6562306a36Sopenharmony_ci	struct folio *folio;
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	if (buffer_dirty(bh))
6862306a36Sopenharmony_ci		goto nope;
6962306a36Sopenharmony_ci	if (atomic_read(&bh->b_count) != 1)
7062306a36Sopenharmony_ci		goto nope;
7162306a36Sopenharmony_ci	folio = bh->b_folio;
7262306a36Sopenharmony_ci	if (folio->mapping)
7362306a36Sopenharmony_ci		goto nope;
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	/* OK, it's a truncated page */
7662306a36Sopenharmony_ci	if (!folio_trylock(folio))
7762306a36Sopenharmony_ci		goto nope;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	folio_get(folio);
8062306a36Sopenharmony_ci	__brelse(bh);
8162306a36Sopenharmony_ci	try_to_free_buffers(folio);
8262306a36Sopenharmony_ci	folio_unlock(folio);
8362306a36Sopenharmony_ci	folio_put(folio);
8462306a36Sopenharmony_ci	return;
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_cinope:
8762306a36Sopenharmony_ci	__brelse(bh);
8862306a36Sopenharmony_ci}
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_cistatic void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
9162306a36Sopenharmony_ci{
9262306a36Sopenharmony_ci	struct commit_header *h;
9362306a36Sopenharmony_ci	__u32 csum;
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	if (!jbd2_journal_has_csum_v2or3(j))
9662306a36Sopenharmony_ci		return;
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	h = (struct commit_header *)(bh->b_data);
9962306a36Sopenharmony_ci	h->h_chksum_type = 0;
10062306a36Sopenharmony_ci	h->h_chksum_size = 0;
10162306a36Sopenharmony_ci	h->h_chksum[0] = 0;
10262306a36Sopenharmony_ci	csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
10362306a36Sopenharmony_ci	h->h_chksum[0] = cpu_to_be32(csum);
10462306a36Sopenharmony_ci}
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci/*
10762306a36Sopenharmony_ci * Done it all: now submit the commit record.  We should have
10862306a36Sopenharmony_ci * cleaned up our previous buffers by now, so if we are in abort
10962306a36Sopenharmony_ci * mode we can now just skip the rest of the journal write
11062306a36Sopenharmony_ci * entirely.
11162306a36Sopenharmony_ci *
11262306a36Sopenharmony_ci * Returns 1 if the journal needs to be aborted or 0 on success
11362306a36Sopenharmony_ci */
11462306a36Sopenharmony_cistatic int journal_submit_commit_record(journal_t *journal,
11562306a36Sopenharmony_ci					transaction_t *commit_transaction,
11662306a36Sopenharmony_ci					struct buffer_head **cbh,
11762306a36Sopenharmony_ci					__u32 crc32_sum)
11862306a36Sopenharmony_ci{
11962306a36Sopenharmony_ci	struct commit_header *tmp;
12062306a36Sopenharmony_ci	struct buffer_head *bh;
12162306a36Sopenharmony_ci	struct timespec64 now;
12262306a36Sopenharmony_ci	blk_opf_t write_flags = REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS;
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	*cbh = NULL;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	if (is_journal_aborted(journal))
12762306a36Sopenharmony_ci		return 0;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
13062306a36Sopenharmony_ci						JBD2_COMMIT_BLOCK);
13162306a36Sopenharmony_ci	if (!bh)
13262306a36Sopenharmony_ci		return 1;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	tmp = (struct commit_header *)bh->b_data;
13562306a36Sopenharmony_ci	ktime_get_coarse_real_ts64(&now);
13662306a36Sopenharmony_ci	tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
13762306a36Sopenharmony_ci	tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	if (jbd2_has_feature_checksum(journal)) {
14062306a36Sopenharmony_ci		tmp->h_chksum_type 	= JBD2_CRC32_CHKSUM;
14162306a36Sopenharmony_ci		tmp->h_chksum_size 	= JBD2_CRC32_CHKSUM_SIZE;
14262306a36Sopenharmony_ci		tmp->h_chksum[0] 	= cpu_to_be32(crc32_sum);
14362306a36Sopenharmony_ci	}
14462306a36Sopenharmony_ci	jbd2_commit_block_csum_set(journal, bh);
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	BUFFER_TRACE(bh, "submit commit block");
14762306a36Sopenharmony_ci	lock_buffer(bh);
14862306a36Sopenharmony_ci	clear_buffer_dirty(bh);
14962306a36Sopenharmony_ci	set_buffer_uptodate(bh);
15062306a36Sopenharmony_ci	bh->b_end_io = journal_end_buffer_io_sync;
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	if (journal->j_flags & JBD2_BARRIER &&
15362306a36Sopenharmony_ci	    !jbd2_has_feature_async_commit(journal))
15462306a36Sopenharmony_ci		write_flags |= REQ_PREFLUSH | REQ_FUA;
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	submit_bh(write_flags, bh);
15762306a36Sopenharmony_ci	*cbh = bh;
15862306a36Sopenharmony_ci	return 0;
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci/*
16262306a36Sopenharmony_ci * This function along with journal_submit_commit_record
16362306a36Sopenharmony_ci * allows to write the commit record asynchronously.
16462306a36Sopenharmony_ci */
16562306a36Sopenharmony_cistatic int journal_wait_on_commit_record(journal_t *journal,
16662306a36Sopenharmony_ci					 struct buffer_head *bh)
16762306a36Sopenharmony_ci{
16862306a36Sopenharmony_ci	int ret = 0;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	clear_buffer_dirty(bh);
17162306a36Sopenharmony_ci	wait_on_buffer(bh);
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	if (unlikely(!buffer_uptodate(bh)))
17462306a36Sopenharmony_ci		ret = -EIO;
17562306a36Sopenharmony_ci	put_bh(bh);            /* One for getblk() */
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	return ret;
17862306a36Sopenharmony_ci}
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci/* Send all the data buffers related to an inode */
18162306a36Sopenharmony_ciint jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
18262306a36Sopenharmony_ci{
18362306a36Sopenharmony_ci	if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
18462306a36Sopenharmony_ci		return 0;
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
18762306a36Sopenharmony_ci	return journal->j_submit_inode_data_buffers(jinode);
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci}
19062306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_submit_inode_data);
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ciint jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
19362306a36Sopenharmony_ci{
19462306a36Sopenharmony_ci	if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
19562306a36Sopenharmony_ci		!jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
19662306a36Sopenharmony_ci		return 0;
19762306a36Sopenharmony_ci	return filemap_fdatawait_range_keep_errors(
19862306a36Sopenharmony_ci		jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
19962306a36Sopenharmony_ci		jinode->i_dirty_end);
20062306a36Sopenharmony_ci}
20162306a36Sopenharmony_ciEXPORT_SYMBOL(jbd2_wait_inode_data);
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci/*
20462306a36Sopenharmony_ci * Submit all the data buffers of inode associated with the transaction to
20562306a36Sopenharmony_ci * disk.
20662306a36Sopenharmony_ci *
20762306a36Sopenharmony_ci * We are in a committing transaction. Therefore no new inode can be added to
20862306a36Sopenharmony_ci * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
20962306a36Sopenharmony_ci * operate on from being released while we write out pages.
21062306a36Sopenharmony_ci */
21162306a36Sopenharmony_cistatic int journal_submit_data_buffers(journal_t *journal,
21262306a36Sopenharmony_ci		transaction_t *commit_transaction)
21362306a36Sopenharmony_ci{
21462306a36Sopenharmony_ci	struct jbd2_inode *jinode;
21562306a36Sopenharmony_ci	int err, ret = 0;
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	spin_lock(&journal->j_list_lock);
21862306a36Sopenharmony_ci	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
21962306a36Sopenharmony_ci		if (!(jinode->i_flags & JI_WRITE_DATA))
22062306a36Sopenharmony_ci			continue;
22162306a36Sopenharmony_ci		jinode->i_flags |= JI_COMMIT_RUNNING;
22262306a36Sopenharmony_ci		spin_unlock(&journal->j_list_lock);
22362306a36Sopenharmony_ci		/* submit the inode data buffers. */
22462306a36Sopenharmony_ci		trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
22562306a36Sopenharmony_ci		if (journal->j_submit_inode_data_buffers) {
22662306a36Sopenharmony_ci			err = journal->j_submit_inode_data_buffers(jinode);
22762306a36Sopenharmony_ci			if (!ret)
22862306a36Sopenharmony_ci				ret = err;
22962306a36Sopenharmony_ci		}
23062306a36Sopenharmony_ci		spin_lock(&journal->j_list_lock);
23162306a36Sopenharmony_ci		J_ASSERT(jinode->i_transaction == commit_transaction);
23262306a36Sopenharmony_ci		jinode->i_flags &= ~JI_COMMIT_RUNNING;
23362306a36Sopenharmony_ci		smp_mb();
23462306a36Sopenharmony_ci		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
23562306a36Sopenharmony_ci	}
23662306a36Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
23762306a36Sopenharmony_ci	return ret;
23862306a36Sopenharmony_ci}
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ciint jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
24162306a36Sopenharmony_ci{
24262306a36Sopenharmony_ci	struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	return filemap_fdatawait_range_keep_errors(mapping,
24562306a36Sopenharmony_ci						   jinode->i_dirty_start,
24662306a36Sopenharmony_ci						   jinode->i_dirty_end);
24762306a36Sopenharmony_ci}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci/*
25062306a36Sopenharmony_ci * Wait for data submitted for writeout, refile inodes to proper
25162306a36Sopenharmony_ci * transaction if needed.
25262306a36Sopenharmony_ci *
25362306a36Sopenharmony_ci */
25462306a36Sopenharmony_cistatic int journal_finish_inode_data_buffers(journal_t *journal,
25562306a36Sopenharmony_ci		transaction_t *commit_transaction)
25662306a36Sopenharmony_ci{
25762306a36Sopenharmony_ci	struct jbd2_inode *jinode, *next_i;
25862306a36Sopenharmony_ci	int err, ret = 0;
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	/* For locking, see the comment in journal_submit_data_buffers() */
26162306a36Sopenharmony_ci	spin_lock(&journal->j_list_lock);
26262306a36Sopenharmony_ci	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
26362306a36Sopenharmony_ci		if (!(jinode->i_flags & JI_WAIT_DATA))
26462306a36Sopenharmony_ci			continue;
26562306a36Sopenharmony_ci		jinode->i_flags |= JI_COMMIT_RUNNING;
26662306a36Sopenharmony_ci		spin_unlock(&journal->j_list_lock);
26762306a36Sopenharmony_ci		/* wait for the inode data buffers writeout. */
26862306a36Sopenharmony_ci		if (journal->j_finish_inode_data_buffers) {
26962306a36Sopenharmony_ci			err = journal->j_finish_inode_data_buffers(jinode);
27062306a36Sopenharmony_ci			if (!ret)
27162306a36Sopenharmony_ci				ret = err;
27262306a36Sopenharmony_ci		}
27362306a36Sopenharmony_ci		cond_resched();
27462306a36Sopenharmony_ci		spin_lock(&journal->j_list_lock);
27562306a36Sopenharmony_ci		jinode->i_flags &= ~JI_COMMIT_RUNNING;
27662306a36Sopenharmony_ci		smp_mb();
27762306a36Sopenharmony_ci		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
27862306a36Sopenharmony_ci	}
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	/* Now refile inode to proper lists */
28162306a36Sopenharmony_ci	list_for_each_entry_safe(jinode, next_i,
28262306a36Sopenharmony_ci				 &commit_transaction->t_inode_list, i_list) {
28362306a36Sopenharmony_ci		list_del(&jinode->i_list);
28462306a36Sopenharmony_ci		if (jinode->i_next_transaction) {
28562306a36Sopenharmony_ci			jinode->i_transaction = jinode->i_next_transaction;
28662306a36Sopenharmony_ci			jinode->i_next_transaction = NULL;
28762306a36Sopenharmony_ci			list_add(&jinode->i_list,
28862306a36Sopenharmony_ci				&jinode->i_transaction->t_inode_list);
28962306a36Sopenharmony_ci		} else {
29062306a36Sopenharmony_ci			jinode->i_transaction = NULL;
29162306a36Sopenharmony_ci			jinode->i_dirty_start = 0;
29262306a36Sopenharmony_ci			jinode->i_dirty_end = 0;
29362306a36Sopenharmony_ci		}
29462306a36Sopenharmony_ci	}
29562306a36Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	return ret;
29862306a36Sopenharmony_ci}
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_cistatic __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
30162306a36Sopenharmony_ci{
30262306a36Sopenharmony_ci	char *addr;
30362306a36Sopenharmony_ci	__u32 checksum;
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci	addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
30662306a36Sopenharmony_ci	checksum = crc32_be(crc32_sum, addr, bh->b_size);
30762306a36Sopenharmony_ci	kunmap_local(addr);
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	return checksum;
31062306a36Sopenharmony_ci}
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_cistatic void write_tag_block(journal_t *j, journal_block_tag_t *tag,
31362306a36Sopenharmony_ci				   unsigned long long block)
31462306a36Sopenharmony_ci{
31562306a36Sopenharmony_ci	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
31662306a36Sopenharmony_ci	if (jbd2_has_feature_64bit(j))
31762306a36Sopenharmony_ci		tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
31862306a36Sopenharmony_ci}
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_cistatic void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
32162306a36Sopenharmony_ci				    struct buffer_head *bh, __u32 sequence)
32262306a36Sopenharmony_ci{
32362306a36Sopenharmony_ci	journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
32462306a36Sopenharmony_ci	__u8 *addr;
32562306a36Sopenharmony_ci	__u32 csum32;
32662306a36Sopenharmony_ci	__be32 seq;
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	if (!jbd2_journal_has_csum_v2or3(j))
32962306a36Sopenharmony_ci		return;
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	seq = cpu_to_be32(sequence);
33262306a36Sopenharmony_ci	addr = kmap_local_folio(bh->b_folio, bh_offset(bh));
33362306a36Sopenharmony_ci	csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
33462306a36Sopenharmony_ci	csum32 = jbd2_chksum(j, csum32, addr, bh->b_size);
33562306a36Sopenharmony_ci	kunmap_local(addr);
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	if (jbd2_has_feature_csum3(j))
33862306a36Sopenharmony_ci		tag3->t_checksum = cpu_to_be32(csum32);
33962306a36Sopenharmony_ci	else
34062306a36Sopenharmony_ci		tag->t_checksum = cpu_to_be16(csum32);
34162306a36Sopenharmony_ci}
34262306a36Sopenharmony_ci/*
34362306a36Sopenharmony_ci * jbd2_journal_commit_transaction
34462306a36Sopenharmony_ci *
34562306a36Sopenharmony_ci * The primary function for committing a transaction to the log.  This
34662306a36Sopenharmony_ci * function is called by the journal thread to begin a complete commit.
34762306a36Sopenharmony_ci */
34862306a36Sopenharmony_civoid jbd2_journal_commit_transaction(journal_t *journal)
34962306a36Sopenharmony_ci{
35062306a36Sopenharmony_ci	struct transaction_stats_s stats;
35162306a36Sopenharmony_ci	transaction_t *commit_transaction;
35262306a36Sopenharmony_ci	struct journal_head *jh;
35362306a36Sopenharmony_ci	struct buffer_head *descriptor;
35462306a36Sopenharmony_ci	struct buffer_head **wbuf = journal->j_wbuf;
35562306a36Sopenharmony_ci	int bufs;
35662306a36Sopenharmony_ci	int flags;
35762306a36Sopenharmony_ci	int err;
35862306a36Sopenharmony_ci	unsigned long long blocknr;
35962306a36Sopenharmony_ci	ktime_t start_time;
36062306a36Sopenharmony_ci	u64 commit_time;
36162306a36Sopenharmony_ci	char *tagp = NULL;
36262306a36Sopenharmony_ci	journal_block_tag_t *tag = NULL;
36362306a36Sopenharmony_ci	int space_left = 0;
36462306a36Sopenharmony_ci	int first_tag = 0;
36562306a36Sopenharmony_ci	int tag_flag;
36662306a36Sopenharmony_ci	int i;
36762306a36Sopenharmony_ci	int tag_bytes = journal_tag_bytes(journal);
36862306a36Sopenharmony_ci	struct buffer_head *cbh = NULL; /* For transactional checksums */
36962306a36Sopenharmony_ci	__u32 crc32_sum = ~0;
37062306a36Sopenharmony_ci	struct blk_plug plug;
37162306a36Sopenharmony_ci	/* Tail of the journal */
37262306a36Sopenharmony_ci	unsigned long first_block;
37362306a36Sopenharmony_ci	tid_t first_tid;
37462306a36Sopenharmony_ci	int update_tail;
37562306a36Sopenharmony_ci	int csum_size = 0;
37662306a36Sopenharmony_ci	LIST_HEAD(io_bufs);
37762306a36Sopenharmony_ci	LIST_HEAD(log_bufs);
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci	if (jbd2_journal_has_csum_v2or3(journal))
38062306a36Sopenharmony_ci		csum_size = sizeof(struct jbd2_journal_block_tail);
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	/*
38362306a36Sopenharmony_ci	 * First job: lock down the current transaction and wait for
38462306a36Sopenharmony_ci	 * all outstanding updates to complete.
38562306a36Sopenharmony_ci	 */
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	/* Do we need to erase the effects of a prior jbd2_journal_flush? */
38862306a36Sopenharmony_ci	if (journal->j_flags & JBD2_FLUSHED) {
38962306a36Sopenharmony_ci		jbd2_debug(3, "super block updated\n");
39062306a36Sopenharmony_ci		mutex_lock_io(&journal->j_checkpoint_mutex);
39162306a36Sopenharmony_ci		/*
39262306a36Sopenharmony_ci		 * We hold j_checkpoint_mutex so tail cannot change under us.
39362306a36Sopenharmony_ci		 * We don't need any special data guarantees for writing sb
39462306a36Sopenharmony_ci		 * since journal is empty and it is ok for write to be
39562306a36Sopenharmony_ci		 * flushed only with transaction commit.
39662306a36Sopenharmony_ci		 */
39762306a36Sopenharmony_ci		jbd2_journal_update_sb_log_tail(journal,
39862306a36Sopenharmony_ci						journal->j_tail_sequence,
39962306a36Sopenharmony_ci						journal->j_tail, 0);
40062306a36Sopenharmony_ci		mutex_unlock(&journal->j_checkpoint_mutex);
40162306a36Sopenharmony_ci	} else {
40262306a36Sopenharmony_ci		jbd2_debug(3, "superblock not updated\n");
40362306a36Sopenharmony_ci	}
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci	J_ASSERT(journal->j_running_transaction != NULL);
40662306a36Sopenharmony_ci	J_ASSERT(journal->j_committing_transaction == NULL);
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	write_lock(&journal->j_state_lock);
40962306a36Sopenharmony_ci	journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
41062306a36Sopenharmony_ci	while (journal->j_flags & JBD2_FAST_COMMIT_ONGOING) {
41162306a36Sopenharmony_ci		DEFINE_WAIT(wait);
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci		prepare_to_wait(&journal->j_fc_wait, &wait,
41462306a36Sopenharmony_ci				TASK_UNINTERRUPTIBLE);
41562306a36Sopenharmony_ci		write_unlock(&journal->j_state_lock);
41662306a36Sopenharmony_ci		schedule();
41762306a36Sopenharmony_ci		write_lock(&journal->j_state_lock);
41862306a36Sopenharmony_ci		finish_wait(&journal->j_fc_wait, &wait);
41962306a36Sopenharmony_ci		/*
42062306a36Sopenharmony_ci		 * TODO: by blocking fast commits here, we are increasing
42162306a36Sopenharmony_ci		 * fsync() latency slightly. Strictly speaking, we don't need
42262306a36Sopenharmony_ci		 * to block fast commits until the transaction enters T_FLUSH
42362306a36Sopenharmony_ci		 * state. So an optimization is possible where we block new fast
42462306a36Sopenharmony_ci		 * commits here and wait for existing ones to complete
42562306a36Sopenharmony_ci		 * just before we enter T_FLUSH. That way, the existing fast
42662306a36Sopenharmony_ci		 * commits and this full commit can proceed parallely.
42762306a36Sopenharmony_ci		 */
42862306a36Sopenharmony_ci	}
42962306a36Sopenharmony_ci	write_unlock(&journal->j_state_lock);
43062306a36Sopenharmony_ci
43162306a36Sopenharmony_ci	commit_transaction = journal->j_running_transaction;
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	trace_jbd2_start_commit(journal, commit_transaction);
43462306a36Sopenharmony_ci	jbd2_debug(1, "JBD2: starting commit of transaction %d\n",
43562306a36Sopenharmony_ci			commit_transaction->t_tid);
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci	write_lock(&journal->j_state_lock);
43862306a36Sopenharmony_ci	journal->j_fc_off = 0;
43962306a36Sopenharmony_ci	J_ASSERT(commit_transaction->t_state == T_RUNNING);
44062306a36Sopenharmony_ci	commit_transaction->t_state = T_LOCKED;
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	trace_jbd2_commit_locking(journal, commit_transaction);
44362306a36Sopenharmony_ci	stats.run.rs_wait = commit_transaction->t_max_wait;
44462306a36Sopenharmony_ci	stats.run.rs_request_delay = 0;
44562306a36Sopenharmony_ci	stats.run.rs_locked = jiffies;
44662306a36Sopenharmony_ci	if (commit_transaction->t_requested)
44762306a36Sopenharmony_ci		stats.run.rs_request_delay =
44862306a36Sopenharmony_ci			jbd2_time_diff(commit_transaction->t_requested,
44962306a36Sopenharmony_ci				       stats.run.rs_locked);
45062306a36Sopenharmony_ci	stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
45162306a36Sopenharmony_ci					      stats.run.rs_locked);
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci	// waits for any t_updates to finish
45462306a36Sopenharmony_ci	jbd2_journal_wait_updates(journal);
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	commit_transaction->t_state = T_SWITCH;
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
45962306a36Sopenharmony_ci			journal->j_max_transaction_buffers);
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci	/*
46262306a36Sopenharmony_ci	 * First thing we are allowed to do is to discard any remaining
46362306a36Sopenharmony_ci	 * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
46462306a36Sopenharmony_ci	 * that there are no such buffers: if a large filesystem
46562306a36Sopenharmony_ci	 * operation like a truncate needs to split itself over multiple
46662306a36Sopenharmony_ci	 * transactions, then it may try to do a jbd2_journal_restart() while
46762306a36Sopenharmony_ci	 * there are still BJ_Reserved buffers outstanding.  These must
46862306a36Sopenharmony_ci	 * be released cleanly from the current transaction.
46962306a36Sopenharmony_ci	 *
47062306a36Sopenharmony_ci	 * In this case, the filesystem must still reserve write access
47162306a36Sopenharmony_ci	 * again before modifying the buffer in the new transaction, but
47262306a36Sopenharmony_ci	 * we do not require it to remember exactly which old buffers it
47362306a36Sopenharmony_ci	 * has reserved.  This is consistent with the existing behaviour
47462306a36Sopenharmony_ci	 * that multiple jbd2_journal_get_write_access() calls to the same
47562306a36Sopenharmony_ci	 * buffer are perfectly permissible.
47662306a36Sopenharmony_ci	 * We use journal->j_state_lock here to serialize processing of
47762306a36Sopenharmony_ci	 * t_reserved_list with eviction of buffers from journal_unmap_buffer().
47862306a36Sopenharmony_ci	 */
47962306a36Sopenharmony_ci	while (commit_transaction->t_reserved_list) {
48062306a36Sopenharmony_ci		jh = commit_transaction->t_reserved_list;
48162306a36Sopenharmony_ci		JBUFFER_TRACE(jh, "reserved, unused: refile");
48262306a36Sopenharmony_ci		/*
48362306a36Sopenharmony_ci		 * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
48462306a36Sopenharmony_ci		 * leave undo-committed data.
48562306a36Sopenharmony_ci		 */
48662306a36Sopenharmony_ci		if (jh->b_committed_data) {
48762306a36Sopenharmony_ci			struct buffer_head *bh = jh2bh(jh);
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci			spin_lock(&jh->b_state_lock);
49062306a36Sopenharmony_ci			jbd2_free(jh->b_committed_data, bh->b_size);
49162306a36Sopenharmony_ci			jh->b_committed_data = NULL;
49262306a36Sopenharmony_ci			spin_unlock(&jh->b_state_lock);
49362306a36Sopenharmony_ci		}
49462306a36Sopenharmony_ci		jbd2_journal_refile_buffer(journal, jh);
49562306a36Sopenharmony_ci	}
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	write_unlock(&journal->j_state_lock);
49862306a36Sopenharmony_ci	/*
49962306a36Sopenharmony_ci	 * Now try to drop any written-back buffers from the journal's
50062306a36Sopenharmony_ci	 * checkpoint lists.  We do this *before* commit because it potentially
50162306a36Sopenharmony_ci	 * frees some memory
50262306a36Sopenharmony_ci	 */
50362306a36Sopenharmony_ci	spin_lock(&journal->j_list_lock);
50462306a36Sopenharmony_ci	__jbd2_journal_clean_checkpoint_list(journal, false);
50562306a36Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	jbd2_debug(3, "JBD2: commit phase 1\n");
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	/*
51062306a36Sopenharmony_ci	 * Clear revoked flag to reflect there is no revoked buffers
51162306a36Sopenharmony_ci	 * in the next transaction which is going to be started.
51262306a36Sopenharmony_ci	 */
51362306a36Sopenharmony_ci	jbd2_clear_buffer_revoked_flags(journal);
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	/*
51662306a36Sopenharmony_ci	 * Switch to a new revoke table.
51762306a36Sopenharmony_ci	 */
51862306a36Sopenharmony_ci	jbd2_journal_switch_revoke_table(journal);
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	write_lock(&journal->j_state_lock);
52162306a36Sopenharmony_ci	/*
52262306a36Sopenharmony_ci	 * Reserved credits cannot be claimed anymore, free them
52362306a36Sopenharmony_ci	 */
52462306a36Sopenharmony_ci	atomic_sub(atomic_read(&journal->j_reserved_credits),
52562306a36Sopenharmony_ci		   &commit_transaction->t_outstanding_credits);
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	trace_jbd2_commit_flushing(journal, commit_transaction);
52862306a36Sopenharmony_ci	stats.run.rs_flushing = jiffies;
52962306a36Sopenharmony_ci	stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
53062306a36Sopenharmony_ci					     stats.run.rs_flushing);
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	commit_transaction->t_state = T_FLUSH;
53362306a36Sopenharmony_ci	journal->j_committing_transaction = commit_transaction;
53462306a36Sopenharmony_ci	journal->j_running_transaction = NULL;
53562306a36Sopenharmony_ci	start_time = ktime_get();
53662306a36Sopenharmony_ci	commit_transaction->t_log_start = journal->j_head;
53762306a36Sopenharmony_ci	wake_up_all(&journal->j_wait_transaction_locked);
53862306a36Sopenharmony_ci	write_unlock(&journal->j_state_lock);
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	jbd2_debug(3, "JBD2: commit phase 2a\n");
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	/*
54362306a36Sopenharmony_ci	 * Now start flushing things to disk, in the order they appear
54462306a36Sopenharmony_ci	 * on the transaction lists.  Data blocks go first.
54562306a36Sopenharmony_ci	 */
54662306a36Sopenharmony_ci	err = journal_submit_data_buffers(journal, commit_transaction);
54762306a36Sopenharmony_ci	if (err)
54862306a36Sopenharmony_ci		jbd2_journal_abort(journal, err);
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	blk_start_plug(&plug);
55162306a36Sopenharmony_ci	jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	jbd2_debug(3, "JBD2: commit phase 2b\n");
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci	/*
55662306a36Sopenharmony_ci	 * Way to go: we have now written out all of the data for a
55762306a36Sopenharmony_ci	 * transaction!  Now comes the tricky part: we need to write out
55862306a36Sopenharmony_ci	 * metadata.  Loop over the transaction's entire buffer list:
55962306a36Sopenharmony_ci	 */
56062306a36Sopenharmony_ci	write_lock(&journal->j_state_lock);
56162306a36Sopenharmony_ci	commit_transaction->t_state = T_COMMIT;
56262306a36Sopenharmony_ci	write_unlock(&journal->j_state_lock);
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	trace_jbd2_commit_logging(journal, commit_transaction);
56562306a36Sopenharmony_ci	stats.run.rs_logging = jiffies;
56662306a36Sopenharmony_ci	stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
56762306a36Sopenharmony_ci					       stats.run.rs_logging);
56862306a36Sopenharmony_ci	stats.run.rs_blocks = commit_transaction->t_nr_buffers;
56962306a36Sopenharmony_ci	stats.run.rs_blocks_logged = 0;
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	J_ASSERT(commit_transaction->t_nr_buffers <=
57262306a36Sopenharmony_ci		 atomic_read(&commit_transaction->t_outstanding_credits));
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	err = 0;
57562306a36Sopenharmony_ci	bufs = 0;
57662306a36Sopenharmony_ci	descriptor = NULL;
57762306a36Sopenharmony_ci	while (commit_transaction->t_buffers) {
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci		/* Find the next buffer to be journaled... */
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci		jh = commit_transaction->t_buffers;
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci		/* If we're in abort mode, we just un-journal the buffer and
58462306a36Sopenharmony_ci		   release it. */
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci		if (is_journal_aborted(journal)) {
58762306a36Sopenharmony_ci			clear_buffer_jbddirty(jh2bh(jh));
58862306a36Sopenharmony_ci			JBUFFER_TRACE(jh, "journal is aborting: refile");
58962306a36Sopenharmony_ci			jbd2_buffer_abort_trigger(jh,
59062306a36Sopenharmony_ci						  jh->b_frozen_data ?
59162306a36Sopenharmony_ci						  jh->b_frozen_triggers :
59262306a36Sopenharmony_ci						  jh->b_triggers);
59362306a36Sopenharmony_ci			jbd2_journal_refile_buffer(journal, jh);
59462306a36Sopenharmony_ci			/* If that was the last one, we need to clean up
59562306a36Sopenharmony_ci			 * any descriptor buffers which may have been
59662306a36Sopenharmony_ci			 * already allocated, even if we are now
59762306a36Sopenharmony_ci			 * aborting. */
59862306a36Sopenharmony_ci			if (!commit_transaction->t_buffers)
59962306a36Sopenharmony_ci				goto start_journal_io;
60062306a36Sopenharmony_ci			continue;
60162306a36Sopenharmony_ci		}
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci		/* Make sure we have a descriptor block in which to
60462306a36Sopenharmony_ci		   record the metadata buffer. */
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci		if (!descriptor) {
60762306a36Sopenharmony_ci			J_ASSERT (bufs == 0);
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci			jbd2_debug(4, "JBD2: get descriptor\n");
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci			descriptor = jbd2_journal_get_descriptor_buffer(
61262306a36Sopenharmony_ci							commit_transaction,
61362306a36Sopenharmony_ci							JBD2_DESCRIPTOR_BLOCK);
61462306a36Sopenharmony_ci			if (!descriptor) {
61562306a36Sopenharmony_ci				jbd2_journal_abort(journal, -EIO);
61662306a36Sopenharmony_ci				continue;
61762306a36Sopenharmony_ci			}
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci			jbd2_debug(4, "JBD2: got buffer %llu (%p)\n",
62062306a36Sopenharmony_ci				(unsigned long long)descriptor->b_blocknr,
62162306a36Sopenharmony_ci				descriptor->b_data);
62262306a36Sopenharmony_ci			tagp = &descriptor->b_data[sizeof(journal_header_t)];
62362306a36Sopenharmony_ci			space_left = descriptor->b_size -
62462306a36Sopenharmony_ci						sizeof(journal_header_t);
62562306a36Sopenharmony_ci			first_tag = 1;
62662306a36Sopenharmony_ci			set_buffer_jwrite(descriptor);
62762306a36Sopenharmony_ci			set_buffer_dirty(descriptor);
62862306a36Sopenharmony_ci			wbuf[bufs++] = descriptor;
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci			/* Record it so that we can wait for IO
63162306a36Sopenharmony_ci                           completion later */
63262306a36Sopenharmony_ci			BUFFER_TRACE(descriptor, "ph3: file as descriptor");
63362306a36Sopenharmony_ci			jbd2_file_log_bh(&log_bufs, descriptor);
63462306a36Sopenharmony_ci		}
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci		/* Where is the buffer to be written? */
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci		err = jbd2_journal_next_log_block(journal, &blocknr);
63962306a36Sopenharmony_ci		/* If the block mapping failed, just abandon the buffer
64062306a36Sopenharmony_ci		   and repeat this loop: we'll fall into the
64162306a36Sopenharmony_ci		   refile-on-abort condition above. */
64262306a36Sopenharmony_ci		if (err) {
64362306a36Sopenharmony_ci			jbd2_journal_abort(journal, err);
64462306a36Sopenharmony_ci			continue;
64562306a36Sopenharmony_ci		}
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci		/*
64862306a36Sopenharmony_ci		 * start_this_handle() uses t_outstanding_credits to determine
64962306a36Sopenharmony_ci		 * the free space in the log.
65062306a36Sopenharmony_ci		 */
65162306a36Sopenharmony_ci		atomic_dec(&commit_transaction->t_outstanding_credits);
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci		/* Bump b_count to prevent truncate from stumbling over
65462306a36Sopenharmony_ci                   the shadowed buffer!  @@@ This can go if we ever get
65562306a36Sopenharmony_ci                   rid of the shadow pairing of buffers. */
65662306a36Sopenharmony_ci		atomic_inc(&jh2bh(jh)->b_count);
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci		/*
65962306a36Sopenharmony_ci		 * Make a temporary IO buffer with which to write it out
66062306a36Sopenharmony_ci		 * (this will requeue the metadata buffer to BJ_Shadow).
66162306a36Sopenharmony_ci		 */
66262306a36Sopenharmony_ci		set_bit(BH_JWrite, &jh2bh(jh)->b_state);
66362306a36Sopenharmony_ci		JBUFFER_TRACE(jh, "ph3: write metadata");
66462306a36Sopenharmony_ci		flags = jbd2_journal_write_metadata_buffer(commit_transaction,
66562306a36Sopenharmony_ci						jh, &wbuf[bufs], blocknr);
66662306a36Sopenharmony_ci		if (flags < 0) {
66762306a36Sopenharmony_ci			jbd2_journal_abort(journal, flags);
66862306a36Sopenharmony_ci			continue;
66962306a36Sopenharmony_ci		}
67062306a36Sopenharmony_ci		jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci		/* Record the new block's tag in the current descriptor
67362306a36Sopenharmony_ci                   buffer */
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_ci		tag_flag = 0;
67662306a36Sopenharmony_ci		if (flags & 1)
67762306a36Sopenharmony_ci			tag_flag |= JBD2_FLAG_ESCAPE;
67862306a36Sopenharmony_ci		if (!first_tag)
67962306a36Sopenharmony_ci			tag_flag |= JBD2_FLAG_SAME_UUID;
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci		tag = (journal_block_tag_t *) tagp;
68262306a36Sopenharmony_ci		write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
68362306a36Sopenharmony_ci		tag->t_flags = cpu_to_be16(tag_flag);
68462306a36Sopenharmony_ci		jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
68562306a36Sopenharmony_ci					commit_transaction->t_tid);
68662306a36Sopenharmony_ci		tagp += tag_bytes;
68762306a36Sopenharmony_ci		space_left -= tag_bytes;
68862306a36Sopenharmony_ci		bufs++;
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci		if (first_tag) {
69162306a36Sopenharmony_ci			memcpy (tagp, journal->j_uuid, 16);
69262306a36Sopenharmony_ci			tagp += 16;
69362306a36Sopenharmony_ci			space_left -= 16;
69462306a36Sopenharmony_ci			first_tag = 0;
69562306a36Sopenharmony_ci		}
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci		/* If there's no more to do, or if the descriptor is full,
69862306a36Sopenharmony_ci		   let the IO rip! */
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci		if (bufs == journal->j_wbufsize ||
70162306a36Sopenharmony_ci		    commit_transaction->t_buffers == NULL ||
70262306a36Sopenharmony_ci		    space_left < tag_bytes + 16 + csum_size) {
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci			jbd2_debug(4, "JBD2: Submit %d IOs\n", bufs);
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_ci			/* Write an end-of-descriptor marker before
70762306a36Sopenharmony_ci                           submitting the IOs.  "tag" still points to
70862306a36Sopenharmony_ci                           the last tag we set up. */
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci			tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
71162306a36Sopenharmony_cistart_journal_io:
71262306a36Sopenharmony_ci			if (descriptor)
71362306a36Sopenharmony_ci				jbd2_descriptor_block_csum_set(journal,
71462306a36Sopenharmony_ci							descriptor);
71562306a36Sopenharmony_ci
71662306a36Sopenharmony_ci			for (i = 0; i < bufs; i++) {
71762306a36Sopenharmony_ci				struct buffer_head *bh = wbuf[i];
71862306a36Sopenharmony_ci
71962306a36Sopenharmony_ci				/*
72062306a36Sopenharmony_ci				 * Compute checksum.
72162306a36Sopenharmony_ci				 */
72262306a36Sopenharmony_ci				if (jbd2_has_feature_checksum(journal)) {
72362306a36Sopenharmony_ci					crc32_sum =
72462306a36Sopenharmony_ci					    jbd2_checksum_data(crc32_sum, bh);
72562306a36Sopenharmony_ci				}
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci				lock_buffer(bh);
72862306a36Sopenharmony_ci				clear_buffer_dirty(bh);
72962306a36Sopenharmony_ci				set_buffer_uptodate(bh);
73062306a36Sopenharmony_ci				bh->b_end_io = journal_end_buffer_io_sync;
73162306a36Sopenharmony_ci				submit_bh(REQ_OP_WRITE | JBD2_JOURNAL_REQ_FLAGS,
73262306a36Sopenharmony_ci					  bh);
73362306a36Sopenharmony_ci			}
73462306a36Sopenharmony_ci			cond_resched();
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci			/* Force a new descriptor to be generated next
73762306a36Sopenharmony_ci                           time round the loop. */
73862306a36Sopenharmony_ci			descriptor = NULL;
73962306a36Sopenharmony_ci			bufs = 0;
74062306a36Sopenharmony_ci		}
74162306a36Sopenharmony_ci	}
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci	err = journal_finish_inode_data_buffers(journal, commit_transaction);
74462306a36Sopenharmony_ci	if (err) {
74562306a36Sopenharmony_ci		printk(KERN_WARNING
74662306a36Sopenharmony_ci			"JBD2: Detected IO errors while flushing file data "
74762306a36Sopenharmony_ci		       "on %s\n", journal->j_devname);
74862306a36Sopenharmony_ci		if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
74962306a36Sopenharmony_ci			jbd2_journal_abort(journal, err);
75062306a36Sopenharmony_ci		err = 0;
75162306a36Sopenharmony_ci	}
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci	/*
75462306a36Sopenharmony_ci	 * Get current oldest transaction in the log before we issue flush
75562306a36Sopenharmony_ci	 * to the filesystem device. After the flush we can be sure that
75662306a36Sopenharmony_ci	 * blocks of all older transactions are checkpointed to persistent
75762306a36Sopenharmony_ci	 * storage and we will be safe to update journal start in the
75862306a36Sopenharmony_ci	 * superblock with the numbers we get here.
75962306a36Sopenharmony_ci	 */
76062306a36Sopenharmony_ci	update_tail =
76162306a36Sopenharmony_ci		jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	write_lock(&journal->j_state_lock);
76462306a36Sopenharmony_ci	if (update_tail) {
76562306a36Sopenharmony_ci		long freed = first_block - journal->j_tail;
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci		if (first_block < journal->j_tail)
76862306a36Sopenharmony_ci			freed += journal->j_last - journal->j_first;
76962306a36Sopenharmony_ci		/* Update tail only if we free significant amount of space */
77062306a36Sopenharmony_ci		if (freed < jbd2_journal_get_max_txn_bufs(journal))
77162306a36Sopenharmony_ci			update_tail = 0;
77262306a36Sopenharmony_ci	}
77362306a36Sopenharmony_ci	J_ASSERT(commit_transaction->t_state == T_COMMIT);
77462306a36Sopenharmony_ci	commit_transaction->t_state = T_COMMIT_DFLUSH;
77562306a36Sopenharmony_ci	write_unlock(&journal->j_state_lock);
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci	/*
77862306a36Sopenharmony_ci	 * If the journal is not located on the file system device,
77962306a36Sopenharmony_ci	 * then we must flush the file system device before we issue
78062306a36Sopenharmony_ci	 * the commit record
78162306a36Sopenharmony_ci	 */
78262306a36Sopenharmony_ci	if (commit_transaction->t_need_data_flush &&
78362306a36Sopenharmony_ci	    (journal->j_fs_dev != journal->j_dev) &&
78462306a36Sopenharmony_ci	    (journal->j_flags & JBD2_BARRIER))
78562306a36Sopenharmony_ci		blkdev_issue_flush(journal->j_fs_dev);
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	/* Done it all: now write the commit record asynchronously. */
78862306a36Sopenharmony_ci	if (jbd2_has_feature_async_commit(journal)) {
78962306a36Sopenharmony_ci		err = journal_submit_commit_record(journal, commit_transaction,
79062306a36Sopenharmony_ci						 &cbh, crc32_sum);
79162306a36Sopenharmony_ci		if (err)
79262306a36Sopenharmony_ci			jbd2_journal_abort(journal, err);
79362306a36Sopenharmony_ci	}
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci	blk_finish_plug(&plug);
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	/* Lo and behold: we have just managed to send a transaction to
79862306a36Sopenharmony_ci           the log.  Before we can commit it, wait for the IO so far to
79962306a36Sopenharmony_ci           complete.  Control buffers being written are on the
80062306a36Sopenharmony_ci           transaction's t_log_list queue, and metadata buffers are on
80162306a36Sopenharmony_ci           the io_bufs list.
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	   Wait for the buffers in reverse order.  That way we are
80462306a36Sopenharmony_ci	   less likely to be woken up until all IOs have completed, and
80562306a36Sopenharmony_ci	   so we incur less scheduling load.
80662306a36Sopenharmony_ci	*/
80762306a36Sopenharmony_ci
80862306a36Sopenharmony_ci	jbd2_debug(3, "JBD2: commit phase 3\n");
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	while (!list_empty(&io_bufs)) {
81162306a36Sopenharmony_ci		struct buffer_head *bh = list_entry(io_bufs.prev,
81262306a36Sopenharmony_ci						    struct buffer_head,
81362306a36Sopenharmony_ci						    b_assoc_buffers);
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci		wait_on_buffer(bh);
81662306a36Sopenharmony_ci		cond_resched();
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci		if (unlikely(!buffer_uptodate(bh)))
81962306a36Sopenharmony_ci			err = -EIO;
82062306a36Sopenharmony_ci		jbd2_unfile_log_bh(bh);
82162306a36Sopenharmony_ci		stats.run.rs_blocks_logged++;
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci		/*
82462306a36Sopenharmony_ci		 * The list contains temporary buffer heads created by
82562306a36Sopenharmony_ci		 * jbd2_journal_write_metadata_buffer().
82662306a36Sopenharmony_ci		 */
82762306a36Sopenharmony_ci		BUFFER_TRACE(bh, "dumping temporary bh");
82862306a36Sopenharmony_ci		__brelse(bh);
82962306a36Sopenharmony_ci		J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
83062306a36Sopenharmony_ci		free_buffer_head(bh);
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci		/* We also have to refile the corresponding shadowed buffer */
83362306a36Sopenharmony_ci		jh = commit_transaction->t_shadow_list->b_tprev;
83462306a36Sopenharmony_ci		bh = jh2bh(jh);
83562306a36Sopenharmony_ci		clear_buffer_jwrite(bh);
83662306a36Sopenharmony_ci		J_ASSERT_BH(bh, buffer_jbddirty(bh));
83762306a36Sopenharmony_ci		J_ASSERT_BH(bh, !buffer_shadow(bh));
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci		/* The metadata is now released for reuse, but we need
84062306a36Sopenharmony_ci                   to remember it against this transaction so that when
84162306a36Sopenharmony_ci                   we finally commit, we can do any checkpointing
84262306a36Sopenharmony_ci                   required. */
84362306a36Sopenharmony_ci		JBUFFER_TRACE(jh, "file as BJ_Forget");
84462306a36Sopenharmony_ci		jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
84562306a36Sopenharmony_ci		JBUFFER_TRACE(jh, "brelse shadowed buffer");
84662306a36Sopenharmony_ci		__brelse(bh);
84762306a36Sopenharmony_ci	}
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	J_ASSERT (commit_transaction->t_shadow_list == NULL);
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_ci	jbd2_debug(3, "JBD2: commit phase 4\n");
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci	/* Here we wait for the revoke record and descriptor record buffers */
85462306a36Sopenharmony_ci	while (!list_empty(&log_bufs)) {
85562306a36Sopenharmony_ci		struct buffer_head *bh;
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci		bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers);
85862306a36Sopenharmony_ci		wait_on_buffer(bh);
85962306a36Sopenharmony_ci		cond_resched();
86062306a36Sopenharmony_ci
86162306a36Sopenharmony_ci		if (unlikely(!buffer_uptodate(bh)))
86262306a36Sopenharmony_ci			err = -EIO;
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_ci		BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
86562306a36Sopenharmony_ci		clear_buffer_jwrite(bh);
86662306a36Sopenharmony_ci		jbd2_unfile_log_bh(bh);
86762306a36Sopenharmony_ci		stats.run.rs_blocks_logged++;
86862306a36Sopenharmony_ci		__brelse(bh);		/* One for getblk */
86962306a36Sopenharmony_ci		/* AKPM: bforget here */
87062306a36Sopenharmony_ci	}
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	if (err)
87362306a36Sopenharmony_ci		jbd2_journal_abort(journal, err);
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	jbd2_debug(3, "JBD2: commit phase 5\n");
87662306a36Sopenharmony_ci	write_lock(&journal->j_state_lock);
87762306a36Sopenharmony_ci	J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
87862306a36Sopenharmony_ci	commit_transaction->t_state = T_COMMIT_JFLUSH;
87962306a36Sopenharmony_ci	write_unlock(&journal->j_state_lock);
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci	if (!jbd2_has_feature_async_commit(journal)) {
88262306a36Sopenharmony_ci		err = journal_submit_commit_record(journal, commit_transaction,
88362306a36Sopenharmony_ci						&cbh, crc32_sum);
88462306a36Sopenharmony_ci		if (err)
88562306a36Sopenharmony_ci			jbd2_journal_abort(journal, err);
88662306a36Sopenharmony_ci	}
88762306a36Sopenharmony_ci	if (cbh)
88862306a36Sopenharmony_ci		err = journal_wait_on_commit_record(journal, cbh);
88962306a36Sopenharmony_ci	stats.run.rs_blocks_logged++;
89062306a36Sopenharmony_ci	if (jbd2_has_feature_async_commit(journal) &&
89162306a36Sopenharmony_ci	    journal->j_flags & JBD2_BARRIER) {
89262306a36Sopenharmony_ci		blkdev_issue_flush(journal->j_dev);
89362306a36Sopenharmony_ci	}
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci	if (err)
89662306a36Sopenharmony_ci		jbd2_journal_abort(journal, err);
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_ci	WARN_ON_ONCE(
89962306a36Sopenharmony_ci		atomic_read(&commit_transaction->t_outstanding_credits) < 0);
90062306a36Sopenharmony_ci
90162306a36Sopenharmony_ci	/*
90262306a36Sopenharmony_ci	 * Now disk caches for filesystem device are flushed so we are safe to
90362306a36Sopenharmony_ci	 * erase checkpointed transactions from the log by updating journal
90462306a36Sopenharmony_ci	 * superblock.
90562306a36Sopenharmony_ci	 */
90662306a36Sopenharmony_ci	if (update_tail)
90762306a36Sopenharmony_ci		jbd2_update_log_tail(journal, first_tid, first_block);
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	/* End of a transaction!  Finally, we can do checkpoint
91062306a36Sopenharmony_ci           processing: any buffers committed as a result of this
91162306a36Sopenharmony_ci           transaction can be removed from any checkpoint list it was on
91262306a36Sopenharmony_ci           before. */
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_ci	jbd2_debug(3, "JBD2: commit phase 6\n");
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci	J_ASSERT(list_empty(&commit_transaction->t_inode_list));
91762306a36Sopenharmony_ci	J_ASSERT(commit_transaction->t_buffers == NULL);
91862306a36Sopenharmony_ci	J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
91962306a36Sopenharmony_ci	J_ASSERT(commit_transaction->t_shadow_list == NULL);
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_cirestart_loop:
92262306a36Sopenharmony_ci	/*
92362306a36Sopenharmony_ci	 * As there are other places (journal_unmap_buffer()) adding buffers
92462306a36Sopenharmony_ci	 * to this list we have to be careful and hold the j_list_lock.
92562306a36Sopenharmony_ci	 */
92662306a36Sopenharmony_ci	spin_lock(&journal->j_list_lock);
92762306a36Sopenharmony_ci	while (commit_transaction->t_forget) {
92862306a36Sopenharmony_ci		transaction_t *cp_transaction;
92962306a36Sopenharmony_ci		struct buffer_head *bh;
93062306a36Sopenharmony_ci		int try_to_free = 0;
93162306a36Sopenharmony_ci		bool drop_ref;
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci		jh = commit_transaction->t_forget;
93462306a36Sopenharmony_ci		spin_unlock(&journal->j_list_lock);
93562306a36Sopenharmony_ci		bh = jh2bh(jh);
93662306a36Sopenharmony_ci		/*
93762306a36Sopenharmony_ci		 * Get a reference so that bh cannot be freed before we are
93862306a36Sopenharmony_ci		 * done with it.
93962306a36Sopenharmony_ci		 */
94062306a36Sopenharmony_ci		get_bh(bh);
94162306a36Sopenharmony_ci		spin_lock(&jh->b_state_lock);
94262306a36Sopenharmony_ci		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction);
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_ci		/*
94562306a36Sopenharmony_ci		 * If there is undo-protected committed data against
94662306a36Sopenharmony_ci		 * this buffer, then we can remove it now.  If it is a
94762306a36Sopenharmony_ci		 * buffer needing such protection, the old frozen_data
94862306a36Sopenharmony_ci		 * field now points to a committed version of the
94962306a36Sopenharmony_ci		 * buffer, so rotate that field to the new committed
95062306a36Sopenharmony_ci		 * data.
95162306a36Sopenharmony_ci		 *
95262306a36Sopenharmony_ci		 * Otherwise, we can just throw away the frozen data now.
95362306a36Sopenharmony_ci		 *
95462306a36Sopenharmony_ci		 * We also know that the frozen data has already fired
95562306a36Sopenharmony_ci		 * its triggers if they exist, so we can clear that too.
95662306a36Sopenharmony_ci		 */
95762306a36Sopenharmony_ci		if (jh->b_committed_data) {
95862306a36Sopenharmony_ci			jbd2_free(jh->b_committed_data, bh->b_size);
95962306a36Sopenharmony_ci			jh->b_committed_data = NULL;
96062306a36Sopenharmony_ci			if (jh->b_frozen_data) {
96162306a36Sopenharmony_ci				jh->b_committed_data = jh->b_frozen_data;
96262306a36Sopenharmony_ci				jh->b_frozen_data = NULL;
96362306a36Sopenharmony_ci				jh->b_frozen_triggers = NULL;
96462306a36Sopenharmony_ci			}
96562306a36Sopenharmony_ci		} else if (jh->b_frozen_data) {
96662306a36Sopenharmony_ci			jbd2_free(jh->b_frozen_data, bh->b_size);
96762306a36Sopenharmony_ci			jh->b_frozen_data = NULL;
96862306a36Sopenharmony_ci			jh->b_frozen_triggers = NULL;
96962306a36Sopenharmony_ci		}
97062306a36Sopenharmony_ci
97162306a36Sopenharmony_ci		spin_lock(&journal->j_list_lock);
97262306a36Sopenharmony_ci		cp_transaction = jh->b_cp_transaction;
97362306a36Sopenharmony_ci		if (cp_transaction) {
97462306a36Sopenharmony_ci			JBUFFER_TRACE(jh, "remove from old cp transaction");
97562306a36Sopenharmony_ci			cp_transaction->t_chp_stats.cs_dropped++;
97662306a36Sopenharmony_ci			__jbd2_journal_remove_checkpoint(jh);
97762306a36Sopenharmony_ci		}
97862306a36Sopenharmony_ci
97962306a36Sopenharmony_ci		/* Only re-checkpoint the buffer_head if it is marked
98062306a36Sopenharmony_ci		 * dirty.  If the buffer was added to the BJ_Forget list
98162306a36Sopenharmony_ci		 * by jbd2_journal_forget, it may no longer be dirty and
98262306a36Sopenharmony_ci		 * there's no point in keeping a checkpoint record for
98362306a36Sopenharmony_ci		 * it. */
98462306a36Sopenharmony_ci
98562306a36Sopenharmony_ci		/*
98662306a36Sopenharmony_ci		 * A buffer which has been freed while still being journaled
98762306a36Sopenharmony_ci		 * by a previous transaction, refile the buffer to BJ_Forget of
98862306a36Sopenharmony_ci		 * the running transaction. If the just committed transaction
98962306a36Sopenharmony_ci		 * contains "add to orphan" operation, we can completely
99062306a36Sopenharmony_ci		 * invalidate the buffer now. We are rather through in that
99162306a36Sopenharmony_ci		 * since the buffer may be still accessible when blocksize <
99262306a36Sopenharmony_ci		 * pagesize and it is attached to the last partial page.
99362306a36Sopenharmony_ci		 */
99462306a36Sopenharmony_ci		if (buffer_freed(bh) && !jh->b_next_transaction) {
99562306a36Sopenharmony_ci			struct address_space *mapping;
99662306a36Sopenharmony_ci
99762306a36Sopenharmony_ci			clear_buffer_freed(bh);
99862306a36Sopenharmony_ci			clear_buffer_jbddirty(bh);
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci			/*
100162306a36Sopenharmony_ci			 * Block device buffers need to stay mapped all the
100262306a36Sopenharmony_ci			 * time, so it is enough to clear buffer_jbddirty and
100362306a36Sopenharmony_ci			 * buffer_freed bits. For the file mapping buffers (i.e.
100462306a36Sopenharmony_ci			 * journalled data) we need to unmap buffer and clear
100562306a36Sopenharmony_ci			 * more bits. We also need to be careful about the check
100662306a36Sopenharmony_ci			 * because the data page mapping can get cleared under
100762306a36Sopenharmony_ci			 * our hands. Note that if mapping == NULL, we don't
100862306a36Sopenharmony_ci			 * need to make buffer unmapped because the page is
100962306a36Sopenharmony_ci			 * already detached from the mapping and buffers cannot
101062306a36Sopenharmony_ci			 * get reused.
101162306a36Sopenharmony_ci			 */
101262306a36Sopenharmony_ci			mapping = READ_ONCE(bh->b_folio->mapping);
101362306a36Sopenharmony_ci			if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) {
101462306a36Sopenharmony_ci				clear_buffer_mapped(bh);
101562306a36Sopenharmony_ci				clear_buffer_new(bh);
101662306a36Sopenharmony_ci				clear_buffer_req(bh);
101762306a36Sopenharmony_ci				bh->b_bdev = NULL;
101862306a36Sopenharmony_ci			}
101962306a36Sopenharmony_ci		}
102062306a36Sopenharmony_ci
102162306a36Sopenharmony_ci		if (buffer_jbddirty(bh)) {
102262306a36Sopenharmony_ci			JBUFFER_TRACE(jh, "add to new checkpointing trans");
102362306a36Sopenharmony_ci			__jbd2_journal_insert_checkpoint(jh, commit_transaction);
102462306a36Sopenharmony_ci			if (is_journal_aborted(journal))
102562306a36Sopenharmony_ci				clear_buffer_jbddirty(bh);
102662306a36Sopenharmony_ci		} else {
102762306a36Sopenharmony_ci			J_ASSERT_BH(bh, !buffer_dirty(bh));
102862306a36Sopenharmony_ci			/*
102962306a36Sopenharmony_ci			 * The buffer on BJ_Forget list and not jbddirty means
103062306a36Sopenharmony_ci			 * it has been freed by this transaction and hence it
103162306a36Sopenharmony_ci			 * could not have been reallocated until this
103262306a36Sopenharmony_ci			 * transaction has committed. *BUT* it could be
103362306a36Sopenharmony_ci			 * reallocated once we have written all the data to
103462306a36Sopenharmony_ci			 * disk and before we process the buffer on BJ_Forget
103562306a36Sopenharmony_ci			 * list.
103662306a36Sopenharmony_ci			 */
103762306a36Sopenharmony_ci			if (!jh->b_next_transaction)
103862306a36Sopenharmony_ci				try_to_free = 1;
103962306a36Sopenharmony_ci		}
104062306a36Sopenharmony_ci		JBUFFER_TRACE(jh, "refile or unfile buffer");
104162306a36Sopenharmony_ci		drop_ref = __jbd2_journal_refile_buffer(jh);
104262306a36Sopenharmony_ci		spin_unlock(&jh->b_state_lock);
104362306a36Sopenharmony_ci		if (drop_ref)
104462306a36Sopenharmony_ci			jbd2_journal_put_journal_head(jh);
104562306a36Sopenharmony_ci		if (try_to_free)
104662306a36Sopenharmony_ci			release_buffer_page(bh);	/* Drops bh reference */
104762306a36Sopenharmony_ci		else
104862306a36Sopenharmony_ci			__brelse(bh);
104962306a36Sopenharmony_ci		cond_resched_lock(&journal->j_list_lock);
105062306a36Sopenharmony_ci	}
105162306a36Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
105262306a36Sopenharmony_ci	/*
105362306a36Sopenharmony_ci	 * This is a bit sleazy.  We use j_list_lock to protect transition
105462306a36Sopenharmony_ci	 * of a transaction into T_FINISHED state and calling
105562306a36Sopenharmony_ci	 * __jbd2_journal_drop_transaction(). Otherwise we could race with
105662306a36Sopenharmony_ci	 * other checkpointing code processing the transaction...
105762306a36Sopenharmony_ci	 */
105862306a36Sopenharmony_ci	write_lock(&journal->j_state_lock);
105962306a36Sopenharmony_ci	spin_lock(&journal->j_list_lock);
106062306a36Sopenharmony_ci	/*
106162306a36Sopenharmony_ci	 * Now recheck if some buffers did not get attached to the transaction
106262306a36Sopenharmony_ci	 * while the lock was dropped...
106362306a36Sopenharmony_ci	 */
106462306a36Sopenharmony_ci	if (commit_transaction->t_forget) {
106562306a36Sopenharmony_ci		spin_unlock(&journal->j_list_lock);
106662306a36Sopenharmony_ci		write_unlock(&journal->j_state_lock);
106762306a36Sopenharmony_ci		goto restart_loop;
106862306a36Sopenharmony_ci	}
106962306a36Sopenharmony_ci
107062306a36Sopenharmony_ci	/* Add the transaction to the checkpoint list
107162306a36Sopenharmony_ci	 * __journal_remove_checkpoint() can not destroy transaction
107262306a36Sopenharmony_ci	 * under us because it is not marked as T_FINISHED yet */
107362306a36Sopenharmony_ci	if (journal->j_checkpoint_transactions == NULL) {
107462306a36Sopenharmony_ci		journal->j_checkpoint_transactions = commit_transaction;
107562306a36Sopenharmony_ci		commit_transaction->t_cpnext = commit_transaction;
107662306a36Sopenharmony_ci		commit_transaction->t_cpprev = commit_transaction;
107762306a36Sopenharmony_ci	} else {
107862306a36Sopenharmony_ci		commit_transaction->t_cpnext =
107962306a36Sopenharmony_ci			journal->j_checkpoint_transactions;
108062306a36Sopenharmony_ci		commit_transaction->t_cpprev =
108162306a36Sopenharmony_ci			commit_transaction->t_cpnext->t_cpprev;
108262306a36Sopenharmony_ci		commit_transaction->t_cpnext->t_cpprev =
108362306a36Sopenharmony_ci			commit_transaction;
108462306a36Sopenharmony_ci		commit_transaction->t_cpprev->t_cpnext =
108562306a36Sopenharmony_ci				commit_transaction;
108662306a36Sopenharmony_ci	}
108762306a36Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ci	/* Done with this transaction! */
109062306a36Sopenharmony_ci
109162306a36Sopenharmony_ci	jbd2_debug(3, "JBD2: commit phase 7\n");
109262306a36Sopenharmony_ci
109362306a36Sopenharmony_ci	J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
109462306a36Sopenharmony_ci
109562306a36Sopenharmony_ci	commit_transaction->t_start = jiffies;
109662306a36Sopenharmony_ci	stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
109762306a36Sopenharmony_ci					      commit_transaction->t_start);
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ci	/*
110062306a36Sopenharmony_ci	 * File the transaction statistics
110162306a36Sopenharmony_ci	 */
110262306a36Sopenharmony_ci	stats.ts_tid = commit_transaction->t_tid;
110362306a36Sopenharmony_ci	stats.run.rs_handle_count =
110462306a36Sopenharmony_ci		atomic_read(&commit_transaction->t_handle_count);
110562306a36Sopenharmony_ci	trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
110662306a36Sopenharmony_ci			     commit_transaction->t_tid, &stats.run);
110762306a36Sopenharmony_ci	stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0;
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci	commit_transaction->t_state = T_COMMIT_CALLBACK;
111062306a36Sopenharmony_ci	J_ASSERT(commit_transaction == journal->j_committing_transaction);
111162306a36Sopenharmony_ci	journal->j_commit_sequence = commit_transaction->t_tid;
111262306a36Sopenharmony_ci	journal->j_committing_transaction = NULL;
111362306a36Sopenharmony_ci	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
111462306a36Sopenharmony_ci
111562306a36Sopenharmony_ci	/*
111662306a36Sopenharmony_ci	 * weight the commit time higher than the average time so we don't
111762306a36Sopenharmony_ci	 * react too strongly to vast changes in the commit time
111862306a36Sopenharmony_ci	 */
111962306a36Sopenharmony_ci	if (likely(journal->j_average_commit_time))
112062306a36Sopenharmony_ci		journal->j_average_commit_time = (commit_time +
112162306a36Sopenharmony_ci				journal->j_average_commit_time*3) / 4;
112262306a36Sopenharmony_ci	else
112362306a36Sopenharmony_ci		journal->j_average_commit_time = commit_time;
112462306a36Sopenharmony_ci
112562306a36Sopenharmony_ci	write_unlock(&journal->j_state_lock);
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_ci	if (journal->j_commit_callback)
112862306a36Sopenharmony_ci		journal->j_commit_callback(journal, commit_transaction);
112962306a36Sopenharmony_ci	if (journal->j_fc_cleanup_callback)
113062306a36Sopenharmony_ci		journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	trace_jbd2_end_commit(journal, commit_transaction);
113362306a36Sopenharmony_ci	jbd2_debug(1, "JBD2: commit %d complete, head %d\n",
113462306a36Sopenharmony_ci		  journal->j_commit_sequence, journal->j_tail_sequence);
113562306a36Sopenharmony_ci
113662306a36Sopenharmony_ci	write_lock(&journal->j_state_lock);
113762306a36Sopenharmony_ci	journal->j_flags &= ~JBD2_FULL_COMMIT_ONGOING;
113862306a36Sopenharmony_ci	journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
113962306a36Sopenharmony_ci	spin_lock(&journal->j_list_lock);
114062306a36Sopenharmony_ci	commit_transaction->t_state = T_FINISHED;
114162306a36Sopenharmony_ci	/* Check if the transaction can be dropped now that we are finished */
114262306a36Sopenharmony_ci	if (commit_transaction->t_checkpoint_list == NULL) {
114362306a36Sopenharmony_ci		__jbd2_journal_drop_transaction(journal, commit_transaction);
114462306a36Sopenharmony_ci		jbd2_journal_free_transaction(commit_transaction);
114562306a36Sopenharmony_ci	}
114662306a36Sopenharmony_ci	spin_unlock(&journal->j_list_lock);
114762306a36Sopenharmony_ci	write_unlock(&journal->j_state_lock);
114862306a36Sopenharmony_ci	wake_up(&journal->j_wait_done_commit);
114962306a36Sopenharmony_ci	wake_up(&journal->j_fc_wait);
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci	/*
115262306a36Sopenharmony_ci	 * Calculate overall stats
115362306a36Sopenharmony_ci	 */
115462306a36Sopenharmony_ci	spin_lock(&journal->j_history_lock);
115562306a36Sopenharmony_ci	journal->j_stats.ts_tid++;
115662306a36Sopenharmony_ci	journal->j_stats.ts_requested += stats.ts_requested;
115762306a36Sopenharmony_ci	journal->j_stats.run.rs_wait += stats.run.rs_wait;
115862306a36Sopenharmony_ci	journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay;
115962306a36Sopenharmony_ci	journal->j_stats.run.rs_running += stats.run.rs_running;
116062306a36Sopenharmony_ci	journal->j_stats.run.rs_locked += stats.run.rs_locked;
116162306a36Sopenharmony_ci	journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
116262306a36Sopenharmony_ci	journal->j_stats.run.rs_logging += stats.run.rs_logging;
116362306a36Sopenharmony_ci	journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
116462306a36Sopenharmony_ci	journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
116562306a36Sopenharmony_ci	journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
116662306a36Sopenharmony_ci	spin_unlock(&journal->j_history_lock);
116762306a36Sopenharmony_ci}
1168