xref: /kernel/linux/linux-6.6/fs/buffer.c (revision 62306a36)
162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  linux/fs/buffer.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *  Copyright (C) 1991, 1992, 2002  Linus Torvalds
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci/*
962306a36Sopenharmony_ci * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * Removed a lot of unnecessary code and simplified things now that
1262306a36Sopenharmony_ci * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * Speed up hash, lru, and free list operations.  Use gfp() for allocating
1562306a36Sopenharmony_ci * hash table, use SLAB cache for buffer heads. SMP threading.  -DaveM
1662306a36Sopenharmony_ci *
1762306a36Sopenharmony_ci * Added 32k buffer block sizes - these are required older ARM systems. - RMK
1862306a36Sopenharmony_ci *
1962306a36Sopenharmony_ci * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
2062306a36Sopenharmony_ci */
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#include <linux/kernel.h>
2362306a36Sopenharmony_ci#include <linux/sched/signal.h>
2462306a36Sopenharmony_ci#include <linux/syscalls.h>
2562306a36Sopenharmony_ci#include <linux/fs.h>
2662306a36Sopenharmony_ci#include <linux/iomap.h>
2762306a36Sopenharmony_ci#include <linux/mm.h>
2862306a36Sopenharmony_ci#include <linux/percpu.h>
2962306a36Sopenharmony_ci#include <linux/slab.h>
3062306a36Sopenharmony_ci#include <linux/capability.h>
3162306a36Sopenharmony_ci#include <linux/blkdev.h>
3262306a36Sopenharmony_ci#include <linux/file.h>
3362306a36Sopenharmony_ci#include <linux/quotaops.h>
3462306a36Sopenharmony_ci#include <linux/highmem.h>
3562306a36Sopenharmony_ci#include <linux/export.h>
3662306a36Sopenharmony_ci#include <linux/backing-dev.h>
3762306a36Sopenharmony_ci#include <linux/writeback.h>
3862306a36Sopenharmony_ci#include <linux/hash.h>
3962306a36Sopenharmony_ci#include <linux/suspend.h>
4062306a36Sopenharmony_ci#include <linux/buffer_head.h>
4162306a36Sopenharmony_ci#include <linux/task_io_accounting_ops.h>
4262306a36Sopenharmony_ci#include <linux/bio.h>
4362306a36Sopenharmony_ci#include <linux/cpu.h>
4462306a36Sopenharmony_ci#include <linux/bitops.h>
4562306a36Sopenharmony_ci#include <linux/mpage.h>
4662306a36Sopenharmony_ci#include <linux/bit_spinlock.h>
4762306a36Sopenharmony_ci#include <linux/pagevec.h>
4862306a36Sopenharmony_ci#include <linux/sched/mm.h>
4962306a36Sopenharmony_ci#include <trace/events/block.h>
5062306a36Sopenharmony_ci#include <linux/fscrypt.h>
5162306a36Sopenharmony_ci#include <linux/fsverity.h>
5262306a36Sopenharmony_ci#include <linux/sched/isolation.h>
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci#include "internal.h"
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_cistatic int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
5762306a36Sopenharmony_cistatic void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
5862306a36Sopenharmony_ci			  struct writeback_control *wbc);
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ciinline void touch_buffer(struct buffer_head *bh)
6362306a36Sopenharmony_ci{
6462306a36Sopenharmony_ci	trace_block_touch_buffer(bh);
6562306a36Sopenharmony_ci	folio_mark_accessed(bh->b_folio);
6662306a36Sopenharmony_ci}
6762306a36Sopenharmony_ciEXPORT_SYMBOL(touch_buffer);
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_civoid __lock_buffer(struct buffer_head *bh)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
7262306a36Sopenharmony_ci}
7362306a36Sopenharmony_ciEXPORT_SYMBOL(__lock_buffer);
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_civoid unlock_buffer(struct buffer_head *bh)
7662306a36Sopenharmony_ci{
7762306a36Sopenharmony_ci	clear_bit_unlock(BH_Lock, &bh->b_state);
7862306a36Sopenharmony_ci	smp_mb__after_atomic();
7962306a36Sopenharmony_ci	wake_up_bit(&bh->b_state, BH_Lock);
8062306a36Sopenharmony_ci}
8162306a36Sopenharmony_ciEXPORT_SYMBOL(unlock_buffer);
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci/*
8462306a36Sopenharmony_ci * Returns if the folio has dirty or writeback buffers. If all the buffers
8562306a36Sopenharmony_ci * are unlocked and clean then the folio_test_dirty information is stale. If
8662306a36Sopenharmony_ci * any of the buffers are locked, it is assumed they are locked for IO.
8762306a36Sopenharmony_ci */
8862306a36Sopenharmony_civoid buffer_check_dirty_writeback(struct folio *folio,
8962306a36Sopenharmony_ci				     bool *dirty, bool *writeback)
9062306a36Sopenharmony_ci{
9162306a36Sopenharmony_ci	struct buffer_head *head, *bh;
9262306a36Sopenharmony_ci	*dirty = false;
9362306a36Sopenharmony_ci	*writeback = false;
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	BUG_ON(!folio_test_locked(folio));
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	head = folio_buffers(folio);
9862306a36Sopenharmony_ci	if (!head)
9962306a36Sopenharmony_ci		return;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	if (folio_test_writeback(folio))
10262306a36Sopenharmony_ci		*writeback = true;
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	bh = head;
10562306a36Sopenharmony_ci	do {
10662306a36Sopenharmony_ci		if (buffer_locked(bh))
10762306a36Sopenharmony_ci			*writeback = true;
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci		if (buffer_dirty(bh))
11062306a36Sopenharmony_ci			*dirty = true;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci		bh = bh->b_this_page;
11362306a36Sopenharmony_ci	} while (bh != head);
11462306a36Sopenharmony_ci}
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci/*
11762306a36Sopenharmony_ci * Block until a buffer comes unlocked.  This doesn't stop it
11862306a36Sopenharmony_ci * from becoming locked again - you have to lock it yourself
11962306a36Sopenharmony_ci * if you want to preserve its state.
12062306a36Sopenharmony_ci */
12162306a36Sopenharmony_civoid __wait_on_buffer(struct buffer_head * bh)
12262306a36Sopenharmony_ci{
12362306a36Sopenharmony_ci	wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
12462306a36Sopenharmony_ci}
12562306a36Sopenharmony_ciEXPORT_SYMBOL(__wait_on_buffer);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_cistatic void buffer_io_error(struct buffer_head *bh, char *msg)
12862306a36Sopenharmony_ci{
12962306a36Sopenharmony_ci	if (!test_bit(BH_Quiet, &bh->b_state))
13062306a36Sopenharmony_ci		printk_ratelimited(KERN_ERR
13162306a36Sopenharmony_ci			"Buffer I/O error on dev %pg, logical block %llu%s\n",
13262306a36Sopenharmony_ci			bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
13362306a36Sopenharmony_ci}
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci/*
13662306a36Sopenharmony_ci * End-of-IO handler helper function which does not touch the bh after
13762306a36Sopenharmony_ci * unlocking it.
13862306a36Sopenharmony_ci * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
13962306a36Sopenharmony_ci * a race there is benign: unlock_buffer() only use the bh's address for
14062306a36Sopenharmony_ci * hashing after unlocking the buffer, so it doesn't actually touch the bh
14162306a36Sopenharmony_ci * itself.
14262306a36Sopenharmony_ci */
14362306a36Sopenharmony_cistatic void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
14462306a36Sopenharmony_ci{
14562306a36Sopenharmony_ci	if (uptodate) {
14662306a36Sopenharmony_ci		set_buffer_uptodate(bh);
14762306a36Sopenharmony_ci	} else {
14862306a36Sopenharmony_ci		/* This happens, due to failed read-ahead attempts. */
14962306a36Sopenharmony_ci		clear_buffer_uptodate(bh);
15062306a36Sopenharmony_ci	}
15162306a36Sopenharmony_ci	unlock_buffer(bh);
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci/*
15562306a36Sopenharmony_ci * Default synchronous end-of-IO handler..  Just mark it up-to-date and
15662306a36Sopenharmony_ci * unlock the buffer.
15762306a36Sopenharmony_ci */
15862306a36Sopenharmony_civoid end_buffer_read_sync(struct buffer_head *bh, int uptodate)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	__end_buffer_read_notouch(bh, uptodate);
16162306a36Sopenharmony_ci	put_bh(bh);
16262306a36Sopenharmony_ci}
16362306a36Sopenharmony_ciEXPORT_SYMBOL(end_buffer_read_sync);
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_civoid end_buffer_write_sync(struct buffer_head *bh, int uptodate)
16662306a36Sopenharmony_ci{
16762306a36Sopenharmony_ci	if (uptodate) {
16862306a36Sopenharmony_ci		set_buffer_uptodate(bh);
16962306a36Sopenharmony_ci	} else {
17062306a36Sopenharmony_ci		buffer_io_error(bh, ", lost sync page write");
17162306a36Sopenharmony_ci		mark_buffer_write_io_error(bh);
17262306a36Sopenharmony_ci		clear_buffer_uptodate(bh);
17362306a36Sopenharmony_ci	}
17462306a36Sopenharmony_ci	unlock_buffer(bh);
17562306a36Sopenharmony_ci	put_bh(bh);
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ciEXPORT_SYMBOL(end_buffer_write_sync);
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci/*
18062306a36Sopenharmony_ci * Various filesystems appear to want __find_get_block to be non-blocking.
18162306a36Sopenharmony_ci * But it's the page lock which protects the buffers.  To get around this,
18262306a36Sopenharmony_ci * we get exclusion from try_to_free_buffers with the blockdev mapping's
18362306a36Sopenharmony_ci * private_lock.
18462306a36Sopenharmony_ci *
18562306a36Sopenharmony_ci * Hack idea: for the blockdev mapping, private_lock contention
18662306a36Sopenharmony_ci * may be quite high.  This code could TryLock the page, and if that
18762306a36Sopenharmony_ci * succeeds, there is no need to take private_lock.
18862306a36Sopenharmony_ci */
18962306a36Sopenharmony_cistatic struct buffer_head *
19062306a36Sopenharmony_ci__find_get_block_slow(struct block_device *bdev, sector_t block)
19162306a36Sopenharmony_ci{
19262306a36Sopenharmony_ci	struct inode *bd_inode = bdev->bd_inode;
19362306a36Sopenharmony_ci	struct address_space *bd_mapping = bd_inode->i_mapping;
19462306a36Sopenharmony_ci	struct buffer_head *ret = NULL;
19562306a36Sopenharmony_ci	pgoff_t index;
19662306a36Sopenharmony_ci	struct buffer_head *bh;
19762306a36Sopenharmony_ci	struct buffer_head *head;
19862306a36Sopenharmony_ci	struct folio *folio;
19962306a36Sopenharmony_ci	int all_mapped = 1;
20062306a36Sopenharmony_ci	static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
20362306a36Sopenharmony_ci	folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0);
20462306a36Sopenharmony_ci	if (IS_ERR(folio))
20562306a36Sopenharmony_ci		goto out;
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci	spin_lock(&bd_mapping->private_lock);
20862306a36Sopenharmony_ci	head = folio_buffers(folio);
20962306a36Sopenharmony_ci	if (!head)
21062306a36Sopenharmony_ci		goto out_unlock;
21162306a36Sopenharmony_ci	bh = head;
21262306a36Sopenharmony_ci	do {
21362306a36Sopenharmony_ci		if (!buffer_mapped(bh))
21462306a36Sopenharmony_ci			all_mapped = 0;
21562306a36Sopenharmony_ci		else if (bh->b_blocknr == block) {
21662306a36Sopenharmony_ci			ret = bh;
21762306a36Sopenharmony_ci			get_bh(bh);
21862306a36Sopenharmony_ci			goto out_unlock;
21962306a36Sopenharmony_ci		}
22062306a36Sopenharmony_ci		bh = bh->b_this_page;
22162306a36Sopenharmony_ci	} while (bh != head);
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci	/* we might be here because some of the buffers on this page are
22462306a36Sopenharmony_ci	 * not mapped.  This is due to various races between
22562306a36Sopenharmony_ci	 * file io on the block device and getblk.  It gets dealt with
22662306a36Sopenharmony_ci	 * elsewhere, don't buffer_error if we had some unmapped buffers
22762306a36Sopenharmony_ci	 */
22862306a36Sopenharmony_ci	ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
22962306a36Sopenharmony_ci	if (all_mapped && __ratelimit(&last_warned)) {
23062306a36Sopenharmony_ci		printk("__find_get_block_slow() failed. block=%llu, "
23162306a36Sopenharmony_ci		       "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
23262306a36Sopenharmony_ci		       "device %pg blocksize: %d\n",
23362306a36Sopenharmony_ci		       (unsigned long long)block,
23462306a36Sopenharmony_ci		       (unsigned long long)bh->b_blocknr,
23562306a36Sopenharmony_ci		       bh->b_state, bh->b_size, bdev,
23662306a36Sopenharmony_ci		       1 << bd_inode->i_blkbits);
23762306a36Sopenharmony_ci	}
23862306a36Sopenharmony_ciout_unlock:
23962306a36Sopenharmony_ci	spin_unlock(&bd_mapping->private_lock);
24062306a36Sopenharmony_ci	folio_put(folio);
24162306a36Sopenharmony_ciout:
24262306a36Sopenharmony_ci	return ret;
24362306a36Sopenharmony_ci}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_cistatic void end_buffer_async_read(struct buffer_head *bh, int uptodate)
24662306a36Sopenharmony_ci{
24762306a36Sopenharmony_ci	unsigned long flags;
24862306a36Sopenharmony_ci	struct buffer_head *first;
24962306a36Sopenharmony_ci	struct buffer_head *tmp;
25062306a36Sopenharmony_ci	struct folio *folio;
25162306a36Sopenharmony_ci	int folio_uptodate = 1;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	BUG_ON(!buffer_async_read(bh));
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci	folio = bh->b_folio;
25662306a36Sopenharmony_ci	if (uptodate) {
25762306a36Sopenharmony_ci		set_buffer_uptodate(bh);
25862306a36Sopenharmony_ci	} else {
25962306a36Sopenharmony_ci		clear_buffer_uptodate(bh);
26062306a36Sopenharmony_ci		buffer_io_error(bh, ", async page read");
26162306a36Sopenharmony_ci		folio_set_error(folio);
26262306a36Sopenharmony_ci	}
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	/*
26562306a36Sopenharmony_ci	 * Be _very_ careful from here on. Bad things can happen if
26662306a36Sopenharmony_ci	 * two buffer heads end IO at almost the same time and both
26762306a36Sopenharmony_ci	 * decide that the page is now completely done.
26862306a36Sopenharmony_ci	 */
26962306a36Sopenharmony_ci	first = folio_buffers(folio);
27062306a36Sopenharmony_ci	spin_lock_irqsave(&first->b_uptodate_lock, flags);
27162306a36Sopenharmony_ci	clear_buffer_async_read(bh);
27262306a36Sopenharmony_ci	unlock_buffer(bh);
27362306a36Sopenharmony_ci	tmp = bh;
27462306a36Sopenharmony_ci	do {
27562306a36Sopenharmony_ci		if (!buffer_uptodate(tmp))
27662306a36Sopenharmony_ci			folio_uptodate = 0;
27762306a36Sopenharmony_ci		if (buffer_async_read(tmp)) {
27862306a36Sopenharmony_ci			BUG_ON(!buffer_locked(tmp));
27962306a36Sopenharmony_ci			goto still_busy;
28062306a36Sopenharmony_ci		}
28162306a36Sopenharmony_ci		tmp = tmp->b_this_page;
28262306a36Sopenharmony_ci	} while (tmp != bh);
28362306a36Sopenharmony_ci	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci	/*
28662306a36Sopenharmony_ci	 * If all of the buffers are uptodate then we can set the page
28762306a36Sopenharmony_ci	 * uptodate.
28862306a36Sopenharmony_ci	 */
28962306a36Sopenharmony_ci	if (folio_uptodate)
29062306a36Sopenharmony_ci		folio_mark_uptodate(folio);
29162306a36Sopenharmony_ci	folio_unlock(folio);
29262306a36Sopenharmony_ci	return;
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_cistill_busy:
29562306a36Sopenharmony_ci	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
29662306a36Sopenharmony_ci	return;
29762306a36Sopenharmony_ci}
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_cistruct postprocess_bh_ctx {
30062306a36Sopenharmony_ci	struct work_struct work;
30162306a36Sopenharmony_ci	struct buffer_head *bh;
30262306a36Sopenharmony_ci};
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_cistatic void verify_bh(struct work_struct *work)
30562306a36Sopenharmony_ci{
30662306a36Sopenharmony_ci	struct postprocess_bh_ctx *ctx =
30762306a36Sopenharmony_ci		container_of(work, struct postprocess_bh_ctx, work);
30862306a36Sopenharmony_ci	struct buffer_head *bh = ctx->bh;
30962306a36Sopenharmony_ci	bool valid;
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	valid = fsverity_verify_blocks(bh->b_folio, bh->b_size, bh_offset(bh));
31262306a36Sopenharmony_ci	end_buffer_async_read(bh, valid);
31362306a36Sopenharmony_ci	kfree(ctx);
31462306a36Sopenharmony_ci}
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_cistatic bool need_fsverity(struct buffer_head *bh)
31762306a36Sopenharmony_ci{
31862306a36Sopenharmony_ci	struct folio *folio = bh->b_folio;
31962306a36Sopenharmony_ci	struct inode *inode = folio->mapping->host;
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	return fsverity_active(inode) &&
32262306a36Sopenharmony_ci		/* needed by ext4 */
32362306a36Sopenharmony_ci		folio->index < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
32462306a36Sopenharmony_ci}
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_cistatic void decrypt_bh(struct work_struct *work)
32762306a36Sopenharmony_ci{
32862306a36Sopenharmony_ci	struct postprocess_bh_ctx *ctx =
32962306a36Sopenharmony_ci		container_of(work, struct postprocess_bh_ctx, work);
33062306a36Sopenharmony_ci	struct buffer_head *bh = ctx->bh;
33162306a36Sopenharmony_ci	int err;
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	err = fscrypt_decrypt_pagecache_blocks(bh->b_folio, bh->b_size,
33462306a36Sopenharmony_ci					       bh_offset(bh));
33562306a36Sopenharmony_ci	if (err == 0 && need_fsverity(bh)) {
33662306a36Sopenharmony_ci		/*
33762306a36Sopenharmony_ci		 * We use different work queues for decryption and for verity
33862306a36Sopenharmony_ci		 * because verity may require reading metadata pages that need
33962306a36Sopenharmony_ci		 * decryption, and we shouldn't recurse to the same workqueue.
34062306a36Sopenharmony_ci		 */
34162306a36Sopenharmony_ci		INIT_WORK(&ctx->work, verify_bh);
34262306a36Sopenharmony_ci		fsverity_enqueue_verify_work(&ctx->work);
34362306a36Sopenharmony_ci		return;
34462306a36Sopenharmony_ci	}
34562306a36Sopenharmony_ci	end_buffer_async_read(bh, err == 0);
34662306a36Sopenharmony_ci	kfree(ctx);
34762306a36Sopenharmony_ci}
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_ci/*
35062306a36Sopenharmony_ci * I/O completion handler for block_read_full_folio() - pages
35162306a36Sopenharmony_ci * which come unlocked at the end of I/O.
35262306a36Sopenharmony_ci */
35362306a36Sopenharmony_cistatic void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
35462306a36Sopenharmony_ci{
35562306a36Sopenharmony_ci	struct inode *inode = bh->b_folio->mapping->host;
35662306a36Sopenharmony_ci	bool decrypt = fscrypt_inode_uses_fs_layer_crypto(inode);
35762306a36Sopenharmony_ci	bool verify = need_fsverity(bh);
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci	/* Decrypt (with fscrypt) and/or verify (with fsverity) if needed. */
36062306a36Sopenharmony_ci	if (uptodate && (decrypt || verify)) {
36162306a36Sopenharmony_ci		struct postprocess_bh_ctx *ctx =
36262306a36Sopenharmony_ci			kmalloc(sizeof(*ctx), GFP_ATOMIC);
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci		if (ctx) {
36562306a36Sopenharmony_ci			ctx->bh = bh;
36662306a36Sopenharmony_ci			if (decrypt) {
36762306a36Sopenharmony_ci				INIT_WORK(&ctx->work, decrypt_bh);
36862306a36Sopenharmony_ci				fscrypt_enqueue_decrypt_work(&ctx->work);
36962306a36Sopenharmony_ci			} else {
37062306a36Sopenharmony_ci				INIT_WORK(&ctx->work, verify_bh);
37162306a36Sopenharmony_ci				fsverity_enqueue_verify_work(&ctx->work);
37262306a36Sopenharmony_ci			}
37362306a36Sopenharmony_ci			return;
37462306a36Sopenharmony_ci		}
37562306a36Sopenharmony_ci		uptodate = 0;
37662306a36Sopenharmony_ci	}
37762306a36Sopenharmony_ci	end_buffer_async_read(bh, uptodate);
37862306a36Sopenharmony_ci}
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci/*
38162306a36Sopenharmony_ci * Completion handler for block_write_full_page() - pages which are unlocked
38262306a36Sopenharmony_ci * during I/O, and which have PageWriteback cleared upon I/O completion.
38362306a36Sopenharmony_ci */
38462306a36Sopenharmony_civoid end_buffer_async_write(struct buffer_head *bh, int uptodate)
38562306a36Sopenharmony_ci{
38662306a36Sopenharmony_ci	unsigned long flags;
38762306a36Sopenharmony_ci	struct buffer_head *first;
38862306a36Sopenharmony_ci	struct buffer_head *tmp;
38962306a36Sopenharmony_ci	struct folio *folio;
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	BUG_ON(!buffer_async_write(bh));
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	folio = bh->b_folio;
39462306a36Sopenharmony_ci	if (uptodate) {
39562306a36Sopenharmony_ci		set_buffer_uptodate(bh);
39662306a36Sopenharmony_ci	} else {
39762306a36Sopenharmony_ci		buffer_io_error(bh, ", lost async page write");
39862306a36Sopenharmony_ci		mark_buffer_write_io_error(bh);
39962306a36Sopenharmony_ci		clear_buffer_uptodate(bh);
40062306a36Sopenharmony_ci		folio_set_error(folio);
40162306a36Sopenharmony_ci	}
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	first = folio_buffers(folio);
40462306a36Sopenharmony_ci	spin_lock_irqsave(&first->b_uptodate_lock, flags);
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_ci	clear_buffer_async_write(bh);
40762306a36Sopenharmony_ci	unlock_buffer(bh);
40862306a36Sopenharmony_ci	tmp = bh->b_this_page;
40962306a36Sopenharmony_ci	while (tmp != bh) {
41062306a36Sopenharmony_ci		if (buffer_async_write(tmp)) {
41162306a36Sopenharmony_ci			BUG_ON(!buffer_locked(tmp));
41262306a36Sopenharmony_ci			goto still_busy;
41362306a36Sopenharmony_ci		}
41462306a36Sopenharmony_ci		tmp = tmp->b_this_page;
41562306a36Sopenharmony_ci	}
41662306a36Sopenharmony_ci	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
41762306a36Sopenharmony_ci	folio_end_writeback(folio);
41862306a36Sopenharmony_ci	return;
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_cistill_busy:
42162306a36Sopenharmony_ci	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
42262306a36Sopenharmony_ci	return;
42362306a36Sopenharmony_ci}
42462306a36Sopenharmony_ciEXPORT_SYMBOL(end_buffer_async_write);
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci/*
42762306a36Sopenharmony_ci * If a page's buffers are under async readin (end_buffer_async_read
42862306a36Sopenharmony_ci * completion) then there is a possibility that another thread of
42962306a36Sopenharmony_ci * control could lock one of the buffers after it has completed
43062306a36Sopenharmony_ci * but while some of the other buffers have not completed.  This
43162306a36Sopenharmony_ci * locked buffer would confuse end_buffer_async_read() into not unlocking
43262306a36Sopenharmony_ci * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
43362306a36Sopenharmony_ci * that this buffer is not under async I/O.
43462306a36Sopenharmony_ci *
43562306a36Sopenharmony_ci * The page comes unlocked when it has no locked buffer_async buffers
43662306a36Sopenharmony_ci * left.
43762306a36Sopenharmony_ci *
43862306a36Sopenharmony_ci * PageLocked prevents anyone starting new async I/O reads any of
43962306a36Sopenharmony_ci * the buffers.
44062306a36Sopenharmony_ci *
44162306a36Sopenharmony_ci * PageWriteback is used to prevent simultaneous writeout of the same
44262306a36Sopenharmony_ci * page.
44362306a36Sopenharmony_ci *
44462306a36Sopenharmony_ci * PageLocked prevents anyone from starting writeback of a page which is
44562306a36Sopenharmony_ci * under read I/O (PageWriteback is only ever set against a locked page).
44662306a36Sopenharmony_ci */
44762306a36Sopenharmony_cistatic void mark_buffer_async_read(struct buffer_head *bh)
44862306a36Sopenharmony_ci{
44962306a36Sopenharmony_ci	bh->b_end_io = end_buffer_async_read_io;
45062306a36Sopenharmony_ci	set_buffer_async_read(bh);
45162306a36Sopenharmony_ci}
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_cistatic void mark_buffer_async_write_endio(struct buffer_head *bh,
45462306a36Sopenharmony_ci					  bh_end_io_t *handler)
45562306a36Sopenharmony_ci{
45662306a36Sopenharmony_ci	bh->b_end_io = handler;
45762306a36Sopenharmony_ci	set_buffer_async_write(bh);
45862306a36Sopenharmony_ci}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_civoid mark_buffer_async_write(struct buffer_head *bh)
46162306a36Sopenharmony_ci{
46262306a36Sopenharmony_ci	mark_buffer_async_write_endio(bh, end_buffer_async_write);
46362306a36Sopenharmony_ci}
46462306a36Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_async_write);
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci/*
46862306a36Sopenharmony_ci * fs/buffer.c contains helper functions for buffer-backed address space's
46962306a36Sopenharmony_ci * fsync functions.  A common requirement for buffer-based filesystems is
47062306a36Sopenharmony_ci * that certain data from the backing blockdev needs to be written out for
47162306a36Sopenharmony_ci * a successful fsync().  For example, ext2 indirect blocks need to be
47262306a36Sopenharmony_ci * written back and waited upon before fsync() returns.
47362306a36Sopenharmony_ci *
47462306a36Sopenharmony_ci * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
47562306a36Sopenharmony_ci * inode_has_buffers() and invalidate_inode_buffers() are provided for the
47662306a36Sopenharmony_ci * management of a list of dependent buffers at ->i_mapping->private_list.
47762306a36Sopenharmony_ci *
47862306a36Sopenharmony_ci * Locking is a little subtle: try_to_free_buffers() will remove buffers
47962306a36Sopenharmony_ci * from their controlling inode's queue when they are being freed.  But
48062306a36Sopenharmony_ci * try_to_free_buffers() will be operating against the *blockdev* mapping
48162306a36Sopenharmony_ci * at the time, not against the S_ISREG file which depends on those buffers.
48262306a36Sopenharmony_ci * So the locking for private_list is via the private_lock in the address_space
48362306a36Sopenharmony_ci * which backs the buffers.  Which is different from the address_space
48462306a36Sopenharmony_ci * against which the buffers are listed.  So for a particular address_space,
48562306a36Sopenharmony_ci * mapping->private_lock does *not* protect mapping->private_list!  In fact,
48662306a36Sopenharmony_ci * mapping->private_list will always be protected by the backing blockdev's
48762306a36Sopenharmony_ci * ->private_lock.
48862306a36Sopenharmony_ci *
48962306a36Sopenharmony_ci * Which introduces a requirement: all buffers on an address_space's
49062306a36Sopenharmony_ci * ->private_list must be from the same address_space: the blockdev's.
49162306a36Sopenharmony_ci *
49262306a36Sopenharmony_ci * address_spaces which do not place buffers at ->private_list via these
49362306a36Sopenharmony_ci * utility functions are free to use private_lock and private_list for
49462306a36Sopenharmony_ci * whatever they want.  The only requirement is that list_empty(private_list)
49562306a36Sopenharmony_ci * be true at clear_inode() time.
49662306a36Sopenharmony_ci *
49762306a36Sopenharmony_ci * FIXME: clear_inode should not call invalidate_inode_buffers().  The
49862306a36Sopenharmony_ci * filesystems should do that.  invalidate_inode_buffers() should just go
49962306a36Sopenharmony_ci * BUG_ON(!list_empty).
50062306a36Sopenharmony_ci *
50162306a36Sopenharmony_ci * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
50262306a36Sopenharmony_ci * take an address_space, not an inode.  And it should be called
50362306a36Sopenharmony_ci * mark_buffer_dirty_fsync() to clearly define why those buffers are being
50462306a36Sopenharmony_ci * queued up.
50562306a36Sopenharmony_ci *
50662306a36Sopenharmony_ci * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
50762306a36Sopenharmony_ci * list if it is already on a list.  Because if the buffer is on a list,
50862306a36Sopenharmony_ci * it *must* already be on the right one.  If not, the filesystem is being
50962306a36Sopenharmony_ci * silly.  This will save a ton of locking.  But first we have to ensure
51062306a36Sopenharmony_ci * that buffers are taken *off* the old inode's list when they are freed
51162306a36Sopenharmony_ci * (presumably in truncate).  That requires careful auditing of all
51262306a36Sopenharmony_ci * filesystems (do it inside bforget()).  It could also be done by bringing
51362306a36Sopenharmony_ci * b_inode back.
51462306a36Sopenharmony_ci */
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci/*
51762306a36Sopenharmony_ci * The buffer's backing address_space's private_lock must be held
51862306a36Sopenharmony_ci */
51962306a36Sopenharmony_cistatic void __remove_assoc_queue(struct buffer_head *bh)
52062306a36Sopenharmony_ci{
52162306a36Sopenharmony_ci	list_del_init(&bh->b_assoc_buffers);
52262306a36Sopenharmony_ci	WARN_ON(!bh->b_assoc_map);
52362306a36Sopenharmony_ci	bh->b_assoc_map = NULL;
52462306a36Sopenharmony_ci}
52562306a36Sopenharmony_ci
52662306a36Sopenharmony_ciint inode_has_buffers(struct inode *inode)
52762306a36Sopenharmony_ci{
52862306a36Sopenharmony_ci	return !list_empty(&inode->i_data.private_list);
52962306a36Sopenharmony_ci}
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci/*
53262306a36Sopenharmony_ci * osync is designed to support O_SYNC io.  It waits synchronously for
53362306a36Sopenharmony_ci * all already-submitted IO to complete, but does not queue any new
53462306a36Sopenharmony_ci * writes to the disk.
53562306a36Sopenharmony_ci *
53662306a36Sopenharmony_ci * To do O_SYNC writes, just queue the buffer writes with write_dirty_buffer
53762306a36Sopenharmony_ci * as you dirty the buffers, and then use osync_inode_buffers to wait for
53862306a36Sopenharmony_ci * completion.  Any other dirty buffers which are not yet queued for
53962306a36Sopenharmony_ci * write will not be flushed to disk by the osync.
54062306a36Sopenharmony_ci */
54162306a36Sopenharmony_cistatic int osync_buffers_list(spinlock_t *lock, struct list_head *list)
54262306a36Sopenharmony_ci{
54362306a36Sopenharmony_ci	struct buffer_head *bh;
54462306a36Sopenharmony_ci	struct list_head *p;
54562306a36Sopenharmony_ci	int err = 0;
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	spin_lock(lock);
54862306a36Sopenharmony_cirepeat:
54962306a36Sopenharmony_ci	list_for_each_prev(p, list) {
55062306a36Sopenharmony_ci		bh = BH_ENTRY(p);
55162306a36Sopenharmony_ci		if (buffer_locked(bh)) {
55262306a36Sopenharmony_ci			get_bh(bh);
55362306a36Sopenharmony_ci			spin_unlock(lock);
55462306a36Sopenharmony_ci			wait_on_buffer(bh);
55562306a36Sopenharmony_ci			if (!buffer_uptodate(bh))
55662306a36Sopenharmony_ci				err = -EIO;
55762306a36Sopenharmony_ci			brelse(bh);
55862306a36Sopenharmony_ci			spin_lock(lock);
55962306a36Sopenharmony_ci			goto repeat;
56062306a36Sopenharmony_ci		}
56162306a36Sopenharmony_ci	}
56262306a36Sopenharmony_ci	spin_unlock(lock);
56362306a36Sopenharmony_ci	return err;
56462306a36Sopenharmony_ci}
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci/**
56762306a36Sopenharmony_ci * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
56862306a36Sopenharmony_ci * @mapping: the mapping which wants those buffers written
56962306a36Sopenharmony_ci *
57062306a36Sopenharmony_ci * Starts I/O against the buffers at mapping->private_list, and waits upon
57162306a36Sopenharmony_ci * that I/O.
57262306a36Sopenharmony_ci *
57362306a36Sopenharmony_ci * Basically, this is a convenience function for fsync().
57462306a36Sopenharmony_ci * @mapping is a file or directory which needs those buffers to be written for
57562306a36Sopenharmony_ci * a successful fsync().
57662306a36Sopenharmony_ci */
57762306a36Sopenharmony_ciint sync_mapping_buffers(struct address_space *mapping)
57862306a36Sopenharmony_ci{
57962306a36Sopenharmony_ci	struct address_space *buffer_mapping = mapping->private_data;
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	if (buffer_mapping == NULL || list_empty(&mapping->private_list))
58262306a36Sopenharmony_ci		return 0;
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci	return fsync_buffers_list(&buffer_mapping->private_lock,
58562306a36Sopenharmony_ci					&mapping->private_list);
58662306a36Sopenharmony_ci}
58762306a36Sopenharmony_ciEXPORT_SYMBOL(sync_mapping_buffers);
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci/**
59062306a36Sopenharmony_ci * generic_buffers_fsync_noflush - generic buffer fsync implementation
59162306a36Sopenharmony_ci * for simple filesystems with no inode lock
59262306a36Sopenharmony_ci *
59362306a36Sopenharmony_ci * @file:	file to synchronize
59462306a36Sopenharmony_ci * @start:	start offset in bytes
59562306a36Sopenharmony_ci * @end:	end offset in bytes (inclusive)
59662306a36Sopenharmony_ci * @datasync:	only synchronize essential metadata if true
59762306a36Sopenharmony_ci *
59862306a36Sopenharmony_ci * This is a generic implementation of the fsync method for simple
59962306a36Sopenharmony_ci * filesystems which track all non-inode metadata in the buffers list
60062306a36Sopenharmony_ci * hanging off the address_space structure.
60162306a36Sopenharmony_ci */
60262306a36Sopenharmony_ciint generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end,
60362306a36Sopenharmony_ci				  bool datasync)
60462306a36Sopenharmony_ci{
60562306a36Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
60662306a36Sopenharmony_ci	int err;
60762306a36Sopenharmony_ci	int ret;
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci	err = file_write_and_wait_range(file, start, end);
61062306a36Sopenharmony_ci	if (err)
61162306a36Sopenharmony_ci		return err;
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	ret = sync_mapping_buffers(inode->i_mapping);
61462306a36Sopenharmony_ci	if (!(inode->i_state & I_DIRTY_ALL))
61562306a36Sopenharmony_ci		goto out;
61662306a36Sopenharmony_ci	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
61762306a36Sopenharmony_ci		goto out;
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	err = sync_inode_metadata(inode, 1);
62062306a36Sopenharmony_ci	if (ret == 0)
62162306a36Sopenharmony_ci		ret = err;
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ciout:
62462306a36Sopenharmony_ci	/* check and advance again to catch errors after syncing out buffers */
62562306a36Sopenharmony_ci	err = file_check_and_advance_wb_err(file);
62662306a36Sopenharmony_ci	if (ret == 0)
62762306a36Sopenharmony_ci		ret = err;
62862306a36Sopenharmony_ci	return ret;
62962306a36Sopenharmony_ci}
63062306a36Sopenharmony_ciEXPORT_SYMBOL(generic_buffers_fsync_noflush);
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_ci/**
63362306a36Sopenharmony_ci * generic_buffers_fsync - generic buffer fsync implementation
63462306a36Sopenharmony_ci * for simple filesystems with no inode lock
63562306a36Sopenharmony_ci *
63662306a36Sopenharmony_ci * @file:	file to synchronize
63762306a36Sopenharmony_ci * @start:	start offset in bytes
63862306a36Sopenharmony_ci * @end:	end offset in bytes (inclusive)
63962306a36Sopenharmony_ci * @datasync:	only synchronize essential metadata if true
64062306a36Sopenharmony_ci *
64162306a36Sopenharmony_ci * This is a generic implementation of the fsync method for simple
64262306a36Sopenharmony_ci * filesystems which track all non-inode metadata in the buffers list
64362306a36Sopenharmony_ci * hanging off the address_space structure. This also makes sure that
64462306a36Sopenharmony_ci * a device cache flush operation is called at the end.
64562306a36Sopenharmony_ci */
64662306a36Sopenharmony_ciint generic_buffers_fsync(struct file *file, loff_t start, loff_t end,
64762306a36Sopenharmony_ci			  bool datasync)
64862306a36Sopenharmony_ci{
64962306a36Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
65062306a36Sopenharmony_ci	int ret;
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	ret = generic_buffers_fsync_noflush(file, start, end, datasync);
65362306a36Sopenharmony_ci	if (!ret)
65462306a36Sopenharmony_ci		ret = blkdev_issue_flush(inode->i_sb->s_bdev);
65562306a36Sopenharmony_ci	return ret;
65662306a36Sopenharmony_ci}
65762306a36Sopenharmony_ciEXPORT_SYMBOL(generic_buffers_fsync);
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci/*
66062306a36Sopenharmony_ci * Called when we've recently written block `bblock', and it is known that
66162306a36Sopenharmony_ci * `bblock' was for a buffer_boundary() buffer.  This means that the block at
66262306a36Sopenharmony_ci * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
66362306a36Sopenharmony_ci * dirty, schedule it for IO.  So that indirects merge nicely with their data.
66462306a36Sopenharmony_ci */
66562306a36Sopenharmony_civoid write_boundary_block(struct block_device *bdev,
66662306a36Sopenharmony_ci			sector_t bblock, unsigned blocksize)
66762306a36Sopenharmony_ci{
66862306a36Sopenharmony_ci	struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
66962306a36Sopenharmony_ci	if (bh) {
67062306a36Sopenharmony_ci		if (buffer_dirty(bh))
67162306a36Sopenharmony_ci			write_dirty_buffer(bh, 0);
67262306a36Sopenharmony_ci		put_bh(bh);
67362306a36Sopenharmony_ci	}
67462306a36Sopenharmony_ci}
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_civoid mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
67762306a36Sopenharmony_ci{
67862306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
67962306a36Sopenharmony_ci	struct address_space *buffer_mapping = bh->b_folio->mapping;
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	mark_buffer_dirty(bh);
68262306a36Sopenharmony_ci	if (!mapping->private_data) {
68362306a36Sopenharmony_ci		mapping->private_data = buffer_mapping;
68462306a36Sopenharmony_ci	} else {
68562306a36Sopenharmony_ci		BUG_ON(mapping->private_data != buffer_mapping);
68662306a36Sopenharmony_ci	}
68762306a36Sopenharmony_ci	if (!bh->b_assoc_map) {
68862306a36Sopenharmony_ci		spin_lock(&buffer_mapping->private_lock);
68962306a36Sopenharmony_ci		list_move_tail(&bh->b_assoc_buffers,
69062306a36Sopenharmony_ci				&mapping->private_list);
69162306a36Sopenharmony_ci		bh->b_assoc_map = mapping;
69262306a36Sopenharmony_ci		spin_unlock(&buffer_mapping->private_lock);
69362306a36Sopenharmony_ci	}
69462306a36Sopenharmony_ci}
69562306a36Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_dirty_inode);
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci/*
69862306a36Sopenharmony_ci * Add a page to the dirty page list.
69962306a36Sopenharmony_ci *
70062306a36Sopenharmony_ci * It is a sad fact of life that this function is called from several places
70162306a36Sopenharmony_ci * deeply under spinlocking.  It may not sleep.
70262306a36Sopenharmony_ci *
70362306a36Sopenharmony_ci * If the page has buffers, the uptodate buffers are set dirty, to preserve
70462306a36Sopenharmony_ci * dirty-state coherency between the page and the buffers.  It the page does
70562306a36Sopenharmony_ci * not have buffers then when they are later attached they will all be set
70662306a36Sopenharmony_ci * dirty.
70762306a36Sopenharmony_ci *
70862306a36Sopenharmony_ci * The buffers are dirtied before the page is dirtied.  There's a small race
70962306a36Sopenharmony_ci * window in which a writepage caller may see the page cleanness but not the
71062306a36Sopenharmony_ci * buffer dirtiness.  That's fine.  If this code were to set the page dirty
71162306a36Sopenharmony_ci * before the buffers, a concurrent writepage caller could clear the page dirty
71262306a36Sopenharmony_ci * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
71362306a36Sopenharmony_ci * page on the dirty page list.
71462306a36Sopenharmony_ci *
71562306a36Sopenharmony_ci * We use private_lock to lock against try_to_free_buffers while using the
71662306a36Sopenharmony_ci * page's buffer list.  Also use this to protect against clean buffers being
71762306a36Sopenharmony_ci * added to the page after it was set dirty.
71862306a36Sopenharmony_ci *
71962306a36Sopenharmony_ci * FIXME: may need to call ->reservepage here as well.  That's rather up to the
72062306a36Sopenharmony_ci * address_space though.
72162306a36Sopenharmony_ci */
72262306a36Sopenharmony_cibool block_dirty_folio(struct address_space *mapping, struct folio *folio)
72362306a36Sopenharmony_ci{
72462306a36Sopenharmony_ci	struct buffer_head *head;
72562306a36Sopenharmony_ci	bool newly_dirty;
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci	spin_lock(&mapping->private_lock);
72862306a36Sopenharmony_ci	head = folio_buffers(folio);
72962306a36Sopenharmony_ci	if (head) {
73062306a36Sopenharmony_ci		struct buffer_head *bh = head;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci		do {
73362306a36Sopenharmony_ci			set_buffer_dirty(bh);
73462306a36Sopenharmony_ci			bh = bh->b_this_page;
73562306a36Sopenharmony_ci		} while (bh != head);
73662306a36Sopenharmony_ci	}
73762306a36Sopenharmony_ci	/*
73862306a36Sopenharmony_ci	 * Lock out page's memcg migration to keep PageDirty
73962306a36Sopenharmony_ci	 * synchronized with per-memcg dirty page counters.
74062306a36Sopenharmony_ci	 */
74162306a36Sopenharmony_ci	folio_memcg_lock(folio);
74262306a36Sopenharmony_ci	newly_dirty = !folio_test_set_dirty(folio);
74362306a36Sopenharmony_ci	spin_unlock(&mapping->private_lock);
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ci	if (newly_dirty)
74662306a36Sopenharmony_ci		__folio_mark_dirty(folio, mapping, 1);
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci	folio_memcg_unlock(folio);
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	if (newly_dirty)
75162306a36Sopenharmony_ci		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci	return newly_dirty;
75462306a36Sopenharmony_ci}
75562306a36Sopenharmony_ciEXPORT_SYMBOL(block_dirty_folio);
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci/*
75862306a36Sopenharmony_ci * Write out and wait upon a list of buffers.
75962306a36Sopenharmony_ci *
76062306a36Sopenharmony_ci * We have conflicting pressures: we want to make sure that all
76162306a36Sopenharmony_ci * initially dirty buffers get waited on, but that any subsequently
76262306a36Sopenharmony_ci * dirtied buffers don't.  After all, we don't want fsync to last
76362306a36Sopenharmony_ci * forever if somebody is actively writing to the file.
76462306a36Sopenharmony_ci *
76562306a36Sopenharmony_ci * Do this in two main stages: first we copy dirty buffers to a
76662306a36Sopenharmony_ci * temporary inode list, queueing the writes as we go.  Then we clean
76762306a36Sopenharmony_ci * up, waiting for those writes to complete.
76862306a36Sopenharmony_ci *
76962306a36Sopenharmony_ci * During this second stage, any subsequent updates to the file may end
77062306a36Sopenharmony_ci * up refiling the buffer on the original inode's dirty list again, so
77162306a36Sopenharmony_ci * there is a chance we will end up with a buffer queued for write but
77262306a36Sopenharmony_ci * not yet completed on that list.  So, as a final cleanup we go through
77362306a36Sopenharmony_ci * the osync code to catch these locked, dirty buffers without requeuing
77462306a36Sopenharmony_ci * any newly dirty buffers for write.
77562306a36Sopenharmony_ci */
77662306a36Sopenharmony_cistatic int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
77762306a36Sopenharmony_ci{
77862306a36Sopenharmony_ci	struct buffer_head *bh;
77962306a36Sopenharmony_ci	struct list_head tmp;
78062306a36Sopenharmony_ci	struct address_space *mapping;
78162306a36Sopenharmony_ci	int err = 0, err2;
78262306a36Sopenharmony_ci	struct blk_plug plug;
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	INIT_LIST_HEAD(&tmp);
78562306a36Sopenharmony_ci	blk_start_plug(&plug);
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	spin_lock(lock);
78862306a36Sopenharmony_ci	while (!list_empty(list)) {
78962306a36Sopenharmony_ci		bh = BH_ENTRY(list->next);
79062306a36Sopenharmony_ci		mapping = bh->b_assoc_map;
79162306a36Sopenharmony_ci		__remove_assoc_queue(bh);
79262306a36Sopenharmony_ci		/* Avoid race with mark_buffer_dirty_inode() which does
79362306a36Sopenharmony_ci		 * a lockless check and we rely on seeing the dirty bit */
79462306a36Sopenharmony_ci		smp_mb();
79562306a36Sopenharmony_ci		if (buffer_dirty(bh) || buffer_locked(bh)) {
79662306a36Sopenharmony_ci			list_add(&bh->b_assoc_buffers, &tmp);
79762306a36Sopenharmony_ci			bh->b_assoc_map = mapping;
79862306a36Sopenharmony_ci			if (buffer_dirty(bh)) {
79962306a36Sopenharmony_ci				get_bh(bh);
80062306a36Sopenharmony_ci				spin_unlock(lock);
80162306a36Sopenharmony_ci				/*
80262306a36Sopenharmony_ci				 * Ensure any pending I/O completes so that
80362306a36Sopenharmony_ci				 * write_dirty_buffer() actually writes the
80462306a36Sopenharmony_ci				 * current contents - it is a noop if I/O is
80562306a36Sopenharmony_ci				 * still in flight on potentially older
80662306a36Sopenharmony_ci				 * contents.
80762306a36Sopenharmony_ci				 */
80862306a36Sopenharmony_ci				write_dirty_buffer(bh, REQ_SYNC);
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci				/*
81162306a36Sopenharmony_ci				 * Kick off IO for the previous mapping. Note
81262306a36Sopenharmony_ci				 * that we will not run the very last mapping,
81362306a36Sopenharmony_ci				 * wait_on_buffer() will do that for us
81462306a36Sopenharmony_ci				 * through sync_buffer().
81562306a36Sopenharmony_ci				 */
81662306a36Sopenharmony_ci				brelse(bh);
81762306a36Sopenharmony_ci				spin_lock(lock);
81862306a36Sopenharmony_ci			}
81962306a36Sopenharmony_ci		}
82062306a36Sopenharmony_ci	}
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	spin_unlock(lock);
82362306a36Sopenharmony_ci	blk_finish_plug(&plug);
82462306a36Sopenharmony_ci	spin_lock(lock);
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	while (!list_empty(&tmp)) {
82762306a36Sopenharmony_ci		bh = BH_ENTRY(tmp.prev);
82862306a36Sopenharmony_ci		get_bh(bh);
82962306a36Sopenharmony_ci		mapping = bh->b_assoc_map;
83062306a36Sopenharmony_ci		__remove_assoc_queue(bh);
83162306a36Sopenharmony_ci		/* Avoid race with mark_buffer_dirty_inode() which does
83262306a36Sopenharmony_ci		 * a lockless check and we rely on seeing the dirty bit */
83362306a36Sopenharmony_ci		smp_mb();
83462306a36Sopenharmony_ci		if (buffer_dirty(bh)) {
83562306a36Sopenharmony_ci			list_add(&bh->b_assoc_buffers,
83662306a36Sopenharmony_ci				 &mapping->private_list);
83762306a36Sopenharmony_ci			bh->b_assoc_map = mapping;
83862306a36Sopenharmony_ci		}
83962306a36Sopenharmony_ci		spin_unlock(lock);
84062306a36Sopenharmony_ci		wait_on_buffer(bh);
84162306a36Sopenharmony_ci		if (!buffer_uptodate(bh))
84262306a36Sopenharmony_ci			err = -EIO;
84362306a36Sopenharmony_ci		brelse(bh);
84462306a36Sopenharmony_ci		spin_lock(lock);
84562306a36Sopenharmony_ci	}
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ci	spin_unlock(lock);
84862306a36Sopenharmony_ci	err2 = osync_buffers_list(lock, list);
84962306a36Sopenharmony_ci	if (err)
85062306a36Sopenharmony_ci		return err;
85162306a36Sopenharmony_ci	else
85262306a36Sopenharmony_ci		return err2;
85362306a36Sopenharmony_ci}
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_ci/*
85662306a36Sopenharmony_ci * Invalidate any and all dirty buffers on a given inode.  We are
85762306a36Sopenharmony_ci * probably unmounting the fs, but that doesn't mean we have already
85862306a36Sopenharmony_ci * done a sync().  Just drop the buffers from the inode list.
85962306a36Sopenharmony_ci *
86062306a36Sopenharmony_ci * NOTE: we take the inode's blockdev's mapping's private_lock.  Which
86162306a36Sopenharmony_ci * assumes that all the buffers are against the blockdev.  Not true
86262306a36Sopenharmony_ci * for reiserfs.
86362306a36Sopenharmony_ci */
86462306a36Sopenharmony_civoid invalidate_inode_buffers(struct inode *inode)
86562306a36Sopenharmony_ci{
86662306a36Sopenharmony_ci	if (inode_has_buffers(inode)) {
86762306a36Sopenharmony_ci		struct address_space *mapping = &inode->i_data;
86862306a36Sopenharmony_ci		struct list_head *list = &mapping->private_list;
86962306a36Sopenharmony_ci		struct address_space *buffer_mapping = mapping->private_data;
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci		spin_lock(&buffer_mapping->private_lock);
87262306a36Sopenharmony_ci		while (!list_empty(list))
87362306a36Sopenharmony_ci			__remove_assoc_queue(BH_ENTRY(list->next));
87462306a36Sopenharmony_ci		spin_unlock(&buffer_mapping->private_lock);
87562306a36Sopenharmony_ci	}
87662306a36Sopenharmony_ci}
87762306a36Sopenharmony_ciEXPORT_SYMBOL(invalidate_inode_buffers);
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci/*
88062306a36Sopenharmony_ci * Remove any clean buffers from the inode's buffer list.  This is called
88162306a36Sopenharmony_ci * when we're trying to free the inode itself.  Those buffers can pin it.
88262306a36Sopenharmony_ci *
88362306a36Sopenharmony_ci * Returns true if all buffers were removed.
88462306a36Sopenharmony_ci */
88562306a36Sopenharmony_ciint remove_inode_buffers(struct inode *inode)
88662306a36Sopenharmony_ci{
88762306a36Sopenharmony_ci	int ret = 1;
88862306a36Sopenharmony_ci
88962306a36Sopenharmony_ci	if (inode_has_buffers(inode)) {
89062306a36Sopenharmony_ci		struct address_space *mapping = &inode->i_data;
89162306a36Sopenharmony_ci		struct list_head *list = &mapping->private_list;
89262306a36Sopenharmony_ci		struct address_space *buffer_mapping = mapping->private_data;
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci		spin_lock(&buffer_mapping->private_lock);
89562306a36Sopenharmony_ci		while (!list_empty(list)) {
89662306a36Sopenharmony_ci			struct buffer_head *bh = BH_ENTRY(list->next);
89762306a36Sopenharmony_ci			if (buffer_dirty(bh)) {
89862306a36Sopenharmony_ci				ret = 0;
89962306a36Sopenharmony_ci				break;
90062306a36Sopenharmony_ci			}
90162306a36Sopenharmony_ci			__remove_assoc_queue(bh);
90262306a36Sopenharmony_ci		}
90362306a36Sopenharmony_ci		spin_unlock(&buffer_mapping->private_lock);
90462306a36Sopenharmony_ci	}
90562306a36Sopenharmony_ci	return ret;
90662306a36Sopenharmony_ci}
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci/*
90962306a36Sopenharmony_ci * Create the appropriate buffers when given a folio for data area and
91062306a36Sopenharmony_ci * the size of each buffer.. Use the bh->b_this_page linked list to
91162306a36Sopenharmony_ci * follow the buffers created.  Return NULL if unable to create more
91262306a36Sopenharmony_ci * buffers.
91362306a36Sopenharmony_ci *
91462306a36Sopenharmony_ci * The retry flag is used to differentiate async IO (paging, swapping)
91562306a36Sopenharmony_ci * which may not fail from ordinary buffer allocations.
91662306a36Sopenharmony_ci */
91762306a36Sopenharmony_cistruct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size,
91862306a36Sopenharmony_ci					bool retry)
91962306a36Sopenharmony_ci{
92062306a36Sopenharmony_ci	struct buffer_head *bh, *head;
92162306a36Sopenharmony_ci	gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
92262306a36Sopenharmony_ci	long offset;
92362306a36Sopenharmony_ci	struct mem_cgroup *memcg, *old_memcg;
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci	if (retry)
92662306a36Sopenharmony_ci		gfp |= __GFP_NOFAIL;
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	/* The folio lock pins the memcg */
92962306a36Sopenharmony_ci	memcg = folio_memcg(folio);
93062306a36Sopenharmony_ci	old_memcg = set_active_memcg(memcg);
93162306a36Sopenharmony_ci
93262306a36Sopenharmony_ci	head = NULL;
93362306a36Sopenharmony_ci	offset = folio_size(folio);
93462306a36Sopenharmony_ci	while ((offset -= size) >= 0) {
93562306a36Sopenharmony_ci		bh = alloc_buffer_head(gfp);
93662306a36Sopenharmony_ci		if (!bh)
93762306a36Sopenharmony_ci			goto no_grow;
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci		bh->b_this_page = head;
94062306a36Sopenharmony_ci		bh->b_blocknr = -1;
94162306a36Sopenharmony_ci		head = bh;
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci		bh->b_size = size;
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci		/* Link the buffer to its folio */
94662306a36Sopenharmony_ci		folio_set_bh(bh, folio, offset);
94762306a36Sopenharmony_ci	}
94862306a36Sopenharmony_ciout:
94962306a36Sopenharmony_ci	set_active_memcg(old_memcg);
95062306a36Sopenharmony_ci	return head;
95162306a36Sopenharmony_ci/*
95262306a36Sopenharmony_ci * In case anything failed, we just free everything we got.
95362306a36Sopenharmony_ci */
95462306a36Sopenharmony_cino_grow:
95562306a36Sopenharmony_ci	if (head) {
95662306a36Sopenharmony_ci		do {
95762306a36Sopenharmony_ci			bh = head;
95862306a36Sopenharmony_ci			head = head->b_this_page;
95962306a36Sopenharmony_ci			free_buffer_head(bh);
96062306a36Sopenharmony_ci		} while (head);
96162306a36Sopenharmony_ci	}
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci	goto out;
96462306a36Sopenharmony_ci}
96562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(folio_alloc_buffers);
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_cistruct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
96862306a36Sopenharmony_ci				       bool retry)
96962306a36Sopenharmony_ci{
97062306a36Sopenharmony_ci	return folio_alloc_buffers(page_folio(page), size, retry);
97162306a36Sopenharmony_ci}
97262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(alloc_page_buffers);
97362306a36Sopenharmony_ci
97462306a36Sopenharmony_cistatic inline void link_dev_buffers(struct folio *folio,
97562306a36Sopenharmony_ci		struct buffer_head *head)
97662306a36Sopenharmony_ci{
97762306a36Sopenharmony_ci	struct buffer_head *bh, *tail;
97862306a36Sopenharmony_ci
97962306a36Sopenharmony_ci	bh = head;
98062306a36Sopenharmony_ci	do {
98162306a36Sopenharmony_ci		tail = bh;
98262306a36Sopenharmony_ci		bh = bh->b_this_page;
98362306a36Sopenharmony_ci	} while (bh);
98462306a36Sopenharmony_ci	tail->b_this_page = head;
98562306a36Sopenharmony_ci	folio_attach_private(folio, head);
98662306a36Sopenharmony_ci}
98762306a36Sopenharmony_ci
98862306a36Sopenharmony_cistatic sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
98962306a36Sopenharmony_ci{
99062306a36Sopenharmony_ci	sector_t retval = ~((sector_t)0);
99162306a36Sopenharmony_ci	loff_t sz = bdev_nr_bytes(bdev);
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci	if (sz) {
99462306a36Sopenharmony_ci		unsigned int sizebits = blksize_bits(size);
99562306a36Sopenharmony_ci		retval = (sz >> sizebits);
99662306a36Sopenharmony_ci	}
99762306a36Sopenharmony_ci	return retval;
99862306a36Sopenharmony_ci}
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci/*
100162306a36Sopenharmony_ci * Initialise the state of a blockdev folio's buffers.
100262306a36Sopenharmony_ci */
100362306a36Sopenharmony_cistatic sector_t folio_init_buffers(struct folio *folio,
100462306a36Sopenharmony_ci		struct block_device *bdev, sector_t block, int size)
100562306a36Sopenharmony_ci{
100662306a36Sopenharmony_ci	struct buffer_head *head = folio_buffers(folio);
100762306a36Sopenharmony_ci	struct buffer_head *bh = head;
100862306a36Sopenharmony_ci	bool uptodate = folio_test_uptodate(folio);
100962306a36Sopenharmony_ci	sector_t end_block = blkdev_max_block(bdev, size);
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci	do {
101262306a36Sopenharmony_ci		if (!buffer_mapped(bh)) {
101362306a36Sopenharmony_ci			bh->b_end_io = NULL;
101462306a36Sopenharmony_ci			bh->b_private = NULL;
101562306a36Sopenharmony_ci			bh->b_bdev = bdev;
101662306a36Sopenharmony_ci			bh->b_blocknr = block;
101762306a36Sopenharmony_ci			if (uptodate)
101862306a36Sopenharmony_ci				set_buffer_uptodate(bh);
101962306a36Sopenharmony_ci			if (block < end_block)
102062306a36Sopenharmony_ci				set_buffer_mapped(bh);
102162306a36Sopenharmony_ci		}
102262306a36Sopenharmony_ci		block++;
102362306a36Sopenharmony_ci		bh = bh->b_this_page;
102462306a36Sopenharmony_ci	} while (bh != head);
102562306a36Sopenharmony_ci
102662306a36Sopenharmony_ci	/*
102762306a36Sopenharmony_ci	 * Caller needs to validate requested block against end of device.
102862306a36Sopenharmony_ci	 */
102962306a36Sopenharmony_ci	return end_block;
103062306a36Sopenharmony_ci}
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci/*
103362306a36Sopenharmony_ci * Create the page-cache page that contains the requested block.
103462306a36Sopenharmony_ci *
103562306a36Sopenharmony_ci * This is used purely for blockdev mappings.
103662306a36Sopenharmony_ci */
103762306a36Sopenharmony_cistatic int
103862306a36Sopenharmony_cigrow_dev_page(struct block_device *bdev, sector_t block,
103962306a36Sopenharmony_ci	      pgoff_t index, int size, int sizebits, gfp_t gfp)
104062306a36Sopenharmony_ci{
104162306a36Sopenharmony_ci	struct inode *inode = bdev->bd_inode;
104262306a36Sopenharmony_ci	struct folio *folio;
104362306a36Sopenharmony_ci	struct buffer_head *bh;
104462306a36Sopenharmony_ci	sector_t end_block;
104562306a36Sopenharmony_ci	int ret = 0;
104662306a36Sopenharmony_ci	gfp_t gfp_mask;
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci	gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
104962306a36Sopenharmony_ci
105062306a36Sopenharmony_ci	/*
105162306a36Sopenharmony_ci	 * XXX: __getblk_slow() can not really deal with failure and
105262306a36Sopenharmony_ci	 * will endlessly loop on improvised global reclaim.  Prefer
105362306a36Sopenharmony_ci	 * looping in the allocator rather than here, at least that
105462306a36Sopenharmony_ci	 * code knows what it's doing.
105562306a36Sopenharmony_ci	 */
105662306a36Sopenharmony_ci	gfp_mask |= __GFP_NOFAIL;
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_ci	folio = __filemap_get_folio(inode->i_mapping, index,
105962306a36Sopenharmony_ci			FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp_mask);
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci	bh = folio_buffers(folio);
106262306a36Sopenharmony_ci	if (bh) {
106362306a36Sopenharmony_ci		if (bh->b_size == size) {
106462306a36Sopenharmony_ci			end_block = folio_init_buffers(folio, bdev,
106562306a36Sopenharmony_ci					(sector_t)index << sizebits, size);
106662306a36Sopenharmony_ci			goto done;
106762306a36Sopenharmony_ci		}
106862306a36Sopenharmony_ci		if (!try_to_free_buffers(folio))
106962306a36Sopenharmony_ci			goto failed;
107062306a36Sopenharmony_ci	}
107162306a36Sopenharmony_ci
107262306a36Sopenharmony_ci	bh = folio_alloc_buffers(folio, size, true);
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci	/*
107562306a36Sopenharmony_ci	 * Link the folio to the buffers and initialise them.  Take the
107662306a36Sopenharmony_ci	 * lock to be atomic wrt __find_get_block(), which does not
107762306a36Sopenharmony_ci	 * run under the folio lock.
107862306a36Sopenharmony_ci	 */
107962306a36Sopenharmony_ci	spin_lock(&inode->i_mapping->private_lock);
108062306a36Sopenharmony_ci	link_dev_buffers(folio, bh);
108162306a36Sopenharmony_ci	end_block = folio_init_buffers(folio, bdev,
108262306a36Sopenharmony_ci			(sector_t)index << sizebits, size);
108362306a36Sopenharmony_ci	spin_unlock(&inode->i_mapping->private_lock);
108462306a36Sopenharmony_cidone:
108562306a36Sopenharmony_ci	ret = (block < end_block) ? 1 : -ENXIO;
108662306a36Sopenharmony_cifailed:
108762306a36Sopenharmony_ci	folio_unlock(folio);
108862306a36Sopenharmony_ci	folio_put(folio);
108962306a36Sopenharmony_ci	return ret;
109062306a36Sopenharmony_ci}
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_ci/*
109362306a36Sopenharmony_ci * Create buffers for the specified block device block's page.  If
109462306a36Sopenharmony_ci * that page was dirty, the buffers are set dirty also.
109562306a36Sopenharmony_ci */
109662306a36Sopenharmony_cistatic int
109762306a36Sopenharmony_cigrow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
109862306a36Sopenharmony_ci{
109962306a36Sopenharmony_ci	pgoff_t index;
110062306a36Sopenharmony_ci	int sizebits;
110162306a36Sopenharmony_ci
110262306a36Sopenharmony_ci	sizebits = PAGE_SHIFT - __ffs(size);
110362306a36Sopenharmony_ci	index = block >> sizebits;
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	/*
110662306a36Sopenharmony_ci	 * Check for a block which wants to lie outside our maximum possible
110762306a36Sopenharmony_ci	 * pagecache index.  (this comparison is done using sector_t types).
110862306a36Sopenharmony_ci	 */
110962306a36Sopenharmony_ci	if (unlikely(index != block >> sizebits)) {
111062306a36Sopenharmony_ci		printk(KERN_ERR "%s: requested out-of-range block %llu for "
111162306a36Sopenharmony_ci			"device %pg\n",
111262306a36Sopenharmony_ci			__func__, (unsigned long long)block,
111362306a36Sopenharmony_ci			bdev);
111462306a36Sopenharmony_ci		return -EIO;
111562306a36Sopenharmony_ci	}
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci	/* Create a page with the proper size buffers.. */
111862306a36Sopenharmony_ci	return grow_dev_page(bdev, block, index, size, sizebits, gfp);
111962306a36Sopenharmony_ci}
112062306a36Sopenharmony_ci
112162306a36Sopenharmony_cistatic struct buffer_head *
112262306a36Sopenharmony_ci__getblk_slow(struct block_device *bdev, sector_t block,
112362306a36Sopenharmony_ci	     unsigned size, gfp_t gfp)
112462306a36Sopenharmony_ci{
112562306a36Sopenharmony_ci	/* Size must be multiple of hard sectorsize */
112662306a36Sopenharmony_ci	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
112762306a36Sopenharmony_ci			(size < 512 || size > PAGE_SIZE))) {
112862306a36Sopenharmony_ci		printk(KERN_ERR "getblk(): invalid block size %d requested\n",
112962306a36Sopenharmony_ci					size);
113062306a36Sopenharmony_ci		printk(KERN_ERR "logical block size: %d\n",
113162306a36Sopenharmony_ci					bdev_logical_block_size(bdev));
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ci		dump_stack();
113462306a36Sopenharmony_ci		return NULL;
113562306a36Sopenharmony_ci	}
113662306a36Sopenharmony_ci
113762306a36Sopenharmony_ci	for (;;) {
113862306a36Sopenharmony_ci		struct buffer_head *bh;
113962306a36Sopenharmony_ci		int ret;
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci		bh = __find_get_block(bdev, block, size);
114262306a36Sopenharmony_ci		if (bh)
114362306a36Sopenharmony_ci			return bh;
114462306a36Sopenharmony_ci
114562306a36Sopenharmony_ci		ret = grow_buffers(bdev, block, size, gfp);
114662306a36Sopenharmony_ci		if (ret < 0)
114762306a36Sopenharmony_ci			return NULL;
114862306a36Sopenharmony_ci	}
114962306a36Sopenharmony_ci}
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci/*
115262306a36Sopenharmony_ci * The relationship between dirty buffers and dirty pages:
115362306a36Sopenharmony_ci *
115462306a36Sopenharmony_ci * Whenever a page has any dirty buffers, the page's dirty bit is set, and
115562306a36Sopenharmony_ci * the page is tagged dirty in the page cache.
115662306a36Sopenharmony_ci *
115762306a36Sopenharmony_ci * At all times, the dirtiness of the buffers represents the dirtiness of
115862306a36Sopenharmony_ci * subsections of the page.  If the page has buffers, the page dirty bit is
115962306a36Sopenharmony_ci * merely a hint about the true dirty state.
116062306a36Sopenharmony_ci *
116162306a36Sopenharmony_ci * When a page is set dirty in its entirety, all its buffers are marked dirty
116262306a36Sopenharmony_ci * (if the page has buffers).
116362306a36Sopenharmony_ci *
116462306a36Sopenharmony_ci * When a buffer is marked dirty, its page is dirtied, but the page's other
116562306a36Sopenharmony_ci * buffers are not.
116662306a36Sopenharmony_ci *
116762306a36Sopenharmony_ci * Also.  When blockdev buffers are explicitly read with bread(), they
116862306a36Sopenharmony_ci * individually become uptodate.  But their backing page remains not
116962306a36Sopenharmony_ci * uptodate - even if all of its buffers are uptodate.  A subsequent
117062306a36Sopenharmony_ci * block_read_full_folio() against that folio will discover all the uptodate
117162306a36Sopenharmony_ci * buffers, will set the folio uptodate and will perform no I/O.
117262306a36Sopenharmony_ci */
117362306a36Sopenharmony_ci
117462306a36Sopenharmony_ci/**
117562306a36Sopenharmony_ci * mark_buffer_dirty - mark a buffer_head as needing writeout
117662306a36Sopenharmony_ci * @bh: the buffer_head to mark dirty
117762306a36Sopenharmony_ci *
117862306a36Sopenharmony_ci * mark_buffer_dirty() will set the dirty bit against the buffer, then set
117962306a36Sopenharmony_ci * its backing page dirty, then tag the page as dirty in the page cache
118062306a36Sopenharmony_ci * and then attach the address_space's inode to its superblock's dirty
118162306a36Sopenharmony_ci * inode list.
118262306a36Sopenharmony_ci *
118362306a36Sopenharmony_ci * mark_buffer_dirty() is atomic.  It takes bh->b_folio->mapping->private_lock,
118462306a36Sopenharmony_ci * i_pages lock and mapping->host->i_lock.
118562306a36Sopenharmony_ci */
118662306a36Sopenharmony_civoid mark_buffer_dirty(struct buffer_head *bh)
118762306a36Sopenharmony_ci{
118862306a36Sopenharmony_ci	WARN_ON_ONCE(!buffer_uptodate(bh));
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_ci	trace_block_dirty_buffer(bh);
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci	/*
119362306a36Sopenharmony_ci	 * Very *carefully* optimize the it-is-already-dirty case.
119462306a36Sopenharmony_ci	 *
119562306a36Sopenharmony_ci	 * Don't let the final "is it dirty" escape to before we
119662306a36Sopenharmony_ci	 * perhaps modified the buffer.
119762306a36Sopenharmony_ci	 */
119862306a36Sopenharmony_ci	if (buffer_dirty(bh)) {
119962306a36Sopenharmony_ci		smp_mb();
120062306a36Sopenharmony_ci		if (buffer_dirty(bh))
120162306a36Sopenharmony_ci			return;
120262306a36Sopenharmony_ci	}
120362306a36Sopenharmony_ci
120462306a36Sopenharmony_ci	if (!test_set_buffer_dirty(bh)) {
120562306a36Sopenharmony_ci		struct folio *folio = bh->b_folio;
120662306a36Sopenharmony_ci		struct address_space *mapping = NULL;
120762306a36Sopenharmony_ci
120862306a36Sopenharmony_ci		folio_memcg_lock(folio);
120962306a36Sopenharmony_ci		if (!folio_test_set_dirty(folio)) {
121062306a36Sopenharmony_ci			mapping = folio->mapping;
121162306a36Sopenharmony_ci			if (mapping)
121262306a36Sopenharmony_ci				__folio_mark_dirty(folio, mapping, 0);
121362306a36Sopenharmony_ci		}
121462306a36Sopenharmony_ci		folio_memcg_unlock(folio);
121562306a36Sopenharmony_ci		if (mapping)
121662306a36Sopenharmony_ci			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
121762306a36Sopenharmony_ci	}
121862306a36Sopenharmony_ci}
121962306a36Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_dirty);
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_civoid mark_buffer_write_io_error(struct buffer_head *bh)
122262306a36Sopenharmony_ci{
122362306a36Sopenharmony_ci	set_buffer_write_io_error(bh);
122462306a36Sopenharmony_ci	/* FIXME: do we need to set this in both places? */
122562306a36Sopenharmony_ci	if (bh->b_folio && bh->b_folio->mapping)
122662306a36Sopenharmony_ci		mapping_set_error(bh->b_folio->mapping, -EIO);
122762306a36Sopenharmony_ci	if (bh->b_assoc_map) {
122862306a36Sopenharmony_ci		mapping_set_error(bh->b_assoc_map, -EIO);
122962306a36Sopenharmony_ci		errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO);
123062306a36Sopenharmony_ci	}
123162306a36Sopenharmony_ci}
123262306a36Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_write_io_error);
123362306a36Sopenharmony_ci
123462306a36Sopenharmony_ci/*
123562306a36Sopenharmony_ci * Decrement a buffer_head's reference count.  If all buffers against a page
123662306a36Sopenharmony_ci * have zero reference count, are clean and unlocked, and if the page is clean
123762306a36Sopenharmony_ci * and unlocked then try_to_free_buffers() may strip the buffers from the page
123862306a36Sopenharmony_ci * in preparation for freeing it (sometimes, rarely, buffers are removed from
123962306a36Sopenharmony_ci * a page but it ends up not being freed, and buffers may later be reattached).
124062306a36Sopenharmony_ci */
124162306a36Sopenharmony_civoid __brelse(struct buffer_head * buf)
124262306a36Sopenharmony_ci{
124362306a36Sopenharmony_ci	if (atomic_read(&buf->b_count)) {
124462306a36Sopenharmony_ci		put_bh(buf);
124562306a36Sopenharmony_ci		return;
124662306a36Sopenharmony_ci	}
124762306a36Sopenharmony_ci	WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
124862306a36Sopenharmony_ci}
124962306a36Sopenharmony_ciEXPORT_SYMBOL(__brelse);
125062306a36Sopenharmony_ci
125162306a36Sopenharmony_ci/*
125262306a36Sopenharmony_ci * bforget() is like brelse(), except it discards any
125362306a36Sopenharmony_ci * potentially dirty data.
125462306a36Sopenharmony_ci */
125562306a36Sopenharmony_civoid __bforget(struct buffer_head *bh)
125662306a36Sopenharmony_ci{
125762306a36Sopenharmony_ci	clear_buffer_dirty(bh);
125862306a36Sopenharmony_ci	if (bh->b_assoc_map) {
125962306a36Sopenharmony_ci		struct address_space *buffer_mapping = bh->b_folio->mapping;
126062306a36Sopenharmony_ci
126162306a36Sopenharmony_ci		spin_lock(&buffer_mapping->private_lock);
126262306a36Sopenharmony_ci		list_del_init(&bh->b_assoc_buffers);
126362306a36Sopenharmony_ci		bh->b_assoc_map = NULL;
126462306a36Sopenharmony_ci		spin_unlock(&buffer_mapping->private_lock);
126562306a36Sopenharmony_ci	}
126662306a36Sopenharmony_ci	__brelse(bh);
126762306a36Sopenharmony_ci}
126862306a36Sopenharmony_ciEXPORT_SYMBOL(__bforget);
126962306a36Sopenharmony_ci
127062306a36Sopenharmony_cistatic struct buffer_head *__bread_slow(struct buffer_head *bh)
127162306a36Sopenharmony_ci{
127262306a36Sopenharmony_ci	lock_buffer(bh);
127362306a36Sopenharmony_ci	if (buffer_uptodate(bh)) {
127462306a36Sopenharmony_ci		unlock_buffer(bh);
127562306a36Sopenharmony_ci		return bh;
127662306a36Sopenharmony_ci	} else {
127762306a36Sopenharmony_ci		get_bh(bh);
127862306a36Sopenharmony_ci		bh->b_end_io = end_buffer_read_sync;
127962306a36Sopenharmony_ci		submit_bh(REQ_OP_READ, bh);
128062306a36Sopenharmony_ci		wait_on_buffer(bh);
128162306a36Sopenharmony_ci		if (buffer_uptodate(bh))
128262306a36Sopenharmony_ci			return bh;
128362306a36Sopenharmony_ci	}
128462306a36Sopenharmony_ci	brelse(bh);
128562306a36Sopenharmony_ci	return NULL;
128662306a36Sopenharmony_ci}
128762306a36Sopenharmony_ci
128862306a36Sopenharmony_ci/*
128962306a36Sopenharmony_ci * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
129062306a36Sopenharmony_ci * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
129162306a36Sopenharmony_ci * refcount elevated by one when they're in an LRU.  A buffer can only appear
129262306a36Sopenharmony_ci * once in a particular CPU's LRU.  A single buffer can be present in multiple
129362306a36Sopenharmony_ci * CPU's LRUs at the same time.
129462306a36Sopenharmony_ci *
129562306a36Sopenharmony_ci * This is a transparent caching front-end to sb_bread(), sb_getblk() and
129662306a36Sopenharmony_ci * sb_find_get_block().
129762306a36Sopenharmony_ci *
129862306a36Sopenharmony_ci * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
129962306a36Sopenharmony_ci * a local interrupt disable for that.
130062306a36Sopenharmony_ci */
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci#define BH_LRU_SIZE	16
130362306a36Sopenharmony_ci
130462306a36Sopenharmony_cistruct bh_lru {
130562306a36Sopenharmony_ci	struct buffer_head *bhs[BH_LRU_SIZE];
130662306a36Sopenharmony_ci};
130762306a36Sopenharmony_ci
130862306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
130962306a36Sopenharmony_ci
131062306a36Sopenharmony_ci#ifdef CONFIG_SMP
131162306a36Sopenharmony_ci#define bh_lru_lock()	local_irq_disable()
131262306a36Sopenharmony_ci#define bh_lru_unlock()	local_irq_enable()
131362306a36Sopenharmony_ci#else
131462306a36Sopenharmony_ci#define bh_lru_lock()	preempt_disable()
131562306a36Sopenharmony_ci#define bh_lru_unlock()	preempt_enable()
131662306a36Sopenharmony_ci#endif
131762306a36Sopenharmony_ci
131862306a36Sopenharmony_cistatic inline void check_irqs_on(void)
131962306a36Sopenharmony_ci{
132062306a36Sopenharmony_ci#ifdef irqs_disabled
132162306a36Sopenharmony_ci	BUG_ON(irqs_disabled());
132262306a36Sopenharmony_ci#endif
132362306a36Sopenharmony_ci}
132462306a36Sopenharmony_ci
132562306a36Sopenharmony_ci/*
132662306a36Sopenharmony_ci * Install a buffer_head into this cpu's LRU.  If not already in the LRU, it is
132762306a36Sopenharmony_ci * inserted at the front, and the buffer_head at the back if any is evicted.
132862306a36Sopenharmony_ci * Or, if already in the LRU it is moved to the front.
132962306a36Sopenharmony_ci */
133062306a36Sopenharmony_cistatic void bh_lru_install(struct buffer_head *bh)
133162306a36Sopenharmony_ci{
133262306a36Sopenharmony_ci	struct buffer_head *evictee = bh;
133362306a36Sopenharmony_ci	struct bh_lru *b;
133462306a36Sopenharmony_ci	int i;
133562306a36Sopenharmony_ci
133662306a36Sopenharmony_ci	check_irqs_on();
133762306a36Sopenharmony_ci	bh_lru_lock();
133862306a36Sopenharmony_ci
133962306a36Sopenharmony_ci	/*
134062306a36Sopenharmony_ci	 * the refcount of buffer_head in bh_lru prevents dropping the
134162306a36Sopenharmony_ci	 * attached page(i.e., try_to_free_buffers) so it could cause
134262306a36Sopenharmony_ci	 * failing page migration.
134362306a36Sopenharmony_ci	 * Skip putting upcoming bh into bh_lru until migration is done.
134462306a36Sopenharmony_ci	 */
134562306a36Sopenharmony_ci	if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) {
134662306a36Sopenharmony_ci		bh_lru_unlock();
134762306a36Sopenharmony_ci		return;
134862306a36Sopenharmony_ci	}
134962306a36Sopenharmony_ci
135062306a36Sopenharmony_ci	b = this_cpu_ptr(&bh_lrus);
135162306a36Sopenharmony_ci	for (i = 0; i < BH_LRU_SIZE; i++) {
135262306a36Sopenharmony_ci		swap(evictee, b->bhs[i]);
135362306a36Sopenharmony_ci		if (evictee == bh) {
135462306a36Sopenharmony_ci			bh_lru_unlock();
135562306a36Sopenharmony_ci			return;
135662306a36Sopenharmony_ci		}
135762306a36Sopenharmony_ci	}
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci	get_bh(bh);
136062306a36Sopenharmony_ci	bh_lru_unlock();
136162306a36Sopenharmony_ci	brelse(evictee);
136262306a36Sopenharmony_ci}
136362306a36Sopenharmony_ci
136462306a36Sopenharmony_ci/*
136562306a36Sopenharmony_ci * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
136662306a36Sopenharmony_ci */
136762306a36Sopenharmony_cistatic struct buffer_head *
136862306a36Sopenharmony_cilookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
136962306a36Sopenharmony_ci{
137062306a36Sopenharmony_ci	struct buffer_head *ret = NULL;
137162306a36Sopenharmony_ci	unsigned int i;
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_ci	check_irqs_on();
137462306a36Sopenharmony_ci	bh_lru_lock();
137562306a36Sopenharmony_ci	if (cpu_is_isolated(smp_processor_id())) {
137662306a36Sopenharmony_ci		bh_lru_unlock();
137762306a36Sopenharmony_ci		return NULL;
137862306a36Sopenharmony_ci	}
137962306a36Sopenharmony_ci	for (i = 0; i < BH_LRU_SIZE; i++) {
138062306a36Sopenharmony_ci		struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
138162306a36Sopenharmony_ci
138262306a36Sopenharmony_ci		if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
138362306a36Sopenharmony_ci		    bh->b_size == size) {
138462306a36Sopenharmony_ci			if (i) {
138562306a36Sopenharmony_ci				while (i) {
138662306a36Sopenharmony_ci					__this_cpu_write(bh_lrus.bhs[i],
138762306a36Sopenharmony_ci						__this_cpu_read(bh_lrus.bhs[i - 1]));
138862306a36Sopenharmony_ci					i--;
138962306a36Sopenharmony_ci				}
139062306a36Sopenharmony_ci				__this_cpu_write(bh_lrus.bhs[0], bh);
139162306a36Sopenharmony_ci			}
139262306a36Sopenharmony_ci			get_bh(bh);
139362306a36Sopenharmony_ci			ret = bh;
139462306a36Sopenharmony_ci			break;
139562306a36Sopenharmony_ci		}
139662306a36Sopenharmony_ci	}
139762306a36Sopenharmony_ci	bh_lru_unlock();
139862306a36Sopenharmony_ci	return ret;
139962306a36Sopenharmony_ci}
140062306a36Sopenharmony_ci
140162306a36Sopenharmony_ci/*
140262306a36Sopenharmony_ci * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
140362306a36Sopenharmony_ci * it in the LRU and mark it as accessed.  If it is not present then return
140462306a36Sopenharmony_ci * NULL
140562306a36Sopenharmony_ci */
140662306a36Sopenharmony_cistruct buffer_head *
140762306a36Sopenharmony_ci__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
140862306a36Sopenharmony_ci{
140962306a36Sopenharmony_ci	struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
141062306a36Sopenharmony_ci
141162306a36Sopenharmony_ci	if (bh == NULL) {
141262306a36Sopenharmony_ci		/* __find_get_block_slow will mark the page accessed */
141362306a36Sopenharmony_ci		bh = __find_get_block_slow(bdev, block);
141462306a36Sopenharmony_ci		if (bh)
141562306a36Sopenharmony_ci			bh_lru_install(bh);
141662306a36Sopenharmony_ci	} else
141762306a36Sopenharmony_ci		touch_buffer(bh);
141862306a36Sopenharmony_ci
141962306a36Sopenharmony_ci	return bh;
142062306a36Sopenharmony_ci}
142162306a36Sopenharmony_ciEXPORT_SYMBOL(__find_get_block);
142262306a36Sopenharmony_ci
142362306a36Sopenharmony_ci/*
142462306a36Sopenharmony_ci * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
142562306a36Sopenharmony_ci * which corresponds to the passed block_device, block and size. The
142662306a36Sopenharmony_ci * returned buffer has its reference count incremented.
142762306a36Sopenharmony_ci *
142862306a36Sopenharmony_ci * __getblk_gfp() will lock up the machine if grow_dev_page's
142962306a36Sopenharmony_ci * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
143062306a36Sopenharmony_ci */
143162306a36Sopenharmony_cistruct buffer_head *
143262306a36Sopenharmony_ci__getblk_gfp(struct block_device *bdev, sector_t block,
143362306a36Sopenharmony_ci	     unsigned size, gfp_t gfp)
143462306a36Sopenharmony_ci{
143562306a36Sopenharmony_ci	struct buffer_head *bh = __find_get_block(bdev, block, size);
143662306a36Sopenharmony_ci
143762306a36Sopenharmony_ci	might_sleep();
143862306a36Sopenharmony_ci	if (bh == NULL)
143962306a36Sopenharmony_ci		bh = __getblk_slow(bdev, block, size, gfp);
144062306a36Sopenharmony_ci	return bh;
144162306a36Sopenharmony_ci}
144262306a36Sopenharmony_ciEXPORT_SYMBOL(__getblk_gfp);
144362306a36Sopenharmony_ci
144462306a36Sopenharmony_ci/*
144562306a36Sopenharmony_ci * Do async read-ahead on a buffer..
144662306a36Sopenharmony_ci */
144762306a36Sopenharmony_civoid __breadahead(struct block_device *bdev, sector_t block, unsigned size)
144862306a36Sopenharmony_ci{
144962306a36Sopenharmony_ci	struct buffer_head *bh = __getblk(bdev, block, size);
145062306a36Sopenharmony_ci	if (likely(bh)) {
145162306a36Sopenharmony_ci		bh_readahead(bh, REQ_RAHEAD);
145262306a36Sopenharmony_ci		brelse(bh);
145362306a36Sopenharmony_ci	}
145462306a36Sopenharmony_ci}
145562306a36Sopenharmony_ciEXPORT_SYMBOL(__breadahead);
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_ci/**
145862306a36Sopenharmony_ci *  __bread_gfp() - reads a specified block and returns the bh
145962306a36Sopenharmony_ci *  @bdev: the block_device to read from
146062306a36Sopenharmony_ci *  @block: number of block
146162306a36Sopenharmony_ci *  @size: size (in bytes) to read
146262306a36Sopenharmony_ci *  @gfp: page allocation flag
146362306a36Sopenharmony_ci *
146462306a36Sopenharmony_ci *  Reads a specified block, and returns buffer head that contains it.
146562306a36Sopenharmony_ci *  The page cache can be allocated from non-movable area
146662306a36Sopenharmony_ci *  not to prevent page migration if you set gfp to zero.
146762306a36Sopenharmony_ci *  It returns NULL if the block was unreadable.
146862306a36Sopenharmony_ci */
146962306a36Sopenharmony_cistruct buffer_head *
147062306a36Sopenharmony_ci__bread_gfp(struct block_device *bdev, sector_t block,
147162306a36Sopenharmony_ci		   unsigned size, gfp_t gfp)
147262306a36Sopenharmony_ci{
147362306a36Sopenharmony_ci	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
147462306a36Sopenharmony_ci
147562306a36Sopenharmony_ci	if (likely(bh) && !buffer_uptodate(bh))
147662306a36Sopenharmony_ci		bh = __bread_slow(bh);
147762306a36Sopenharmony_ci	return bh;
147862306a36Sopenharmony_ci}
147962306a36Sopenharmony_ciEXPORT_SYMBOL(__bread_gfp);
148062306a36Sopenharmony_ci
148162306a36Sopenharmony_cistatic void __invalidate_bh_lrus(struct bh_lru *b)
148262306a36Sopenharmony_ci{
148362306a36Sopenharmony_ci	int i;
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_ci	for (i = 0; i < BH_LRU_SIZE; i++) {
148662306a36Sopenharmony_ci		brelse(b->bhs[i]);
148762306a36Sopenharmony_ci		b->bhs[i] = NULL;
148862306a36Sopenharmony_ci	}
148962306a36Sopenharmony_ci}
149062306a36Sopenharmony_ci/*
149162306a36Sopenharmony_ci * invalidate_bh_lrus() is called rarely - but not only at unmount.
149262306a36Sopenharmony_ci * This doesn't race because it runs in each cpu either in irq
149362306a36Sopenharmony_ci * or with preempt disabled.
149462306a36Sopenharmony_ci */
149562306a36Sopenharmony_cistatic void invalidate_bh_lru(void *arg)
149662306a36Sopenharmony_ci{
149762306a36Sopenharmony_ci	struct bh_lru *b = &get_cpu_var(bh_lrus);
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_ci	__invalidate_bh_lrus(b);
150062306a36Sopenharmony_ci	put_cpu_var(bh_lrus);
150162306a36Sopenharmony_ci}
150262306a36Sopenharmony_ci
150362306a36Sopenharmony_cibool has_bh_in_lru(int cpu, void *dummy)
150462306a36Sopenharmony_ci{
150562306a36Sopenharmony_ci	struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
150662306a36Sopenharmony_ci	int i;
150762306a36Sopenharmony_ci
150862306a36Sopenharmony_ci	for (i = 0; i < BH_LRU_SIZE; i++) {
150962306a36Sopenharmony_ci		if (b->bhs[i])
151062306a36Sopenharmony_ci			return true;
151162306a36Sopenharmony_ci	}
151262306a36Sopenharmony_ci
151362306a36Sopenharmony_ci	return false;
151462306a36Sopenharmony_ci}
151562306a36Sopenharmony_ci
151662306a36Sopenharmony_civoid invalidate_bh_lrus(void)
151762306a36Sopenharmony_ci{
151862306a36Sopenharmony_ci	on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
151962306a36Sopenharmony_ci}
152062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(invalidate_bh_lrus);
152162306a36Sopenharmony_ci
152262306a36Sopenharmony_ci/*
152362306a36Sopenharmony_ci * It's called from workqueue context so we need a bh_lru_lock to close
152462306a36Sopenharmony_ci * the race with preemption/irq.
152562306a36Sopenharmony_ci */
152662306a36Sopenharmony_civoid invalidate_bh_lrus_cpu(void)
152762306a36Sopenharmony_ci{
152862306a36Sopenharmony_ci	struct bh_lru *b;
152962306a36Sopenharmony_ci
153062306a36Sopenharmony_ci	bh_lru_lock();
153162306a36Sopenharmony_ci	b = this_cpu_ptr(&bh_lrus);
153262306a36Sopenharmony_ci	__invalidate_bh_lrus(b);
153362306a36Sopenharmony_ci	bh_lru_unlock();
153462306a36Sopenharmony_ci}
153562306a36Sopenharmony_ci
153662306a36Sopenharmony_civoid folio_set_bh(struct buffer_head *bh, struct folio *folio,
153762306a36Sopenharmony_ci		  unsigned long offset)
153862306a36Sopenharmony_ci{
153962306a36Sopenharmony_ci	bh->b_folio = folio;
154062306a36Sopenharmony_ci	BUG_ON(offset >= folio_size(folio));
154162306a36Sopenharmony_ci	if (folio_test_highmem(folio))
154262306a36Sopenharmony_ci		/*
154362306a36Sopenharmony_ci		 * This catches illegal uses and preserves the offset:
154462306a36Sopenharmony_ci		 */
154562306a36Sopenharmony_ci		bh->b_data = (char *)(0 + offset);
154662306a36Sopenharmony_ci	else
154762306a36Sopenharmony_ci		bh->b_data = folio_address(folio) + offset;
154862306a36Sopenharmony_ci}
154962306a36Sopenharmony_ciEXPORT_SYMBOL(folio_set_bh);
155062306a36Sopenharmony_ci
155162306a36Sopenharmony_ci/*
155262306a36Sopenharmony_ci * Called when truncating a buffer on a page completely.
155362306a36Sopenharmony_ci */
155462306a36Sopenharmony_ci
155562306a36Sopenharmony_ci/* Bits that are cleared during an invalidate */
155662306a36Sopenharmony_ci#define BUFFER_FLAGS_DISCARD \
155762306a36Sopenharmony_ci	(1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
155862306a36Sopenharmony_ci	 1 << BH_Delay | 1 << BH_Unwritten)
155962306a36Sopenharmony_ci
156062306a36Sopenharmony_cistatic void discard_buffer(struct buffer_head * bh)
156162306a36Sopenharmony_ci{
156262306a36Sopenharmony_ci	unsigned long b_state;
156362306a36Sopenharmony_ci
156462306a36Sopenharmony_ci	lock_buffer(bh);
156562306a36Sopenharmony_ci	clear_buffer_dirty(bh);
156662306a36Sopenharmony_ci	bh->b_bdev = NULL;
156762306a36Sopenharmony_ci	b_state = READ_ONCE(bh->b_state);
156862306a36Sopenharmony_ci	do {
156962306a36Sopenharmony_ci	} while (!try_cmpxchg(&bh->b_state, &b_state,
157062306a36Sopenharmony_ci			      b_state & ~BUFFER_FLAGS_DISCARD));
157162306a36Sopenharmony_ci	unlock_buffer(bh);
157262306a36Sopenharmony_ci}
157362306a36Sopenharmony_ci
157462306a36Sopenharmony_ci/**
157562306a36Sopenharmony_ci * block_invalidate_folio - Invalidate part or all of a buffer-backed folio.
157662306a36Sopenharmony_ci * @folio: The folio which is affected.
157762306a36Sopenharmony_ci * @offset: start of the range to invalidate
157862306a36Sopenharmony_ci * @length: length of the range to invalidate
157962306a36Sopenharmony_ci *
158062306a36Sopenharmony_ci * block_invalidate_folio() is called when all or part of the folio has been
158162306a36Sopenharmony_ci * invalidated by a truncate operation.
158262306a36Sopenharmony_ci *
158362306a36Sopenharmony_ci * block_invalidate_folio() does not have to release all buffers, but it must
158462306a36Sopenharmony_ci * ensure that no dirty buffer is left outside @offset and that no I/O
158562306a36Sopenharmony_ci * is underway against any of the blocks which are outside the truncation
158662306a36Sopenharmony_ci * point.  Because the caller is about to free (and possibly reuse) those
158762306a36Sopenharmony_ci * blocks on-disk.
158862306a36Sopenharmony_ci */
158962306a36Sopenharmony_civoid block_invalidate_folio(struct folio *folio, size_t offset, size_t length)
159062306a36Sopenharmony_ci{
159162306a36Sopenharmony_ci	struct buffer_head *head, *bh, *next;
159262306a36Sopenharmony_ci	size_t curr_off = 0;
159362306a36Sopenharmony_ci	size_t stop = length + offset;
159462306a36Sopenharmony_ci
159562306a36Sopenharmony_ci	BUG_ON(!folio_test_locked(folio));
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_ci	/*
159862306a36Sopenharmony_ci	 * Check for overflow
159962306a36Sopenharmony_ci	 */
160062306a36Sopenharmony_ci	BUG_ON(stop > folio_size(folio) || stop < length);
160162306a36Sopenharmony_ci
160262306a36Sopenharmony_ci	head = folio_buffers(folio);
160362306a36Sopenharmony_ci	if (!head)
160462306a36Sopenharmony_ci		return;
160562306a36Sopenharmony_ci
160662306a36Sopenharmony_ci	bh = head;
160762306a36Sopenharmony_ci	do {
160862306a36Sopenharmony_ci		size_t next_off = curr_off + bh->b_size;
160962306a36Sopenharmony_ci		next = bh->b_this_page;
161062306a36Sopenharmony_ci
161162306a36Sopenharmony_ci		/*
161262306a36Sopenharmony_ci		 * Are we still fully in range ?
161362306a36Sopenharmony_ci		 */
161462306a36Sopenharmony_ci		if (next_off > stop)
161562306a36Sopenharmony_ci			goto out;
161662306a36Sopenharmony_ci
161762306a36Sopenharmony_ci		/*
161862306a36Sopenharmony_ci		 * is this block fully invalidated?
161962306a36Sopenharmony_ci		 */
162062306a36Sopenharmony_ci		if (offset <= curr_off)
162162306a36Sopenharmony_ci			discard_buffer(bh);
162262306a36Sopenharmony_ci		curr_off = next_off;
162362306a36Sopenharmony_ci		bh = next;
162462306a36Sopenharmony_ci	} while (bh != head);
162562306a36Sopenharmony_ci
162662306a36Sopenharmony_ci	/*
162762306a36Sopenharmony_ci	 * We release buffers only if the entire folio is being invalidated.
162862306a36Sopenharmony_ci	 * The get_block cached value has been unconditionally invalidated,
162962306a36Sopenharmony_ci	 * so real IO is not possible anymore.
163062306a36Sopenharmony_ci	 */
163162306a36Sopenharmony_ci	if (length == folio_size(folio))
163262306a36Sopenharmony_ci		filemap_release_folio(folio, 0);
163362306a36Sopenharmony_ciout:
163462306a36Sopenharmony_ci	return;
163562306a36Sopenharmony_ci}
163662306a36Sopenharmony_ciEXPORT_SYMBOL(block_invalidate_folio);
163762306a36Sopenharmony_ci
163862306a36Sopenharmony_ci/*
163962306a36Sopenharmony_ci * We attach and possibly dirty the buffers atomically wrt
164062306a36Sopenharmony_ci * block_dirty_folio() via private_lock.  try_to_free_buffers
164162306a36Sopenharmony_ci * is already excluded via the folio lock.
164262306a36Sopenharmony_ci */
164362306a36Sopenharmony_civoid folio_create_empty_buffers(struct folio *folio, unsigned long blocksize,
164462306a36Sopenharmony_ci				unsigned long b_state)
164562306a36Sopenharmony_ci{
164662306a36Sopenharmony_ci	struct buffer_head *bh, *head, *tail;
164762306a36Sopenharmony_ci
164862306a36Sopenharmony_ci	head = folio_alloc_buffers(folio, blocksize, true);
164962306a36Sopenharmony_ci	bh = head;
165062306a36Sopenharmony_ci	do {
165162306a36Sopenharmony_ci		bh->b_state |= b_state;
165262306a36Sopenharmony_ci		tail = bh;
165362306a36Sopenharmony_ci		bh = bh->b_this_page;
165462306a36Sopenharmony_ci	} while (bh);
165562306a36Sopenharmony_ci	tail->b_this_page = head;
165662306a36Sopenharmony_ci
165762306a36Sopenharmony_ci	spin_lock(&folio->mapping->private_lock);
165862306a36Sopenharmony_ci	if (folio_test_uptodate(folio) || folio_test_dirty(folio)) {
165962306a36Sopenharmony_ci		bh = head;
166062306a36Sopenharmony_ci		do {
166162306a36Sopenharmony_ci			if (folio_test_dirty(folio))
166262306a36Sopenharmony_ci				set_buffer_dirty(bh);
166362306a36Sopenharmony_ci			if (folio_test_uptodate(folio))
166462306a36Sopenharmony_ci				set_buffer_uptodate(bh);
166562306a36Sopenharmony_ci			bh = bh->b_this_page;
166662306a36Sopenharmony_ci		} while (bh != head);
166762306a36Sopenharmony_ci	}
166862306a36Sopenharmony_ci	folio_attach_private(folio, head);
166962306a36Sopenharmony_ci	spin_unlock(&folio->mapping->private_lock);
167062306a36Sopenharmony_ci}
167162306a36Sopenharmony_ciEXPORT_SYMBOL(folio_create_empty_buffers);
167262306a36Sopenharmony_ci
167362306a36Sopenharmony_civoid create_empty_buffers(struct page *page,
167462306a36Sopenharmony_ci			unsigned long blocksize, unsigned long b_state)
167562306a36Sopenharmony_ci{
167662306a36Sopenharmony_ci	folio_create_empty_buffers(page_folio(page), blocksize, b_state);
167762306a36Sopenharmony_ci}
167862306a36Sopenharmony_ciEXPORT_SYMBOL(create_empty_buffers);
167962306a36Sopenharmony_ci
168062306a36Sopenharmony_ci/**
168162306a36Sopenharmony_ci * clean_bdev_aliases: clean a range of buffers in block device
168262306a36Sopenharmony_ci * @bdev: Block device to clean buffers in
168362306a36Sopenharmony_ci * @block: Start of a range of blocks to clean
168462306a36Sopenharmony_ci * @len: Number of blocks to clean
168562306a36Sopenharmony_ci *
168662306a36Sopenharmony_ci * We are taking a range of blocks for data and we don't want writeback of any
168762306a36Sopenharmony_ci * buffer-cache aliases starting from return from this function and until the
168862306a36Sopenharmony_ci * moment when something will explicitly mark the buffer dirty (hopefully that
168962306a36Sopenharmony_ci * will not happen until we will free that block ;-) We don't even need to mark
169062306a36Sopenharmony_ci * it not-uptodate - nobody can expect anything from a newly allocated buffer
169162306a36Sopenharmony_ci * anyway. We used to use unmap_buffer() for such invalidation, but that was
169262306a36Sopenharmony_ci * wrong. We definitely don't want to mark the alias unmapped, for example - it
169362306a36Sopenharmony_ci * would confuse anyone who might pick it with bread() afterwards...
169462306a36Sopenharmony_ci *
169562306a36Sopenharmony_ci * Also..  Note that bforget() doesn't lock the buffer.  So there can be
169662306a36Sopenharmony_ci * writeout I/O going on against recently-freed buffers.  We don't wait on that
169762306a36Sopenharmony_ci * I/O in bforget() - it's more efficient to wait on the I/O only if we really
169862306a36Sopenharmony_ci * need to.  That happens here.
169962306a36Sopenharmony_ci */
170062306a36Sopenharmony_civoid clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
170162306a36Sopenharmony_ci{
170262306a36Sopenharmony_ci	struct inode *bd_inode = bdev->bd_inode;
170362306a36Sopenharmony_ci	struct address_space *bd_mapping = bd_inode->i_mapping;
170462306a36Sopenharmony_ci	struct folio_batch fbatch;
170562306a36Sopenharmony_ci	pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
170662306a36Sopenharmony_ci	pgoff_t end;
170762306a36Sopenharmony_ci	int i, count;
170862306a36Sopenharmony_ci	struct buffer_head *bh;
170962306a36Sopenharmony_ci	struct buffer_head *head;
171062306a36Sopenharmony_ci
171162306a36Sopenharmony_ci	end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
171262306a36Sopenharmony_ci	folio_batch_init(&fbatch);
171362306a36Sopenharmony_ci	while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) {
171462306a36Sopenharmony_ci		count = folio_batch_count(&fbatch);
171562306a36Sopenharmony_ci		for (i = 0; i < count; i++) {
171662306a36Sopenharmony_ci			struct folio *folio = fbatch.folios[i];
171762306a36Sopenharmony_ci
171862306a36Sopenharmony_ci			if (!folio_buffers(folio))
171962306a36Sopenharmony_ci				continue;
172062306a36Sopenharmony_ci			/*
172162306a36Sopenharmony_ci			 * We use folio lock instead of bd_mapping->private_lock
172262306a36Sopenharmony_ci			 * to pin buffers here since we can afford to sleep and
172362306a36Sopenharmony_ci			 * it scales better than a global spinlock lock.
172462306a36Sopenharmony_ci			 */
172562306a36Sopenharmony_ci			folio_lock(folio);
172662306a36Sopenharmony_ci			/* Recheck when the folio is locked which pins bhs */
172762306a36Sopenharmony_ci			head = folio_buffers(folio);
172862306a36Sopenharmony_ci			if (!head)
172962306a36Sopenharmony_ci				goto unlock_page;
173062306a36Sopenharmony_ci			bh = head;
173162306a36Sopenharmony_ci			do {
173262306a36Sopenharmony_ci				if (!buffer_mapped(bh) || (bh->b_blocknr < block))
173362306a36Sopenharmony_ci					goto next;
173462306a36Sopenharmony_ci				if (bh->b_blocknr >= block + len)
173562306a36Sopenharmony_ci					break;
173662306a36Sopenharmony_ci				clear_buffer_dirty(bh);
173762306a36Sopenharmony_ci				wait_on_buffer(bh);
173862306a36Sopenharmony_ci				clear_buffer_req(bh);
173962306a36Sopenharmony_cinext:
174062306a36Sopenharmony_ci				bh = bh->b_this_page;
174162306a36Sopenharmony_ci			} while (bh != head);
174262306a36Sopenharmony_ciunlock_page:
174362306a36Sopenharmony_ci			folio_unlock(folio);
174462306a36Sopenharmony_ci		}
174562306a36Sopenharmony_ci		folio_batch_release(&fbatch);
174662306a36Sopenharmony_ci		cond_resched();
174762306a36Sopenharmony_ci		/* End of range already reached? */
174862306a36Sopenharmony_ci		if (index > end || !index)
174962306a36Sopenharmony_ci			break;
175062306a36Sopenharmony_ci	}
175162306a36Sopenharmony_ci}
175262306a36Sopenharmony_ciEXPORT_SYMBOL(clean_bdev_aliases);
175362306a36Sopenharmony_ci
175462306a36Sopenharmony_ci/*
175562306a36Sopenharmony_ci * Size is a power-of-two in the range 512..PAGE_SIZE,
175662306a36Sopenharmony_ci * and the case we care about most is PAGE_SIZE.
175762306a36Sopenharmony_ci *
175862306a36Sopenharmony_ci * So this *could* possibly be written with those
175962306a36Sopenharmony_ci * constraints in mind (relevant mostly if some
176062306a36Sopenharmony_ci * architecture has a slow bit-scan instruction)
176162306a36Sopenharmony_ci */
176262306a36Sopenharmony_cistatic inline int block_size_bits(unsigned int blocksize)
176362306a36Sopenharmony_ci{
176462306a36Sopenharmony_ci	return ilog2(blocksize);
176562306a36Sopenharmony_ci}
176662306a36Sopenharmony_ci
176762306a36Sopenharmony_cistatic struct buffer_head *folio_create_buffers(struct folio *folio,
176862306a36Sopenharmony_ci						struct inode *inode,
176962306a36Sopenharmony_ci						unsigned int b_state)
177062306a36Sopenharmony_ci{
177162306a36Sopenharmony_ci	BUG_ON(!folio_test_locked(folio));
177262306a36Sopenharmony_ci
177362306a36Sopenharmony_ci	if (!folio_buffers(folio))
177462306a36Sopenharmony_ci		folio_create_empty_buffers(folio,
177562306a36Sopenharmony_ci					   1 << READ_ONCE(inode->i_blkbits),
177662306a36Sopenharmony_ci					   b_state);
177762306a36Sopenharmony_ci	return folio_buffers(folio);
177862306a36Sopenharmony_ci}
177962306a36Sopenharmony_ci
178062306a36Sopenharmony_ci/*
178162306a36Sopenharmony_ci * NOTE! All mapped/uptodate combinations are valid:
178262306a36Sopenharmony_ci *
178362306a36Sopenharmony_ci *	Mapped	Uptodate	Meaning
178462306a36Sopenharmony_ci *
178562306a36Sopenharmony_ci *	No	No		"unknown" - must do get_block()
178662306a36Sopenharmony_ci *	No	Yes		"hole" - zero-filled
178762306a36Sopenharmony_ci *	Yes	No		"allocated" - allocated on disk, not read in
178862306a36Sopenharmony_ci *	Yes	Yes		"valid" - allocated and up-to-date in memory.
178962306a36Sopenharmony_ci *
179062306a36Sopenharmony_ci * "Dirty" is valid only with the last case (mapped+uptodate).
179162306a36Sopenharmony_ci */
179262306a36Sopenharmony_ci
179362306a36Sopenharmony_ci/*
179462306a36Sopenharmony_ci * While block_write_full_page is writing back the dirty buffers under
179562306a36Sopenharmony_ci * the page lock, whoever dirtied the buffers may decide to clean them
179662306a36Sopenharmony_ci * again at any time.  We handle that by only looking at the buffer
179762306a36Sopenharmony_ci * state inside lock_buffer().
179862306a36Sopenharmony_ci *
179962306a36Sopenharmony_ci * If block_write_full_page() is called for regular writeback
180062306a36Sopenharmony_ci * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
180162306a36Sopenharmony_ci * locked buffer.   This only can happen if someone has written the buffer
180262306a36Sopenharmony_ci * directly, with submit_bh().  At the address_space level PageWriteback
180362306a36Sopenharmony_ci * prevents this contention from occurring.
180462306a36Sopenharmony_ci *
180562306a36Sopenharmony_ci * If block_write_full_page() is called with wbc->sync_mode ==
180662306a36Sopenharmony_ci * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
180762306a36Sopenharmony_ci * causes the writes to be flagged as synchronous writes.
180862306a36Sopenharmony_ci */
180962306a36Sopenharmony_ciint __block_write_full_folio(struct inode *inode, struct folio *folio,
181062306a36Sopenharmony_ci			get_block_t *get_block, struct writeback_control *wbc,
181162306a36Sopenharmony_ci			bh_end_io_t *handler)
181262306a36Sopenharmony_ci{
181362306a36Sopenharmony_ci	int err;
181462306a36Sopenharmony_ci	sector_t block;
181562306a36Sopenharmony_ci	sector_t last_block;
181662306a36Sopenharmony_ci	struct buffer_head *bh, *head;
181762306a36Sopenharmony_ci	unsigned int blocksize, bbits;
181862306a36Sopenharmony_ci	int nr_underway = 0;
181962306a36Sopenharmony_ci	blk_opf_t write_flags = wbc_to_write_flags(wbc);
182062306a36Sopenharmony_ci
182162306a36Sopenharmony_ci	head = folio_create_buffers(folio, inode,
182262306a36Sopenharmony_ci				    (1 << BH_Dirty) | (1 << BH_Uptodate));
182362306a36Sopenharmony_ci
182462306a36Sopenharmony_ci	/*
182562306a36Sopenharmony_ci	 * Be very careful.  We have no exclusion from block_dirty_folio
182662306a36Sopenharmony_ci	 * here, and the (potentially unmapped) buffers may become dirty at
182762306a36Sopenharmony_ci	 * any time.  If a buffer becomes dirty here after we've inspected it
182862306a36Sopenharmony_ci	 * then we just miss that fact, and the folio stays dirty.
182962306a36Sopenharmony_ci	 *
183062306a36Sopenharmony_ci	 * Buffers outside i_size may be dirtied by block_dirty_folio;
183162306a36Sopenharmony_ci	 * handle that here by just cleaning them.
183262306a36Sopenharmony_ci	 */
183362306a36Sopenharmony_ci
183462306a36Sopenharmony_ci	bh = head;
183562306a36Sopenharmony_ci	blocksize = bh->b_size;
183662306a36Sopenharmony_ci	bbits = block_size_bits(blocksize);
183762306a36Sopenharmony_ci
183862306a36Sopenharmony_ci	block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
183962306a36Sopenharmony_ci	last_block = (i_size_read(inode) - 1) >> bbits;
184062306a36Sopenharmony_ci
184162306a36Sopenharmony_ci	/*
184262306a36Sopenharmony_ci	 * Get all the dirty buffers mapped to disk addresses and
184362306a36Sopenharmony_ci	 * handle any aliases from the underlying blockdev's mapping.
184462306a36Sopenharmony_ci	 */
184562306a36Sopenharmony_ci	do {
184662306a36Sopenharmony_ci		if (block > last_block) {
184762306a36Sopenharmony_ci			/*
184862306a36Sopenharmony_ci			 * mapped buffers outside i_size will occur, because
184962306a36Sopenharmony_ci			 * this folio can be outside i_size when there is a
185062306a36Sopenharmony_ci			 * truncate in progress.
185162306a36Sopenharmony_ci			 */
185262306a36Sopenharmony_ci			/*
185362306a36Sopenharmony_ci			 * The buffer was zeroed by block_write_full_page()
185462306a36Sopenharmony_ci			 */
185562306a36Sopenharmony_ci			clear_buffer_dirty(bh);
185662306a36Sopenharmony_ci			set_buffer_uptodate(bh);
185762306a36Sopenharmony_ci		} else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
185862306a36Sopenharmony_ci			   buffer_dirty(bh)) {
185962306a36Sopenharmony_ci			WARN_ON(bh->b_size != blocksize);
186062306a36Sopenharmony_ci			err = get_block(inode, block, bh, 1);
186162306a36Sopenharmony_ci			if (err)
186262306a36Sopenharmony_ci				goto recover;
186362306a36Sopenharmony_ci			clear_buffer_delay(bh);
186462306a36Sopenharmony_ci			if (buffer_new(bh)) {
186562306a36Sopenharmony_ci				/* blockdev mappings never come here */
186662306a36Sopenharmony_ci				clear_buffer_new(bh);
186762306a36Sopenharmony_ci				clean_bdev_bh_alias(bh);
186862306a36Sopenharmony_ci			}
186962306a36Sopenharmony_ci		}
187062306a36Sopenharmony_ci		bh = bh->b_this_page;
187162306a36Sopenharmony_ci		block++;
187262306a36Sopenharmony_ci	} while (bh != head);
187362306a36Sopenharmony_ci
187462306a36Sopenharmony_ci	do {
187562306a36Sopenharmony_ci		if (!buffer_mapped(bh))
187662306a36Sopenharmony_ci			continue;
187762306a36Sopenharmony_ci		/*
187862306a36Sopenharmony_ci		 * If it's a fully non-blocking write attempt and we cannot
187962306a36Sopenharmony_ci		 * lock the buffer then redirty the folio.  Note that this can
188062306a36Sopenharmony_ci		 * potentially cause a busy-wait loop from writeback threads
188162306a36Sopenharmony_ci		 * and kswapd activity, but those code paths have their own
188262306a36Sopenharmony_ci		 * higher-level throttling.
188362306a36Sopenharmony_ci		 */
188462306a36Sopenharmony_ci		if (wbc->sync_mode != WB_SYNC_NONE) {
188562306a36Sopenharmony_ci			lock_buffer(bh);
188662306a36Sopenharmony_ci		} else if (!trylock_buffer(bh)) {
188762306a36Sopenharmony_ci			folio_redirty_for_writepage(wbc, folio);
188862306a36Sopenharmony_ci			continue;
188962306a36Sopenharmony_ci		}
189062306a36Sopenharmony_ci		if (test_clear_buffer_dirty(bh)) {
189162306a36Sopenharmony_ci			mark_buffer_async_write_endio(bh, handler);
189262306a36Sopenharmony_ci		} else {
189362306a36Sopenharmony_ci			unlock_buffer(bh);
189462306a36Sopenharmony_ci		}
189562306a36Sopenharmony_ci	} while ((bh = bh->b_this_page) != head);
189662306a36Sopenharmony_ci
189762306a36Sopenharmony_ci	/*
189862306a36Sopenharmony_ci	 * The folio and its buffers are protected by the writeback flag,
189962306a36Sopenharmony_ci	 * so we can drop the bh refcounts early.
190062306a36Sopenharmony_ci	 */
190162306a36Sopenharmony_ci	BUG_ON(folio_test_writeback(folio));
190262306a36Sopenharmony_ci	folio_start_writeback(folio);
190362306a36Sopenharmony_ci
190462306a36Sopenharmony_ci	do {
190562306a36Sopenharmony_ci		struct buffer_head *next = bh->b_this_page;
190662306a36Sopenharmony_ci		if (buffer_async_write(bh)) {
190762306a36Sopenharmony_ci			submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
190862306a36Sopenharmony_ci			nr_underway++;
190962306a36Sopenharmony_ci		}
191062306a36Sopenharmony_ci		bh = next;
191162306a36Sopenharmony_ci	} while (bh != head);
191262306a36Sopenharmony_ci	folio_unlock(folio);
191362306a36Sopenharmony_ci
191462306a36Sopenharmony_ci	err = 0;
191562306a36Sopenharmony_cidone:
191662306a36Sopenharmony_ci	if (nr_underway == 0) {
191762306a36Sopenharmony_ci		/*
191862306a36Sopenharmony_ci		 * The folio was marked dirty, but the buffers were
191962306a36Sopenharmony_ci		 * clean.  Someone wrote them back by hand with
192062306a36Sopenharmony_ci		 * write_dirty_buffer/submit_bh.  A rare case.
192162306a36Sopenharmony_ci		 */
192262306a36Sopenharmony_ci		folio_end_writeback(folio);
192362306a36Sopenharmony_ci
192462306a36Sopenharmony_ci		/*
192562306a36Sopenharmony_ci		 * The folio and buffer_heads can be released at any time from
192662306a36Sopenharmony_ci		 * here on.
192762306a36Sopenharmony_ci		 */
192862306a36Sopenharmony_ci	}
192962306a36Sopenharmony_ci	return err;
193062306a36Sopenharmony_ci
193162306a36Sopenharmony_cirecover:
193262306a36Sopenharmony_ci	/*
193362306a36Sopenharmony_ci	 * ENOSPC, or some other error.  We may already have added some
193462306a36Sopenharmony_ci	 * blocks to the file, so we need to write these out to avoid
193562306a36Sopenharmony_ci	 * exposing stale data.
193662306a36Sopenharmony_ci	 * The folio is currently locked and not marked for writeback
193762306a36Sopenharmony_ci	 */
193862306a36Sopenharmony_ci	bh = head;
193962306a36Sopenharmony_ci	/* Recovery: lock and submit the mapped buffers */
194062306a36Sopenharmony_ci	do {
194162306a36Sopenharmony_ci		if (buffer_mapped(bh) && buffer_dirty(bh) &&
194262306a36Sopenharmony_ci		    !buffer_delay(bh)) {
194362306a36Sopenharmony_ci			lock_buffer(bh);
194462306a36Sopenharmony_ci			mark_buffer_async_write_endio(bh, handler);
194562306a36Sopenharmony_ci		} else {
194662306a36Sopenharmony_ci			/*
194762306a36Sopenharmony_ci			 * The buffer may have been set dirty during
194862306a36Sopenharmony_ci			 * attachment to a dirty folio.
194962306a36Sopenharmony_ci			 */
195062306a36Sopenharmony_ci			clear_buffer_dirty(bh);
195162306a36Sopenharmony_ci		}
195262306a36Sopenharmony_ci	} while ((bh = bh->b_this_page) != head);
195362306a36Sopenharmony_ci	folio_set_error(folio);
195462306a36Sopenharmony_ci	BUG_ON(folio_test_writeback(folio));
195562306a36Sopenharmony_ci	mapping_set_error(folio->mapping, err);
195662306a36Sopenharmony_ci	folio_start_writeback(folio);
195762306a36Sopenharmony_ci	do {
195862306a36Sopenharmony_ci		struct buffer_head *next = bh->b_this_page;
195962306a36Sopenharmony_ci		if (buffer_async_write(bh)) {
196062306a36Sopenharmony_ci			clear_buffer_dirty(bh);
196162306a36Sopenharmony_ci			submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
196262306a36Sopenharmony_ci			nr_underway++;
196362306a36Sopenharmony_ci		}
196462306a36Sopenharmony_ci		bh = next;
196562306a36Sopenharmony_ci	} while (bh != head);
196662306a36Sopenharmony_ci	folio_unlock(folio);
196762306a36Sopenharmony_ci	goto done;
196862306a36Sopenharmony_ci}
196962306a36Sopenharmony_ciEXPORT_SYMBOL(__block_write_full_folio);
197062306a36Sopenharmony_ci
197162306a36Sopenharmony_ci/*
197262306a36Sopenharmony_ci * If a folio has any new buffers, zero them out here, and mark them uptodate
197362306a36Sopenharmony_ci * and dirty so they'll be written out (in order to prevent uninitialised
197462306a36Sopenharmony_ci * block data from leaking). And clear the new bit.
197562306a36Sopenharmony_ci */
197662306a36Sopenharmony_civoid folio_zero_new_buffers(struct folio *folio, size_t from, size_t to)
197762306a36Sopenharmony_ci{
197862306a36Sopenharmony_ci	size_t block_start, block_end;
197962306a36Sopenharmony_ci	struct buffer_head *head, *bh;
198062306a36Sopenharmony_ci
198162306a36Sopenharmony_ci	BUG_ON(!folio_test_locked(folio));
198262306a36Sopenharmony_ci	head = folio_buffers(folio);
198362306a36Sopenharmony_ci	if (!head)
198462306a36Sopenharmony_ci		return;
198562306a36Sopenharmony_ci
198662306a36Sopenharmony_ci	bh = head;
198762306a36Sopenharmony_ci	block_start = 0;
198862306a36Sopenharmony_ci	do {
198962306a36Sopenharmony_ci		block_end = block_start + bh->b_size;
199062306a36Sopenharmony_ci
199162306a36Sopenharmony_ci		if (buffer_new(bh)) {
199262306a36Sopenharmony_ci			if (block_end > from && block_start < to) {
199362306a36Sopenharmony_ci				if (!folio_test_uptodate(folio)) {
199462306a36Sopenharmony_ci					size_t start, xend;
199562306a36Sopenharmony_ci
199662306a36Sopenharmony_ci					start = max(from, block_start);
199762306a36Sopenharmony_ci					xend = min(to, block_end);
199862306a36Sopenharmony_ci
199962306a36Sopenharmony_ci					folio_zero_segment(folio, start, xend);
200062306a36Sopenharmony_ci					set_buffer_uptodate(bh);
200162306a36Sopenharmony_ci				}
200262306a36Sopenharmony_ci
200362306a36Sopenharmony_ci				clear_buffer_new(bh);
200462306a36Sopenharmony_ci				mark_buffer_dirty(bh);
200562306a36Sopenharmony_ci			}
200662306a36Sopenharmony_ci		}
200762306a36Sopenharmony_ci
200862306a36Sopenharmony_ci		block_start = block_end;
200962306a36Sopenharmony_ci		bh = bh->b_this_page;
201062306a36Sopenharmony_ci	} while (bh != head);
201162306a36Sopenharmony_ci}
201262306a36Sopenharmony_ciEXPORT_SYMBOL(folio_zero_new_buffers);
201362306a36Sopenharmony_ci
201462306a36Sopenharmony_cistatic int
201562306a36Sopenharmony_ciiomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
201662306a36Sopenharmony_ci		const struct iomap *iomap)
201762306a36Sopenharmony_ci{
201862306a36Sopenharmony_ci	loff_t offset = block << inode->i_blkbits;
201962306a36Sopenharmony_ci
202062306a36Sopenharmony_ci	bh->b_bdev = iomap->bdev;
202162306a36Sopenharmony_ci
202262306a36Sopenharmony_ci	/*
202362306a36Sopenharmony_ci	 * Block points to offset in file we need to map, iomap contains
202462306a36Sopenharmony_ci	 * the offset at which the map starts. If the map ends before the
202562306a36Sopenharmony_ci	 * current block, then do not map the buffer and let the caller
202662306a36Sopenharmony_ci	 * handle it.
202762306a36Sopenharmony_ci	 */
202862306a36Sopenharmony_ci	if (offset >= iomap->offset + iomap->length)
202962306a36Sopenharmony_ci		return -EIO;
203062306a36Sopenharmony_ci
203162306a36Sopenharmony_ci	switch (iomap->type) {
203262306a36Sopenharmony_ci	case IOMAP_HOLE:
203362306a36Sopenharmony_ci		/*
203462306a36Sopenharmony_ci		 * If the buffer is not up to date or beyond the current EOF,
203562306a36Sopenharmony_ci		 * we need to mark it as new to ensure sub-block zeroing is
203662306a36Sopenharmony_ci		 * executed if necessary.
203762306a36Sopenharmony_ci		 */
203862306a36Sopenharmony_ci		if (!buffer_uptodate(bh) ||
203962306a36Sopenharmony_ci		    (offset >= i_size_read(inode)))
204062306a36Sopenharmony_ci			set_buffer_new(bh);
204162306a36Sopenharmony_ci		return 0;
204262306a36Sopenharmony_ci	case IOMAP_DELALLOC:
204362306a36Sopenharmony_ci		if (!buffer_uptodate(bh) ||
204462306a36Sopenharmony_ci		    (offset >= i_size_read(inode)))
204562306a36Sopenharmony_ci			set_buffer_new(bh);
204662306a36Sopenharmony_ci		set_buffer_uptodate(bh);
204762306a36Sopenharmony_ci		set_buffer_mapped(bh);
204862306a36Sopenharmony_ci		set_buffer_delay(bh);
204962306a36Sopenharmony_ci		return 0;
205062306a36Sopenharmony_ci	case IOMAP_UNWRITTEN:
205162306a36Sopenharmony_ci		/*
205262306a36Sopenharmony_ci		 * For unwritten regions, we always need to ensure that regions
205362306a36Sopenharmony_ci		 * in the block we are not writing to are zeroed. Mark the
205462306a36Sopenharmony_ci		 * buffer as new to ensure this.
205562306a36Sopenharmony_ci		 */
205662306a36Sopenharmony_ci		set_buffer_new(bh);
205762306a36Sopenharmony_ci		set_buffer_unwritten(bh);
205862306a36Sopenharmony_ci		fallthrough;
205962306a36Sopenharmony_ci	case IOMAP_MAPPED:
206062306a36Sopenharmony_ci		if ((iomap->flags & IOMAP_F_NEW) ||
206162306a36Sopenharmony_ci		    offset >= i_size_read(inode)) {
206262306a36Sopenharmony_ci			/*
206362306a36Sopenharmony_ci			 * This can happen if truncating the block device races
206462306a36Sopenharmony_ci			 * with the check in the caller as i_size updates on
206562306a36Sopenharmony_ci			 * block devices aren't synchronized by i_rwsem for
206662306a36Sopenharmony_ci			 * block devices.
206762306a36Sopenharmony_ci			 */
206862306a36Sopenharmony_ci			if (S_ISBLK(inode->i_mode))
206962306a36Sopenharmony_ci				return -EIO;
207062306a36Sopenharmony_ci			set_buffer_new(bh);
207162306a36Sopenharmony_ci		}
207262306a36Sopenharmony_ci		bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
207362306a36Sopenharmony_ci				inode->i_blkbits;
207462306a36Sopenharmony_ci		set_buffer_mapped(bh);
207562306a36Sopenharmony_ci		return 0;
207662306a36Sopenharmony_ci	default:
207762306a36Sopenharmony_ci		WARN_ON_ONCE(1);
207862306a36Sopenharmony_ci		return -EIO;
207962306a36Sopenharmony_ci	}
208062306a36Sopenharmony_ci}
208162306a36Sopenharmony_ci
208262306a36Sopenharmony_ciint __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len,
208362306a36Sopenharmony_ci		get_block_t *get_block, const struct iomap *iomap)
208462306a36Sopenharmony_ci{
208562306a36Sopenharmony_ci	unsigned from = pos & (PAGE_SIZE - 1);
208662306a36Sopenharmony_ci	unsigned to = from + len;
208762306a36Sopenharmony_ci	struct inode *inode = folio->mapping->host;
208862306a36Sopenharmony_ci	unsigned block_start, block_end;
208962306a36Sopenharmony_ci	sector_t block;
209062306a36Sopenharmony_ci	int err = 0;
209162306a36Sopenharmony_ci	unsigned blocksize, bbits;
209262306a36Sopenharmony_ci	struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
209362306a36Sopenharmony_ci
209462306a36Sopenharmony_ci	BUG_ON(!folio_test_locked(folio));
209562306a36Sopenharmony_ci	BUG_ON(from > PAGE_SIZE);
209662306a36Sopenharmony_ci	BUG_ON(to > PAGE_SIZE);
209762306a36Sopenharmony_ci	BUG_ON(from > to);
209862306a36Sopenharmony_ci
209962306a36Sopenharmony_ci	head = folio_create_buffers(folio, inode, 0);
210062306a36Sopenharmony_ci	blocksize = head->b_size;
210162306a36Sopenharmony_ci	bbits = block_size_bits(blocksize);
210262306a36Sopenharmony_ci
210362306a36Sopenharmony_ci	block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
210462306a36Sopenharmony_ci
210562306a36Sopenharmony_ci	for(bh = head, block_start = 0; bh != head || !block_start;
210662306a36Sopenharmony_ci	    block++, block_start=block_end, bh = bh->b_this_page) {
210762306a36Sopenharmony_ci		block_end = block_start + blocksize;
210862306a36Sopenharmony_ci		if (block_end <= from || block_start >= to) {
210962306a36Sopenharmony_ci			if (folio_test_uptodate(folio)) {
211062306a36Sopenharmony_ci				if (!buffer_uptodate(bh))
211162306a36Sopenharmony_ci					set_buffer_uptodate(bh);
211262306a36Sopenharmony_ci			}
211362306a36Sopenharmony_ci			continue;
211462306a36Sopenharmony_ci		}
211562306a36Sopenharmony_ci		if (buffer_new(bh))
211662306a36Sopenharmony_ci			clear_buffer_new(bh);
211762306a36Sopenharmony_ci		if (!buffer_mapped(bh)) {
211862306a36Sopenharmony_ci			WARN_ON(bh->b_size != blocksize);
211962306a36Sopenharmony_ci			if (get_block)
212062306a36Sopenharmony_ci				err = get_block(inode, block, bh, 1);
212162306a36Sopenharmony_ci			else
212262306a36Sopenharmony_ci				err = iomap_to_bh(inode, block, bh, iomap);
212362306a36Sopenharmony_ci			if (err)
212462306a36Sopenharmony_ci				break;
212562306a36Sopenharmony_ci
212662306a36Sopenharmony_ci			if (buffer_new(bh)) {
212762306a36Sopenharmony_ci				clean_bdev_bh_alias(bh);
212862306a36Sopenharmony_ci				if (folio_test_uptodate(folio)) {
212962306a36Sopenharmony_ci					clear_buffer_new(bh);
213062306a36Sopenharmony_ci					set_buffer_uptodate(bh);
213162306a36Sopenharmony_ci					mark_buffer_dirty(bh);
213262306a36Sopenharmony_ci					continue;
213362306a36Sopenharmony_ci				}
213462306a36Sopenharmony_ci				if (block_end > to || block_start < from)
213562306a36Sopenharmony_ci					folio_zero_segments(folio,
213662306a36Sopenharmony_ci						to, block_end,
213762306a36Sopenharmony_ci						block_start, from);
213862306a36Sopenharmony_ci				continue;
213962306a36Sopenharmony_ci			}
214062306a36Sopenharmony_ci		}
214162306a36Sopenharmony_ci		if (folio_test_uptodate(folio)) {
214262306a36Sopenharmony_ci			if (!buffer_uptodate(bh))
214362306a36Sopenharmony_ci				set_buffer_uptodate(bh);
214462306a36Sopenharmony_ci			continue;
214562306a36Sopenharmony_ci		}
214662306a36Sopenharmony_ci		if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
214762306a36Sopenharmony_ci		    !buffer_unwritten(bh) &&
214862306a36Sopenharmony_ci		     (block_start < from || block_end > to)) {
214962306a36Sopenharmony_ci			bh_read_nowait(bh, 0);
215062306a36Sopenharmony_ci			*wait_bh++=bh;
215162306a36Sopenharmony_ci		}
215262306a36Sopenharmony_ci	}
215362306a36Sopenharmony_ci	/*
215462306a36Sopenharmony_ci	 * If we issued read requests - let them complete.
215562306a36Sopenharmony_ci	 */
215662306a36Sopenharmony_ci	while(wait_bh > wait) {
215762306a36Sopenharmony_ci		wait_on_buffer(*--wait_bh);
215862306a36Sopenharmony_ci		if (!buffer_uptodate(*wait_bh))
215962306a36Sopenharmony_ci			err = -EIO;
216062306a36Sopenharmony_ci	}
216162306a36Sopenharmony_ci	if (unlikely(err))
216262306a36Sopenharmony_ci		folio_zero_new_buffers(folio, from, to);
216362306a36Sopenharmony_ci	return err;
216462306a36Sopenharmony_ci}
216562306a36Sopenharmony_ci
216662306a36Sopenharmony_ciint __block_write_begin(struct page *page, loff_t pos, unsigned len,
216762306a36Sopenharmony_ci		get_block_t *get_block)
216862306a36Sopenharmony_ci{
216962306a36Sopenharmony_ci	return __block_write_begin_int(page_folio(page), pos, len, get_block,
217062306a36Sopenharmony_ci				       NULL);
217162306a36Sopenharmony_ci}
217262306a36Sopenharmony_ciEXPORT_SYMBOL(__block_write_begin);
217362306a36Sopenharmony_ci
217462306a36Sopenharmony_cistatic void __block_commit_write(struct folio *folio, size_t from, size_t to)
217562306a36Sopenharmony_ci{
217662306a36Sopenharmony_ci	size_t block_start, block_end;
217762306a36Sopenharmony_ci	bool partial = false;
217862306a36Sopenharmony_ci	unsigned blocksize;
217962306a36Sopenharmony_ci	struct buffer_head *bh, *head;
218062306a36Sopenharmony_ci
218162306a36Sopenharmony_ci	bh = head = folio_buffers(folio);
218262306a36Sopenharmony_ci	blocksize = bh->b_size;
218362306a36Sopenharmony_ci
218462306a36Sopenharmony_ci	block_start = 0;
218562306a36Sopenharmony_ci	do {
218662306a36Sopenharmony_ci		block_end = block_start + blocksize;
218762306a36Sopenharmony_ci		if (block_end <= from || block_start >= to) {
218862306a36Sopenharmony_ci			if (!buffer_uptodate(bh))
218962306a36Sopenharmony_ci				partial = true;
219062306a36Sopenharmony_ci		} else {
219162306a36Sopenharmony_ci			set_buffer_uptodate(bh);
219262306a36Sopenharmony_ci			mark_buffer_dirty(bh);
219362306a36Sopenharmony_ci		}
219462306a36Sopenharmony_ci		if (buffer_new(bh))
219562306a36Sopenharmony_ci			clear_buffer_new(bh);
219662306a36Sopenharmony_ci
219762306a36Sopenharmony_ci		block_start = block_end;
219862306a36Sopenharmony_ci		bh = bh->b_this_page;
219962306a36Sopenharmony_ci	} while (bh != head);
220062306a36Sopenharmony_ci
220162306a36Sopenharmony_ci	/*
220262306a36Sopenharmony_ci	 * If this is a partial write which happened to make all buffers
220362306a36Sopenharmony_ci	 * uptodate then we can optimize away a bogus read_folio() for
220462306a36Sopenharmony_ci	 * the next read(). Here we 'discover' whether the folio went
220562306a36Sopenharmony_ci	 * uptodate as a result of this (potentially partial) write.
220662306a36Sopenharmony_ci	 */
220762306a36Sopenharmony_ci	if (!partial)
220862306a36Sopenharmony_ci		folio_mark_uptodate(folio);
220962306a36Sopenharmony_ci}
221062306a36Sopenharmony_ci
221162306a36Sopenharmony_ci/*
221262306a36Sopenharmony_ci * block_write_begin takes care of the basic task of block allocation and
221362306a36Sopenharmony_ci * bringing partial write blocks uptodate first.
221462306a36Sopenharmony_ci *
221562306a36Sopenharmony_ci * The filesystem needs to handle block truncation upon failure.
221662306a36Sopenharmony_ci */
221762306a36Sopenharmony_ciint block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
221862306a36Sopenharmony_ci		struct page **pagep, get_block_t *get_block)
221962306a36Sopenharmony_ci{
222062306a36Sopenharmony_ci	pgoff_t index = pos >> PAGE_SHIFT;
222162306a36Sopenharmony_ci	struct page *page;
222262306a36Sopenharmony_ci	int status;
222362306a36Sopenharmony_ci
222462306a36Sopenharmony_ci	page = grab_cache_page_write_begin(mapping, index);
222562306a36Sopenharmony_ci	if (!page)
222662306a36Sopenharmony_ci		return -ENOMEM;
222762306a36Sopenharmony_ci
222862306a36Sopenharmony_ci	status = __block_write_begin(page, pos, len, get_block);
222962306a36Sopenharmony_ci	if (unlikely(status)) {
223062306a36Sopenharmony_ci		unlock_page(page);
223162306a36Sopenharmony_ci		put_page(page);
223262306a36Sopenharmony_ci		page = NULL;
223362306a36Sopenharmony_ci	}
223462306a36Sopenharmony_ci
223562306a36Sopenharmony_ci	*pagep = page;
223662306a36Sopenharmony_ci	return status;
223762306a36Sopenharmony_ci}
223862306a36Sopenharmony_ciEXPORT_SYMBOL(block_write_begin);
223962306a36Sopenharmony_ci
224062306a36Sopenharmony_ciint block_write_end(struct file *file, struct address_space *mapping,
224162306a36Sopenharmony_ci			loff_t pos, unsigned len, unsigned copied,
224262306a36Sopenharmony_ci			struct page *page, void *fsdata)
224362306a36Sopenharmony_ci{
224462306a36Sopenharmony_ci	struct folio *folio = page_folio(page);
224562306a36Sopenharmony_ci	size_t start = pos - folio_pos(folio);
224662306a36Sopenharmony_ci
224762306a36Sopenharmony_ci	if (unlikely(copied < len)) {
224862306a36Sopenharmony_ci		/*
224962306a36Sopenharmony_ci		 * The buffers that were written will now be uptodate, so
225062306a36Sopenharmony_ci		 * we don't have to worry about a read_folio reading them
225162306a36Sopenharmony_ci		 * and overwriting a partial write. However if we have
225262306a36Sopenharmony_ci		 * encountered a short write and only partially written
225362306a36Sopenharmony_ci		 * into a buffer, it will not be marked uptodate, so a
225462306a36Sopenharmony_ci		 * read_folio might come in and destroy our partial write.
225562306a36Sopenharmony_ci		 *
225662306a36Sopenharmony_ci		 * Do the simplest thing, and just treat any short write to a
225762306a36Sopenharmony_ci		 * non uptodate folio as a zero-length write, and force the
225862306a36Sopenharmony_ci		 * caller to redo the whole thing.
225962306a36Sopenharmony_ci		 */
226062306a36Sopenharmony_ci		if (!folio_test_uptodate(folio))
226162306a36Sopenharmony_ci			copied = 0;
226262306a36Sopenharmony_ci
226362306a36Sopenharmony_ci		folio_zero_new_buffers(folio, start+copied, start+len);
226462306a36Sopenharmony_ci	}
226562306a36Sopenharmony_ci	flush_dcache_folio(folio);
226662306a36Sopenharmony_ci
226762306a36Sopenharmony_ci	/* This could be a short (even 0-length) commit */
226862306a36Sopenharmony_ci	__block_commit_write(folio, start, start + copied);
226962306a36Sopenharmony_ci
227062306a36Sopenharmony_ci	return copied;
227162306a36Sopenharmony_ci}
227262306a36Sopenharmony_ciEXPORT_SYMBOL(block_write_end);
227362306a36Sopenharmony_ci
227462306a36Sopenharmony_ciint generic_write_end(struct file *file, struct address_space *mapping,
227562306a36Sopenharmony_ci			loff_t pos, unsigned len, unsigned copied,
227662306a36Sopenharmony_ci			struct page *page, void *fsdata)
227762306a36Sopenharmony_ci{
227862306a36Sopenharmony_ci	struct inode *inode = mapping->host;
227962306a36Sopenharmony_ci	loff_t old_size = inode->i_size;
228062306a36Sopenharmony_ci	bool i_size_changed = false;
228162306a36Sopenharmony_ci
228262306a36Sopenharmony_ci	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
228362306a36Sopenharmony_ci
228462306a36Sopenharmony_ci	/*
228562306a36Sopenharmony_ci	 * No need to use i_size_read() here, the i_size cannot change under us
228662306a36Sopenharmony_ci	 * because we hold i_rwsem.
228762306a36Sopenharmony_ci	 *
228862306a36Sopenharmony_ci	 * But it's important to update i_size while still holding page lock:
228962306a36Sopenharmony_ci	 * page writeout could otherwise come in and zero beyond i_size.
229062306a36Sopenharmony_ci	 */
229162306a36Sopenharmony_ci	if (pos + copied > inode->i_size) {
229262306a36Sopenharmony_ci		i_size_write(inode, pos + copied);
229362306a36Sopenharmony_ci		i_size_changed = true;
229462306a36Sopenharmony_ci	}
229562306a36Sopenharmony_ci
229662306a36Sopenharmony_ci	unlock_page(page);
229762306a36Sopenharmony_ci	put_page(page);
229862306a36Sopenharmony_ci
229962306a36Sopenharmony_ci	if (old_size < pos)
230062306a36Sopenharmony_ci		pagecache_isize_extended(inode, old_size, pos);
230162306a36Sopenharmony_ci	/*
230262306a36Sopenharmony_ci	 * Don't mark the inode dirty under page lock. First, it unnecessarily
230362306a36Sopenharmony_ci	 * makes the holding time of page lock longer. Second, it forces lock
230462306a36Sopenharmony_ci	 * ordering of page lock and transaction start for journaling
230562306a36Sopenharmony_ci	 * filesystems.
230662306a36Sopenharmony_ci	 */
230762306a36Sopenharmony_ci	if (i_size_changed)
230862306a36Sopenharmony_ci		mark_inode_dirty(inode);
230962306a36Sopenharmony_ci	return copied;
231062306a36Sopenharmony_ci}
231162306a36Sopenharmony_ciEXPORT_SYMBOL(generic_write_end);
231262306a36Sopenharmony_ci
231362306a36Sopenharmony_ci/*
231462306a36Sopenharmony_ci * block_is_partially_uptodate checks whether buffers within a folio are
231562306a36Sopenharmony_ci * uptodate or not.
231662306a36Sopenharmony_ci *
231762306a36Sopenharmony_ci * Returns true if all buffers which correspond to the specified part
231862306a36Sopenharmony_ci * of the folio are uptodate.
231962306a36Sopenharmony_ci */
232062306a36Sopenharmony_cibool block_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
232162306a36Sopenharmony_ci{
232262306a36Sopenharmony_ci	unsigned block_start, block_end, blocksize;
232362306a36Sopenharmony_ci	unsigned to;
232462306a36Sopenharmony_ci	struct buffer_head *bh, *head;
232562306a36Sopenharmony_ci	bool ret = true;
232662306a36Sopenharmony_ci
232762306a36Sopenharmony_ci	head = folio_buffers(folio);
232862306a36Sopenharmony_ci	if (!head)
232962306a36Sopenharmony_ci		return false;
233062306a36Sopenharmony_ci	blocksize = head->b_size;
233162306a36Sopenharmony_ci	to = min_t(unsigned, folio_size(folio) - from, count);
233262306a36Sopenharmony_ci	to = from + to;
233362306a36Sopenharmony_ci	if (from < blocksize && to > folio_size(folio) - blocksize)
233462306a36Sopenharmony_ci		return false;
233562306a36Sopenharmony_ci
233662306a36Sopenharmony_ci	bh = head;
233762306a36Sopenharmony_ci	block_start = 0;
233862306a36Sopenharmony_ci	do {
233962306a36Sopenharmony_ci		block_end = block_start + blocksize;
234062306a36Sopenharmony_ci		if (block_end > from && block_start < to) {
234162306a36Sopenharmony_ci			if (!buffer_uptodate(bh)) {
234262306a36Sopenharmony_ci				ret = false;
234362306a36Sopenharmony_ci				break;
234462306a36Sopenharmony_ci			}
234562306a36Sopenharmony_ci			if (block_end >= to)
234662306a36Sopenharmony_ci				break;
234762306a36Sopenharmony_ci		}
234862306a36Sopenharmony_ci		block_start = block_end;
234962306a36Sopenharmony_ci		bh = bh->b_this_page;
235062306a36Sopenharmony_ci	} while (bh != head);
235162306a36Sopenharmony_ci
235262306a36Sopenharmony_ci	return ret;
235362306a36Sopenharmony_ci}
235462306a36Sopenharmony_ciEXPORT_SYMBOL(block_is_partially_uptodate);
235562306a36Sopenharmony_ci
235662306a36Sopenharmony_ci/*
235762306a36Sopenharmony_ci * Generic "read_folio" function for block devices that have the normal
235862306a36Sopenharmony_ci * get_block functionality. This is most of the block device filesystems.
235962306a36Sopenharmony_ci * Reads the folio asynchronously --- the unlock_buffer() and
236062306a36Sopenharmony_ci * set/clear_buffer_uptodate() functions propagate buffer state into the
236162306a36Sopenharmony_ci * folio once IO has completed.
236262306a36Sopenharmony_ci */
236362306a36Sopenharmony_ciint block_read_full_folio(struct folio *folio, get_block_t *get_block)
236462306a36Sopenharmony_ci{
236562306a36Sopenharmony_ci	struct inode *inode = folio->mapping->host;
236662306a36Sopenharmony_ci	sector_t iblock, lblock;
236762306a36Sopenharmony_ci	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
236862306a36Sopenharmony_ci	unsigned int blocksize, bbits;
236962306a36Sopenharmony_ci	int nr, i;
237062306a36Sopenharmony_ci	int fully_mapped = 1;
237162306a36Sopenharmony_ci	bool page_error = false;
237262306a36Sopenharmony_ci	loff_t limit = i_size_read(inode);
237362306a36Sopenharmony_ci
237462306a36Sopenharmony_ci	/* This is needed for ext4. */
237562306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
237662306a36Sopenharmony_ci		limit = inode->i_sb->s_maxbytes;
237762306a36Sopenharmony_ci
237862306a36Sopenharmony_ci	VM_BUG_ON_FOLIO(folio_test_large(folio), folio);
237962306a36Sopenharmony_ci
238062306a36Sopenharmony_ci	head = folio_create_buffers(folio, inode, 0);
238162306a36Sopenharmony_ci	blocksize = head->b_size;
238262306a36Sopenharmony_ci	bbits = block_size_bits(blocksize);
238362306a36Sopenharmony_ci
238462306a36Sopenharmony_ci	iblock = (sector_t)folio->index << (PAGE_SHIFT - bbits);
238562306a36Sopenharmony_ci	lblock = (limit+blocksize-1) >> bbits;
238662306a36Sopenharmony_ci	bh = head;
238762306a36Sopenharmony_ci	nr = 0;
238862306a36Sopenharmony_ci	i = 0;
238962306a36Sopenharmony_ci
239062306a36Sopenharmony_ci	do {
239162306a36Sopenharmony_ci		if (buffer_uptodate(bh))
239262306a36Sopenharmony_ci			continue;
239362306a36Sopenharmony_ci
239462306a36Sopenharmony_ci		if (!buffer_mapped(bh)) {
239562306a36Sopenharmony_ci			int err = 0;
239662306a36Sopenharmony_ci
239762306a36Sopenharmony_ci			fully_mapped = 0;
239862306a36Sopenharmony_ci			if (iblock < lblock) {
239962306a36Sopenharmony_ci				WARN_ON(bh->b_size != blocksize);
240062306a36Sopenharmony_ci				err = get_block(inode, iblock, bh, 0);
240162306a36Sopenharmony_ci				if (err) {
240262306a36Sopenharmony_ci					folio_set_error(folio);
240362306a36Sopenharmony_ci					page_error = true;
240462306a36Sopenharmony_ci				}
240562306a36Sopenharmony_ci			}
240662306a36Sopenharmony_ci			if (!buffer_mapped(bh)) {
240762306a36Sopenharmony_ci				folio_zero_range(folio, i * blocksize,
240862306a36Sopenharmony_ci						blocksize);
240962306a36Sopenharmony_ci				if (!err)
241062306a36Sopenharmony_ci					set_buffer_uptodate(bh);
241162306a36Sopenharmony_ci				continue;
241262306a36Sopenharmony_ci			}
241362306a36Sopenharmony_ci			/*
241462306a36Sopenharmony_ci			 * get_block() might have updated the buffer
241562306a36Sopenharmony_ci			 * synchronously
241662306a36Sopenharmony_ci			 */
241762306a36Sopenharmony_ci			if (buffer_uptodate(bh))
241862306a36Sopenharmony_ci				continue;
241962306a36Sopenharmony_ci		}
242062306a36Sopenharmony_ci		arr[nr++] = bh;
242162306a36Sopenharmony_ci	} while (i++, iblock++, (bh = bh->b_this_page) != head);
242262306a36Sopenharmony_ci
242362306a36Sopenharmony_ci	if (fully_mapped)
242462306a36Sopenharmony_ci		folio_set_mappedtodisk(folio);
242562306a36Sopenharmony_ci
242662306a36Sopenharmony_ci	if (!nr) {
242762306a36Sopenharmony_ci		/*
242862306a36Sopenharmony_ci		 * All buffers are uptodate - we can set the folio uptodate
242962306a36Sopenharmony_ci		 * as well. But not if get_block() returned an error.
243062306a36Sopenharmony_ci		 */
243162306a36Sopenharmony_ci		if (!page_error)
243262306a36Sopenharmony_ci			folio_mark_uptodate(folio);
243362306a36Sopenharmony_ci		folio_unlock(folio);
243462306a36Sopenharmony_ci		return 0;
243562306a36Sopenharmony_ci	}
243662306a36Sopenharmony_ci
243762306a36Sopenharmony_ci	/* Stage two: lock the buffers */
243862306a36Sopenharmony_ci	for (i = 0; i < nr; i++) {
243962306a36Sopenharmony_ci		bh = arr[i];
244062306a36Sopenharmony_ci		lock_buffer(bh);
244162306a36Sopenharmony_ci		mark_buffer_async_read(bh);
244262306a36Sopenharmony_ci	}
244362306a36Sopenharmony_ci
244462306a36Sopenharmony_ci	/*
244562306a36Sopenharmony_ci	 * Stage 3: start the IO.  Check for uptodateness
244662306a36Sopenharmony_ci	 * inside the buffer lock in case another process reading
244762306a36Sopenharmony_ci	 * the underlying blockdev brought it uptodate (the sct fix).
244862306a36Sopenharmony_ci	 */
244962306a36Sopenharmony_ci	for (i = 0; i < nr; i++) {
245062306a36Sopenharmony_ci		bh = arr[i];
245162306a36Sopenharmony_ci		if (buffer_uptodate(bh))
245262306a36Sopenharmony_ci			end_buffer_async_read(bh, 1);
245362306a36Sopenharmony_ci		else
245462306a36Sopenharmony_ci			submit_bh(REQ_OP_READ, bh);
245562306a36Sopenharmony_ci	}
245662306a36Sopenharmony_ci	return 0;
245762306a36Sopenharmony_ci}
245862306a36Sopenharmony_ciEXPORT_SYMBOL(block_read_full_folio);
245962306a36Sopenharmony_ci
246062306a36Sopenharmony_ci/* utility function for filesystems that need to do work on expanding
246162306a36Sopenharmony_ci * truncates.  Uses filesystem pagecache writes to allow the filesystem to
246262306a36Sopenharmony_ci * deal with the hole.
246362306a36Sopenharmony_ci */
246462306a36Sopenharmony_ciint generic_cont_expand_simple(struct inode *inode, loff_t size)
246562306a36Sopenharmony_ci{
246662306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
246762306a36Sopenharmony_ci	const struct address_space_operations *aops = mapping->a_ops;
246862306a36Sopenharmony_ci	struct page *page;
246962306a36Sopenharmony_ci	void *fsdata = NULL;
247062306a36Sopenharmony_ci	int err;
247162306a36Sopenharmony_ci
247262306a36Sopenharmony_ci	err = inode_newsize_ok(inode, size);
247362306a36Sopenharmony_ci	if (err)
247462306a36Sopenharmony_ci		goto out;
247562306a36Sopenharmony_ci
247662306a36Sopenharmony_ci	err = aops->write_begin(NULL, mapping, size, 0, &page, &fsdata);
247762306a36Sopenharmony_ci	if (err)
247862306a36Sopenharmony_ci		goto out;
247962306a36Sopenharmony_ci
248062306a36Sopenharmony_ci	err = aops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
248162306a36Sopenharmony_ci	BUG_ON(err > 0);
248262306a36Sopenharmony_ci
248362306a36Sopenharmony_ciout:
248462306a36Sopenharmony_ci	return err;
248562306a36Sopenharmony_ci}
248662306a36Sopenharmony_ciEXPORT_SYMBOL(generic_cont_expand_simple);
248762306a36Sopenharmony_ci
248862306a36Sopenharmony_cistatic int cont_expand_zero(struct file *file, struct address_space *mapping,
248962306a36Sopenharmony_ci			    loff_t pos, loff_t *bytes)
249062306a36Sopenharmony_ci{
249162306a36Sopenharmony_ci	struct inode *inode = mapping->host;
249262306a36Sopenharmony_ci	const struct address_space_operations *aops = mapping->a_ops;
249362306a36Sopenharmony_ci	unsigned int blocksize = i_blocksize(inode);
249462306a36Sopenharmony_ci	struct page *page;
249562306a36Sopenharmony_ci	void *fsdata = NULL;
249662306a36Sopenharmony_ci	pgoff_t index, curidx;
249762306a36Sopenharmony_ci	loff_t curpos;
249862306a36Sopenharmony_ci	unsigned zerofrom, offset, len;
249962306a36Sopenharmony_ci	int err = 0;
250062306a36Sopenharmony_ci
250162306a36Sopenharmony_ci	index = pos >> PAGE_SHIFT;
250262306a36Sopenharmony_ci	offset = pos & ~PAGE_MASK;
250362306a36Sopenharmony_ci
250462306a36Sopenharmony_ci	while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
250562306a36Sopenharmony_ci		zerofrom = curpos & ~PAGE_MASK;
250662306a36Sopenharmony_ci		if (zerofrom & (blocksize-1)) {
250762306a36Sopenharmony_ci			*bytes |= (blocksize-1);
250862306a36Sopenharmony_ci			(*bytes)++;
250962306a36Sopenharmony_ci		}
251062306a36Sopenharmony_ci		len = PAGE_SIZE - zerofrom;
251162306a36Sopenharmony_ci
251262306a36Sopenharmony_ci		err = aops->write_begin(file, mapping, curpos, len,
251362306a36Sopenharmony_ci					    &page, &fsdata);
251462306a36Sopenharmony_ci		if (err)
251562306a36Sopenharmony_ci			goto out;
251662306a36Sopenharmony_ci		zero_user(page, zerofrom, len);
251762306a36Sopenharmony_ci		err = aops->write_end(file, mapping, curpos, len, len,
251862306a36Sopenharmony_ci						page, fsdata);
251962306a36Sopenharmony_ci		if (err < 0)
252062306a36Sopenharmony_ci			goto out;
252162306a36Sopenharmony_ci		BUG_ON(err != len);
252262306a36Sopenharmony_ci		err = 0;
252362306a36Sopenharmony_ci
252462306a36Sopenharmony_ci		balance_dirty_pages_ratelimited(mapping);
252562306a36Sopenharmony_ci
252662306a36Sopenharmony_ci		if (fatal_signal_pending(current)) {
252762306a36Sopenharmony_ci			err = -EINTR;
252862306a36Sopenharmony_ci			goto out;
252962306a36Sopenharmony_ci		}
253062306a36Sopenharmony_ci	}
253162306a36Sopenharmony_ci
253262306a36Sopenharmony_ci	/* page covers the boundary, find the boundary offset */
253362306a36Sopenharmony_ci	if (index == curidx) {
253462306a36Sopenharmony_ci		zerofrom = curpos & ~PAGE_MASK;
253562306a36Sopenharmony_ci		/* if we will expand the thing last block will be filled */
253662306a36Sopenharmony_ci		if (offset <= zerofrom) {
253762306a36Sopenharmony_ci			goto out;
253862306a36Sopenharmony_ci		}
253962306a36Sopenharmony_ci		if (zerofrom & (blocksize-1)) {
254062306a36Sopenharmony_ci			*bytes |= (blocksize-1);
254162306a36Sopenharmony_ci			(*bytes)++;
254262306a36Sopenharmony_ci		}
254362306a36Sopenharmony_ci		len = offset - zerofrom;
254462306a36Sopenharmony_ci
254562306a36Sopenharmony_ci		err = aops->write_begin(file, mapping, curpos, len,
254662306a36Sopenharmony_ci					    &page, &fsdata);
254762306a36Sopenharmony_ci		if (err)
254862306a36Sopenharmony_ci			goto out;
254962306a36Sopenharmony_ci		zero_user(page, zerofrom, len);
255062306a36Sopenharmony_ci		err = aops->write_end(file, mapping, curpos, len, len,
255162306a36Sopenharmony_ci						page, fsdata);
255262306a36Sopenharmony_ci		if (err < 0)
255362306a36Sopenharmony_ci			goto out;
255462306a36Sopenharmony_ci		BUG_ON(err != len);
255562306a36Sopenharmony_ci		err = 0;
255662306a36Sopenharmony_ci	}
255762306a36Sopenharmony_ciout:
255862306a36Sopenharmony_ci	return err;
255962306a36Sopenharmony_ci}
256062306a36Sopenharmony_ci
256162306a36Sopenharmony_ci/*
256262306a36Sopenharmony_ci * For moronic filesystems that do not allow holes in file.
256362306a36Sopenharmony_ci * We may have to extend the file.
256462306a36Sopenharmony_ci */
256562306a36Sopenharmony_ciint cont_write_begin(struct file *file, struct address_space *mapping,
256662306a36Sopenharmony_ci			loff_t pos, unsigned len,
256762306a36Sopenharmony_ci			struct page **pagep, void **fsdata,
256862306a36Sopenharmony_ci			get_block_t *get_block, loff_t *bytes)
256962306a36Sopenharmony_ci{
257062306a36Sopenharmony_ci	struct inode *inode = mapping->host;
257162306a36Sopenharmony_ci	unsigned int blocksize = i_blocksize(inode);
257262306a36Sopenharmony_ci	unsigned int zerofrom;
257362306a36Sopenharmony_ci	int err;
257462306a36Sopenharmony_ci
257562306a36Sopenharmony_ci	err = cont_expand_zero(file, mapping, pos, bytes);
257662306a36Sopenharmony_ci	if (err)
257762306a36Sopenharmony_ci		return err;
257862306a36Sopenharmony_ci
257962306a36Sopenharmony_ci	zerofrom = *bytes & ~PAGE_MASK;
258062306a36Sopenharmony_ci	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
258162306a36Sopenharmony_ci		*bytes |= (blocksize-1);
258262306a36Sopenharmony_ci		(*bytes)++;
258362306a36Sopenharmony_ci	}
258462306a36Sopenharmony_ci
258562306a36Sopenharmony_ci	return block_write_begin(mapping, pos, len, pagep, get_block);
258662306a36Sopenharmony_ci}
258762306a36Sopenharmony_ciEXPORT_SYMBOL(cont_write_begin);
258862306a36Sopenharmony_ci
258962306a36Sopenharmony_civoid block_commit_write(struct page *page, unsigned from, unsigned to)
259062306a36Sopenharmony_ci{
259162306a36Sopenharmony_ci	struct folio *folio = page_folio(page);
259262306a36Sopenharmony_ci	__block_commit_write(folio, from, to);
259362306a36Sopenharmony_ci}
259462306a36Sopenharmony_ciEXPORT_SYMBOL(block_commit_write);
259562306a36Sopenharmony_ci
259662306a36Sopenharmony_ci/*
259762306a36Sopenharmony_ci * block_page_mkwrite() is not allowed to change the file size as it gets
259862306a36Sopenharmony_ci * called from a page fault handler when a page is first dirtied. Hence we must
259962306a36Sopenharmony_ci * be careful to check for EOF conditions here. We set the page up correctly
260062306a36Sopenharmony_ci * for a written page which means we get ENOSPC checking when writing into
260162306a36Sopenharmony_ci * holes and correct delalloc and unwritten extent mapping on filesystems that
260262306a36Sopenharmony_ci * support these features.
260362306a36Sopenharmony_ci *
260462306a36Sopenharmony_ci * We are not allowed to take the i_mutex here so we have to play games to
260562306a36Sopenharmony_ci * protect against truncate races as the page could now be beyond EOF.  Because
260662306a36Sopenharmony_ci * truncate writes the inode size before removing pages, once we have the
260762306a36Sopenharmony_ci * page lock we can determine safely if the page is beyond EOF. If it is not
260862306a36Sopenharmony_ci * beyond EOF, then the page is guaranteed safe against truncation until we
260962306a36Sopenharmony_ci * unlock the page.
261062306a36Sopenharmony_ci *
261162306a36Sopenharmony_ci * Direct callers of this function should protect against filesystem freezing
261262306a36Sopenharmony_ci * using sb_start_pagefault() - sb_end_pagefault() functions.
261362306a36Sopenharmony_ci */
261462306a36Sopenharmony_ciint block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
261562306a36Sopenharmony_ci			 get_block_t get_block)
261662306a36Sopenharmony_ci{
261762306a36Sopenharmony_ci	struct folio *folio = page_folio(vmf->page);
261862306a36Sopenharmony_ci	struct inode *inode = file_inode(vma->vm_file);
261962306a36Sopenharmony_ci	unsigned long end;
262062306a36Sopenharmony_ci	loff_t size;
262162306a36Sopenharmony_ci	int ret;
262262306a36Sopenharmony_ci
262362306a36Sopenharmony_ci	folio_lock(folio);
262462306a36Sopenharmony_ci	size = i_size_read(inode);
262562306a36Sopenharmony_ci	if ((folio->mapping != inode->i_mapping) ||
262662306a36Sopenharmony_ci	    (folio_pos(folio) >= size)) {
262762306a36Sopenharmony_ci		/* We overload EFAULT to mean page got truncated */
262862306a36Sopenharmony_ci		ret = -EFAULT;
262962306a36Sopenharmony_ci		goto out_unlock;
263062306a36Sopenharmony_ci	}
263162306a36Sopenharmony_ci
263262306a36Sopenharmony_ci	end = folio_size(folio);
263362306a36Sopenharmony_ci	/* folio is wholly or partially inside EOF */
263462306a36Sopenharmony_ci	if (folio_pos(folio) + end > size)
263562306a36Sopenharmony_ci		end = size - folio_pos(folio);
263662306a36Sopenharmony_ci
263762306a36Sopenharmony_ci	ret = __block_write_begin_int(folio, 0, end, get_block, NULL);
263862306a36Sopenharmony_ci	if (unlikely(ret))
263962306a36Sopenharmony_ci		goto out_unlock;
264062306a36Sopenharmony_ci
264162306a36Sopenharmony_ci	__block_commit_write(folio, 0, end);
264262306a36Sopenharmony_ci
264362306a36Sopenharmony_ci	folio_mark_dirty(folio);
264462306a36Sopenharmony_ci	folio_wait_stable(folio);
264562306a36Sopenharmony_ci	return 0;
264662306a36Sopenharmony_ciout_unlock:
264762306a36Sopenharmony_ci	folio_unlock(folio);
264862306a36Sopenharmony_ci	return ret;
264962306a36Sopenharmony_ci}
265062306a36Sopenharmony_ciEXPORT_SYMBOL(block_page_mkwrite);
265162306a36Sopenharmony_ci
265262306a36Sopenharmony_ciint block_truncate_page(struct address_space *mapping,
265362306a36Sopenharmony_ci			loff_t from, get_block_t *get_block)
265462306a36Sopenharmony_ci{
265562306a36Sopenharmony_ci	pgoff_t index = from >> PAGE_SHIFT;
265662306a36Sopenharmony_ci	unsigned blocksize;
265762306a36Sopenharmony_ci	sector_t iblock;
265862306a36Sopenharmony_ci	size_t offset, length, pos;
265962306a36Sopenharmony_ci	struct inode *inode = mapping->host;
266062306a36Sopenharmony_ci	struct folio *folio;
266162306a36Sopenharmony_ci	struct buffer_head *bh;
266262306a36Sopenharmony_ci	int err = 0;
266362306a36Sopenharmony_ci
266462306a36Sopenharmony_ci	blocksize = i_blocksize(inode);
266562306a36Sopenharmony_ci	length = from & (blocksize - 1);
266662306a36Sopenharmony_ci
266762306a36Sopenharmony_ci	/* Block boundary? Nothing to do */
266862306a36Sopenharmony_ci	if (!length)
266962306a36Sopenharmony_ci		return 0;
267062306a36Sopenharmony_ci
267162306a36Sopenharmony_ci	length = blocksize - length;
267262306a36Sopenharmony_ci	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
267362306a36Sopenharmony_ci
267462306a36Sopenharmony_ci	folio = filemap_grab_folio(mapping, index);
267562306a36Sopenharmony_ci	if (IS_ERR(folio))
267662306a36Sopenharmony_ci		return PTR_ERR(folio);
267762306a36Sopenharmony_ci
267862306a36Sopenharmony_ci	bh = folio_buffers(folio);
267962306a36Sopenharmony_ci	if (!bh) {
268062306a36Sopenharmony_ci		folio_create_empty_buffers(folio, blocksize, 0);
268162306a36Sopenharmony_ci		bh = folio_buffers(folio);
268262306a36Sopenharmony_ci	}
268362306a36Sopenharmony_ci
268462306a36Sopenharmony_ci	/* Find the buffer that contains "offset" */
268562306a36Sopenharmony_ci	offset = offset_in_folio(folio, from);
268662306a36Sopenharmony_ci	pos = blocksize;
268762306a36Sopenharmony_ci	while (offset >= pos) {
268862306a36Sopenharmony_ci		bh = bh->b_this_page;
268962306a36Sopenharmony_ci		iblock++;
269062306a36Sopenharmony_ci		pos += blocksize;
269162306a36Sopenharmony_ci	}
269262306a36Sopenharmony_ci
269362306a36Sopenharmony_ci	if (!buffer_mapped(bh)) {
269462306a36Sopenharmony_ci		WARN_ON(bh->b_size != blocksize);
269562306a36Sopenharmony_ci		err = get_block(inode, iblock, bh, 0);
269662306a36Sopenharmony_ci		if (err)
269762306a36Sopenharmony_ci			goto unlock;
269862306a36Sopenharmony_ci		/* unmapped? It's a hole - nothing to do */
269962306a36Sopenharmony_ci		if (!buffer_mapped(bh))
270062306a36Sopenharmony_ci			goto unlock;
270162306a36Sopenharmony_ci	}
270262306a36Sopenharmony_ci
270362306a36Sopenharmony_ci	/* Ok, it's mapped. Make sure it's up-to-date */
270462306a36Sopenharmony_ci	if (folio_test_uptodate(folio))
270562306a36Sopenharmony_ci		set_buffer_uptodate(bh);
270662306a36Sopenharmony_ci
270762306a36Sopenharmony_ci	if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
270862306a36Sopenharmony_ci		err = bh_read(bh, 0);
270962306a36Sopenharmony_ci		/* Uhhuh. Read error. Complain and punt. */
271062306a36Sopenharmony_ci		if (err < 0)
271162306a36Sopenharmony_ci			goto unlock;
271262306a36Sopenharmony_ci	}
271362306a36Sopenharmony_ci
271462306a36Sopenharmony_ci	folio_zero_range(folio, offset, length);
271562306a36Sopenharmony_ci	mark_buffer_dirty(bh);
271662306a36Sopenharmony_ci
271762306a36Sopenharmony_ciunlock:
271862306a36Sopenharmony_ci	folio_unlock(folio);
271962306a36Sopenharmony_ci	folio_put(folio);
272062306a36Sopenharmony_ci
272162306a36Sopenharmony_ci	return err;
272262306a36Sopenharmony_ci}
272362306a36Sopenharmony_ciEXPORT_SYMBOL(block_truncate_page);
272462306a36Sopenharmony_ci
272562306a36Sopenharmony_ci/*
272662306a36Sopenharmony_ci * The generic ->writepage function for buffer-backed address_spaces
272762306a36Sopenharmony_ci */
272862306a36Sopenharmony_ciint block_write_full_page(struct page *page, get_block_t *get_block,
272962306a36Sopenharmony_ci			struct writeback_control *wbc)
273062306a36Sopenharmony_ci{
273162306a36Sopenharmony_ci	struct folio *folio = page_folio(page);
273262306a36Sopenharmony_ci	struct inode * const inode = folio->mapping->host;
273362306a36Sopenharmony_ci	loff_t i_size = i_size_read(inode);
273462306a36Sopenharmony_ci
273562306a36Sopenharmony_ci	/* Is the folio fully inside i_size? */
273662306a36Sopenharmony_ci	if (folio_pos(folio) + folio_size(folio) <= i_size)
273762306a36Sopenharmony_ci		return __block_write_full_folio(inode, folio, get_block, wbc,
273862306a36Sopenharmony_ci					       end_buffer_async_write);
273962306a36Sopenharmony_ci
274062306a36Sopenharmony_ci	/* Is the folio fully outside i_size? (truncate in progress) */
274162306a36Sopenharmony_ci	if (folio_pos(folio) >= i_size) {
274262306a36Sopenharmony_ci		folio_unlock(folio);
274362306a36Sopenharmony_ci		return 0; /* don't care */
274462306a36Sopenharmony_ci	}
274562306a36Sopenharmony_ci
274662306a36Sopenharmony_ci	/*
274762306a36Sopenharmony_ci	 * The folio straddles i_size.  It must be zeroed out on each and every
274862306a36Sopenharmony_ci	 * writepage invocation because it may be mmapped.  "A file is mapped
274962306a36Sopenharmony_ci	 * in multiples of the page size.  For a file that is not a multiple of
275062306a36Sopenharmony_ci	 * the page size, the remaining memory is zeroed when mapped, and
275162306a36Sopenharmony_ci	 * writes to that region are not written out to the file."
275262306a36Sopenharmony_ci	 */
275362306a36Sopenharmony_ci	folio_zero_segment(folio, offset_in_folio(folio, i_size),
275462306a36Sopenharmony_ci			folio_size(folio));
275562306a36Sopenharmony_ci	return __block_write_full_folio(inode, folio, get_block, wbc,
275662306a36Sopenharmony_ci			end_buffer_async_write);
275762306a36Sopenharmony_ci}
275862306a36Sopenharmony_ciEXPORT_SYMBOL(block_write_full_page);
275962306a36Sopenharmony_ci
276062306a36Sopenharmony_cisector_t generic_block_bmap(struct address_space *mapping, sector_t block,
276162306a36Sopenharmony_ci			    get_block_t *get_block)
276262306a36Sopenharmony_ci{
276362306a36Sopenharmony_ci	struct inode *inode = mapping->host;
276462306a36Sopenharmony_ci	struct buffer_head tmp = {
276562306a36Sopenharmony_ci		.b_size = i_blocksize(inode),
276662306a36Sopenharmony_ci	};
276762306a36Sopenharmony_ci
276862306a36Sopenharmony_ci	get_block(inode, block, &tmp, 0);
276962306a36Sopenharmony_ci	return tmp.b_blocknr;
277062306a36Sopenharmony_ci}
277162306a36Sopenharmony_ciEXPORT_SYMBOL(generic_block_bmap);
277262306a36Sopenharmony_ci
277362306a36Sopenharmony_cistatic void end_bio_bh_io_sync(struct bio *bio)
277462306a36Sopenharmony_ci{
277562306a36Sopenharmony_ci	struct buffer_head *bh = bio->bi_private;
277662306a36Sopenharmony_ci
277762306a36Sopenharmony_ci	if (unlikely(bio_flagged(bio, BIO_QUIET)))
277862306a36Sopenharmony_ci		set_bit(BH_Quiet, &bh->b_state);
277962306a36Sopenharmony_ci
278062306a36Sopenharmony_ci	bh->b_end_io(bh, !bio->bi_status);
278162306a36Sopenharmony_ci	bio_put(bio);
278262306a36Sopenharmony_ci}
278362306a36Sopenharmony_ci
278462306a36Sopenharmony_cistatic void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
278562306a36Sopenharmony_ci			  struct writeback_control *wbc)
278662306a36Sopenharmony_ci{
278762306a36Sopenharmony_ci	const enum req_op op = opf & REQ_OP_MASK;
278862306a36Sopenharmony_ci	struct bio *bio;
278962306a36Sopenharmony_ci
279062306a36Sopenharmony_ci	BUG_ON(!buffer_locked(bh));
279162306a36Sopenharmony_ci	BUG_ON(!buffer_mapped(bh));
279262306a36Sopenharmony_ci	BUG_ON(!bh->b_end_io);
279362306a36Sopenharmony_ci	BUG_ON(buffer_delay(bh));
279462306a36Sopenharmony_ci	BUG_ON(buffer_unwritten(bh));
279562306a36Sopenharmony_ci
279662306a36Sopenharmony_ci	/*
279762306a36Sopenharmony_ci	 * Only clear out a write error when rewriting
279862306a36Sopenharmony_ci	 */
279962306a36Sopenharmony_ci	if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
280062306a36Sopenharmony_ci		clear_buffer_write_io_error(bh);
280162306a36Sopenharmony_ci
280262306a36Sopenharmony_ci	if (buffer_meta(bh))
280362306a36Sopenharmony_ci		opf |= REQ_META;
280462306a36Sopenharmony_ci	if (buffer_prio(bh))
280562306a36Sopenharmony_ci		opf |= REQ_PRIO;
280662306a36Sopenharmony_ci
280762306a36Sopenharmony_ci	bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO);
280862306a36Sopenharmony_ci
280962306a36Sopenharmony_ci	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
281062306a36Sopenharmony_ci
281162306a36Sopenharmony_ci	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
281262306a36Sopenharmony_ci
281362306a36Sopenharmony_ci	__bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
281462306a36Sopenharmony_ci
281562306a36Sopenharmony_ci	bio->bi_end_io = end_bio_bh_io_sync;
281662306a36Sopenharmony_ci	bio->bi_private = bh;
281762306a36Sopenharmony_ci
281862306a36Sopenharmony_ci	/* Take care of bh's that straddle the end of the device */
281962306a36Sopenharmony_ci	guard_bio_eod(bio);
282062306a36Sopenharmony_ci
282162306a36Sopenharmony_ci	if (wbc) {
282262306a36Sopenharmony_ci		wbc_init_bio(wbc, bio);
282362306a36Sopenharmony_ci		wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
282462306a36Sopenharmony_ci	}
282562306a36Sopenharmony_ci
282662306a36Sopenharmony_ci	submit_bio(bio);
282762306a36Sopenharmony_ci}
282862306a36Sopenharmony_ci
282962306a36Sopenharmony_civoid submit_bh(blk_opf_t opf, struct buffer_head *bh)
283062306a36Sopenharmony_ci{
283162306a36Sopenharmony_ci	submit_bh_wbc(opf, bh, NULL);
283262306a36Sopenharmony_ci}
283362306a36Sopenharmony_ciEXPORT_SYMBOL(submit_bh);
283462306a36Sopenharmony_ci
283562306a36Sopenharmony_civoid write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
283662306a36Sopenharmony_ci{
283762306a36Sopenharmony_ci	lock_buffer(bh);
283862306a36Sopenharmony_ci	if (!test_clear_buffer_dirty(bh)) {
283962306a36Sopenharmony_ci		unlock_buffer(bh);
284062306a36Sopenharmony_ci		return;
284162306a36Sopenharmony_ci	}
284262306a36Sopenharmony_ci	bh->b_end_io = end_buffer_write_sync;
284362306a36Sopenharmony_ci	get_bh(bh);
284462306a36Sopenharmony_ci	submit_bh(REQ_OP_WRITE | op_flags, bh);
284562306a36Sopenharmony_ci}
284662306a36Sopenharmony_ciEXPORT_SYMBOL(write_dirty_buffer);
284762306a36Sopenharmony_ci
284862306a36Sopenharmony_ci/*
284962306a36Sopenharmony_ci * For a data-integrity writeout, we need to wait upon any in-progress I/O
285062306a36Sopenharmony_ci * and then start new I/O and then wait upon it.  The caller must have a ref on
285162306a36Sopenharmony_ci * the buffer_head.
285262306a36Sopenharmony_ci */
285362306a36Sopenharmony_ciint __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags)
285462306a36Sopenharmony_ci{
285562306a36Sopenharmony_ci	WARN_ON(atomic_read(&bh->b_count) < 1);
285662306a36Sopenharmony_ci	lock_buffer(bh);
285762306a36Sopenharmony_ci	if (test_clear_buffer_dirty(bh)) {
285862306a36Sopenharmony_ci		/*
285962306a36Sopenharmony_ci		 * The bh should be mapped, but it might not be if the
286062306a36Sopenharmony_ci		 * device was hot-removed. Not much we can do but fail the I/O.
286162306a36Sopenharmony_ci		 */
286262306a36Sopenharmony_ci		if (!buffer_mapped(bh)) {
286362306a36Sopenharmony_ci			unlock_buffer(bh);
286462306a36Sopenharmony_ci			return -EIO;
286562306a36Sopenharmony_ci		}
286662306a36Sopenharmony_ci
286762306a36Sopenharmony_ci		get_bh(bh);
286862306a36Sopenharmony_ci		bh->b_end_io = end_buffer_write_sync;
286962306a36Sopenharmony_ci		submit_bh(REQ_OP_WRITE | op_flags, bh);
287062306a36Sopenharmony_ci		wait_on_buffer(bh);
287162306a36Sopenharmony_ci		if (!buffer_uptodate(bh))
287262306a36Sopenharmony_ci			return -EIO;
287362306a36Sopenharmony_ci	} else {
287462306a36Sopenharmony_ci		unlock_buffer(bh);
287562306a36Sopenharmony_ci	}
287662306a36Sopenharmony_ci	return 0;
287762306a36Sopenharmony_ci}
287862306a36Sopenharmony_ciEXPORT_SYMBOL(__sync_dirty_buffer);
287962306a36Sopenharmony_ci
288062306a36Sopenharmony_ciint sync_dirty_buffer(struct buffer_head *bh)
288162306a36Sopenharmony_ci{
288262306a36Sopenharmony_ci	return __sync_dirty_buffer(bh, REQ_SYNC);
288362306a36Sopenharmony_ci}
288462306a36Sopenharmony_ciEXPORT_SYMBOL(sync_dirty_buffer);
288562306a36Sopenharmony_ci
288662306a36Sopenharmony_ci/*
288762306a36Sopenharmony_ci * try_to_free_buffers() checks if all the buffers on this particular folio
288862306a36Sopenharmony_ci * are unused, and releases them if so.
288962306a36Sopenharmony_ci *
289062306a36Sopenharmony_ci * Exclusion against try_to_free_buffers may be obtained by either
289162306a36Sopenharmony_ci * locking the folio or by holding its mapping's private_lock.
289262306a36Sopenharmony_ci *
289362306a36Sopenharmony_ci * If the folio is dirty but all the buffers are clean then we need to
289462306a36Sopenharmony_ci * be sure to mark the folio clean as well.  This is because the folio
289562306a36Sopenharmony_ci * may be against a block device, and a later reattachment of buffers
289662306a36Sopenharmony_ci * to a dirty folio will set *all* buffers dirty.  Which would corrupt
289762306a36Sopenharmony_ci * filesystem data on the same device.
289862306a36Sopenharmony_ci *
289962306a36Sopenharmony_ci * The same applies to regular filesystem folios: if all the buffers are
290062306a36Sopenharmony_ci * clean then we set the folio clean and proceed.  To do that, we require
290162306a36Sopenharmony_ci * total exclusion from block_dirty_folio().  That is obtained with
290262306a36Sopenharmony_ci * private_lock.
290362306a36Sopenharmony_ci *
290462306a36Sopenharmony_ci * try_to_free_buffers() is non-blocking.
290562306a36Sopenharmony_ci */
290662306a36Sopenharmony_cistatic inline int buffer_busy(struct buffer_head *bh)
290762306a36Sopenharmony_ci{
290862306a36Sopenharmony_ci	return atomic_read(&bh->b_count) |
290962306a36Sopenharmony_ci		(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
291062306a36Sopenharmony_ci}
291162306a36Sopenharmony_ci
291262306a36Sopenharmony_cistatic bool
291362306a36Sopenharmony_cidrop_buffers(struct folio *folio, struct buffer_head **buffers_to_free)
291462306a36Sopenharmony_ci{
291562306a36Sopenharmony_ci	struct buffer_head *head = folio_buffers(folio);
291662306a36Sopenharmony_ci	struct buffer_head *bh;
291762306a36Sopenharmony_ci
291862306a36Sopenharmony_ci	bh = head;
291962306a36Sopenharmony_ci	do {
292062306a36Sopenharmony_ci		if (buffer_busy(bh))
292162306a36Sopenharmony_ci			goto failed;
292262306a36Sopenharmony_ci		bh = bh->b_this_page;
292362306a36Sopenharmony_ci	} while (bh != head);
292462306a36Sopenharmony_ci
292562306a36Sopenharmony_ci	do {
292662306a36Sopenharmony_ci		struct buffer_head *next = bh->b_this_page;
292762306a36Sopenharmony_ci
292862306a36Sopenharmony_ci		if (bh->b_assoc_map)
292962306a36Sopenharmony_ci			__remove_assoc_queue(bh);
293062306a36Sopenharmony_ci		bh = next;
293162306a36Sopenharmony_ci	} while (bh != head);
293262306a36Sopenharmony_ci	*buffers_to_free = head;
293362306a36Sopenharmony_ci	folio_detach_private(folio);
293462306a36Sopenharmony_ci	return true;
293562306a36Sopenharmony_cifailed:
293662306a36Sopenharmony_ci	return false;
293762306a36Sopenharmony_ci}
293862306a36Sopenharmony_ci
293962306a36Sopenharmony_cibool try_to_free_buffers(struct folio *folio)
294062306a36Sopenharmony_ci{
294162306a36Sopenharmony_ci	struct address_space * const mapping = folio->mapping;
294262306a36Sopenharmony_ci	struct buffer_head *buffers_to_free = NULL;
294362306a36Sopenharmony_ci	bool ret = 0;
294462306a36Sopenharmony_ci
294562306a36Sopenharmony_ci	BUG_ON(!folio_test_locked(folio));
294662306a36Sopenharmony_ci	if (folio_test_writeback(folio))
294762306a36Sopenharmony_ci		return false;
294862306a36Sopenharmony_ci
294962306a36Sopenharmony_ci	if (mapping == NULL) {		/* can this still happen? */
295062306a36Sopenharmony_ci		ret = drop_buffers(folio, &buffers_to_free);
295162306a36Sopenharmony_ci		goto out;
295262306a36Sopenharmony_ci	}
295362306a36Sopenharmony_ci
295462306a36Sopenharmony_ci	spin_lock(&mapping->private_lock);
295562306a36Sopenharmony_ci	ret = drop_buffers(folio, &buffers_to_free);
295662306a36Sopenharmony_ci
295762306a36Sopenharmony_ci	/*
295862306a36Sopenharmony_ci	 * If the filesystem writes its buffers by hand (eg ext3)
295962306a36Sopenharmony_ci	 * then we can have clean buffers against a dirty folio.  We
296062306a36Sopenharmony_ci	 * clean the folio here; otherwise the VM will never notice
296162306a36Sopenharmony_ci	 * that the filesystem did any IO at all.
296262306a36Sopenharmony_ci	 *
296362306a36Sopenharmony_ci	 * Also, during truncate, discard_buffer will have marked all
296462306a36Sopenharmony_ci	 * the folio's buffers clean.  We discover that here and clean
296562306a36Sopenharmony_ci	 * the folio also.
296662306a36Sopenharmony_ci	 *
296762306a36Sopenharmony_ci	 * private_lock must be held over this entire operation in order
296862306a36Sopenharmony_ci	 * to synchronise against block_dirty_folio and prevent the
296962306a36Sopenharmony_ci	 * dirty bit from being lost.
297062306a36Sopenharmony_ci	 */
297162306a36Sopenharmony_ci	if (ret)
297262306a36Sopenharmony_ci		folio_cancel_dirty(folio);
297362306a36Sopenharmony_ci	spin_unlock(&mapping->private_lock);
297462306a36Sopenharmony_ciout:
297562306a36Sopenharmony_ci	if (buffers_to_free) {
297662306a36Sopenharmony_ci		struct buffer_head *bh = buffers_to_free;
297762306a36Sopenharmony_ci
297862306a36Sopenharmony_ci		do {
297962306a36Sopenharmony_ci			struct buffer_head *next = bh->b_this_page;
298062306a36Sopenharmony_ci			free_buffer_head(bh);
298162306a36Sopenharmony_ci			bh = next;
298262306a36Sopenharmony_ci		} while (bh != buffers_to_free);
298362306a36Sopenharmony_ci	}
298462306a36Sopenharmony_ci	return ret;
298562306a36Sopenharmony_ci}
298662306a36Sopenharmony_ciEXPORT_SYMBOL(try_to_free_buffers);
298762306a36Sopenharmony_ci
298862306a36Sopenharmony_ci/*
298962306a36Sopenharmony_ci * Buffer-head allocation
299062306a36Sopenharmony_ci */
299162306a36Sopenharmony_cistatic struct kmem_cache *bh_cachep __read_mostly;
299262306a36Sopenharmony_ci
299362306a36Sopenharmony_ci/*
299462306a36Sopenharmony_ci * Once the number of bh's in the machine exceeds this level, we start
299562306a36Sopenharmony_ci * stripping them in writeback.
299662306a36Sopenharmony_ci */
299762306a36Sopenharmony_cistatic unsigned long max_buffer_heads;
299862306a36Sopenharmony_ci
299962306a36Sopenharmony_ciint buffer_heads_over_limit;
300062306a36Sopenharmony_ci
300162306a36Sopenharmony_cistruct bh_accounting {
300262306a36Sopenharmony_ci	int nr;			/* Number of live bh's */
300362306a36Sopenharmony_ci	int ratelimit;		/* Limit cacheline bouncing */
300462306a36Sopenharmony_ci};
300562306a36Sopenharmony_ci
300662306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
300762306a36Sopenharmony_ci
300862306a36Sopenharmony_cistatic void recalc_bh_state(void)
300962306a36Sopenharmony_ci{
301062306a36Sopenharmony_ci	int i;
301162306a36Sopenharmony_ci	int tot = 0;
301262306a36Sopenharmony_ci
301362306a36Sopenharmony_ci	if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
301462306a36Sopenharmony_ci		return;
301562306a36Sopenharmony_ci	__this_cpu_write(bh_accounting.ratelimit, 0);
301662306a36Sopenharmony_ci	for_each_online_cpu(i)
301762306a36Sopenharmony_ci		tot += per_cpu(bh_accounting, i).nr;
301862306a36Sopenharmony_ci	buffer_heads_over_limit = (tot > max_buffer_heads);
301962306a36Sopenharmony_ci}
302062306a36Sopenharmony_ci
302162306a36Sopenharmony_cistruct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
302262306a36Sopenharmony_ci{
302362306a36Sopenharmony_ci	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
302462306a36Sopenharmony_ci	if (ret) {
302562306a36Sopenharmony_ci		INIT_LIST_HEAD(&ret->b_assoc_buffers);
302662306a36Sopenharmony_ci		spin_lock_init(&ret->b_uptodate_lock);
302762306a36Sopenharmony_ci		preempt_disable();
302862306a36Sopenharmony_ci		__this_cpu_inc(bh_accounting.nr);
302962306a36Sopenharmony_ci		recalc_bh_state();
303062306a36Sopenharmony_ci		preempt_enable();
303162306a36Sopenharmony_ci	}
303262306a36Sopenharmony_ci	return ret;
303362306a36Sopenharmony_ci}
303462306a36Sopenharmony_ciEXPORT_SYMBOL(alloc_buffer_head);
303562306a36Sopenharmony_ci
303662306a36Sopenharmony_civoid free_buffer_head(struct buffer_head *bh)
303762306a36Sopenharmony_ci{
303862306a36Sopenharmony_ci	BUG_ON(!list_empty(&bh->b_assoc_buffers));
303962306a36Sopenharmony_ci	kmem_cache_free(bh_cachep, bh);
304062306a36Sopenharmony_ci	preempt_disable();
304162306a36Sopenharmony_ci	__this_cpu_dec(bh_accounting.nr);
304262306a36Sopenharmony_ci	recalc_bh_state();
304362306a36Sopenharmony_ci	preempt_enable();
304462306a36Sopenharmony_ci}
304562306a36Sopenharmony_ciEXPORT_SYMBOL(free_buffer_head);
304662306a36Sopenharmony_ci
304762306a36Sopenharmony_cistatic int buffer_exit_cpu_dead(unsigned int cpu)
304862306a36Sopenharmony_ci{
304962306a36Sopenharmony_ci	int i;
305062306a36Sopenharmony_ci	struct bh_lru *b = &per_cpu(bh_lrus, cpu);
305162306a36Sopenharmony_ci
305262306a36Sopenharmony_ci	for (i = 0; i < BH_LRU_SIZE; i++) {
305362306a36Sopenharmony_ci		brelse(b->bhs[i]);
305462306a36Sopenharmony_ci		b->bhs[i] = NULL;
305562306a36Sopenharmony_ci	}
305662306a36Sopenharmony_ci	this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
305762306a36Sopenharmony_ci	per_cpu(bh_accounting, cpu).nr = 0;
305862306a36Sopenharmony_ci	return 0;
305962306a36Sopenharmony_ci}
306062306a36Sopenharmony_ci
306162306a36Sopenharmony_ci/**
306262306a36Sopenharmony_ci * bh_uptodate_or_lock - Test whether the buffer is uptodate
306362306a36Sopenharmony_ci * @bh: struct buffer_head
306462306a36Sopenharmony_ci *
306562306a36Sopenharmony_ci * Return true if the buffer is up-to-date and false,
306662306a36Sopenharmony_ci * with the buffer locked, if not.
306762306a36Sopenharmony_ci */
306862306a36Sopenharmony_ciint bh_uptodate_or_lock(struct buffer_head *bh)
306962306a36Sopenharmony_ci{
307062306a36Sopenharmony_ci	if (!buffer_uptodate(bh)) {
307162306a36Sopenharmony_ci		lock_buffer(bh);
307262306a36Sopenharmony_ci		if (!buffer_uptodate(bh))
307362306a36Sopenharmony_ci			return 0;
307462306a36Sopenharmony_ci		unlock_buffer(bh);
307562306a36Sopenharmony_ci	}
307662306a36Sopenharmony_ci	return 1;
307762306a36Sopenharmony_ci}
307862306a36Sopenharmony_ciEXPORT_SYMBOL(bh_uptodate_or_lock);
307962306a36Sopenharmony_ci
308062306a36Sopenharmony_ci/**
308162306a36Sopenharmony_ci * __bh_read - Submit read for a locked buffer
308262306a36Sopenharmony_ci * @bh: struct buffer_head
308362306a36Sopenharmony_ci * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
308462306a36Sopenharmony_ci * @wait: wait until reading finish
308562306a36Sopenharmony_ci *
308662306a36Sopenharmony_ci * Returns zero on success or don't wait, and -EIO on error.
308762306a36Sopenharmony_ci */
308862306a36Sopenharmony_ciint __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
308962306a36Sopenharmony_ci{
309062306a36Sopenharmony_ci	int ret = 0;
309162306a36Sopenharmony_ci
309262306a36Sopenharmony_ci	BUG_ON(!buffer_locked(bh));
309362306a36Sopenharmony_ci
309462306a36Sopenharmony_ci	get_bh(bh);
309562306a36Sopenharmony_ci	bh->b_end_io = end_buffer_read_sync;
309662306a36Sopenharmony_ci	submit_bh(REQ_OP_READ | op_flags, bh);
309762306a36Sopenharmony_ci	if (wait) {
309862306a36Sopenharmony_ci		wait_on_buffer(bh);
309962306a36Sopenharmony_ci		if (!buffer_uptodate(bh))
310062306a36Sopenharmony_ci			ret = -EIO;
310162306a36Sopenharmony_ci	}
310262306a36Sopenharmony_ci	return ret;
310362306a36Sopenharmony_ci}
310462306a36Sopenharmony_ciEXPORT_SYMBOL(__bh_read);
310562306a36Sopenharmony_ci
310662306a36Sopenharmony_ci/**
310762306a36Sopenharmony_ci * __bh_read_batch - Submit read for a batch of unlocked buffers
310862306a36Sopenharmony_ci * @nr: entry number of the buffer batch
310962306a36Sopenharmony_ci * @bhs: a batch of struct buffer_head
311062306a36Sopenharmony_ci * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ
311162306a36Sopenharmony_ci * @force_lock: force to get a lock on the buffer if set, otherwise drops any
311262306a36Sopenharmony_ci *              buffer that cannot lock.
311362306a36Sopenharmony_ci *
311462306a36Sopenharmony_ci * Returns zero on success or don't wait, and -EIO on error.
311562306a36Sopenharmony_ci */
311662306a36Sopenharmony_civoid __bh_read_batch(int nr, struct buffer_head *bhs[],
311762306a36Sopenharmony_ci		     blk_opf_t op_flags, bool force_lock)
311862306a36Sopenharmony_ci{
311962306a36Sopenharmony_ci	int i;
312062306a36Sopenharmony_ci
312162306a36Sopenharmony_ci	for (i = 0; i < nr; i++) {
312262306a36Sopenharmony_ci		struct buffer_head *bh = bhs[i];
312362306a36Sopenharmony_ci
312462306a36Sopenharmony_ci		if (buffer_uptodate(bh))
312562306a36Sopenharmony_ci			continue;
312662306a36Sopenharmony_ci
312762306a36Sopenharmony_ci		if (force_lock)
312862306a36Sopenharmony_ci			lock_buffer(bh);
312962306a36Sopenharmony_ci		else
313062306a36Sopenharmony_ci			if (!trylock_buffer(bh))
313162306a36Sopenharmony_ci				continue;
313262306a36Sopenharmony_ci
313362306a36Sopenharmony_ci		if (buffer_uptodate(bh)) {
313462306a36Sopenharmony_ci			unlock_buffer(bh);
313562306a36Sopenharmony_ci			continue;
313662306a36Sopenharmony_ci		}
313762306a36Sopenharmony_ci
313862306a36Sopenharmony_ci		bh->b_end_io = end_buffer_read_sync;
313962306a36Sopenharmony_ci		get_bh(bh);
314062306a36Sopenharmony_ci		submit_bh(REQ_OP_READ | op_flags, bh);
314162306a36Sopenharmony_ci	}
314262306a36Sopenharmony_ci}
314362306a36Sopenharmony_ciEXPORT_SYMBOL(__bh_read_batch);
314462306a36Sopenharmony_ci
314562306a36Sopenharmony_civoid __init buffer_init(void)
314662306a36Sopenharmony_ci{
314762306a36Sopenharmony_ci	unsigned long nrpages;
314862306a36Sopenharmony_ci	int ret;
314962306a36Sopenharmony_ci
315062306a36Sopenharmony_ci	bh_cachep = kmem_cache_create("buffer_head",
315162306a36Sopenharmony_ci			sizeof(struct buffer_head), 0,
315262306a36Sopenharmony_ci				(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
315362306a36Sopenharmony_ci				SLAB_MEM_SPREAD),
315462306a36Sopenharmony_ci				NULL);
315562306a36Sopenharmony_ci
315662306a36Sopenharmony_ci	/*
315762306a36Sopenharmony_ci	 * Limit the bh occupancy to 10% of ZONE_NORMAL
315862306a36Sopenharmony_ci	 */
315962306a36Sopenharmony_ci	nrpages = (nr_free_buffer_pages() * 10) / 100;
316062306a36Sopenharmony_ci	max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
316162306a36Sopenharmony_ci	ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
316262306a36Sopenharmony_ci					NULL, buffer_exit_cpu_dead);
316362306a36Sopenharmony_ci	WARN_ON(ret < 0);
316462306a36Sopenharmony_ci}
3165