18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  linux/fs/buffer.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci *  Copyright (C) 1991, 1992, 2002  Linus Torvalds
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci/*
98c2ecf20Sopenharmony_ci * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci * Removed a lot of unnecessary code and simplified things now that
128c2ecf20Sopenharmony_ci * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * Speed up hash, lru, and free list operations.  Use gfp() for allocating
158c2ecf20Sopenharmony_ci * hash table, use SLAB cache for buffer heads. SMP threading.  -DaveM
168c2ecf20Sopenharmony_ci *
178c2ecf20Sopenharmony_ci * Added 32k buffer block sizes - these are required older ARM systems. - RMK
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de>
208c2ecf20Sopenharmony_ci */
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci#include <linux/kernel.h>
238c2ecf20Sopenharmony_ci#include <linux/sched/signal.h>
248c2ecf20Sopenharmony_ci#include <linux/syscalls.h>
258c2ecf20Sopenharmony_ci#include <linux/fs.h>
268c2ecf20Sopenharmony_ci#include <linux/iomap.h>
278c2ecf20Sopenharmony_ci#include <linux/mm.h>
288c2ecf20Sopenharmony_ci#include <linux/percpu.h>
298c2ecf20Sopenharmony_ci#include <linux/slab.h>
308c2ecf20Sopenharmony_ci#include <linux/capability.h>
318c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
328c2ecf20Sopenharmony_ci#include <linux/file.h>
338c2ecf20Sopenharmony_ci#include <linux/quotaops.h>
348c2ecf20Sopenharmony_ci#include <linux/highmem.h>
358c2ecf20Sopenharmony_ci#include <linux/export.h>
368c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
378c2ecf20Sopenharmony_ci#include <linux/writeback.h>
388c2ecf20Sopenharmony_ci#include <linux/hash.h>
398c2ecf20Sopenharmony_ci#include <linux/suspend.h>
408c2ecf20Sopenharmony_ci#include <linux/buffer_head.h>
418c2ecf20Sopenharmony_ci#include <linux/task_io_accounting_ops.h>
428c2ecf20Sopenharmony_ci#include <linux/bio.h>
438c2ecf20Sopenharmony_ci#include <linux/cpu.h>
448c2ecf20Sopenharmony_ci#include <linux/bitops.h>
458c2ecf20Sopenharmony_ci#include <linux/mpage.h>
468c2ecf20Sopenharmony_ci#include <linux/bit_spinlock.h>
478c2ecf20Sopenharmony_ci#include <linux/pagevec.h>
488c2ecf20Sopenharmony_ci#include <linux/sched/mm.h>
498c2ecf20Sopenharmony_ci#include <trace/events/block.h>
508c2ecf20Sopenharmony_ci#include <linux/fscrypt.h>
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci#include "internal.h"
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistatic int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
558c2ecf20Sopenharmony_cistatic int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
568c2ecf20Sopenharmony_ci			 enum rw_hint hint, struct writeback_control *wbc);
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ciinline void touch_buffer(struct buffer_head *bh)
618c2ecf20Sopenharmony_ci{
628c2ecf20Sopenharmony_ci	trace_block_touch_buffer(bh);
638c2ecf20Sopenharmony_ci	mark_page_accessed(bh->b_page);
648c2ecf20Sopenharmony_ci}
658c2ecf20Sopenharmony_ciEXPORT_SYMBOL(touch_buffer);
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_civoid __lock_buffer(struct buffer_head *bh)
688c2ecf20Sopenharmony_ci{
698c2ecf20Sopenharmony_ci	wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
708c2ecf20Sopenharmony_ci}
718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__lock_buffer);
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_civoid unlock_buffer(struct buffer_head *bh)
748c2ecf20Sopenharmony_ci{
758c2ecf20Sopenharmony_ci	clear_bit_unlock(BH_Lock, &bh->b_state);
768c2ecf20Sopenharmony_ci	smp_mb__after_atomic();
778c2ecf20Sopenharmony_ci	wake_up_bit(&bh->b_state, BH_Lock);
788c2ecf20Sopenharmony_ci}
798c2ecf20Sopenharmony_ciEXPORT_SYMBOL(unlock_buffer);
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci/*
828c2ecf20Sopenharmony_ci * Returns if the page has dirty or writeback buffers. If all the buffers
838c2ecf20Sopenharmony_ci * are unlocked and clean then the PageDirty information is stale. If
848c2ecf20Sopenharmony_ci * any of the pages are locked, it is assumed they are locked for IO.
858c2ecf20Sopenharmony_ci */
868c2ecf20Sopenharmony_civoid buffer_check_dirty_writeback(struct page *page,
878c2ecf20Sopenharmony_ci				     bool *dirty, bool *writeback)
888c2ecf20Sopenharmony_ci{
898c2ecf20Sopenharmony_ci	struct buffer_head *head, *bh;
908c2ecf20Sopenharmony_ci	*dirty = false;
918c2ecf20Sopenharmony_ci	*writeback = false;
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	BUG_ON(!PageLocked(page));
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	if (!page_has_buffers(page))
968c2ecf20Sopenharmony_ci		return;
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci	if (PageWriteback(page))
998c2ecf20Sopenharmony_ci		*writeback = true;
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	head = page_buffers(page);
1028c2ecf20Sopenharmony_ci	bh = head;
1038c2ecf20Sopenharmony_ci	do {
1048c2ecf20Sopenharmony_ci		if (buffer_locked(bh))
1058c2ecf20Sopenharmony_ci			*writeback = true;
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci		if (buffer_dirty(bh))
1088c2ecf20Sopenharmony_ci			*dirty = true;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
1118c2ecf20Sopenharmony_ci	} while (bh != head);
1128c2ecf20Sopenharmony_ci}
1138c2ecf20Sopenharmony_ciEXPORT_SYMBOL(buffer_check_dirty_writeback);
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci/*
1168c2ecf20Sopenharmony_ci * Block until a buffer comes unlocked.  This doesn't stop it
1178c2ecf20Sopenharmony_ci * from becoming locked again - you have to lock it yourself
1188c2ecf20Sopenharmony_ci * if you want to preserve its state.
1198c2ecf20Sopenharmony_ci */
1208c2ecf20Sopenharmony_civoid __wait_on_buffer(struct buffer_head * bh)
1218c2ecf20Sopenharmony_ci{
1228c2ecf20Sopenharmony_ci	wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE);
1238c2ecf20Sopenharmony_ci}
1248c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__wait_on_buffer);
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_cistatic void buffer_io_error(struct buffer_head *bh, char *msg)
1278c2ecf20Sopenharmony_ci{
1288c2ecf20Sopenharmony_ci	if (!test_bit(BH_Quiet, &bh->b_state))
1298c2ecf20Sopenharmony_ci		printk_ratelimited(KERN_ERR
1308c2ecf20Sopenharmony_ci			"Buffer I/O error on dev %pg, logical block %llu%s\n",
1318c2ecf20Sopenharmony_ci			bh->b_bdev, (unsigned long long)bh->b_blocknr, msg);
1328c2ecf20Sopenharmony_ci}
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci/*
1358c2ecf20Sopenharmony_ci * End-of-IO handler helper function which does not touch the bh after
1368c2ecf20Sopenharmony_ci * unlocking it.
1378c2ecf20Sopenharmony_ci * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but
1388c2ecf20Sopenharmony_ci * a race there is benign: unlock_buffer() only use the bh's address for
1398c2ecf20Sopenharmony_ci * hashing after unlocking the buffer, so it doesn't actually touch the bh
1408c2ecf20Sopenharmony_ci * itself.
1418c2ecf20Sopenharmony_ci */
1428c2ecf20Sopenharmony_cistatic void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate)
1438c2ecf20Sopenharmony_ci{
1448c2ecf20Sopenharmony_ci	if (uptodate) {
1458c2ecf20Sopenharmony_ci		set_buffer_uptodate(bh);
1468c2ecf20Sopenharmony_ci	} else {
1478c2ecf20Sopenharmony_ci		/* This happens, due to failed read-ahead attempts. */
1488c2ecf20Sopenharmony_ci		clear_buffer_uptodate(bh);
1498c2ecf20Sopenharmony_ci	}
1508c2ecf20Sopenharmony_ci	unlock_buffer(bh);
1518c2ecf20Sopenharmony_ci}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci/*
1548c2ecf20Sopenharmony_ci * Default synchronous end-of-IO handler..  Just mark it up-to-date and
1558c2ecf20Sopenharmony_ci * unlock the buffer. This is what ll_rw_block uses too.
1568c2ecf20Sopenharmony_ci */
1578c2ecf20Sopenharmony_civoid end_buffer_read_sync(struct buffer_head *bh, int uptodate)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	__end_buffer_read_notouch(bh, uptodate);
1608c2ecf20Sopenharmony_ci	put_bh(bh);
1618c2ecf20Sopenharmony_ci}
1628c2ecf20Sopenharmony_ciEXPORT_SYMBOL(end_buffer_read_sync);
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_civoid end_buffer_write_sync(struct buffer_head *bh, int uptodate)
1658c2ecf20Sopenharmony_ci{
1668c2ecf20Sopenharmony_ci	if (uptodate) {
1678c2ecf20Sopenharmony_ci		set_buffer_uptodate(bh);
1688c2ecf20Sopenharmony_ci	} else {
1698c2ecf20Sopenharmony_ci		buffer_io_error(bh, ", lost sync page write");
1708c2ecf20Sopenharmony_ci		mark_buffer_write_io_error(bh);
1718c2ecf20Sopenharmony_ci		clear_buffer_uptodate(bh);
1728c2ecf20Sopenharmony_ci	}
1738c2ecf20Sopenharmony_ci	unlock_buffer(bh);
1748c2ecf20Sopenharmony_ci	put_bh(bh);
1758c2ecf20Sopenharmony_ci}
1768c2ecf20Sopenharmony_ciEXPORT_SYMBOL(end_buffer_write_sync);
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci/*
1798c2ecf20Sopenharmony_ci * Various filesystems appear to want __find_get_block to be non-blocking.
1808c2ecf20Sopenharmony_ci * But it's the page lock which protects the buffers.  To get around this,
1818c2ecf20Sopenharmony_ci * we get exclusion from try_to_free_buffers with the blockdev mapping's
1828c2ecf20Sopenharmony_ci * private_lock.
1838c2ecf20Sopenharmony_ci *
1848c2ecf20Sopenharmony_ci * Hack idea: for the blockdev mapping, private_lock contention
1858c2ecf20Sopenharmony_ci * may be quite high.  This code could TryLock the page, and if that
1868c2ecf20Sopenharmony_ci * succeeds, there is no need to take private_lock.
1878c2ecf20Sopenharmony_ci */
1888c2ecf20Sopenharmony_cistatic struct buffer_head *
1898c2ecf20Sopenharmony_ci__find_get_block_slow(struct block_device *bdev, sector_t block)
1908c2ecf20Sopenharmony_ci{
1918c2ecf20Sopenharmony_ci	struct inode *bd_inode = bdev->bd_inode;
1928c2ecf20Sopenharmony_ci	struct address_space *bd_mapping = bd_inode->i_mapping;
1938c2ecf20Sopenharmony_ci	struct buffer_head *ret = NULL;
1948c2ecf20Sopenharmony_ci	pgoff_t index;
1958c2ecf20Sopenharmony_ci	struct buffer_head *bh;
1968c2ecf20Sopenharmony_ci	struct buffer_head *head;
1978c2ecf20Sopenharmony_ci	struct page *page;
1988c2ecf20Sopenharmony_ci	int all_mapped = 1;
1998c2ecf20Sopenharmony_ci	static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
2008c2ecf20Sopenharmony_ci
2018c2ecf20Sopenharmony_ci	index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
2028c2ecf20Sopenharmony_ci	page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
2038c2ecf20Sopenharmony_ci	if (!page)
2048c2ecf20Sopenharmony_ci		goto out;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	spin_lock(&bd_mapping->private_lock);
2078c2ecf20Sopenharmony_ci	if (!page_has_buffers(page))
2088c2ecf20Sopenharmony_ci		goto out_unlock;
2098c2ecf20Sopenharmony_ci	head = page_buffers(page);
2108c2ecf20Sopenharmony_ci	bh = head;
2118c2ecf20Sopenharmony_ci	do {
2128c2ecf20Sopenharmony_ci		if (!buffer_mapped(bh))
2138c2ecf20Sopenharmony_ci			all_mapped = 0;
2148c2ecf20Sopenharmony_ci		else if (bh->b_blocknr == block) {
2158c2ecf20Sopenharmony_ci			ret = bh;
2168c2ecf20Sopenharmony_ci			get_bh(bh);
2178c2ecf20Sopenharmony_ci			goto out_unlock;
2188c2ecf20Sopenharmony_ci		}
2198c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
2208c2ecf20Sopenharmony_ci	} while (bh != head);
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	/* we might be here because some of the buffers on this page are
2238c2ecf20Sopenharmony_ci	 * not mapped.  This is due to various races between
2248c2ecf20Sopenharmony_ci	 * file io on the block device and getblk.  It gets dealt with
2258c2ecf20Sopenharmony_ci	 * elsewhere, don't buffer_error if we had some unmapped buffers
2268c2ecf20Sopenharmony_ci	 */
2278c2ecf20Sopenharmony_ci	ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE);
2288c2ecf20Sopenharmony_ci	if (all_mapped && __ratelimit(&last_warned)) {
2298c2ecf20Sopenharmony_ci		printk("__find_get_block_slow() failed. block=%llu, "
2308c2ecf20Sopenharmony_ci		       "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, "
2318c2ecf20Sopenharmony_ci		       "device %pg blocksize: %d\n",
2328c2ecf20Sopenharmony_ci		       (unsigned long long)block,
2338c2ecf20Sopenharmony_ci		       (unsigned long long)bh->b_blocknr,
2348c2ecf20Sopenharmony_ci		       bh->b_state, bh->b_size, bdev,
2358c2ecf20Sopenharmony_ci		       1 << bd_inode->i_blkbits);
2368c2ecf20Sopenharmony_ci	}
2378c2ecf20Sopenharmony_ciout_unlock:
2388c2ecf20Sopenharmony_ci	spin_unlock(&bd_mapping->private_lock);
2398c2ecf20Sopenharmony_ci	put_page(page);
2408c2ecf20Sopenharmony_ciout:
2418c2ecf20Sopenharmony_ci	return ret;
2428c2ecf20Sopenharmony_ci}
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_cistatic void end_buffer_async_read(struct buffer_head *bh, int uptodate)
2458c2ecf20Sopenharmony_ci{
2468c2ecf20Sopenharmony_ci	unsigned long flags;
2478c2ecf20Sopenharmony_ci	struct buffer_head *first;
2488c2ecf20Sopenharmony_ci	struct buffer_head *tmp;
2498c2ecf20Sopenharmony_ci	struct page *page;
2508c2ecf20Sopenharmony_ci	int page_uptodate = 1;
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ci	BUG_ON(!buffer_async_read(bh));
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_ci	page = bh->b_page;
2558c2ecf20Sopenharmony_ci	if (uptodate) {
2568c2ecf20Sopenharmony_ci		set_buffer_uptodate(bh);
2578c2ecf20Sopenharmony_ci	} else {
2588c2ecf20Sopenharmony_ci		clear_buffer_uptodate(bh);
2598c2ecf20Sopenharmony_ci		buffer_io_error(bh, ", async page read");
2608c2ecf20Sopenharmony_ci		SetPageError(page);
2618c2ecf20Sopenharmony_ci	}
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	/*
2648c2ecf20Sopenharmony_ci	 * Be _very_ careful from here on. Bad things can happen if
2658c2ecf20Sopenharmony_ci	 * two buffer heads end IO at almost the same time and both
2668c2ecf20Sopenharmony_ci	 * decide that the page is now completely done.
2678c2ecf20Sopenharmony_ci	 */
2688c2ecf20Sopenharmony_ci	first = page_buffers(page);
2698c2ecf20Sopenharmony_ci	spin_lock_irqsave(&first->b_uptodate_lock, flags);
2708c2ecf20Sopenharmony_ci	clear_buffer_async_read(bh);
2718c2ecf20Sopenharmony_ci	unlock_buffer(bh);
2728c2ecf20Sopenharmony_ci	tmp = bh;
2738c2ecf20Sopenharmony_ci	do {
2748c2ecf20Sopenharmony_ci		if (!buffer_uptodate(tmp))
2758c2ecf20Sopenharmony_ci			page_uptodate = 0;
2768c2ecf20Sopenharmony_ci		if (buffer_async_read(tmp)) {
2778c2ecf20Sopenharmony_ci			BUG_ON(!buffer_locked(tmp));
2788c2ecf20Sopenharmony_ci			goto still_busy;
2798c2ecf20Sopenharmony_ci		}
2808c2ecf20Sopenharmony_ci		tmp = tmp->b_this_page;
2818c2ecf20Sopenharmony_ci	} while (tmp != bh);
2828c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci	/*
2858c2ecf20Sopenharmony_ci	 * If none of the buffers had errors and they are all
2868c2ecf20Sopenharmony_ci	 * uptodate then we can set the page uptodate.
2878c2ecf20Sopenharmony_ci	 */
2888c2ecf20Sopenharmony_ci	if (page_uptodate && !PageError(page))
2898c2ecf20Sopenharmony_ci		SetPageUptodate(page);
2908c2ecf20Sopenharmony_ci	unlock_page(page);
2918c2ecf20Sopenharmony_ci	return;
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_cistill_busy:
2948c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
2958c2ecf20Sopenharmony_ci	return;
2968c2ecf20Sopenharmony_ci}
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_cistruct decrypt_bh_ctx {
2998c2ecf20Sopenharmony_ci	struct work_struct work;
3008c2ecf20Sopenharmony_ci	struct buffer_head *bh;
3018c2ecf20Sopenharmony_ci};
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_cistatic void decrypt_bh(struct work_struct *work)
3048c2ecf20Sopenharmony_ci{
3058c2ecf20Sopenharmony_ci	struct decrypt_bh_ctx *ctx =
3068c2ecf20Sopenharmony_ci		container_of(work, struct decrypt_bh_ctx, work);
3078c2ecf20Sopenharmony_ci	struct buffer_head *bh = ctx->bh;
3088c2ecf20Sopenharmony_ci	int err;
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
3118c2ecf20Sopenharmony_ci					       bh_offset(bh));
3128c2ecf20Sopenharmony_ci	end_buffer_async_read(bh, err == 0);
3138c2ecf20Sopenharmony_ci	kfree(ctx);
3148c2ecf20Sopenharmony_ci}
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci/*
3178c2ecf20Sopenharmony_ci * I/O completion handler for block_read_full_page() - pages
3188c2ecf20Sopenharmony_ci * which come unlocked at the end of I/O.
3198c2ecf20Sopenharmony_ci */
3208c2ecf20Sopenharmony_cistatic void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
3218c2ecf20Sopenharmony_ci{
3228c2ecf20Sopenharmony_ci	/* Decrypt if needed */
3238c2ecf20Sopenharmony_ci	if (uptodate &&
3248c2ecf20Sopenharmony_ci	    fscrypt_inode_uses_fs_layer_crypto(bh->b_page->mapping->host)) {
3258c2ecf20Sopenharmony_ci		struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci		if (ctx) {
3288c2ecf20Sopenharmony_ci			INIT_WORK(&ctx->work, decrypt_bh);
3298c2ecf20Sopenharmony_ci			ctx->bh = bh;
3308c2ecf20Sopenharmony_ci			fscrypt_enqueue_decrypt_work(&ctx->work);
3318c2ecf20Sopenharmony_ci			return;
3328c2ecf20Sopenharmony_ci		}
3338c2ecf20Sopenharmony_ci		uptodate = 0;
3348c2ecf20Sopenharmony_ci	}
3358c2ecf20Sopenharmony_ci	end_buffer_async_read(bh, uptodate);
3368c2ecf20Sopenharmony_ci}
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci/*
3398c2ecf20Sopenharmony_ci * Completion handler for block_write_full_page() - pages which are unlocked
3408c2ecf20Sopenharmony_ci * during I/O, and which have PageWriteback cleared upon I/O completion.
3418c2ecf20Sopenharmony_ci */
3428c2ecf20Sopenharmony_civoid end_buffer_async_write(struct buffer_head *bh, int uptodate)
3438c2ecf20Sopenharmony_ci{
3448c2ecf20Sopenharmony_ci	unsigned long flags;
3458c2ecf20Sopenharmony_ci	struct buffer_head *first;
3468c2ecf20Sopenharmony_ci	struct buffer_head *tmp;
3478c2ecf20Sopenharmony_ci	struct page *page;
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_ci	BUG_ON(!buffer_async_write(bh));
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci	page = bh->b_page;
3528c2ecf20Sopenharmony_ci	if (uptodate) {
3538c2ecf20Sopenharmony_ci		set_buffer_uptodate(bh);
3548c2ecf20Sopenharmony_ci	} else {
3558c2ecf20Sopenharmony_ci		buffer_io_error(bh, ", lost async page write");
3568c2ecf20Sopenharmony_ci		mark_buffer_write_io_error(bh);
3578c2ecf20Sopenharmony_ci		clear_buffer_uptodate(bh);
3588c2ecf20Sopenharmony_ci		SetPageError(page);
3598c2ecf20Sopenharmony_ci	}
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci	first = page_buffers(page);
3628c2ecf20Sopenharmony_ci	spin_lock_irqsave(&first->b_uptodate_lock, flags);
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci	clear_buffer_async_write(bh);
3658c2ecf20Sopenharmony_ci	unlock_buffer(bh);
3668c2ecf20Sopenharmony_ci	tmp = bh->b_this_page;
3678c2ecf20Sopenharmony_ci	while (tmp != bh) {
3688c2ecf20Sopenharmony_ci		if (buffer_async_write(tmp)) {
3698c2ecf20Sopenharmony_ci			BUG_ON(!buffer_locked(tmp));
3708c2ecf20Sopenharmony_ci			goto still_busy;
3718c2ecf20Sopenharmony_ci		}
3728c2ecf20Sopenharmony_ci		tmp = tmp->b_this_page;
3738c2ecf20Sopenharmony_ci	}
3748c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
3758c2ecf20Sopenharmony_ci	end_page_writeback(page);
3768c2ecf20Sopenharmony_ci	return;
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_cistill_busy:
3798c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
3808c2ecf20Sopenharmony_ci	return;
3818c2ecf20Sopenharmony_ci}
3828c2ecf20Sopenharmony_ciEXPORT_SYMBOL(end_buffer_async_write);
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci/*
3858c2ecf20Sopenharmony_ci * If a page's buffers are under async readin (end_buffer_async_read
3868c2ecf20Sopenharmony_ci * completion) then there is a possibility that another thread of
3878c2ecf20Sopenharmony_ci * control could lock one of the buffers after it has completed
3888c2ecf20Sopenharmony_ci * but while some of the other buffers have not completed.  This
3898c2ecf20Sopenharmony_ci * locked buffer would confuse end_buffer_async_read() into not unlocking
3908c2ecf20Sopenharmony_ci * the page.  So the absence of BH_Async_Read tells end_buffer_async_read()
3918c2ecf20Sopenharmony_ci * that this buffer is not under async I/O.
3928c2ecf20Sopenharmony_ci *
3938c2ecf20Sopenharmony_ci * The page comes unlocked when it has no locked buffer_async buffers
3948c2ecf20Sopenharmony_ci * left.
3958c2ecf20Sopenharmony_ci *
3968c2ecf20Sopenharmony_ci * PageLocked prevents anyone starting new async I/O reads any of
3978c2ecf20Sopenharmony_ci * the buffers.
3988c2ecf20Sopenharmony_ci *
3998c2ecf20Sopenharmony_ci * PageWriteback is used to prevent simultaneous writeout of the same
4008c2ecf20Sopenharmony_ci * page.
4018c2ecf20Sopenharmony_ci *
4028c2ecf20Sopenharmony_ci * PageLocked prevents anyone from starting writeback of a page which is
4038c2ecf20Sopenharmony_ci * under read I/O (PageWriteback is only ever set against a locked page).
4048c2ecf20Sopenharmony_ci */
4058c2ecf20Sopenharmony_cistatic void mark_buffer_async_read(struct buffer_head *bh)
4068c2ecf20Sopenharmony_ci{
4078c2ecf20Sopenharmony_ci	bh->b_end_io = end_buffer_async_read_io;
4088c2ecf20Sopenharmony_ci	set_buffer_async_read(bh);
4098c2ecf20Sopenharmony_ci}
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_cistatic void mark_buffer_async_write_endio(struct buffer_head *bh,
4128c2ecf20Sopenharmony_ci					  bh_end_io_t *handler)
4138c2ecf20Sopenharmony_ci{
4148c2ecf20Sopenharmony_ci	bh->b_end_io = handler;
4158c2ecf20Sopenharmony_ci	set_buffer_async_write(bh);
4168c2ecf20Sopenharmony_ci}
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_civoid mark_buffer_async_write(struct buffer_head *bh)
4198c2ecf20Sopenharmony_ci{
4208c2ecf20Sopenharmony_ci	mark_buffer_async_write_endio(bh, end_buffer_async_write);
4218c2ecf20Sopenharmony_ci}
4228c2ecf20Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_async_write);
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci/*
4268c2ecf20Sopenharmony_ci * fs/buffer.c contains helper functions for buffer-backed address space's
4278c2ecf20Sopenharmony_ci * fsync functions.  A common requirement for buffer-based filesystems is
4288c2ecf20Sopenharmony_ci * that certain data from the backing blockdev needs to be written out for
4298c2ecf20Sopenharmony_ci * a successful fsync().  For example, ext2 indirect blocks need to be
4308c2ecf20Sopenharmony_ci * written back and waited upon before fsync() returns.
4318c2ecf20Sopenharmony_ci *
4328c2ecf20Sopenharmony_ci * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(),
4338c2ecf20Sopenharmony_ci * inode_has_buffers() and invalidate_inode_buffers() are provided for the
4348c2ecf20Sopenharmony_ci * management of a list of dependent buffers at ->i_mapping->private_list.
4358c2ecf20Sopenharmony_ci *
4368c2ecf20Sopenharmony_ci * Locking is a little subtle: try_to_free_buffers() will remove buffers
4378c2ecf20Sopenharmony_ci * from their controlling inode's queue when they are being freed.  But
4388c2ecf20Sopenharmony_ci * try_to_free_buffers() will be operating against the *blockdev* mapping
4398c2ecf20Sopenharmony_ci * at the time, not against the S_ISREG file which depends on those buffers.
4408c2ecf20Sopenharmony_ci * So the locking for private_list is via the private_lock in the address_space
4418c2ecf20Sopenharmony_ci * which backs the buffers.  Which is different from the address_space
4428c2ecf20Sopenharmony_ci * against which the buffers are listed.  So for a particular address_space,
4438c2ecf20Sopenharmony_ci * mapping->private_lock does *not* protect mapping->private_list!  In fact,
4448c2ecf20Sopenharmony_ci * mapping->private_list will always be protected by the backing blockdev's
4458c2ecf20Sopenharmony_ci * ->private_lock.
4468c2ecf20Sopenharmony_ci *
4478c2ecf20Sopenharmony_ci * Which introduces a requirement: all buffers on an address_space's
4488c2ecf20Sopenharmony_ci * ->private_list must be from the same address_space: the blockdev's.
4498c2ecf20Sopenharmony_ci *
4508c2ecf20Sopenharmony_ci * address_spaces which do not place buffers at ->private_list via these
4518c2ecf20Sopenharmony_ci * utility functions are free to use private_lock and private_list for
4528c2ecf20Sopenharmony_ci * whatever they want.  The only requirement is that list_empty(private_list)
4538c2ecf20Sopenharmony_ci * be true at clear_inode() time.
4548c2ecf20Sopenharmony_ci *
4558c2ecf20Sopenharmony_ci * FIXME: clear_inode should not call invalidate_inode_buffers().  The
4568c2ecf20Sopenharmony_ci * filesystems should do that.  invalidate_inode_buffers() should just go
4578c2ecf20Sopenharmony_ci * BUG_ON(!list_empty).
4588c2ecf20Sopenharmony_ci *
4598c2ecf20Sopenharmony_ci * FIXME: mark_buffer_dirty_inode() is a data-plane operation.  It should
4608c2ecf20Sopenharmony_ci * take an address_space, not an inode.  And it should be called
4618c2ecf20Sopenharmony_ci * mark_buffer_dirty_fsync() to clearly define why those buffers are being
4628c2ecf20Sopenharmony_ci * queued up.
4638c2ecf20Sopenharmony_ci *
4648c2ecf20Sopenharmony_ci * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the
4658c2ecf20Sopenharmony_ci * list if it is already on a list.  Because if the buffer is on a list,
4668c2ecf20Sopenharmony_ci * it *must* already be on the right one.  If not, the filesystem is being
4678c2ecf20Sopenharmony_ci * silly.  This will save a ton of locking.  But first we have to ensure
4688c2ecf20Sopenharmony_ci * that buffers are taken *off* the old inode's list when they are freed
4698c2ecf20Sopenharmony_ci * (presumably in truncate).  That requires careful auditing of all
4708c2ecf20Sopenharmony_ci * filesystems (do it inside bforget()).  It could also be done by bringing
4718c2ecf20Sopenharmony_ci * b_inode back.
4728c2ecf20Sopenharmony_ci */
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci/*
4758c2ecf20Sopenharmony_ci * The buffer's backing address_space's private_lock must be held
4768c2ecf20Sopenharmony_ci */
4778c2ecf20Sopenharmony_cistatic void __remove_assoc_queue(struct buffer_head *bh)
4788c2ecf20Sopenharmony_ci{
4798c2ecf20Sopenharmony_ci	list_del_init(&bh->b_assoc_buffers);
4808c2ecf20Sopenharmony_ci	WARN_ON(!bh->b_assoc_map);
4818c2ecf20Sopenharmony_ci	bh->b_assoc_map = NULL;
4828c2ecf20Sopenharmony_ci}
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ciint inode_has_buffers(struct inode *inode)
4858c2ecf20Sopenharmony_ci{
4868c2ecf20Sopenharmony_ci	return !list_empty(&inode->i_data.private_list);
4878c2ecf20Sopenharmony_ci}
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci/*
4908c2ecf20Sopenharmony_ci * osync is designed to support O_SYNC io.  It waits synchronously for
4918c2ecf20Sopenharmony_ci * all already-submitted IO to complete, but does not queue any new
4928c2ecf20Sopenharmony_ci * writes to the disk.
4938c2ecf20Sopenharmony_ci *
4948c2ecf20Sopenharmony_ci * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
4958c2ecf20Sopenharmony_ci * you dirty the buffers, and then use osync_inode_buffers to wait for
4968c2ecf20Sopenharmony_ci * completion.  Any other dirty buffers which are not yet queued for
4978c2ecf20Sopenharmony_ci * write will not be flushed to disk by the osync.
4988c2ecf20Sopenharmony_ci */
4998c2ecf20Sopenharmony_cistatic int osync_buffers_list(spinlock_t *lock, struct list_head *list)
5008c2ecf20Sopenharmony_ci{
5018c2ecf20Sopenharmony_ci	struct buffer_head *bh;
5028c2ecf20Sopenharmony_ci	struct list_head *p;
5038c2ecf20Sopenharmony_ci	int err = 0;
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	spin_lock(lock);
5068c2ecf20Sopenharmony_cirepeat:
5078c2ecf20Sopenharmony_ci	list_for_each_prev(p, list) {
5088c2ecf20Sopenharmony_ci		bh = BH_ENTRY(p);
5098c2ecf20Sopenharmony_ci		if (buffer_locked(bh)) {
5108c2ecf20Sopenharmony_ci			get_bh(bh);
5118c2ecf20Sopenharmony_ci			spin_unlock(lock);
5128c2ecf20Sopenharmony_ci			wait_on_buffer(bh);
5138c2ecf20Sopenharmony_ci			if (!buffer_uptodate(bh))
5148c2ecf20Sopenharmony_ci				err = -EIO;
5158c2ecf20Sopenharmony_ci			brelse(bh);
5168c2ecf20Sopenharmony_ci			spin_lock(lock);
5178c2ecf20Sopenharmony_ci			goto repeat;
5188c2ecf20Sopenharmony_ci		}
5198c2ecf20Sopenharmony_ci	}
5208c2ecf20Sopenharmony_ci	spin_unlock(lock);
5218c2ecf20Sopenharmony_ci	return err;
5228c2ecf20Sopenharmony_ci}
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_civoid emergency_thaw_bdev(struct super_block *sb)
5258c2ecf20Sopenharmony_ci{
5268c2ecf20Sopenharmony_ci	while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
5278c2ecf20Sopenharmony_ci		printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev);
5288c2ecf20Sopenharmony_ci}
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_ci/**
5318c2ecf20Sopenharmony_ci * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
5328c2ecf20Sopenharmony_ci * @mapping: the mapping which wants those buffers written
5338c2ecf20Sopenharmony_ci *
5348c2ecf20Sopenharmony_ci * Starts I/O against the buffers at mapping->private_list, and waits upon
5358c2ecf20Sopenharmony_ci * that I/O.
5368c2ecf20Sopenharmony_ci *
5378c2ecf20Sopenharmony_ci * Basically, this is a convenience function for fsync().
5388c2ecf20Sopenharmony_ci * @mapping is a file or directory which needs those buffers to be written for
5398c2ecf20Sopenharmony_ci * a successful fsync().
5408c2ecf20Sopenharmony_ci */
5418c2ecf20Sopenharmony_ciint sync_mapping_buffers(struct address_space *mapping)
5428c2ecf20Sopenharmony_ci{
5438c2ecf20Sopenharmony_ci	struct address_space *buffer_mapping = mapping->private_data;
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	if (buffer_mapping == NULL || list_empty(&mapping->private_list))
5468c2ecf20Sopenharmony_ci		return 0;
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_ci	return fsync_buffers_list(&buffer_mapping->private_lock,
5498c2ecf20Sopenharmony_ci					&mapping->private_list);
5508c2ecf20Sopenharmony_ci}
5518c2ecf20Sopenharmony_ciEXPORT_SYMBOL(sync_mapping_buffers);
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci/*
5548c2ecf20Sopenharmony_ci * Called when we've recently written block `bblock', and it is known that
5558c2ecf20Sopenharmony_ci * `bblock' was for a buffer_boundary() buffer.  This means that the block at
5568c2ecf20Sopenharmony_ci * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
5578c2ecf20Sopenharmony_ci * dirty, schedule it for IO.  So that indirects merge nicely with their data.
5588c2ecf20Sopenharmony_ci */
5598c2ecf20Sopenharmony_civoid write_boundary_block(struct block_device *bdev,
5608c2ecf20Sopenharmony_ci			sector_t bblock, unsigned blocksize)
5618c2ecf20Sopenharmony_ci{
5628c2ecf20Sopenharmony_ci	struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
5638c2ecf20Sopenharmony_ci	if (bh) {
5648c2ecf20Sopenharmony_ci		if (buffer_dirty(bh))
5658c2ecf20Sopenharmony_ci			ll_rw_block(REQ_OP_WRITE, 0, 1, &bh);
5668c2ecf20Sopenharmony_ci		put_bh(bh);
5678c2ecf20Sopenharmony_ci	}
5688c2ecf20Sopenharmony_ci}
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_civoid mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
5718c2ecf20Sopenharmony_ci{
5728c2ecf20Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
5738c2ecf20Sopenharmony_ci	struct address_space *buffer_mapping = bh->b_page->mapping;
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci	mark_buffer_dirty(bh);
5768c2ecf20Sopenharmony_ci	if (!mapping->private_data) {
5778c2ecf20Sopenharmony_ci		mapping->private_data = buffer_mapping;
5788c2ecf20Sopenharmony_ci	} else {
5798c2ecf20Sopenharmony_ci		BUG_ON(mapping->private_data != buffer_mapping);
5808c2ecf20Sopenharmony_ci	}
5818c2ecf20Sopenharmony_ci	if (!bh->b_assoc_map) {
5828c2ecf20Sopenharmony_ci		spin_lock(&buffer_mapping->private_lock);
5838c2ecf20Sopenharmony_ci		list_move_tail(&bh->b_assoc_buffers,
5848c2ecf20Sopenharmony_ci				&mapping->private_list);
5858c2ecf20Sopenharmony_ci		bh->b_assoc_map = mapping;
5868c2ecf20Sopenharmony_ci		spin_unlock(&buffer_mapping->private_lock);
5878c2ecf20Sopenharmony_ci	}
5888c2ecf20Sopenharmony_ci}
5898c2ecf20Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_dirty_inode);
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci/*
5928c2ecf20Sopenharmony_ci * Mark the page dirty, and set it dirty in the page cache, and mark the inode
5938c2ecf20Sopenharmony_ci * dirty.
5948c2ecf20Sopenharmony_ci *
5958c2ecf20Sopenharmony_ci * If warn is true, then emit a warning if the page is not uptodate and has
5968c2ecf20Sopenharmony_ci * not been truncated.
5978c2ecf20Sopenharmony_ci *
5988c2ecf20Sopenharmony_ci * The caller must hold lock_page_memcg().
5998c2ecf20Sopenharmony_ci */
6008c2ecf20Sopenharmony_civoid __set_page_dirty(struct page *page, struct address_space *mapping,
6018c2ecf20Sopenharmony_ci			     int warn)
6028c2ecf20Sopenharmony_ci{
6038c2ecf20Sopenharmony_ci	unsigned long flags;
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	xa_lock_irqsave(&mapping->i_pages, flags);
6068c2ecf20Sopenharmony_ci	if (page->mapping) {	/* Race with truncate? */
6078c2ecf20Sopenharmony_ci		WARN_ON_ONCE(warn && !PageUptodate(page));
6088c2ecf20Sopenharmony_ci		account_page_dirtied(page, mapping);
6098c2ecf20Sopenharmony_ci		__xa_set_mark(&mapping->i_pages, page_index(page),
6108c2ecf20Sopenharmony_ci				PAGECACHE_TAG_DIRTY);
6118c2ecf20Sopenharmony_ci	}
6128c2ecf20Sopenharmony_ci	xa_unlock_irqrestore(&mapping->i_pages, flags);
6138c2ecf20Sopenharmony_ci}
6148c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__set_page_dirty);
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci/*
6178c2ecf20Sopenharmony_ci * Add a page to the dirty page list.
6188c2ecf20Sopenharmony_ci *
6198c2ecf20Sopenharmony_ci * It is a sad fact of life that this function is called from several places
6208c2ecf20Sopenharmony_ci * deeply under spinlocking.  It may not sleep.
6218c2ecf20Sopenharmony_ci *
6228c2ecf20Sopenharmony_ci * If the page has buffers, the uptodate buffers are set dirty, to preserve
6238c2ecf20Sopenharmony_ci * dirty-state coherency between the page and the buffers.  It the page does
6248c2ecf20Sopenharmony_ci * not have buffers then when they are later attached they will all be set
6258c2ecf20Sopenharmony_ci * dirty.
6268c2ecf20Sopenharmony_ci *
6278c2ecf20Sopenharmony_ci * The buffers are dirtied before the page is dirtied.  There's a small race
6288c2ecf20Sopenharmony_ci * window in which a writepage caller may see the page cleanness but not the
6298c2ecf20Sopenharmony_ci * buffer dirtiness.  That's fine.  If this code were to set the page dirty
6308c2ecf20Sopenharmony_ci * before the buffers, a concurrent writepage caller could clear the page dirty
6318c2ecf20Sopenharmony_ci * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean
6328c2ecf20Sopenharmony_ci * page on the dirty page list.
6338c2ecf20Sopenharmony_ci *
6348c2ecf20Sopenharmony_ci * We use private_lock to lock against try_to_free_buffers while using the
6358c2ecf20Sopenharmony_ci * page's buffer list.  Also use this to protect against clean buffers being
6368c2ecf20Sopenharmony_ci * added to the page after it was set dirty.
6378c2ecf20Sopenharmony_ci *
6388c2ecf20Sopenharmony_ci * FIXME: may need to call ->reservepage here as well.  That's rather up to the
6398c2ecf20Sopenharmony_ci * address_space though.
6408c2ecf20Sopenharmony_ci */
6418c2ecf20Sopenharmony_ciint __set_page_dirty_buffers(struct page *page)
6428c2ecf20Sopenharmony_ci{
6438c2ecf20Sopenharmony_ci	int newly_dirty;
6448c2ecf20Sopenharmony_ci	struct address_space *mapping = page_mapping(page);
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_ci	if (unlikely(!mapping))
6478c2ecf20Sopenharmony_ci		return !TestSetPageDirty(page);
6488c2ecf20Sopenharmony_ci
6498c2ecf20Sopenharmony_ci	spin_lock(&mapping->private_lock);
6508c2ecf20Sopenharmony_ci	if (page_has_buffers(page)) {
6518c2ecf20Sopenharmony_ci		struct buffer_head *head = page_buffers(page);
6528c2ecf20Sopenharmony_ci		struct buffer_head *bh = head;
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci		do {
6558c2ecf20Sopenharmony_ci			set_buffer_dirty(bh);
6568c2ecf20Sopenharmony_ci			bh = bh->b_this_page;
6578c2ecf20Sopenharmony_ci		} while (bh != head);
6588c2ecf20Sopenharmony_ci	}
6598c2ecf20Sopenharmony_ci	/*
6608c2ecf20Sopenharmony_ci	 * Lock out page->mem_cgroup migration to keep PageDirty
6618c2ecf20Sopenharmony_ci	 * synchronized with per-memcg dirty page counters.
6628c2ecf20Sopenharmony_ci	 */
6638c2ecf20Sopenharmony_ci	lock_page_memcg(page);
6648c2ecf20Sopenharmony_ci	newly_dirty = !TestSetPageDirty(page);
6658c2ecf20Sopenharmony_ci	spin_unlock(&mapping->private_lock);
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_ci	if (newly_dirty)
6688c2ecf20Sopenharmony_ci		__set_page_dirty(page, mapping, 1);
6698c2ecf20Sopenharmony_ci
6708c2ecf20Sopenharmony_ci	unlock_page_memcg(page);
6718c2ecf20Sopenharmony_ci
6728c2ecf20Sopenharmony_ci	if (newly_dirty)
6738c2ecf20Sopenharmony_ci		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
6748c2ecf20Sopenharmony_ci
6758c2ecf20Sopenharmony_ci	return newly_dirty;
6768c2ecf20Sopenharmony_ci}
6778c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__set_page_dirty_buffers);
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_ci/*
6808c2ecf20Sopenharmony_ci * Write out and wait upon a list of buffers.
6818c2ecf20Sopenharmony_ci *
6828c2ecf20Sopenharmony_ci * We have conflicting pressures: we want to make sure that all
6838c2ecf20Sopenharmony_ci * initially dirty buffers get waited on, but that any subsequently
6848c2ecf20Sopenharmony_ci * dirtied buffers don't.  After all, we don't want fsync to last
6858c2ecf20Sopenharmony_ci * forever if somebody is actively writing to the file.
6868c2ecf20Sopenharmony_ci *
6878c2ecf20Sopenharmony_ci * Do this in two main stages: first we copy dirty buffers to a
6888c2ecf20Sopenharmony_ci * temporary inode list, queueing the writes as we go.  Then we clean
6898c2ecf20Sopenharmony_ci * up, waiting for those writes to complete.
6908c2ecf20Sopenharmony_ci *
6918c2ecf20Sopenharmony_ci * During this second stage, any subsequent updates to the file may end
6928c2ecf20Sopenharmony_ci * up refiling the buffer on the original inode's dirty list again, so
6938c2ecf20Sopenharmony_ci * there is a chance we will end up with a buffer queued for write but
6948c2ecf20Sopenharmony_ci * not yet completed on that list.  So, as a final cleanup we go through
6958c2ecf20Sopenharmony_ci * the osync code to catch these locked, dirty buffers without requeuing
6968c2ecf20Sopenharmony_ci * any newly dirty buffers for write.
6978c2ecf20Sopenharmony_ci */
6988c2ecf20Sopenharmony_cistatic int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
6998c2ecf20Sopenharmony_ci{
7008c2ecf20Sopenharmony_ci	struct buffer_head *bh;
7018c2ecf20Sopenharmony_ci	struct list_head tmp;
7028c2ecf20Sopenharmony_ci	struct address_space *mapping;
7038c2ecf20Sopenharmony_ci	int err = 0, err2;
7048c2ecf20Sopenharmony_ci	struct blk_plug plug;
7058c2ecf20Sopenharmony_ci
7068c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&tmp);
7078c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ci	spin_lock(lock);
7108c2ecf20Sopenharmony_ci	while (!list_empty(list)) {
7118c2ecf20Sopenharmony_ci		bh = BH_ENTRY(list->next);
7128c2ecf20Sopenharmony_ci		mapping = bh->b_assoc_map;
7138c2ecf20Sopenharmony_ci		__remove_assoc_queue(bh);
7148c2ecf20Sopenharmony_ci		/* Avoid race with mark_buffer_dirty_inode() which does
7158c2ecf20Sopenharmony_ci		 * a lockless check and we rely on seeing the dirty bit */
7168c2ecf20Sopenharmony_ci		smp_mb();
7178c2ecf20Sopenharmony_ci		if (buffer_dirty(bh) || buffer_locked(bh)) {
7188c2ecf20Sopenharmony_ci			list_add(&bh->b_assoc_buffers, &tmp);
7198c2ecf20Sopenharmony_ci			bh->b_assoc_map = mapping;
7208c2ecf20Sopenharmony_ci			if (buffer_dirty(bh)) {
7218c2ecf20Sopenharmony_ci				get_bh(bh);
7228c2ecf20Sopenharmony_ci				spin_unlock(lock);
7238c2ecf20Sopenharmony_ci				/*
7248c2ecf20Sopenharmony_ci				 * Ensure any pending I/O completes so that
7258c2ecf20Sopenharmony_ci				 * write_dirty_buffer() actually writes the
7268c2ecf20Sopenharmony_ci				 * current contents - it is a noop if I/O is
7278c2ecf20Sopenharmony_ci				 * still in flight on potentially older
7288c2ecf20Sopenharmony_ci				 * contents.
7298c2ecf20Sopenharmony_ci				 */
7308c2ecf20Sopenharmony_ci				write_dirty_buffer(bh, REQ_SYNC);
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_ci				/*
7338c2ecf20Sopenharmony_ci				 * Kick off IO for the previous mapping. Note
7348c2ecf20Sopenharmony_ci				 * that we will not run the very last mapping,
7358c2ecf20Sopenharmony_ci				 * wait_on_buffer() will do that for us
7368c2ecf20Sopenharmony_ci				 * through sync_buffer().
7378c2ecf20Sopenharmony_ci				 */
7388c2ecf20Sopenharmony_ci				brelse(bh);
7398c2ecf20Sopenharmony_ci				spin_lock(lock);
7408c2ecf20Sopenharmony_ci			}
7418c2ecf20Sopenharmony_ci		}
7428c2ecf20Sopenharmony_ci	}
7438c2ecf20Sopenharmony_ci
7448c2ecf20Sopenharmony_ci	spin_unlock(lock);
7458c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
7468c2ecf20Sopenharmony_ci	spin_lock(lock);
7478c2ecf20Sopenharmony_ci
7488c2ecf20Sopenharmony_ci	while (!list_empty(&tmp)) {
7498c2ecf20Sopenharmony_ci		bh = BH_ENTRY(tmp.prev);
7508c2ecf20Sopenharmony_ci		get_bh(bh);
7518c2ecf20Sopenharmony_ci		mapping = bh->b_assoc_map;
7528c2ecf20Sopenharmony_ci		__remove_assoc_queue(bh);
7538c2ecf20Sopenharmony_ci		/* Avoid race with mark_buffer_dirty_inode() which does
7548c2ecf20Sopenharmony_ci		 * a lockless check and we rely on seeing the dirty bit */
7558c2ecf20Sopenharmony_ci		smp_mb();
7568c2ecf20Sopenharmony_ci		if (buffer_dirty(bh)) {
7578c2ecf20Sopenharmony_ci			list_add(&bh->b_assoc_buffers,
7588c2ecf20Sopenharmony_ci				 &mapping->private_list);
7598c2ecf20Sopenharmony_ci			bh->b_assoc_map = mapping;
7608c2ecf20Sopenharmony_ci		}
7618c2ecf20Sopenharmony_ci		spin_unlock(lock);
7628c2ecf20Sopenharmony_ci		wait_on_buffer(bh);
7638c2ecf20Sopenharmony_ci		if (!buffer_uptodate(bh))
7648c2ecf20Sopenharmony_ci			err = -EIO;
7658c2ecf20Sopenharmony_ci		brelse(bh);
7668c2ecf20Sopenharmony_ci		spin_lock(lock);
7678c2ecf20Sopenharmony_ci	}
7688c2ecf20Sopenharmony_ci
7698c2ecf20Sopenharmony_ci	spin_unlock(lock);
7708c2ecf20Sopenharmony_ci	err2 = osync_buffers_list(lock, list);
7718c2ecf20Sopenharmony_ci	if (err)
7728c2ecf20Sopenharmony_ci		return err;
7738c2ecf20Sopenharmony_ci	else
7748c2ecf20Sopenharmony_ci		return err2;
7758c2ecf20Sopenharmony_ci}
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci/*
7788c2ecf20Sopenharmony_ci * Invalidate any and all dirty buffers on a given inode.  We are
7798c2ecf20Sopenharmony_ci * probably unmounting the fs, but that doesn't mean we have already
7808c2ecf20Sopenharmony_ci * done a sync().  Just drop the buffers from the inode list.
7818c2ecf20Sopenharmony_ci *
7828c2ecf20Sopenharmony_ci * NOTE: we take the inode's blockdev's mapping's private_lock.  Which
7838c2ecf20Sopenharmony_ci * assumes that all the buffers are against the blockdev.  Not true
7848c2ecf20Sopenharmony_ci * for reiserfs.
7858c2ecf20Sopenharmony_ci */
7868c2ecf20Sopenharmony_civoid invalidate_inode_buffers(struct inode *inode)
7878c2ecf20Sopenharmony_ci{
7888c2ecf20Sopenharmony_ci	if (inode_has_buffers(inode)) {
7898c2ecf20Sopenharmony_ci		struct address_space *mapping = &inode->i_data;
7908c2ecf20Sopenharmony_ci		struct list_head *list = &mapping->private_list;
7918c2ecf20Sopenharmony_ci		struct address_space *buffer_mapping = mapping->private_data;
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_ci		spin_lock(&buffer_mapping->private_lock);
7948c2ecf20Sopenharmony_ci		while (!list_empty(list))
7958c2ecf20Sopenharmony_ci			__remove_assoc_queue(BH_ENTRY(list->next));
7968c2ecf20Sopenharmony_ci		spin_unlock(&buffer_mapping->private_lock);
7978c2ecf20Sopenharmony_ci	}
7988c2ecf20Sopenharmony_ci}
7998c2ecf20Sopenharmony_ciEXPORT_SYMBOL(invalidate_inode_buffers);
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci/*
8028c2ecf20Sopenharmony_ci * Remove any clean buffers from the inode's buffer list.  This is called
8038c2ecf20Sopenharmony_ci * when we're trying to free the inode itself.  Those buffers can pin it.
8048c2ecf20Sopenharmony_ci *
8058c2ecf20Sopenharmony_ci * Returns true if all buffers were removed.
8068c2ecf20Sopenharmony_ci */
8078c2ecf20Sopenharmony_ciint remove_inode_buffers(struct inode *inode)
8088c2ecf20Sopenharmony_ci{
8098c2ecf20Sopenharmony_ci	int ret = 1;
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci	if (inode_has_buffers(inode)) {
8128c2ecf20Sopenharmony_ci		struct address_space *mapping = &inode->i_data;
8138c2ecf20Sopenharmony_ci		struct list_head *list = &mapping->private_list;
8148c2ecf20Sopenharmony_ci		struct address_space *buffer_mapping = mapping->private_data;
8158c2ecf20Sopenharmony_ci
8168c2ecf20Sopenharmony_ci		spin_lock(&buffer_mapping->private_lock);
8178c2ecf20Sopenharmony_ci		while (!list_empty(list)) {
8188c2ecf20Sopenharmony_ci			struct buffer_head *bh = BH_ENTRY(list->next);
8198c2ecf20Sopenharmony_ci			if (buffer_dirty(bh)) {
8208c2ecf20Sopenharmony_ci				ret = 0;
8218c2ecf20Sopenharmony_ci				break;
8228c2ecf20Sopenharmony_ci			}
8238c2ecf20Sopenharmony_ci			__remove_assoc_queue(bh);
8248c2ecf20Sopenharmony_ci		}
8258c2ecf20Sopenharmony_ci		spin_unlock(&buffer_mapping->private_lock);
8268c2ecf20Sopenharmony_ci	}
8278c2ecf20Sopenharmony_ci	return ret;
8288c2ecf20Sopenharmony_ci}
8298c2ecf20Sopenharmony_ci
8308c2ecf20Sopenharmony_ci/*
8318c2ecf20Sopenharmony_ci * Create the appropriate buffers when given a page for data area and
8328c2ecf20Sopenharmony_ci * the size of each buffer.. Use the bh->b_this_page linked list to
8338c2ecf20Sopenharmony_ci * follow the buffers created.  Return NULL if unable to create more
8348c2ecf20Sopenharmony_ci * buffers.
8358c2ecf20Sopenharmony_ci *
8368c2ecf20Sopenharmony_ci * The retry flag is used to differentiate async IO (paging, swapping)
8378c2ecf20Sopenharmony_ci * which may not fail from ordinary buffer allocations.
8388c2ecf20Sopenharmony_ci */
8398c2ecf20Sopenharmony_cistruct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
8408c2ecf20Sopenharmony_ci		bool retry)
8418c2ecf20Sopenharmony_ci{
8428c2ecf20Sopenharmony_ci	struct buffer_head *bh, *head;
8438c2ecf20Sopenharmony_ci	gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
8448c2ecf20Sopenharmony_ci	long offset;
8458c2ecf20Sopenharmony_ci	struct mem_cgroup *memcg, *old_memcg;
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_ci	if (retry)
8488c2ecf20Sopenharmony_ci		gfp |= __GFP_NOFAIL;
8498c2ecf20Sopenharmony_ci
8508c2ecf20Sopenharmony_ci	memcg = get_mem_cgroup_from_page(page);
8518c2ecf20Sopenharmony_ci	old_memcg = set_active_memcg(memcg);
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_ci	head = NULL;
8548c2ecf20Sopenharmony_ci	offset = PAGE_SIZE;
8558c2ecf20Sopenharmony_ci	while ((offset -= size) >= 0) {
8568c2ecf20Sopenharmony_ci		bh = alloc_buffer_head(gfp);
8578c2ecf20Sopenharmony_ci		if (!bh)
8588c2ecf20Sopenharmony_ci			goto no_grow;
8598c2ecf20Sopenharmony_ci
8608c2ecf20Sopenharmony_ci		bh->b_this_page = head;
8618c2ecf20Sopenharmony_ci		bh->b_blocknr = -1;
8628c2ecf20Sopenharmony_ci		head = bh;
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_ci		bh->b_size = size;
8658c2ecf20Sopenharmony_ci
8668c2ecf20Sopenharmony_ci		/* Link the buffer to its page */
8678c2ecf20Sopenharmony_ci		set_bh_page(bh, page, offset);
8688c2ecf20Sopenharmony_ci	}
8698c2ecf20Sopenharmony_ciout:
8708c2ecf20Sopenharmony_ci	set_active_memcg(old_memcg);
8718c2ecf20Sopenharmony_ci	mem_cgroup_put(memcg);
8728c2ecf20Sopenharmony_ci	return head;
8738c2ecf20Sopenharmony_ci/*
8748c2ecf20Sopenharmony_ci * In case anything failed, we just free everything we got.
8758c2ecf20Sopenharmony_ci */
8768c2ecf20Sopenharmony_cino_grow:
8778c2ecf20Sopenharmony_ci	if (head) {
8788c2ecf20Sopenharmony_ci		do {
8798c2ecf20Sopenharmony_ci			bh = head;
8808c2ecf20Sopenharmony_ci			head = head->b_this_page;
8818c2ecf20Sopenharmony_ci			free_buffer_head(bh);
8828c2ecf20Sopenharmony_ci		} while (head);
8838c2ecf20Sopenharmony_ci	}
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci	goto out;
8868c2ecf20Sopenharmony_ci}
8878c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(alloc_page_buffers);
8888c2ecf20Sopenharmony_ci
8898c2ecf20Sopenharmony_cistatic inline void
8908c2ecf20Sopenharmony_cilink_dev_buffers(struct page *page, struct buffer_head *head)
8918c2ecf20Sopenharmony_ci{
8928c2ecf20Sopenharmony_ci	struct buffer_head *bh, *tail;
8938c2ecf20Sopenharmony_ci
8948c2ecf20Sopenharmony_ci	bh = head;
8958c2ecf20Sopenharmony_ci	do {
8968c2ecf20Sopenharmony_ci		tail = bh;
8978c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
8988c2ecf20Sopenharmony_ci	} while (bh);
8998c2ecf20Sopenharmony_ci	tail->b_this_page = head;
9008c2ecf20Sopenharmony_ci	attach_page_private(page, head);
9018c2ecf20Sopenharmony_ci}
9028c2ecf20Sopenharmony_ci
9038c2ecf20Sopenharmony_cistatic sector_t blkdev_max_block(struct block_device *bdev, unsigned int size)
9048c2ecf20Sopenharmony_ci{
9058c2ecf20Sopenharmony_ci	sector_t retval = ~((sector_t)0);
9068c2ecf20Sopenharmony_ci	loff_t sz = i_size_read(bdev->bd_inode);
9078c2ecf20Sopenharmony_ci
9088c2ecf20Sopenharmony_ci	if (sz) {
9098c2ecf20Sopenharmony_ci		unsigned int sizebits = blksize_bits(size);
9108c2ecf20Sopenharmony_ci		retval = (sz >> sizebits);
9118c2ecf20Sopenharmony_ci	}
9128c2ecf20Sopenharmony_ci	return retval;
9138c2ecf20Sopenharmony_ci}
9148c2ecf20Sopenharmony_ci
9158c2ecf20Sopenharmony_ci/*
9168c2ecf20Sopenharmony_ci * Initialise the state of a blockdev page's buffers.
9178c2ecf20Sopenharmony_ci */
9188c2ecf20Sopenharmony_cistatic sector_t
9198c2ecf20Sopenharmony_ciinit_page_buffers(struct page *page, struct block_device *bdev,
9208c2ecf20Sopenharmony_ci			sector_t block, int size)
9218c2ecf20Sopenharmony_ci{
9228c2ecf20Sopenharmony_ci	struct buffer_head *head = page_buffers(page);
9238c2ecf20Sopenharmony_ci	struct buffer_head *bh = head;
9248c2ecf20Sopenharmony_ci	int uptodate = PageUptodate(page);
9258c2ecf20Sopenharmony_ci	sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode), size);
9268c2ecf20Sopenharmony_ci
9278c2ecf20Sopenharmony_ci	do {
9288c2ecf20Sopenharmony_ci		if (!buffer_mapped(bh)) {
9298c2ecf20Sopenharmony_ci			bh->b_end_io = NULL;
9308c2ecf20Sopenharmony_ci			bh->b_private = NULL;
9318c2ecf20Sopenharmony_ci			bh->b_bdev = bdev;
9328c2ecf20Sopenharmony_ci			bh->b_blocknr = block;
9338c2ecf20Sopenharmony_ci			if (uptodate)
9348c2ecf20Sopenharmony_ci				set_buffer_uptodate(bh);
9358c2ecf20Sopenharmony_ci			if (block < end_block)
9368c2ecf20Sopenharmony_ci				set_buffer_mapped(bh);
9378c2ecf20Sopenharmony_ci		}
9388c2ecf20Sopenharmony_ci		block++;
9398c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
9408c2ecf20Sopenharmony_ci	} while (bh != head);
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_ci	/*
9438c2ecf20Sopenharmony_ci	 * Caller needs to validate requested block against end of device.
9448c2ecf20Sopenharmony_ci	 */
9458c2ecf20Sopenharmony_ci	return end_block;
9468c2ecf20Sopenharmony_ci}
9478c2ecf20Sopenharmony_ci
9488c2ecf20Sopenharmony_ci/*
9498c2ecf20Sopenharmony_ci * Create the page-cache page that contains the requested block.
9508c2ecf20Sopenharmony_ci *
9518c2ecf20Sopenharmony_ci * This is used purely for blockdev mappings.
9528c2ecf20Sopenharmony_ci */
9538c2ecf20Sopenharmony_cistatic int
9548c2ecf20Sopenharmony_cigrow_dev_page(struct block_device *bdev, sector_t block,
9558c2ecf20Sopenharmony_ci	      pgoff_t index, int size, int sizebits, gfp_t gfp)
9568c2ecf20Sopenharmony_ci{
9578c2ecf20Sopenharmony_ci	struct inode *inode = bdev->bd_inode;
9588c2ecf20Sopenharmony_ci	struct page *page;
9598c2ecf20Sopenharmony_ci	struct buffer_head *bh;
9608c2ecf20Sopenharmony_ci	sector_t end_block;
9618c2ecf20Sopenharmony_ci	int ret = 0;
9628c2ecf20Sopenharmony_ci	gfp_t gfp_mask;
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci	gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
9658c2ecf20Sopenharmony_ci
9668c2ecf20Sopenharmony_ci	/*
9678c2ecf20Sopenharmony_ci	 * XXX: __getblk_slow() can not really deal with failure and
9688c2ecf20Sopenharmony_ci	 * will endlessly loop on improvised global reclaim.  Prefer
9698c2ecf20Sopenharmony_ci	 * looping in the allocator rather than here, at least that
9708c2ecf20Sopenharmony_ci	 * code knows what it's doing.
9718c2ecf20Sopenharmony_ci	 */
9728c2ecf20Sopenharmony_ci	gfp_mask |= __GFP_NOFAIL;
9738c2ecf20Sopenharmony_ci
9748c2ecf20Sopenharmony_ci	page = find_or_create_page(inode->i_mapping, index, gfp_mask);
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_ci	BUG_ON(!PageLocked(page));
9778c2ecf20Sopenharmony_ci
9788c2ecf20Sopenharmony_ci	if (page_has_buffers(page)) {
9798c2ecf20Sopenharmony_ci		bh = page_buffers(page);
9808c2ecf20Sopenharmony_ci		if (bh->b_size == size) {
9818c2ecf20Sopenharmony_ci			end_block = init_page_buffers(page, bdev,
9828c2ecf20Sopenharmony_ci						(sector_t)index << sizebits,
9838c2ecf20Sopenharmony_ci						size);
9848c2ecf20Sopenharmony_ci			goto done;
9858c2ecf20Sopenharmony_ci		}
9868c2ecf20Sopenharmony_ci		if (!try_to_free_buffers(page))
9878c2ecf20Sopenharmony_ci			goto failed;
9888c2ecf20Sopenharmony_ci	}
9898c2ecf20Sopenharmony_ci
9908c2ecf20Sopenharmony_ci	/*
9918c2ecf20Sopenharmony_ci	 * Allocate some buffers for this page
9928c2ecf20Sopenharmony_ci	 */
9938c2ecf20Sopenharmony_ci	bh = alloc_page_buffers(page, size, true);
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_ci	/*
9968c2ecf20Sopenharmony_ci	 * Link the page to the buffers and initialise them.  Take the
9978c2ecf20Sopenharmony_ci	 * lock to be atomic wrt __find_get_block(), which does not
9988c2ecf20Sopenharmony_ci	 * run under the page lock.
9998c2ecf20Sopenharmony_ci	 */
10008c2ecf20Sopenharmony_ci	spin_lock(&inode->i_mapping->private_lock);
10018c2ecf20Sopenharmony_ci	link_dev_buffers(page, bh);
10028c2ecf20Sopenharmony_ci	end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
10038c2ecf20Sopenharmony_ci			size);
10048c2ecf20Sopenharmony_ci	spin_unlock(&inode->i_mapping->private_lock);
10058c2ecf20Sopenharmony_cidone:
10068c2ecf20Sopenharmony_ci	ret = (block < end_block) ? 1 : -ENXIO;
10078c2ecf20Sopenharmony_cifailed:
10088c2ecf20Sopenharmony_ci	unlock_page(page);
10098c2ecf20Sopenharmony_ci	put_page(page);
10108c2ecf20Sopenharmony_ci	return ret;
10118c2ecf20Sopenharmony_ci}
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ci/*
10148c2ecf20Sopenharmony_ci * Create buffers for the specified block device block's page.  If
10158c2ecf20Sopenharmony_ci * that page was dirty, the buffers are set dirty also.
10168c2ecf20Sopenharmony_ci */
10178c2ecf20Sopenharmony_cistatic int
10188c2ecf20Sopenharmony_cigrow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
10198c2ecf20Sopenharmony_ci{
10208c2ecf20Sopenharmony_ci	pgoff_t index;
10218c2ecf20Sopenharmony_ci	int sizebits;
10228c2ecf20Sopenharmony_ci
10238c2ecf20Sopenharmony_ci	sizebits = -1;
10248c2ecf20Sopenharmony_ci	do {
10258c2ecf20Sopenharmony_ci		sizebits++;
10268c2ecf20Sopenharmony_ci	} while ((size << sizebits) < PAGE_SIZE);
10278c2ecf20Sopenharmony_ci
10288c2ecf20Sopenharmony_ci	index = block >> sizebits;
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_ci	/*
10318c2ecf20Sopenharmony_ci	 * Check for a block which wants to lie outside our maximum possible
10328c2ecf20Sopenharmony_ci	 * pagecache index.  (this comparison is done using sector_t types).
10338c2ecf20Sopenharmony_ci	 */
10348c2ecf20Sopenharmony_ci	if (unlikely(index != block >> sizebits)) {
10358c2ecf20Sopenharmony_ci		printk(KERN_ERR "%s: requested out-of-range block %llu for "
10368c2ecf20Sopenharmony_ci			"device %pg\n",
10378c2ecf20Sopenharmony_ci			__func__, (unsigned long long)block,
10388c2ecf20Sopenharmony_ci			bdev);
10398c2ecf20Sopenharmony_ci		return -EIO;
10408c2ecf20Sopenharmony_ci	}
10418c2ecf20Sopenharmony_ci
10428c2ecf20Sopenharmony_ci	/* Create a page with the proper size buffers.. */
10438c2ecf20Sopenharmony_ci	return grow_dev_page(bdev, block, index, size, sizebits, gfp);
10448c2ecf20Sopenharmony_ci}
10458c2ecf20Sopenharmony_ci
10468c2ecf20Sopenharmony_cistatic struct buffer_head *
10478c2ecf20Sopenharmony_ci__getblk_slow(struct block_device *bdev, sector_t block,
10488c2ecf20Sopenharmony_ci	     unsigned size, gfp_t gfp)
10498c2ecf20Sopenharmony_ci{
10508c2ecf20Sopenharmony_ci	/* Size must be multiple of hard sectorsize */
10518c2ecf20Sopenharmony_ci	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
10528c2ecf20Sopenharmony_ci			(size < 512 || size > PAGE_SIZE))) {
10538c2ecf20Sopenharmony_ci		printk(KERN_ERR "getblk(): invalid block size %d requested\n",
10548c2ecf20Sopenharmony_ci					size);
10558c2ecf20Sopenharmony_ci		printk(KERN_ERR "logical block size: %d\n",
10568c2ecf20Sopenharmony_ci					bdev_logical_block_size(bdev));
10578c2ecf20Sopenharmony_ci
10588c2ecf20Sopenharmony_ci		dump_stack();
10598c2ecf20Sopenharmony_ci		return NULL;
10608c2ecf20Sopenharmony_ci	}
10618c2ecf20Sopenharmony_ci
10628c2ecf20Sopenharmony_ci	for (;;) {
10638c2ecf20Sopenharmony_ci		struct buffer_head *bh;
10648c2ecf20Sopenharmony_ci		int ret;
10658c2ecf20Sopenharmony_ci
10668c2ecf20Sopenharmony_ci		bh = __find_get_block(bdev, block, size);
10678c2ecf20Sopenharmony_ci		if (bh)
10688c2ecf20Sopenharmony_ci			return bh;
10698c2ecf20Sopenharmony_ci
10708c2ecf20Sopenharmony_ci		ret = grow_buffers(bdev, block, size, gfp);
10718c2ecf20Sopenharmony_ci		if (ret < 0)
10728c2ecf20Sopenharmony_ci			return NULL;
10738c2ecf20Sopenharmony_ci	}
10748c2ecf20Sopenharmony_ci}
10758c2ecf20Sopenharmony_ci
10768c2ecf20Sopenharmony_ci/*
10778c2ecf20Sopenharmony_ci * The relationship between dirty buffers and dirty pages:
10788c2ecf20Sopenharmony_ci *
10798c2ecf20Sopenharmony_ci * Whenever a page has any dirty buffers, the page's dirty bit is set, and
10808c2ecf20Sopenharmony_ci * the page is tagged dirty in the page cache.
10818c2ecf20Sopenharmony_ci *
10828c2ecf20Sopenharmony_ci * At all times, the dirtiness of the buffers represents the dirtiness of
10838c2ecf20Sopenharmony_ci * subsections of the page.  If the page has buffers, the page dirty bit is
10848c2ecf20Sopenharmony_ci * merely a hint about the true dirty state.
10858c2ecf20Sopenharmony_ci *
10868c2ecf20Sopenharmony_ci * When a page is set dirty in its entirety, all its buffers are marked dirty
10878c2ecf20Sopenharmony_ci * (if the page has buffers).
10888c2ecf20Sopenharmony_ci *
10898c2ecf20Sopenharmony_ci * When a buffer is marked dirty, its page is dirtied, but the page's other
10908c2ecf20Sopenharmony_ci * buffers are not.
10918c2ecf20Sopenharmony_ci *
10928c2ecf20Sopenharmony_ci * Also.  When blockdev buffers are explicitly read with bread(), they
10938c2ecf20Sopenharmony_ci * individually become uptodate.  But their backing page remains not
10948c2ecf20Sopenharmony_ci * uptodate - even if all of its buffers are uptodate.  A subsequent
10958c2ecf20Sopenharmony_ci * block_read_full_page() against that page will discover all the uptodate
10968c2ecf20Sopenharmony_ci * buffers, will set the page uptodate and will perform no I/O.
10978c2ecf20Sopenharmony_ci */
10988c2ecf20Sopenharmony_ci
10998c2ecf20Sopenharmony_ci/**
11008c2ecf20Sopenharmony_ci * mark_buffer_dirty - mark a buffer_head as needing writeout
11018c2ecf20Sopenharmony_ci * @bh: the buffer_head to mark dirty
11028c2ecf20Sopenharmony_ci *
11038c2ecf20Sopenharmony_ci * mark_buffer_dirty() will set the dirty bit against the buffer, then set
11048c2ecf20Sopenharmony_ci * its backing page dirty, then tag the page as dirty in the page cache
11058c2ecf20Sopenharmony_ci * and then attach the address_space's inode to its superblock's dirty
11068c2ecf20Sopenharmony_ci * inode list.
11078c2ecf20Sopenharmony_ci *
11088c2ecf20Sopenharmony_ci * mark_buffer_dirty() is atomic.  It takes bh->b_page->mapping->private_lock,
11098c2ecf20Sopenharmony_ci * i_pages lock and mapping->host->i_lock.
11108c2ecf20Sopenharmony_ci */
11118c2ecf20Sopenharmony_civoid mark_buffer_dirty(struct buffer_head *bh)
11128c2ecf20Sopenharmony_ci{
11138c2ecf20Sopenharmony_ci	WARN_ON_ONCE(!buffer_uptodate(bh));
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_ci	trace_block_dirty_buffer(bh);
11168c2ecf20Sopenharmony_ci
11178c2ecf20Sopenharmony_ci	/*
11188c2ecf20Sopenharmony_ci	 * Very *carefully* optimize the it-is-already-dirty case.
11198c2ecf20Sopenharmony_ci	 *
11208c2ecf20Sopenharmony_ci	 * Don't let the final "is it dirty" escape to before we
11218c2ecf20Sopenharmony_ci	 * perhaps modified the buffer.
11228c2ecf20Sopenharmony_ci	 */
11238c2ecf20Sopenharmony_ci	if (buffer_dirty(bh)) {
11248c2ecf20Sopenharmony_ci		smp_mb();
11258c2ecf20Sopenharmony_ci		if (buffer_dirty(bh))
11268c2ecf20Sopenharmony_ci			return;
11278c2ecf20Sopenharmony_ci	}
11288c2ecf20Sopenharmony_ci
11298c2ecf20Sopenharmony_ci	if (!test_set_buffer_dirty(bh)) {
11308c2ecf20Sopenharmony_ci		struct page *page = bh->b_page;
11318c2ecf20Sopenharmony_ci		struct address_space *mapping = NULL;
11328c2ecf20Sopenharmony_ci
11338c2ecf20Sopenharmony_ci		lock_page_memcg(page);
11348c2ecf20Sopenharmony_ci		if (!TestSetPageDirty(page)) {
11358c2ecf20Sopenharmony_ci			mapping = page_mapping(page);
11368c2ecf20Sopenharmony_ci			if (mapping)
11378c2ecf20Sopenharmony_ci				__set_page_dirty(page, mapping, 0);
11388c2ecf20Sopenharmony_ci		}
11398c2ecf20Sopenharmony_ci		unlock_page_memcg(page);
11408c2ecf20Sopenharmony_ci		if (mapping)
11418c2ecf20Sopenharmony_ci			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
11428c2ecf20Sopenharmony_ci	}
11438c2ecf20Sopenharmony_ci}
11448c2ecf20Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_dirty);
11458c2ecf20Sopenharmony_ci
11468c2ecf20Sopenharmony_civoid mark_buffer_write_io_error(struct buffer_head *bh)
11478c2ecf20Sopenharmony_ci{
11488c2ecf20Sopenharmony_ci	struct super_block *sb;
11498c2ecf20Sopenharmony_ci
11508c2ecf20Sopenharmony_ci	set_buffer_write_io_error(bh);
11518c2ecf20Sopenharmony_ci	/* FIXME: do we need to set this in both places? */
11528c2ecf20Sopenharmony_ci	if (bh->b_page && bh->b_page->mapping)
11538c2ecf20Sopenharmony_ci		mapping_set_error(bh->b_page->mapping, -EIO);
11548c2ecf20Sopenharmony_ci	if (bh->b_assoc_map)
11558c2ecf20Sopenharmony_ci		mapping_set_error(bh->b_assoc_map, -EIO);
11568c2ecf20Sopenharmony_ci	rcu_read_lock();
11578c2ecf20Sopenharmony_ci	sb = READ_ONCE(bh->b_bdev->bd_super);
11588c2ecf20Sopenharmony_ci	if (sb)
11598c2ecf20Sopenharmony_ci		errseq_set(&sb->s_wb_err, -EIO);
11608c2ecf20Sopenharmony_ci	rcu_read_unlock();
11618c2ecf20Sopenharmony_ci}
11628c2ecf20Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_write_io_error);
11638c2ecf20Sopenharmony_ci
11648c2ecf20Sopenharmony_ci/*
11658c2ecf20Sopenharmony_ci * Decrement a buffer_head's reference count.  If all buffers against a page
11668c2ecf20Sopenharmony_ci * have zero reference count, are clean and unlocked, and if the page is clean
11678c2ecf20Sopenharmony_ci * and unlocked then try_to_free_buffers() may strip the buffers from the page
11688c2ecf20Sopenharmony_ci * in preparation for freeing it (sometimes, rarely, buffers are removed from
11698c2ecf20Sopenharmony_ci * a page but it ends up not being freed, and buffers may later be reattached).
11708c2ecf20Sopenharmony_ci */
11718c2ecf20Sopenharmony_civoid __brelse(struct buffer_head * buf)
11728c2ecf20Sopenharmony_ci{
11738c2ecf20Sopenharmony_ci	if (atomic_read(&buf->b_count)) {
11748c2ecf20Sopenharmony_ci		put_bh(buf);
11758c2ecf20Sopenharmony_ci		return;
11768c2ecf20Sopenharmony_ci	}
11778c2ecf20Sopenharmony_ci	WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
11788c2ecf20Sopenharmony_ci}
11798c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__brelse);
11808c2ecf20Sopenharmony_ci
11818c2ecf20Sopenharmony_ci/*
11828c2ecf20Sopenharmony_ci * bforget() is like brelse(), except it discards any
11838c2ecf20Sopenharmony_ci * potentially dirty data.
11848c2ecf20Sopenharmony_ci */
11858c2ecf20Sopenharmony_civoid __bforget(struct buffer_head *bh)
11868c2ecf20Sopenharmony_ci{
11878c2ecf20Sopenharmony_ci	clear_buffer_dirty(bh);
11888c2ecf20Sopenharmony_ci	if (bh->b_assoc_map) {
11898c2ecf20Sopenharmony_ci		struct address_space *buffer_mapping = bh->b_page->mapping;
11908c2ecf20Sopenharmony_ci
11918c2ecf20Sopenharmony_ci		spin_lock(&buffer_mapping->private_lock);
11928c2ecf20Sopenharmony_ci		list_del_init(&bh->b_assoc_buffers);
11938c2ecf20Sopenharmony_ci		bh->b_assoc_map = NULL;
11948c2ecf20Sopenharmony_ci		spin_unlock(&buffer_mapping->private_lock);
11958c2ecf20Sopenharmony_ci	}
11968c2ecf20Sopenharmony_ci	__brelse(bh);
11978c2ecf20Sopenharmony_ci}
11988c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__bforget);
11998c2ecf20Sopenharmony_ci
12008c2ecf20Sopenharmony_cistatic struct buffer_head *__bread_slow(struct buffer_head *bh)
12018c2ecf20Sopenharmony_ci{
12028c2ecf20Sopenharmony_ci	lock_buffer(bh);
12038c2ecf20Sopenharmony_ci	if (buffer_uptodate(bh)) {
12048c2ecf20Sopenharmony_ci		unlock_buffer(bh);
12058c2ecf20Sopenharmony_ci		return bh;
12068c2ecf20Sopenharmony_ci	} else {
12078c2ecf20Sopenharmony_ci		get_bh(bh);
12088c2ecf20Sopenharmony_ci		bh->b_end_io = end_buffer_read_sync;
12098c2ecf20Sopenharmony_ci		submit_bh(REQ_OP_READ, 0, bh);
12108c2ecf20Sopenharmony_ci		wait_on_buffer(bh);
12118c2ecf20Sopenharmony_ci		if (buffer_uptodate(bh))
12128c2ecf20Sopenharmony_ci			return bh;
12138c2ecf20Sopenharmony_ci	}
12148c2ecf20Sopenharmony_ci	brelse(bh);
12158c2ecf20Sopenharmony_ci	return NULL;
12168c2ecf20Sopenharmony_ci}
12178c2ecf20Sopenharmony_ci
12188c2ecf20Sopenharmony_ci/*
12198c2ecf20Sopenharmony_ci * Per-cpu buffer LRU implementation.  To reduce the cost of __find_get_block().
12208c2ecf20Sopenharmony_ci * The bhs[] array is sorted - newest buffer is at bhs[0].  Buffers have their
12218c2ecf20Sopenharmony_ci * refcount elevated by one when they're in an LRU.  A buffer can only appear
12228c2ecf20Sopenharmony_ci * once in a particular CPU's LRU.  A single buffer can be present in multiple
12238c2ecf20Sopenharmony_ci * CPU's LRUs at the same time.
12248c2ecf20Sopenharmony_ci *
12258c2ecf20Sopenharmony_ci * This is a transparent caching front-end to sb_bread(), sb_getblk() and
12268c2ecf20Sopenharmony_ci * sb_find_get_block().
12278c2ecf20Sopenharmony_ci *
12288c2ecf20Sopenharmony_ci * The LRUs themselves only need locking against invalidate_bh_lrus.  We use
12298c2ecf20Sopenharmony_ci * a local interrupt disable for that.
12308c2ecf20Sopenharmony_ci */
12318c2ecf20Sopenharmony_ci
12328c2ecf20Sopenharmony_ci#define BH_LRU_SIZE	16
12338c2ecf20Sopenharmony_ci
12348c2ecf20Sopenharmony_cistruct bh_lru {
12358c2ecf20Sopenharmony_ci	struct buffer_head *bhs[BH_LRU_SIZE];
12368c2ecf20Sopenharmony_ci};
12378c2ecf20Sopenharmony_ci
12388c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }};
12398c2ecf20Sopenharmony_ci
12408c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP
12418c2ecf20Sopenharmony_ci#define bh_lru_lock()	local_irq_disable()
12428c2ecf20Sopenharmony_ci#define bh_lru_unlock()	local_irq_enable()
12438c2ecf20Sopenharmony_ci#else
12448c2ecf20Sopenharmony_ci#define bh_lru_lock()	preempt_disable()
12458c2ecf20Sopenharmony_ci#define bh_lru_unlock()	preempt_enable()
12468c2ecf20Sopenharmony_ci#endif
12478c2ecf20Sopenharmony_ci
12488c2ecf20Sopenharmony_cistatic inline void check_irqs_on(void)
12498c2ecf20Sopenharmony_ci{
12508c2ecf20Sopenharmony_ci#ifdef irqs_disabled
12518c2ecf20Sopenharmony_ci	BUG_ON(irqs_disabled());
12528c2ecf20Sopenharmony_ci#endif
12538c2ecf20Sopenharmony_ci}
12548c2ecf20Sopenharmony_ci
12558c2ecf20Sopenharmony_ci/*
12568c2ecf20Sopenharmony_ci * Install a buffer_head into this cpu's LRU.  If not already in the LRU, it is
12578c2ecf20Sopenharmony_ci * inserted at the front, and the buffer_head at the back if any is evicted.
12588c2ecf20Sopenharmony_ci * Or, if already in the LRU it is moved to the front.
12598c2ecf20Sopenharmony_ci */
12608c2ecf20Sopenharmony_cistatic void bh_lru_install(struct buffer_head *bh)
12618c2ecf20Sopenharmony_ci{
12628c2ecf20Sopenharmony_ci	struct buffer_head *evictee = bh;
12638c2ecf20Sopenharmony_ci	struct bh_lru *b;
12648c2ecf20Sopenharmony_ci	int i;
12658c2ecf20Sopenharmony_ci
12668c2ecf20Sopenharmony_ci	check_irqs_on();
12678c2ecf20Sopenharmony_ci	bh_lru_lock();
12688c2ecf20Sopenharmony_ci
12698c2ecf20Sopenharmony_ci	b = this_cpu_ptr(&bh_lrus);
12708c2ecf20Sopenharmony_ci	for (i = 0; i < BH_LRU_SIZE; i++) {
12718c2ecf20Sopenharmony_ci		swap(evictee, b->bhs[i]);
12728c2ecf20Sopenharmony_ci		if (evictee == bh) {
12738c2ecf20Sopenharmony_ci			bh_lru_unlock();
12748c2ecf20Sopenharmony_ci			return;
12758c2ecf20Sopenharmony_ci		}
12768c2ecf20Sopenharmony_ci	}
12778c2ecf20Sopenharmony_ci
12788c2ecf20Sopenharmony_ci	get_bh(bh);
12798c2ecf20Sopenharmony_ci	bh_lru_unlock();
12808c2ecf20Sopenharmony_ci	brelse(evictee);
12818c2ecf20Sopenharmony_ci}
12828c2ecf20Sopenharmony_ci
12838c2ecf20Sopenharmony_ci/*
12848c2ecf20Sopenharmony_ci * Look up the bh in this cpu's LRU.  If it's there, move it to the head.
12858c2ecf20Sopenharmony_ci */
12868c2ecf20Sopenharmony_cistatic struct buffer_head *
12878c2ecf20Sopenharmony_cilookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
12888c2ecf20Sopenharmony_ci{
12898c2ecf20Sopenharmony_ci	struct buffer_head *ret = NULL;
12908c2ecf20Sopenharmony_ci	unsigned int i;
12918c2ecf20Sopenharmony_ci
12928c2ecf20Sopenharmony_ci	check_irqs_on();
12938c2ecf20Sopenharmony_ci	bh_lru_lock();
12948c2ecf20Sopenharmony_ci	for (i = 0; i < BH_LRU_SIZE; i++) {
12958c2ecf20Sopenharmony_ci		struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
12968c2ecf20Sopenharmony_ci
12978c2ecf20Sopenharmony_ci		if (bh && bh->b_blocknr == block && bh->b_bdev == bdev &&
12988c2ecf20Sopenharmony_ci		    bh->b_size == size) {
12998c2ecf20Sopenharmony_ci			if (i) {
13008c2ecf20Sopenharmony_ci				while (i) {
13018c2ecf20Sopenharmony_ci					__this_cpu_write(bh_lrus.bhs[i],
13028c2ecf20Sopenharmony_ci						__this_cpu_read(bh_lrus.bhs[i - 1]));
13038c2ecf20Sopenharmony_ci					i--;
13048c2ecf20Sopenharmony_ci				}
13058c2ecf20Sopenharmony_ci				__this_cpu_write(bh_lrus.bhs[0], bh);
13068c2ecf20Sopenharmony_ci			}
13078c2ecf20Sopenharmony_ci			get_bh(bh);
13088c2ecf20Sopenharmony_ci			ret = bh;
13098c2ecf20Sopenharmony_ci			break;
13108c2ecf20Sopenharmony_ci		}
13118c2ecf20Sopenharmony_ci	}
13128c2ecf20Sopenharmony_ci	bh_lru_unlock();
13138c2ecf20Sopenharmony_ci	return ret;
13148c2ecf20Sopenharmony_ci}
13158c2ecf20Sopenharmony_ci
13168c2ecf20Sopenharmony_ci/*
13178c2ecf20Sopenharmony_ci * Perform a pagecache lookup for the matching buffer.  If it's there, refresh
13188c2ecf20Sopenharmony_ci * it in the LRU and mark it as accessed.  If it is not present then return
13198c2ecf20Sopenharmony_ci * NULL
13208c2ecf20Sopenharmony_ci */
13218c2ecf20Sopenharmony_cistruct buffer_head *
13228c2ecf20Sopenharmony_ci__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
13238c2ecf20Sopenharmony_ci{
13248c2ecf20Sopenharmony_ci	struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
13258c2ecf20Sopenharmony_ci
13268c2ecf20Sopenharmony_ci	if (bh == NULL) {
13278c2ecf20Sopenharmony_ci		/* __find_get_block_slow will mark the page accessed */
13288c2ecf20Sopenharmony_ci		bh = __find_get_block_slow(bdev, block);
13298c2ecf20Sopenharmony_ci		if (bh)
13308c2ecf20Sopenharmony_ci			bh_lru_install(bh);
13318c2ecf20Sopenharmony_ci	} else
13328c2ecf20Sopenharmony_ci		touch_buffer(bh);
13338c2ecf20Sopenharmony_ci
13348c2ecf20Sopenharmony_ci	return bh;
13358c2ecf20Sopenharmony_ci}
13368c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__find_get_block);
13378c2ecf20Sopenharmony_ci
13388c2ecf20Sopenharmony_ci/*
13398c2ecf20Sopenharmony_ci * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
13408c2ecf20Sopenharmony_ci * which corresponds to the passed block_device, block and size. The
13418c2ecf20Sopenharmony_ci * returned buffer has its reference count incremented.
13428c2ecf20Sopenharmony_ci *
13438c2ecf20Sopenharmony_ci * __getblk_gfp() will lock up the machine if grow_dev_page's
13448c2ecf20Sopenharmony_ci * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
13458c2ecf20Sopenharmony_ci */
13468c2ecf20Sopenharmony_cistruct buffer_head *
13478c2ecf20Sopenharmony_ci__getblk_gfp(struct block_device *bdev, sector_t block,
13488c2ecf20Sopenharmony_ci	     unsigned size, gfp_t gfp)
13498c2ecf20Sopenharmony_ci{
13508c2ecf20Sopenharmony_ci	struct buffer_head *bh = __find_get_block(bdev, block, size);
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci	might_sleep();
13538c2ecf20Sopenharmony_ci	if (bh == NULL)
13548c2ecf20Sopenharmony_ci		bh = __getblk_slow(bdev, block, size, gfp);
13558c2ecf20Sopenharmony_ci	return bh;
13568c2ecf20Sopenharmony_ci}
13578c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__getblk_gfp);
13588c2ecf20Sopenharmony_ci
13598c2ecf20Sopenharmony_ci/*
13608c2ecf20Sopenharmony_ci * Do async read-ahead on a buffer..
13618c2ecf20Sopenharmony_ci */
13628c2ecf20Sopenharmony_civoid __breadahead(struct block_device *bdev, sector_t block, unsigned size)
13638c2ecf20Sopenharmony_ci{
13648c2ecf20Sopenharmony_ci	struct buffer_head *bh = __getblk(bdev, block, size);
13658c2ecf20Sopenharmony_ci	if (likely(bh)) {
13668c2ecf20Sopenharmony_ci		ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
13678c2ecf20Sopenharmony_ci		brelse(bh);
13688c2ecf20Sopenharmony_ci	}
13698c2ecf20Sopenharmony_ci}
13708c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__breadahead);
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_civoid __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size,
13738c2ecf20Sopenharmony_ci		      gfp_t gfp)
13748c2ecf20Sopenharmony_ci{
13758c2ecf20Sopenharmony_ci	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
13768c2ecf20Sopenharmony_ci	if (likely(bh)) {
13778c2ecf20Sopenharmony_ci		ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh);
13788c2ecf20Sopenharmony_ci		brelse(bh);
13798c2ecf20Sopenharmony_ci	}
13808c2ecf20Sopenharmony_ci}
13818c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__breadahead_gfp);
13828c2ecf20Sopenharmony_ci
13838c2ecf20Sopenharmony_ci/**
13848c2ecf20Sopenharmony_ci *  __bread_gfp() - reads a specified block and returns the bh
13858c2ecf20Sopenharmony_ci *  @bdev: the block_device to read from
13868c2ecf20Sopenharmony_ci *  @block: number of block
13878c2ecf20Sopenharmony_ci *  @size: size (in bytes) to read
13888c2ecf20Sopenharmony_ci *  @gfp: page allocation flag
13898c2ecf20Sopenharmony_ci *
13908c2ecf20Sopenharmony_ci *  Reads a specified block, and returns buffer head that contains it.
13918c2ecf20Sopenharmony_ci *  The page cache can be allocated from non-movable area
13928c2ecf20Sopenharmony_ci *  not to prevent page migration if you set gfp to zero.
13938c2ecf20Sopenharmony_ci *  It returns NULL if the block was unreadable.
13948c2ecf20Sopenharmony_ci */
13958c2ecf20Sopenharmony_cistruct buffer_head *
13968c2ecf20Sopenharmony_ci__bread_gfp(struct block_device *bdev, sector_t block,
13978c2ecf20Sopenharmony_ci		   unsigned size, gfp_t gfp)
13988c2ecf20Sopenharmony_ci{
13998c2ecf20Sopenharmony_ci	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
14008c2ecf20Sopenharmony_ci
14018c2ecf20Sopenharmony_ci	if (likely(bh) && !buffer_uptodate(bh))
14028c2ecf20Sopenharmony_ci		bh = __bread_slow(bh);
14038c2ecf20Sopenharmony_ci	return bh;
14048c2ecf20Sopenharmony_ci}
14058c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__bread_gfp);
14068c2ecf20Sopenharmony_ci
14078c2ecf20Sopenharmony_ci/*
14088c2ecf20Sopenharmony_ci * invalidate_bh_lrus() is called rarely - but not only at unmount.
14098c2ecf20Sopenharmony_ci * This doesn't race because it runs in each cpu either in irq
14108c2ecf20Sopenharmony_ci * or with preempt disabled.
14118c2ecf20Sopenharmony_ci */
14128c2ecf20Sopenharmony_cistatic void invalidate_bh_lru(void *arg)
14138c2ecf20Sopenharmony_ci{
14148c2ecf20Sopenharmony_ci	struct bh_lru *b = &get_cpu_var(bh_lrus);
14158c2ecf20Sopenharmony_ci	int i;
14168c2ecf20Sopenharmony_ci
14178c2ecf20Sopenharmony_ci	for (i = 0; i < BH_LRU_SIZE; i++) {
14188c2ecf20Sopenharmony_ci		brelse(b->bhs[i]);
14198c2ecf20Sopenharmony_ci		b->bhs[i] = NULL;
14208c2ecf20Sopenharmony_ci	}
14218c2ecf20Sopenharmony_ci	put_cpu_var(bh_lrus);
14228c2ecf20Sopenharmony_ci}
14238c2ecf20Sopenharmony_ci
14248c2ecf20Sopenharmony_cistatic bool has_bh_in_lru(int cpu, void *dummy)
14258c2ecf20Sopenharmony_ci{
14268c2ecf20Sopenharmony_ci	struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
14278c2ecf20Sopenharmony_ci	int i;
14288c2ecf20Sopenharmony_ci
14298c2ecf20Sopenharmony_ci	for (i = 0; i < BH_LRU_SIZE; i++) {
14308c2ecf20Sopenharmony_ci		if (b->bhs[i])
14318c2ecf20Sopenharmony_ci			return true;
14328c2ecf20Sopenharmony_ci	}
14338c2ecf20Sopenharmony_ci
14348c2ecf20Sopenharmony_ci	return false;
14358c2ecf20Sopenharmony_ci}
14368c2ecf20Sopenharmony_ci
14378c2ecf20Sopenharmony_civoid invalidate_bh_lrus(void)
14388c2ecf20Sopenharmony_ci{
14398c2ecf20Sopenharmony_ci	on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
14408c2ecf20Sopenharmony_ci}
14418c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(invalidate_bh_lrus);
14428c2ecf20Sopenharmony_ci
14438c2ecf20Sopenharmony_civoid set_bh_page(struct buffer_head *bh,
14448c2ecf20Sopenharmony_ci		struct page *page, unsigned long offset)
14458c2ecf20Sopenharmony_ci{
14468c2ecf20Sopenharmony_ci	bh->b_page = page;
14478c2ecf20Sopenharmony_ci	BUG_ON(offset >= PAGE_SIZE);
14488c2ecf20Sopenharmony_ci	if (PageHighMem(page))
14498c2ecf20Sopenharmony_ci		/*
14508c2ecf20Sopenharmony_ci		 * This catches illegal uses and preserves the offset:
14518c2ecf20Sopenharmony_ci		 */
14528c2ecf20Sopenharmony_ci		bh->b_data = (char *)(0 + offset);
14538c2ecf20Sopenharmony_ci	else
14548c2ecf20Sopenharmony_ci		bh->b_data = page_address(page) + offset;
14558c2ecf20Sopenharmony_ci}
14568c2ecf20Sopenharmony_ciEXPORT_SYMBOL(set_bh_page);
14578c2ecf20Sopenharmony_ci
14588c2ecf20Sopenharmony_ci/*
14598c2ecf20Sopenharmony_ci * Called when truncating a buffer on a page completely.
14608c2ecf20Sopenharmony_ci */
14618c2ecf20Sopenharmony_ci
14628c2ecf20Sopenharmony_ci/* Bits that are cleared during an invalidate */
14638c2ecf20Sopenharmony_ci#define BUFFER_FLAGS_DISCARD \
14648c2ecf20Sopenharmony_ci	(1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
14658c2ecf20Sopenharmony_ci	 1 << BH_Delay | 1 << BH_Unwritten)
14668c2ecf20Sopenharmony_ci
14678c2ecf20Sopenharmony_cistatic void discard_buffer(struct buffer_head * bh)
14688c2ecf20Sopenharmony_ci{
14698c2ecf20Sopenharmony_ci	unsigned long b_state, b_state_old;
14708c2ecf20Sopenharmony_ci
14718c2ecf20Sopenharmony_ci	lock_buffer(bh);
14728c2ecf20Sopenharmony_ci	clear_buffer_dirty(bh);
14738c2ecf20Sopenharmony_ci	bh->b_bdev = NULL;
14748c2ecf20Sopenharmony_ci	b_state = bh->b_state;
14758c2ecf20Sopenharmony_ci	for (;;) {
14768c2ecf20Sopenharmony_ci		b_state_old = cmpxchg(&bh->b_state, b_state,
14778c2ecf20Sopenharmony_ci				      (b_state & ~BUFFER_FLAGS_DISCARD));
14788c2ecf20Sopenharmony_ci		if (b_state_old == b_state)
14798c2ecf20Sopenharmony_ci			break;
14808c2ecf20Sopenharmony_ci		b_state = b_state_old;
14818c2ecf20Sopenharmony_ci	}
14828c2ecf20Sopenharmony_ci	unlock_buffer(bh);
14838c2ecf20Sopenharmony_ci}
14848c2ecf20Sopenharmony_ci
14858c2ecf20Sopenharmony_ci/**
14868c2ecf20Sopenharmony_ci * block_invalidatepage - invalidate part or all of a buffer-backed page
14878c2ecf20Sopenharmony_ci *
14888c2ecf20Sopenharmony_ci * @page: the page which is affected
14898c2ecf20Sopenharmony_ci * @offset: start of the range to invalidate
14908c2ecf20Sopenharmony_ci * @length: length of the range to invalidate
14918c2ecf20Sopenharmony_ci *
14928c2ecf20Sopenharmony_ci * block_invalidatepage() is called when all or part of the page has become
14938c2ecf20Sopenharmony_ci * invalidated by a truncate operation.
14948c2ecf20Sopenharmony_ci *
14958c2ecf20Sopenharmony_ci * block_invalidatepage() does not have to release all buffers, but it must
14968c2ecf20Sopenharmony_ci * ensure that no dirty buffer is left outside @offset and that no I/O
14978c2ecf20Sopenharmony_ci * is underway against any of the blocks which are outside the truncation
14988c2ecf20Sopenharmony_ci * point.  Because the caller is about to free (and possibly reuse) those
14998c2ecf20Sopenharmony_ci * blocks on-disk.
15008c2ecf20Sopenharmony_ci */
15018c2ecf20Sopenharmony_civoid block_invalidatepage(struct page *page, unsigned int offset,
15028c2ecf20Sopenharmony_ci			  unsigned int length)
15038c2ecf20Sopenharmony_ci{
15048c2ecf20Sopenharmony_ci	struct buffer_head *head, *bh, *next;
15058c2ecf20Sopenharmony_ci	unsigned int curr_off = 0;
15068c2ecf20Sopenharmony_ci	unsigned int stop = length + offset;
15078c2ecf20Sopenharmony_ci
15088c2ecf20Sopenharmony_ci	BUG_ON(!PageLocked(page));
15098c2ecf20Sopenharmony_ci	if (!page_has_buffers(page))
15108c2ecf20Sopenharmony_ci		goto out;
15118c2ecf20Sopenharmony_ci
15128c2ecf20Sopenharmony_ci	/*
15138c2ecf20Sopenharmony_ci	 * Check for overflow
15148c2ecf20Sopenharmony_ci	 */
15158c2ecf20Sopenharmony_ci	BUG_ON(stop > PAGE_SIZE || stop < length);
15168c2ecf20Sopenharmony_ci
15178c2ecf20Sopenharmony_ci	head = page_buffers(page);
15188c2ecf20Sopenharmony_ci	bh = head;
15198c2ecf20Sopenharmony_ci	do {
15208c2ecf20Sopenharmony_ci		unsigned int next_off = curr_off + bh->b_size;
15218c2ecf20Sopenharmony_ci		next = bh->b_this_page;
15228c2ecf20Sopenharmony_ci
15238c2ecf20Sopenharmony_ci		/*
15248c2ecf20Sopenharmony_ci		 * Are we still fully in range ?
15258c2ecf20Sopenharmony_ci		 */
15268c2ecf20Sopenharmony_ci		if (next_off > stop)
15278c2ecf20Sopenharmony_ci			goto out;
15288c2ecf20Sopenharmony_ci
15298c2ecf20Sopenharmony_ci		/*
15308c2ecf20Sopenharmony_ci		 * is this block fully invalidated?
15318c2ecf20Sopenharmony_ci		 */
15328c2ecf20Sopenharmony_ci		if (offset <= curr_off)
15338c2ecf20Sopenharmony_ci			discard_buffer(bh);
15348c2ecf20Sopenharmony_ci		curr_off = next_off;
15358c2ecf20Sopenharmony_ci		bh = next;
15368c2ecf20Sopenharmony_ci	} while (bh != head);
15378c2ecf20Sopenharmony_ci
15388c2ecf20Sopenharmony_ci	/*
15398c2ecf20Sopenharmony_ci	 * We release buffers only if the entire page is being invalidated.
15408c2ecf20Sopenharmony_ci	 * The get_block cached value has been unconditionally invalidated,
15418c2ecf20Sopenharmony_ci	 * so real IO is not possible anymore.
15428c2ecf20Sopenharmony_ci	 */
15438c2ecf20Sopenharmony_ci	if (length == PAGE_SIZE)
15448c2ecf20Sopenharmony_ci		try_to_release_page(page, 0);
15458c2ecf20Sopenharmony_ciout:
15468c2ecf20Sopenharmony_ci	return;
15478c2ecf20Sopenharmony_ci}
15488c2ecf20Sopenharmony_ciEXPORT_SYMBOL(block_invalidatepage);
15498c2ecf20Sopenharmony_ci
15508c2ecf20Sopenharmony_ci
15518c2ecf20Sopenharmony_ci/*
15528c2ecf20Sopenharmony_ci * We attach and possibly dirty the buffers atomically wrt
15538c2ecf20Sopenharmony_ci * __set_page_dirty_buffers() via private_lock.  try_to_free_buffers
15548c2ecf20Sopenharmony_ci * is already excluded via the page lock.
15558c2ecf20Sopenharmony_ci */
15568c2ecf20Sopenharmony_civoid create_empty_buffers(struct page *page,
15578c2ecf20Sopenharmony_ci			unsigned long blocksize, unsigned long b_state)
15588c2ecf20Sopenharmony_ci{
15598c2ecf20Sopenharmony_ci	struct buffer_head *bh, *head, *tail;
15608c2ecf20Sopenharmony_ci
15618c2ecf20Sopenharmony_ci	head = alloc_page_buffers(page, blocksize, true);
15628c2ecf20Sopenharmony_ci	bh = head;
15638c2ecf20Sopenharmony_ci	do {
15648c2ecf20Sopenharmony_ci		bh->b_state |= b_state;
15658c2ecf20Sopenharmony_ci		tail = bh;
15668c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
15678c2ecf20Sopenharmony_ci	} while (bh);
15688c2ecf20Sopenharmony_ci	tail->b_this_page = head;
15698c2ecf20Sopenharmony_ci
15708c2ecf20Sopenharmony_ci	spin_lock(&page->mapping->private_lock);
15718c2ecf20Sopenharmony_ci	if (PageUptodate(page) || PageDirty(page)) {
15728c2ecf20Sopenharmony_ci		bh = head;
15738c2ecf20Sopenharmony_ci		do {
15748c2ecf20Sopenharmony_ci			if (PageDirty(page))
15758c2ecf20Sopenharmony_ci				set_buffer_dirty(bh);
15768c2ecf20Sopenharmony_ci			if (PageUptodate(page))
15778c2ecf20Sopenharmony_ci				set_buffer_uptodate(bh);
15788c2ecf20Sopenharmony_ci			bh = bh->b_this_page;
15798c2ecf20Sopenharmony_ci		} while (bh != head);
15808c2ecf20Sopenharmony_ci	}
15818c2ecf20Sopenharmony_ci	attach_page_private(page, head);
15828c2ecf20Sopenharmony_ci	spin_unlock(&page->mapping->private_lock);
15838c2ecf20Sopenharmony_ci}
15848c2ecf20Sopenharmony_ciEXPORT_SYMBOL(create_empty_buffers);
15858c2ecf20Sopenharmony_ci
15868c2ecf20Sopenharmony_ci/**
15878c2ecf20Sopenharmony_ci * clean_bdev_aliases: clean a range of buffers in block device
15888c2ecf20Sopenharmony_ci * @bdev: Block device to clean buffers in
15898c2ecf20Sopenharmony_ci * @block: Start of a range of blocks to clean
15908c2ecf20Sopenharmony_ci * @len: Number of blocks to clean
15918c2ecf20Sopenharmony_ci *
15928c2ecf20Sopenharmony_ci * We are taking a range of blocks for data and we don't want writeback of any
15938c2ecf20Sopenharmony_ci * buffer-cache aliases starting from return from this function and until the
15948c2ecf20Sopenharmony_ci * moment when something will explicitly mark the buffer dirty (hopefully that
15958c2ecf20Sopenharmony_ci * will not happen until we will free that block ;-) We don't even need to mark
15968c2ecf20Sopenharmony_ci * it not-uptodate - nobody can expect anything from a newly allocated buffer
15978c2ecf20Sopenharmony_ci * anyway. We used to use unmap_buffer() for such invalidation, but that was
15988c2ecf20Sopenharmony_ci * wrong. We definitely don't want to mark the alias unmapped, for example - it
15998c2ecf20Sopenharmony_ci * would confuse anyone who might pick it with bread() afterwards...
16008c2ecf20Sopenharmony_ci *
16018c2ecf20Sopenharmony_ci * Also..  Note that bforget() doesn't lock the buffer.  So there can be
16028c2ecf20Sopenharmony_ci * writeout I/O going on against recently-freed buffers.  We don't wait on that
16038c2ecf20Sopenharmony_ci * I/O in bforget() - it's more efficient to wait on the I/O only if we really
16048c2ecf20Sopenharmony_ci * need to.  That happens here.
16058c2ecf20Sopenharmony_ci */
16068c2ecf20Sopenharmony_civoid clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
16078c2ecf20Sopenharmony_ci{
16088c2ecf20Sopenharmony_ci	struct inode *bd_inode = bdev->bd_inode;
16098c2ecf20Sopenharmony_ci	struct address_space *bd_mapping = bd_inode->i_mapping;
16108c2ecf20Sopenharmony_ci	struct pagevec pvec;
16118c2ecf20Sopenharmony_ci	pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
16128c2ecf20Sopenharmony_ci	pgoff_t end;
16138c2ecf20Sopenharmony_ci	int i, count;
16148c2ecf20Sopenharmony_ci	struct buffer_head *bh;
16158c2ecf20Sopenharmony_ci	struct buffer_head *head;
16168c2ecf20Sopenharmony_ci
16178c2ecf20Sopenharmony_ci	end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
16188c2ecf20Sopenharmony_ci	pagevec_init(&pvec);
16198c2ecf20Sopenharmony_ci	while (pagevec_lookup_range(&pvec, bd_mapping, &index, end)) {
16208c2ecf20Sopenharmony_ci		count = pagevec_count(&pvec);
16218c2ecf20Sopenharmony_ci		for (i = 0; i < count; i++) {
16228c2ecf20Sopenharmony_ci			struct page *page = pvec.pages[i];
16238c2ecf20Sopenharmony_ci
16248c2ecf20Sopenharmony_ci			if (!page_has_buffers(page))
16258c2ecf20Sopenharmony_ci				continue;
16268c2ecf20Sopenharmony_ci			/*
16278c2ecf20Sopenharmony_ci			 * We use page lock instead of bd_mapping->private_lock
16288c2ecf20Sopenharmony_ci			 * to pin buffers here since we can afford to sleep and
16298c2ecf20Sopenharmony_ci			 * it scales better than a global spinlock lock.
16308c2ecf20Sopenharmony_ci			 */
16318c2ecf20Sopenharmony_ci			lock_page(page);
16328c2ecf20Sopenharmony_ci			/* Recheck when the page is locked which pins bhs */
16338c2ecf20Sopenharmony_ci			if (!page_has_buffers(page))
16348c2ecf20Sopenharmony_ci				goto unlock_page;
16358c2ecf20Sopenharmony_ci			head = page_buffers(page);
16368c2ecf20Sopenharmony_ci			bh = head;
16378c2ecf20Sopenharmony_ci			do {
16388c2ecf20Sopenharmony_ci				if (!buffer_mapped(bh) || (bh->b_blocknr < block))
16398c2ecf20Sopenharmony_ci					goto next;
16408c2ecf20Sopenharmony_ci				if (bh->b_blocknr >= block + len)
16418c2ecf20Sopenharmony_ci					break;
16428c2ecf20Sopenharmony_ci				clear_buffer_dirty(bh);
16438c2ecf20Sopenharmony_ci				wait_on_buffer(bh);
16448c2ecf20Sopenharmony_ci				clear_buffer_req(bh);
16458c2ecf20Sopenharmony_cinext:
16468c2ecf20Sopenharmony_ci				bh = bh->b_this_page;
16478c2ecf20Sopenharmony_ci			} while (bh != head);
16488c2ecf20Sopenharmony_ciunlock_page:
16498c2ecf20Sopenharmony_ci			unlock_page(page);
16508c2ecf20Sopenharmony_ci		}
16518c2ecf20Sopenharmony_ci		pagevec_release(&pvec);
16528c2ecf20Sopenharmony_ci		cond_resched();
16538c2ecf20Sopenharmony_ci		/* End of range already reached? */
16548c2ecf20Sopenharmony_ci		if (index > end || !index)
16558c2ecf20Sopenharmony_ci			break;
16568c2ecf20Sopenharmony_ci	}
16578c2ecf20Sopenharmony_ci}
16588c2ecf20Sopenharmony_ciEXPORT_SYMBOL(clean_bdev_aliases);
16598c2ecf20Sopenharmony_ci
16608c2ecf20Sopenharmony_ci/*
16618c2ecf20Sopenharmony_ci * Size is a power-of-two in the range 512..PAGE_SIZE,
16628c2ecf20Sopenharmony_ci * and the case we care about most is PAGE_SIZE.
16638c2ecf20Sopenharmony_ci *
16648c2ecf20Sopenharmony_ci * So this *could* possibly be written with those
16658c2ecf20Sopenharmony_ci * constraints in mind (relevant mostly if some
16668c2ecf20Sopenharmony_ci * architecture has a slow bit-scan instruction)
16678c2ecf20Sopenharmony_ci */
16688c2ecf20Sopenharmony_cistatic inline int block_size_bits(unsigned int blocksize)
16698c2ecf20Sopenharmony_ci{
16708c2ecf20Sopenharmony_ci	return ilog2(blocksize);
16718c2ecf20Sopenharmony_ci}
16728c2ecf20Sopenharmony_ci
16738c2ecf20Sopenharmony_cistatic struct buffer_head *create_page_buffers(struct page *page, struct inode *inode, unsigned int b_state)
16748c2ecf20Sopenharmony_ci{
16758c2ecf20Sopenharmony_ci	BUG_ON(!PageLocked(page));
16768c2ecf20Sopenharmony_ci
16778c2ecf20Sopenharmony_ci	if (!page_has_buffers(page))
16788c2ecf20Sopenharmony_ci		create_empty_buffers(page, 1 << READ_ONCE(inode->i_blkbits),
16798c2ecf20Sopenharmony_ci				     b_state);
16808c2ecf20Sopenharmony_ci	return page_buffers(page);
16818c2ecf20Sopenharmony_ci}
16828c2ecf20Sopenharmony_ci
16838c2ecf20Sopenharmony_ci/*
16848c2ecf20Sopenharmony_ci * NOTE! All mapped/uptodate combinations are valid:
16858c2ecf20Sopenharmony_ci *
16868c2ecf20Sopenharmony_ci *	Mapped	Uptodate	Meaning
16878c2ecf20Sopenharmony_ci *
16888c2ecf20Sopenharmony_ci *	No	No		"unknown" - must do get_block()
16898c2ecf20Sopenharmony_ci *	No	Yes		"hole" - zero-filled
16908c2ecf20Sopenharmony_ci *	Yes	No		"allocated" - allocated on disk, not read in
16918c2ecf20Sopenharmony_ci *	Yes	Yes		"valid" - allocated and up-to-date in memory.
16928c2ecf20Sopenharmony_ci *
16938c2ecf20Sopenharmony_ci * "Dirty" is valid only with the last case (mapped+uptodate).
16948c2ecf20Sopenharmony_ci */
16958c2ecf20Sopenharmony_ci
16968c2ecf20Sopenharmony_ci/*
16978c2ecf20Sopenharmony_ci * While block_write_full_page is writing back the dirty buffers under
16988c2ecf20Sopenharmony_ci * the page lock, whoever dirtied the buffers may decide to clean them
16998c2ecf20Sopenharmony_ci * again at any time.  We handle that by only looking at the buffer
17008c2ecf20Sopenharmony_ci * state inside lock_buffer().
17018c2ecf20Sopenharmony_ci *
17028c2ecf20Sopenharmony_ci * If block_write_full_page() is called for regular writeback
17038c2ecf20Sopenharmony_ci * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
17048c2ecf20Sopenharmony_ci * locked buffer.   This only can happen if someone has written the buffer
17058c2ecf20Sopenharmony_ci * directly, with submit_bh().  At the address_space level PageWriteback
17068c2ecf20Sopenharmony_ci * prevents this contention from occurring.
17078c2ecf20Sopenharmony_ci *
17088c2ecf20Sopenharmony_ci * If block_write_full_page() is called with wbc->sync_mode ==
17098c2ecf20Sopenharmony_ci * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this
17108c2ecf20Sopenharmony_ci * causes the writes to be flagged as synchronous writes.
17118c2ecf20Sopenharmony_ci */
17128c2ecf20Sopenharmony_ciint __block_write_full_page(struct inode *inode, struct page *page,
17138c2ecf20Sopenharmony_ci			get_block_t *get_block, struct writeback_control *wbc,
17148c2ecf20Sopenharmony_ci			bh_end_io_t *handler)
17158c2ecf20Sopenharmony_ci{
17168c2ecf20Sopenharmony_ci	int err;
17178c2ecf20Sopenharmony_ci	sector_t block;
17188c2ecf20Sopenharmony_ci	sector_t last_block;
17198c2ecf20Sopenharmony_ci	struct buffer_head *bh, *head;
17208c2ecf20Sopenharmony_ci	unsigned int blocksize, bbits;
17218c2ecf20Sopenharmony_ci	int nr_underway = 0;
17228c2ecf20Sopenharmony_ci	int write_flags = wbc_to_write_flags(wbc);
17238c2ecf20Sopenharmony_ci
17248c2ecf20Sopenharmony_ci	head = create_page_buffers(page, inode,
17258c2ecf20Sopenharmony_ci					(1 << BH_Dirty)|(1 << BH_Uptodate));
17268c2ecf20Sopenharmony_ci
17278c2ecf20Sopenharmony_ci	/*
17288c2ecf20Sopenharmony_ci	 * Be very careful.  We have no exclusion from __set_page_dirty_buffers
17298c2ecf20Sopenharmony_ci	 * here, and the (potentially unmapped) buffers may become dirty at
17308c2ecf20Sopenharmony_ci	 * any time.  If a buffer becomes dirty here after we've inspected it
17318c2ecf20Sopenharmony_ci	 * then we just miss that fact, and the page stays dirty.
17328c2ecf20Sopenharmony_ci	 *
17338c2ecf20Sopenharmony_ci	 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
17348c2ecf20Sopenharmony_ci	 * handle that here by just cleaning them.
17358c2ecf20Sopenharmony_ci	 */
17368c2ecf20Sopenharmony_ci
17378c2ecf20Sopenharmony_ci	bh = head;
17388c2ecf20Sopenharmony_ci	blocksize = bh->b_size;
17398c2ecf20Sopenharmony_ci	bbits = block_size_bits(blocksize);
17408c2ecf20Sopenharmony_ci
17418c2ecf20Sopenharmony_ci	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
17428c2ecf20Sopenharmony_ci	last_block = (i_size_read(inode) - 1) >> bbits;
17438c2ecf20Sopenharmony_ci
17448c2ecf20Sopenharmony_ci	/*
17458c2ecf20Sopenharmony_ci	 * Get all the dirty buffers mapped to disk addresses and
17468c2ecf20Sopenharmony_ci	 * handle any aliases from the underlying blockdev's mapping.
17478c2ecf20Sopenharmony_ci	 */
17488c2ecf20Sopenharmony_ci	do {
17498c2ecf20Sopenharmony_ci		if (block > last_block) {
17508c2ecf20Sopenharmony_ci			/*
17518c2ecf20Sopenharmony_ci			 * mapped buffers outside i_size will occur, because
17528c2ecf20Sopenharmony_ci			 * this page can be outside i_size when there is a
17538c2ecf20Sopenharmony_ci			 * truncate in progress.
17548c2ecf20Sopenharmony_ci			 */
17558c2ecf20Sopenharmony_ci			/*
17568c2ecf20Sopenharmony_ci			 * The buffer was zeroed by block_write_full_page()
17578c2ecf20Sopenharmony_ci			 */
17588c2ecf20Sopenharmony_ci			clear_buffer_dirty(bh);
17598c2ecf20Sopenharmony_ci			set_buffer_uptodate(bh);
17608c2ecf20Sopenharmony_ci		} else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
17618c2ecf20Sopenharmony_ci			   buffer_dirty(bh)) {
17628c2ecf20Sopenharmony_ci			WARN_ON(bh->b_size != blocksize);
17638c2ecf20Sopenharmony_ci			err = get_block(inode, block, bh, 1);
17648c2ecf20Sopenharmony_ci			if (err)
17658c2ecf20Sopenharmony_ci				goto recover;
17668c2ecf20Sopenharmony_ci			clear_buffer_delay(bh);
17678c2ecf20Sopenharmony_ci			if (buffer_new(bh)) {
17688c2ecf20Sopenharmony_ci				/* blockdev mappings never come here */
17698c2ecf20Sopenharmony_ci				clear_buffer_new(bh);
17708c2ecf20Sopenharmony_ci				clean_bdev_bh_alias(bh);
17718c2ecf20Sopenharmony_ci			}
17728c2ecf20Sopenharmony_ci		}
17738c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
17748c2ecf20Sopenharmony_ci		block++;
17758c2ecf20Sopenharmony_ci	} while (bh != head);
17768c2ecf20Sopenharmony_ci
17778c2ecf20Sopenharmony_ci	do {
17788c2ecf20Sopenharmony_ci		if (!buffer_mapped(bh))
17798c2ecf20Sopenharmony_ci			continue;
17808c2ecf20Sopenharmony_ci		/*
17818c2ecf20Sopenharmony_ci		 * If it's a fully non-blocking write attempt and we cannot
17828c2ecf20Sopenharmony_ci		 * lock the buffer then redirty the page.  Note that this can
17838c2ecf20Sopenharmony_ci		 * potentially cause a busy-wait loop from writeback threads
17848c2ecf20Sopenharmony_ci		 * and kswapd activity, but those code paths have their own
17858c2ecf20Sopenharmony_ci		 * higher-level throttling.
17868c2ecf20Sopenharmony_ci		 */
17878c2ecf20Sopenharmony_ci		if (wbc->sync_mode != WB_SYNC_NONE) {
17888c2ecf20Sopenharmony_ci			lock_buffer(bh);
17898c2ecf20Sopenharmony_ci		} else if (!trylock_buffer(bh)) {
17908c2ecf20Sopenharmony_ci			redirty_page_for_writepage(wbc, page);
17918c2ecf20Sopenharmony_ci			continue;
17928c2ecf20Sopenharmony_ci		}
17938c2ecf20Sopenharmony_ci		if (test_clear_buffer_dirty(bh)) {
17948c2ecf20Sopenharmony_ci			mark_buffer_async_write_endio(bh, handler);
17958c2ecf20Sopenharmony_ci		} else {
17968c2ecf20Sopenharmony_ci			unlock_buffer(bh);
17978c2ecf20Sopenharmony_ci		}
17988c2ecf20Sopenharmony_ci	} while ((bh = bh->b_this_page) != head);
17998c2ecf20Sopenharmony_ci
18008c2ecf20Sopenharmony_ci	/*
18018c2ecf20Sopenharmony_ci	 * The page and its buffers are protected by PageWriteback(), so we can
18028c2ecf20Sopenharmony_ci	 * drop the bh refcounts early.
18038c2ecf20Sopenharmony_ci	 */
18048c2ecf20Sopenharmony_ci	BUG_ON(PageWriteback(page));
18058c2ecf20Sopenharmony_ci	set_page_writeback(page);
18068c2ecf20Sopenharmony_ci
18078c2ecf20Sopenharmony_ci	do {
18088c2ecf20Sopenharmony_ci		struct buffer_head *next = bh->b_this_page;
18098c2ecf20Sopenharmony_ci		if (buffer_async_write(bh)) {
18108c2ecf20Sopenharmony_ci			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
18118c2ecf20Sopenharmony_ci					inode->i_write_hint, wbc);
18128c2ecf20Sopenharmony_ci			nr_underway++;
18138c2ecf20Sopenharmony_ci		}
18148c2ecf20Sopenharmony_ci		bh = next;
18158c2ecf20Sopenharmony_ci	} while (bh != head);
18168c2ecf20Sopenharmony_ci	unlock_page(page);
18178c2ecf20Sopenharmony_ci
18188c2ecf20Sopenharmony_ci	err = 0;
18198c2ecf20Sopenharmony_cidone:
18208c2ecf20Sopenharmony_ci	if (nr_underway == 0) {
18218c2ecf20Sopenharmony_ci		/*
18228c2ecf20Sopenharmony_ci		 * The page was marked dirty, but the buffers were
18238c2ecf20Sopenharmony_ci		 * clean.  Someone wrote them back by hand with
18248c2ecf20Sopenharmony_ci		 * ll_rw_block/submit_bh.  A rare case.
18258c2ecf20Sopenharmony_ci		 */
18268c2ecf20Sopenharmony_ci		end_page_writeback(page);
18278c2ecf20Sopenharmony_ci
18288c2ecf20Sopenharmony_ci		/*
18298c2ecf20Sopenharmony_ci		 * The page and buffer_heads can be released at any time from
18308c2ecf20Sopenharmony_ci		 * here on.
18318c2ecf20Sopenharmony_ci		 */
18328c2ecf20Sopenharmony_ci	}
18338c2ecf20Sopenharmony_ci	return err;
18348c2ecf20Sopenharmony_ci
18358c2ecf20Sopenharmony_cirecover:
18368c2ecf20Sopenharmony_ci	/*
18378c2ecf20Sopenharmony_ci	 * ENOSPC, or some other error.  We may already have added some
18388c2ecf20Sopenharmony_ci	 * blocks to the file, so we need to write these out to avoid
18398c2ecf20Sopenharmony_ci	 * exposing stale data.
18408c2ecf20Sopenharmony_ci	 * The page is currently locked and not marked for writeback
18418c2ecf20Sopenharmony_ci	 */
18428c2ecf20Sopenharmony_ci	bh = head;
18438c2ecf20Sopenharmony_ci	/* Recovery: lock and submit the mapped buffers */
18448c2ecf20Sopenharmony_ci	do {
18458c2ecf20Sopenharmony_ci		if (buffer_mapped(bh) && buffer_dirty(bh) &&
18468c2ecf20Sopenharmony_ci		    !buffer_delay(bh)) {
18478c2ecf20Sopenharmony_ci			lock_buffer(bh);
18488c2ecf20Sopenharmony_ci			mark_buffer_async_write_endio(bh, handler);
18498c2ecf20Sopenharmony_ci		} else {
18508c2ecf20Sopenharmony_ci			/*
18518c2ecf20Sopenharmony_ci			 * The buffer may have been set dirty during
18528c2ecf20Sopenharmony_ci			 * attachment to a dirty page.
18538c2ecf20Sopenharmony_ci			 */
18548c2ecf20Sopenharmony_ci			clear_buffer_dirty(bh);
18558c2ecf20Sopenharmony_ci		}
18568c2ecf20Sopenharmony_ci	} while ((bh = bh->b_this_page) != head);
18578c2ecf20Sopenharmony_ci	SetPageError(page);
18588c2ecf20Sopenharmony_ci	BUG_ON(PageWriteback(page));
18598c2ecf20Sopenharmony_ci	mapping_set_error(page->mapping, err);
18608c2ecf20Sopenharmony_ci	set_page_writeback(page);
18618c2ecf20Sopenharmony_ci	do {
18628c2ecf20Sopenharmony_ci		struct buffer_head *next = bh->b_this_page;
18638c2ecf20Sopenharmony_ci		if (buffer_async_write(bh)) {
18648c2ecf20Sopenharmony_ci			clear_buffer_dirty(bh);
18658c2ecf20Sopenharmony_ci			submit_bh_wbc(REQ_OP_WRITE, write_flags, bh,
18668c2ecf20Sopenharmony_ci					inode->i_write_hint, wbc);
18678c2ecf20Sopenharmony_ci			nr_underway++;
18688c2ecf20Sopenharmony_ci		}
18698c2ecf20Sopenharmony_ci		bh = next;
18708c2ecf20Sopenharmony_ci	} while (bh != head);
18718c2ecf20Sopenharmony_ci	unlock_page(page);
18728c2ecf20Sopenharmony_ci	goto done;
18738c2ecf20Sopenharmony_ci}
18748c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__block_write_full_page);
18758c2ecf20Sopenharmony_ci
18768c2ecf20Sopenharmony_ci/*
18778c2ecf20Sopenharmony_ci * If a page has any new buffers, zero them out here, and mark them uptodate
18788c2ecf20Sopenharmony_ci * and dirty so they'll be written out (in order to prevent uninitialised
18798c2ecf20Sopenharmony_ci * block data from leaking). And clear the new bit.
18808c2ecf20Sopenharmony_ci */
18818c2ecf20Sopenharmony_civoid page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
18828c2ecf20Sopenharmony_ci{
18838c2ecf20Sopenharmony_ci	unsigned int block_start, block_end;
18848c2ecf20Sopenharmony_ci	struct buffer_head *head, *bh;
18858c2ecf20Sopenharmony_ci
18868c2ecf20Sopenharmony_ci	BUG_ON(!PageLocked(page));
18878c2ecf20Sopenharmony_ci	if (!page_has_buffers(page))
18888c2ecf20Sopenharmony_ci		return;
18898c2ecf20Sopenharmony_ci
18908c2ecf20Sopenharmony_ci	bh = head = page_buffers(page);
18918c2ecf20Sopenharmony_ci	block_start = 0;
18928c2ecf20Sopenharmony_ci	do {
18938c2ecf20Sopenharmony_ci		block_end = block_start + bh->b_size;
18948c2ecf20Sopenharmony_ci
18958c2ecf20Sopenharmony_ci		if (buffer_new(bh)) {
18968c2ecf20Sopenharmony_ci			if (block_end > from && block_start < to) {
18978c2ecf20Sopenharmony_ci				if (!PageUptodate(page)) {
18988c2ecf20Sopenharmony_ci					unsigned start, size;
18998c2ecf20Sopenharmony_ci
19008c2ecf20Sopenharmony_ci					start = max(from, block_start);
19018c2ecf20Sopenharmony_ci					size = min(to, block_end) - start;
19028c2ecf20Sopenharmony_ci
19038c2ecf20Sopenharmony_ci					zero_user(page, start, size);
19048c2ecf20Sopenharmony_ci					set_buffer_uptodate(bh);
19058c2ecf20Sopenharmony_ci				}
19068c2ecf20Sopenharmony_ci
19078c2ecf20Sopenharmony_ci				clear_buffer_new(bh);
19088c2ecf20Sopenharmony_ci				mark_buffer_dirty(bh);
19098c2ecf20Sopenharmony_ci			}
19108c2ecf20Sopenharmony_ci		}
19118c2ecf20Sopenharmony_ci
19128c2ecf20Sopenharmony_ci		block_start = block_end;
19138c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
19148c2ecf20Sopenharmony_ci	} while (bh != head);
19158c2ecf20Sopenharmony_ci}
19168c2ecf20Sopenharmony_ciEXPORT_SYMBOL(page_zero_new_buffers);
19178c2ecf20Sopenharmony_ci
19188c2ecf20Sopenharmony_cistatic void
19198c2ecf20Sopenharmony_ciiomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
19208c2ecf20Sopenharmony_ci		struct iomap *iomap)
19218c2ecf20Sopenharmony_ci{
19228c2ecf20Sopenharmony_ci	loff_t offset = block << inode->i_blkbits;
19238c2ecf20Sopenharmony_ci
19248c2ecf20Sopenharmony_ci	bh->b_bdev = iomap->bdev;
19258c2ecf20Sopenharmony_ci
19268c2ecf20Sopenharmony_ci	/*
19278c2ecf20Sopenharmony_ci	 * Block points to offset in file we need to map, iomap contains
19288c2ecf20Sopenharmony_ci	 * the offset at which the map starts. If the map ends before the
19298c2ecf20Sopenharmony_ci	 * current block, then do not map the buffer and let the caller
19308c2ecf20Sopenharmony_ci	 * handle it.
19318c2ecf20Sopenharmony_ci	 */
19328c2ecf20Sopenharmony_ci	BUG_ON(offset >= iomap->offset + iomap->length);
19338c2ecf20Sopenharmony_ci
19348c2ecf20Sopenharmony_ci	switch (iomap->type) {
19358c2ecf20Sopenharmony_ci	case IOMAP_HOLE:
19368c2ecf20Sopenharmony_ci		/*
19378c2ecf20Sopenharmony_ci		 * If the buffer is not up to date or beyond the current EOF,
19388c2ecf20Sopenharmony_ci		 * we need to mark it as new to ensure sub-block zeroing is
19398c2ecf20Sopenharmony_ci		 * executed if necessary.
19408c2ecf20Sopenharmony_ci		 */
19418c2ecf20Sopenharmony_ci		if (!buffer_uptodate(bh) ||
19428c2ecf20Sopenharmony_ci		    (offset >= i_size_read(inode)))
19438c2ecf20Sopenharmony_ci			set_buffer_new(bh);
19448c2ecf20Sopenharmony_ci		break;
19458c2ecf20Sopenharmony_ci	case IOMAP_DELALLOC:
19468c2ecf20Sopenharmony_ci		if (!buffer_uptodate(bh) ||
19478c2ecf20Sopenharmony_ci		    (offset >= i_size_read(inode)))
19488c2ecf20Sopenharmony_ci			set_buffer_new(bh);
19498c2ecf20Sopenharmony_ci		set_buffer_uptodate(bh);
19508c2ecf20Sopenharmony_ci		set_buffer_mapped(bh);
19518c2ecf20Sopenharmony_ci		set_buffer_delay(bh);
19528c2ecf20Sopenharmony_ci		break;
19538c2ecf20Sopenharmony_ci	case IOMAP_UNWRITTEN:
19548c2ecf20Sopenharmony_ci		/*
19558c2ecf20Sopenharmony_ci		 * For unwritten regions, we always need to ensure that regions
19568c2ecf20Sopenharmony_ci		 * in the block we are not writing to are zeroed. Mark the
19578c2ecf20Sopenharmony_ci		 * buffer as new to ensure this.
19588c2ecf20Sopenharmony_ci		 */
19598c2ecf20Sopenharmony_ci		set_buffer_new(bh);
19608c2ecf20Sopenharmony_ci		set_buffer_unwritten(bh);
19618c2ecf20Sopenharmony_ci		fallthrough;
19628c2ecf20Sopenharmony_ci	case IOMAP_MAPPED:
19638c2ecf20Sopenharmony_ci		if ((iomap->flags & IOMAP_F_NEW) ||
19648c2ecf20Sopenharmony_ci		    offset >= i_size_read(inode))
19658c2ecf20Sopenharmony_ci			set_buffer_new(bh);
19668c2ecf20Sopenharmony_ci		bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
19678c2ecf20Sopenharmony_ci				inode->i_blkbits;
19688c2ecf20Sopenharmony_ci		set_buffer_mapped(bh);
19698c2ecf20Sopenharmony_ci		break;
19708c2ecf20Sopenharmony_ci	}
19718c2ecf20Sopenharmony_ci}
19728c2ecf20Sopenharmony_ci
19738c2ecf20Sopenharmony_ciint __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
19748c2ecf20Sopenharmony_ci		get_block_t *get_block, struct iomap *iomap)
19758c2ecf20Sopenharmony_ci{
19768c2ecf20Sopenharmony_ci	unsigned from = pos & (PAGE_SIZE - 1);
19778c2ecf20Sopenharmony_ci	unsigned to = from + len;
19788c2ecf20Sopenharmony_ci	struct inode *inode = page->mapping->host;
19798c2ecf20Sopenharmony_ci	unsigned block_start, block_end;
19808c2ecf20Sopenharmony_ci	sector_t block;
19818c2ecf20Sopenharmony_ci	int err = 0;
19828c2ecf20Sopenharmony_ci	unsigned blocksize, bbits;
19838c2ecf20Sopenharmony_ci	struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
19848c2ecf20Sopenharmony_ci
19858c2ecf20Sopenharmony_ci	BUG_ON(!PageLocked(page));
19868c2ecf20Sopenharmony_ci	BUG_ON(from > PAGE_SIZE);
19878c2ecf20Sopenharmony_ci	BUG_ON(to > PAGE_SIZE);
19888c2ecf20Sopenharmony_ci	BUG_ON(from > to);
19898c2ecf20Sopenharmony_ci
19908c2ecf20Sopenharmony_ci	head = create_page_buffers(page, inode, 0);
19918c2ecf20Sopenharmony_ci	blocksize = head->b_size;
19928c2ecf20Sopenharmony_ci	bbits = block_size_bits(blocksize);
19938c2ecf20Sopenharmony_ci
19948c2ecf20Sopenharmony_ci	block = (sector_t)page->index << (PAGE_SHIFT - bbits);
19958c2ecf20Sopenharmony_ci
19968c2ecf20Sopenharmony_ci	for(bh = head, block_start = 0; bh != head || !block_start;
19978c2ecf20Sopenharmony_ci	    block++, block_start=block_end, bh = bh->b_this_page) {
19988c2ecf20Sopenharmony_ci		block_end = block_start + blocksize;
19998c2ecf20Sopenharmony_ci		if (block_end <= from || block_start >= to) {
20008c2ecf20Sopenharmony_ci			if (PageUptodate(page)) {
20018c2ecf20Sopenharmony_ci				if (!buffer_uptodate(bh))
20028c2ecf20Sopenharmony_ci					set_buffer_uptodate(bh);
20038c2ecf20Sopenharmony_ci			}
20048c2ecf20Sopenharmony_ci			continue;
20058c2ecf20Sopenharmony_ci		}
20068c2ecf20Sopenharmony_ci		if (buffer_new(bh))
20078c2ecf20Sopenharmony_ci			clear_buffer_new(bh);
20088c2ecf20Sopenharmony_ci		if (!buffer_mapped(bh)) {
20098c2ecf20Sopenharmony_ci			WARN_ON(bh->b_size != blocksize);
20108c2ecf20Sopenharmony_ci			if (get_block) {
20118c2ecf20Sopenharmony_ci				err = get_block(inode, block, bh, 1);
20128c2ecf20Sopenharmony_ci				if (err)
20138c2ecf20Sopenharmony_ci					break;
20148c2ecf20Sopenharmony_ci			} else {
20158c2ecf20Sopenharmony_ci				iomap_to_bh(inode, block, bh, iomap);
20168c2ecf20Sopenharmony_ci			}
20178c2ecf20Sopenharmony_ci
20188c2ecf20Sopenharmony_ci			if (buffer_new(bh)) {
20198c2ecf20Sopenharmony_ci				clean_bdev_bh_alias(bh);
20208c2ecf20Sopenharmony_ci				if (PageUptodate(page)) {
20218c2ecf20Sopenharmony_ci					clear_buffer_new(bh);
20228c2ecf20Sopenharmony_ci					set_buffer_uptodate(bh);
20238c2ecf20Sopenharmony_ci					mark_buffer_dirty(bh);
20248c2ecf20Sopenharmony_ci					continue;
20258c2ecf20Sopenharmony_ci				}
20268c2ecf20Sopenharmony_ci				if (block_end > to || block_start < from)
20278c2ecf20Sopenharmony_ci					zero_user_segments(page,
20288c2ecf20Sopenharmony_ci						to, block_end,
20298c2ecf20Sopenharmony_ci						block_start, from);
20308c2ecf20Sopenharmony_ci				continue;
20318c2ecf20Sopenharmony_ci			}
20328c2ecf20Sopenharmony_ci		}
20338c2ecf20Sopenharmony_ci		if (PageUptodate(page)) {
20348c2ecf20Sopenharmony_ci			if (!buffer_uptodate(bh))
20358c2ecf20Sopenharmony_ci				set_buffer_uptodate(bh);
20368c2ecf20Sopenharmony_ci			continue;
20378c2ecf20Sopenharmony_ci		}
20388c2ecf20Sopenharmony_ci		if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
20398c2ecf20Sopenharmony_ci		    !buffer_unwritten(bh) &&
20408c2ecf20Sopenharmony_ci		     (block_start < from || block_end > to)) {
20418c2ecf20Sopenharmony_ci			ll_rw_block(REQ_OP_READ, 0, 1, &bh);
20428c2ecf20Sopenharmony_ci			*wait_bh++=bh;
20438c2ecf20Sopenharmony_ci		}
20448c2ecf20Sopenharmony_ci	}
20458c2ecf20Sopenharmony_ci	/*
20468c2ecf20Sopenharmony_ci	 * If we issued read requests - let them complete.
20478c2ecf20Sopenharmony_ci	 */
20488c2ecf20Sopenharmony_ci	while(wait_bh > wait) {
20498c2ecf20Sopenharmony_ci		wait_on_buffer(*--wait_bh);
20508c2ecf20Sopenharmony_ci		if (!buffer_uptodate(*wait_bh))
20518c2ecf20Sopenharmony_ci			err = -EIO;
20528c2ecf20Sopenharmony_ci	}
20538c2ecf20Sopenharmony_ci	if (unlikely(err))
20548c2ecf20Sopenharmony_ci		page_zero_new_buffers(page, from, to);
20558c2ecf20Sopenharmony_ci	return err;
20568c2ecf20Sopenharmony_ci}
20578c2ecf20Sopenharmony_ci
20588c2ecf20Sopenharmony_ciint __block_write_begin(struct page *page, loff_t pos, unsigned len,
20598c2ecf20Sopenharmony_ci		get_block_t *get_block)
20608c2ecf20Sopenharmony_ci{
20618c2ecf20Sopenharmony_ci	return __block_write_begin_int(page, pos, len, get_block, NULL);
20628c2ecf20Sopenharmony_ci}
20638c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__block_write_begin);
20648c2ecf20Sopenharmony_ci
20658c2ecf20Sopenharmony_cistatic int __block_commit_write(struct inode *inode, struct page *page,
20668c2ecf20Sopenharmony_ci		unsigned from, unsigned to)
20678c2ecf20Sopenharmony_ci{
20688c2ecf20Sopenharmony_ci	unsigned block_start, block_end;
20698c2ecf20Sopenharmony_ci	int partial = 0;
20708c2ecf20Sopenharmony_ci	unsigned blocksize;
20718c2ecf20Sopenharmony_ci	struct buffer_head *bh, *head;
20728c2ecf20Sopenharmony_ci
20738c2ecf20Sopenharmony_ci	bh = head = page_buffers(page);
20748c2ecf20Sopenharmony_ci	blocksize = bh->b_size;
20758c2ecf20Sopenharmony_ci
20768c2ecf20Sopenharmony_ci	block_start = 0;
20778c2ecf20Sopenharmony_ci	do {
20788c2ecf20Sopenharmony_ci		block_end = block_start + blocksize;
20798c2ecf20Sopenharmony_ci		if (block_end <= from || block_start >= to) {
20808c2ecf20Sopenharmony_ci			if (!buffer_uptodate(bh))
20818c2ecf20Sopenharmony_ci				partial = 1;
20828c2ecf20Sopenharmony_ci		} else {
20838c2ecf20Sopenharmony_ci			set_buffer_uptodate(bh);
20848c2ecf20Sopenharmony_ci			mark_buffer_dirty(bh);
20858c2ecf20Sopenharmony_ci		}
20868c2ecf20Sopenharmony_ci		clear_buffer_new(bh);
20878c2ecf20Sopenharmony_ci
20888c2ecf20Sopenharmony_ci		block_start = block_end;
20898c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
20908c2ecf20Sopenharmony_ci	} while (bh != head);
20918c2ecf20Sopenharmony_ci
20928c2ecf20Sopenharmony_ci	/*
20938c2ecf20Sopenharmony_ci	 * If this is a partial write which happened to make all buffers
20948c2ecf20Sopenharmony_ci	 * uptodate then we can optimize away a bogus readpage() for
20958c2ecf20Sopenharmony_ci	 * the next read(). Here we 'discover' whether the page went
20968c2ecf20Sopenharmony_ci	 * uptodate as a result of this (potentially partial) write.
20978c2ecf20Sopenharmony_ci	 */
20988c2ecf20Sopenharmony_ci	if (!partial)
20998c2ecf20Sopenharmony_ci		SetPageUptodate(page);
21008c2ecf20Sopenharmony_ci	return 0;
21018c2ecf20Sopenharmony_ci}
21028c2ecf20Sopenharmony_ci
21038c2ecf20Sopenharmony_ci/*
21048c2ecf20Sopenharmony_ci * block_write_begin takes care of the basic task of block allocation and
21058c2ecf20Sopenharmony_ci * bringing partial write blocks uptodate first.
21068c2ecf20Sopenharmony_ci *
21078c2ecf20Sopenharmony_ci * The filesystem needs to handle block truncation upon failure.
21088c2ecf20Sopenharmony_ci */
21098c2ecf20Sopenharmony_ciint block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
21108c2ecf20Sopenharmony_ci		unsigned flags, struct page **pagep, get_block_t *get_block)
21118c2ecf20Sopenharmony_ci{
21128c2ecf20Sopenharmony_ci	pgoff_t index = pos >> PAGE_SHIFT;
21138c2ecf20Sopenharmony_ci	struct page *page;
21148c2ecf20Sopenharmony_ci	int status;
21158c2ecf20Sopenharmony_ci
21168c2ecf20Sopenharmony_ci	page = grab_cache_page_write_begin(mapping, index, flags);
21178c2ecf20Sopenharmony_ci	if (!page)
21188c2ecf20Sopenharmony_ci		return -ENOMEM;
21198c2ecf20Sopenharmony_ci
21208c2ecf20Sopenharmony_ci	status = __block_write_begin(page, pos, len, get_block);
21218c2ecf20Sopenharmony_ci	if (unlikely(status)) {
21228c2ecf20Sopenharmony_ci		unlock_page(page);
21238c2ecf20Sopenharmony_ci		put_page(page);
21248c2ecf20Sopenharmony_ci		page = NULL;
21258c2ecf20Sopenharmony_ci	}
21268c2ecf20Sopenharmony_ci
21278c2ecf20Sopenharmony_ci	*pagep = page;
21288c2ecf20Sopenharmony_ci	return status;
21298c2ecf20Sopenharmony_ci}
21308c2ecf20Sopenharmony_ciEXPORT_SYMBOL(block_write_begin);
21318c2ecf20Sopenharmony_ci
21328c2ecf20Sopenharmony_ciint block_write_end(struct file *file, struct address_space *mapping,
21338c2ecf20Sopenharmony_ci			loff_t pos, unsigned len, unsigned copied,
21348c2ecf20Sopenharmony_ci			struct page *page, void *fsdata)
21358c2ecf20Sopenharmony_ci{
21368c2ecf20Sopenharmony_ci	struct inode *inode = mapping->host;
21378c2ecf20Sopenharmony_ci	unsigned start;
21388c2ecf20Sopenharmony_ci
21398c2ecf20Sopenharmony_ci	start = pos & (PAGE_SIZE - 1);
21408c2ecf20Sopenharmony_ci
21418c2ecf20Sopenharmony_ci	if (unlikely(copied < len)) {
21428c2ecf20Sopenharmony_ci		/*
21438c2ecf20Sopenharmony_ci		 * The buffers that were written will now be uptodate, so we
21448c2ecf20Sopenharmony_ci		 * don't have to worry about a readpage reading them and
21458c2ecf20Sopenharmony_ci		 * overwriting a partial write. However if we have encountered
21468c2ecf20Sopenharmony_ci		 * a short write and only partially written into a buffer, it
21478c2ecf20Sopenharmony_ci		 * will not be marked uptodate, so a readpage might come in and
21488c2ecf20Sopenharmony_ci		 * destroy our partial write.
21498c2ecf20Sopenharmony_ci		 *
21508c2ecf20Sopenharmony_ci		 * Do the simplest thing, and just treat any short write to a
21518c2ecf20Sopenharmony_ci		 * non uptodate page as a zero-length write, and force the
21528c2ecf20Sopenharmony_ci		 * caller to redo the whole thing.
21538c2ecf20Sopenharmony_ci		 */
21548c2ecf20Sopenharmony_ci		if (!PageUptodate(page))
21558c2ecf20Sopenharmony_ci			copied = 0;
21568c2ecf20Sopenharmony_ci
21578c2ecf20Sopenharmony_ci		page_zero_new_buffers(page, start+copied, start+len);
21588c2ecf20Sopenharmony_ci	}
21598c2ecf20Sopenharmony_ci	flush_dcache_page(page);
21608c2ecf20Sopenharmony_ci
21618c2ecf20Sopenharmony_ci	/* This could be a short (even 0-length) commit */
21628c2ecf20Sopenharmony_ci	__block_commit_write(inode, page, start, start+copied);
21638c2ecf20Sopenharmony_ci
21648c2ecf20Sopenharmony_ci	return copied;
21658c2ecf20Sopenharmony_ci}
21668c2ecf20Sopenharmony_ciEXPORT_SYMBOL(block_write_end);
21678c2ecf20Sopenharmony_ci
21688c2ecf20Sopenharmony_ciint generic_write_end(struct file *file, struct address_space *mapping,
21698c2ecf20Sopenharmony_ci			loff_t pos, unsigned len, unsigned copied,
21708c2ecf20Sopenharmony_ci			struct page *page, void *fsdata)
21718c2ecf20Sopenharmony_ci{
21728c2ecf20Sopenharmony_ci	struct inode *inode = mapping->host;
21738c2ecf20Sopenharmony_ci	loff_t old_size = inode->i_size;
21748c2ecf20Sopenharmony_ci	bool i_size_changed = false;
21758c2ecf20Sopenharmony_ci
21768c2ecf20Sopenharmony_ci	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
21778c2ecf20Sopenharmony_ci
21788c2ecf20Sopenharmony_ci	/*
21798c2ecf20Sopenharmony_ci	 * No need to use i_size_read() here, the i_size cannot change under us
21808c2ecf20Sopenharmony_ci	 * because we hold i_rwsem.
21818c2ecf20Sopenharmony_ci	 *
21828c2ecf20Sopenharmony_ci	 * But it's important to update i_size while still holding page lock:
21838c2ecf20Sopenharmony_ci	 * page writeout could otherwise come in and zero beyond i_size.
21848c2ecf20Sopenharmony_ci	 */
21858c2ecf20Sopenharmony_ci	if (pos + copied > inode->i_size) {
21868c2ecf20Sopenharmony_ci		i_size_write(inode, pos + copied);
21878c2ecf20Sopenharmony_ci		i_size_changed = true;
21888c2ecf20Sopenharmony_ci	}
21898c2ecf20Sopenharmony_ci
21908c2ecf20Sopenharmony_ci	unlock_page(page);
21918c2ecf20Sopenharmony_ci	put_page(page);
21928c2ecf20Sopenharmony_ci
21938c2ecf20Sopenharmony_ci	if (old_size < pos)
21948c2ecf20Sopenharmony_ci		pagecache_isize_extended(inode, old_size, pos);
21958c2ecf20Sopenharmony_ci	/*
21968c2ecf20Sopenharmony_ci	 * Don't mark the inode dirty under page lock. First, it unnecessarily
21978c2ecf20Sopenharmony_ci	 * makes the holding time of page lock longer. Second, it forces lock
21988c2ecf20Sopenharmony_ci	 * ordering of page lock and transaction start for journaling
21998c2ecf20Sopenharmony_ci	 * filesystems.
22008c2ecf20Sopenharmony_ci	 */
22018c2ecf20Sopenharmony_ci	if (i_size_changed)
22028c2ecf20Sopenharmony_ci		mark_inode_dirty(inode);
22038c2ecf20Sopenharmony_ci	return copied;
22048c2ecf20Sopenharmony_ci}
22058c2ecf20Sopenharmony_ciEXPORT_SYMBOL(generic_write_end);
22068c2ecf20Sopenharmony_ci
22078c2ecf20Sopenharmony_ci/*
22088c2ecf20Sopenharmony_ci * block_is_partially_uptodate checks whether buffers within a page are
22098c2ecf20Sopenharmony_ci * uptodate or not.
22108c2ecf20Sopenharmony_ci *
22118c2ecf20Sopenharmony_ci * Returns true if all buffers which correspond to a file portion
22128c2ecf20Sopenharmony_ci * we want to read are uptodate.
22138c2ecf20Sopenharmony_ci */
22148c2ecf20Sopenharmony_ciint block_is_partially_uptodate(struct page *page, unsigned long from,
22158c2ecf20Sopenharmony_ci					unsigned long count)
22168c2ecf20Sopenharmony_ci{
22178c2ecf20Sopenharmony_ci	unsigned block_start, block_end, blocksize;
22188c2ecf20Sopenharmony_ci	unsigned to;
22198c2ecf20Sopenharmony_ci	struct buffer_head *bh, *head;
22208c2ecf20Sopenharmony_ci	int ret = 1;
22218c2ecf20Sopenharmony_ci
22228c2ecf20Sopenharmony_ci	if (!page_has_buffers(page))
22238c2ecf20Sopenharmony_ci		return 0;
22248c2ecf20Sopenharmony_ci
22258c2ecf20Sopenharmony_ci	head = page_buffers(page);
22268c2ecf20Sopenharmony_ci	blocksize = head->b_size;
22278c2ecf20Sopenharmony_ci	to = min_t(unsigned, PAGE_SIZE - from, count);
22288c2ecf20Sopenharmony_ci	to = from + to;
22298c2ecf20Sopenharmony_ci	if (from < blocksize && to > PAGE_SIZE - blocksize)
22308c2ecf20Sopenharmony_ci		return 0;
22318c2ecf20Sopenharmony_ci
22328c2ecf20Sopenharmony_ci	bh = head;
22338c2ecf20Sopenharmony_ci	block_start = 0;
22348c2ecf20Sopenharmony_ci	do {
22358c2ecf20Sopenharmony_ci		block_end = block_start + blocksize;
22368c2ecf20Sopenharmony_ci		if (block_end > from && block_start < to) {
22378c2ecf20Sopenharmony_ci			if (!buffer_uptodate(bh)) {
22388c2ecf20Sopenharmony_ci				ret = 0;
22398c2ecf20Sopenharmony_ci				break;
22408c2ecf20Sopenharmony_ci			}
22418c2ecf20Sopenharmony_ci			if (block_end >= to)
22428c2ecf20Sopenharmony_ci				break;
22438c2ecf20Sopenharmony_ci		}
22448c2ecf20Sopenharmony_ci		block_start = block_end;
22458c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
22468c2ecf20Sopenharmony_ci	} while (bh != head);
22478c2ecf20Sopenharmony_ci
22488c2ecf20Sopenharmony_ci	return ret;
22498c2ecf20Sopenharmony_ci}
22508c2ecf20Sopenharmony_ciEXPORT_SYMBOL(block_is_partially_uptodate);
22518c2ecf20Sopenharmony_ci
22528c2ecf20Sopenharmony_ci/*
22538c2ecf20Sopenharmony_ci * Generic "read page" function for block devices that have the normal
22548c2ecf20Sopenharmony_ci * get_block functionality. This is most of the block device filesystems.
22558c2ecf20Sopenharmony_ci * Reads the page asynchronously --- the unlock_buffer() and
22568c2ecf20Sopenharmony_ci * set/clear_buffer_uptodate() functions propagate buffer state into the
22578c2ecf20Sopenharmony_ci * page struct once IO has completed.
22588c2ecf20Sopenharmony_ci */
22598c2ecf20Sopenharmony_ciint block_read_full_page(struct page *page, get_block_t *get_block)
22608c2ecf20Sopenharmony_ci{
22618c2ecf20Sopenharmony_ci	struct inode *inode = page->mapping->host;
22628c2ecf20Sopenharmony_ci	sector_t iblock, lblock;
22638c2ecf20Sopenharmony_ci	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
22648c2ecf20Sopenharmony_ci	unsigned int blocksize, bbits;
22658c2ecf20Sopenharmony_ci	int nr, i;
22668c2ecf20Sopenharmony_ci	int fully_mapped = 1;
22678c2ecf20Sopenharmony_ci
22688c2ecf20Sopenharmony_ci	head = create_page_buffers(page, inode, 0);
22698c2ecf20Sopenharmony_ci	blocksize = head->b_size;
22708c2ecf20Sopenharmony_ci	bbits = block_size_bits(blocksize);
22718c2ecf20Sopenharmony_ci
22728c2ecf20Sopenharmony_ci	iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
22738c2ecf20Sopenharmony_ci	lblock = (i_size_read(inode)+blocksize-1) >> bbits;
22748c2ecf20Sopenharmony_ci	bh = head;
22758c2ecf20Sopenharmony_ci	nr = 0;
22768c2ecf20Sopenharmony_ci	i = 0;
22778c2ecf20Sopenharmony_ci
22788c2ecf20Sopenharmony_ci	do {
22798c2ecf20Sopenharmony_ci		if (buffer_uptodate(bh))
22808c2ecf20Sopenharmony_ci			continue;
22818c2ecf20Sopenharmony_ci
22828c2ecf20Sopenharmony_ci		if (!buffer_mapped(bh)) {
22838c2ecf20Sopenharmony_ci			int err = 0;
22848c2ecf20Sopenharmony_ci
22858c2ecf20Sopenharmony_ci			fully_mapped = 0;
22868c2ecf20Sopenharmony_ci			if (iblock < lblock) {
22878c2ecf20Sopenharmony_ci				WARN_ON(bh->b_size != blocksize);
22888c2ecf20Sopenharmony_ci				err = get_block(inode, iblock, bh, 0);
22898c2ecf20Sopenharmony_ci				if (err)
22908c2ecf20Sopenharmony_ci					SetPageError(page);
22918c2ecf20Sopenharmony_ci			}
22928c2ecf20Sopenharmony_ci			if (!buffer_mapped(bh)) {
22938c2ecf20Sopenharmony_ci				zero_user(page, i * blocksize, blocksize);
22948c2ecf20Sopenharmony_ci				if (!err)
22958c2ecf20Sopenharmony_ci					set_buffer_uptodate(bh);
22968c2ecf20Sopenharmony_ci				continue;
22978c2ecf20Sopenharmony_ci			}
22988c2ecf20Sopenharmony_ci			/*
22998c2ecf20Sopenharmony_ci			 * get_block() might have updated the buffer
23008c2ecf20Sopenharmony_ci			 * synchronously
23018c2ecf20Sopenharmony_ci			 */
23028c2ecf20Sopenharmony_ci			if (buffer_uptodate(bh))
23038c2ecf20Sopenharmony_ci				continue;
23048c2ecf20Sopenharmony_ci		}
23058c2ecf20Sopenharmony_ci		arr[nr++] = bh;
23068c2ecf20Sopenharmony_ci	} while (i++, iblock++, (bh = bh->b_this_page) != head);
23078c2ecf20Sopenharmony_ci
23088c2ecf20Sopenharmony_ci	if (fully_mapped)
23098c2ecf20Sopenharmony_ci		SetPageMappedToDisk(page);
23108c2ecf20Sopenharmony_ci
23118c2ecf20Sopenharmony_ci	if (!nr) {
23128c2ecf20Sopenharmony_ci		/*
23138c2ecf20Sopenharmony_ci		 * All buffers are uptodate - we can set the page uptodate
23148c2ecf20Sopenharmony_ci		 * as well. But not if get_block() returned an error.
23158c2ecf20Sopenharmony_ci		 */
23168c2ecf20Sopenharmony_ci		if (!PageError(page))
23178c2ecf20Sopenharmony_ci			SetPageUptodate(page);
23188c2ecf20Sopenharmony_ci		unlock_page(page);
23198c2ecf20Sopenharmony_ci		return 0;
23208c2ecf20Sopenharmony_ci	}
23218c2ecf20Sopenharmony_ci
23228c2ecf20Sopenharmony_ci	/* Stage two: lock the buffers */
23238c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++) {
23248c2ecf20Sopenharmony_ci		bh = arr[i];
23258c2ecf20Sopenharmony_ci		lock_buffer(bh);
23268c2ecf20Sopenharmony_ci		mark_buffer_async_read(bh);
23278c2ecf20Sopenharmony_ci	}
23288c2ecf20Sopenharmony_ci
23298c2ecf20Sopenharmony_ci	/*
23308c2ecf20Sopenharmony_ci	 * Stage 3: start the IO.  Check for uptodateness
23318c2ecf20Sopenharmony_ci	 * inside the buffer lock in case another process reading
23328c2ecf20Sopenharmony_ci	 * the underlying blockdev brought it uptodate (the sct fix).
23338c2ecf20Sopenharmony_ci	 */
23348c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++) {
23358c2ecf20Sopenharmony_ci		bh = arr[i];
23368c2ecf20Sopenharmony_ci		if (buffer_uptodate(bh))
23378c2ecf20Sopenharmony_ci			end_buffer_async_read(bh, 1);
23388c2ecf20Sopenharmony_ci		else
23398c2ecf20Sopenharmony_ci			submit_bh(REQ_OP_READ, 0, bh);
23408c2ecf20Sopenharmony_ci	}
23418c2ecf20Sopenharmony_ci	return 0;
23428c2ecf20Sopenharmony_ci}
23438c2ecf20Sopenharmony_ciEXPORT_SYMBOL(block_read_full_page);
23448c2ecf20Sopenharmony_ci
23458c2ecf20Sopenharmony_ci/* utility function for filesystems that need to do work on expanding
23468c2ecf20Sopenharmony_ci * truncates.  Uses filesystem pagecache writes to allow the filesystem to
23478c2ecf20Sopenharmony_ci * deal with the hole.
23488c2ecf20Sopenharmony_ci */
23498c2ecf20Sopenharmony_ciint generic_cont_expand_simple(struct inode *inode, loff_t size)
23508c2ecf20Sopenharmony_ci{
23518c2ecf20Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
23528c2ecf20Sopenharmony_ci	struct page *page;
23538c2ecf20Sopenharmony_ci	void *fsdata = NULL;
23548c2ecf20Sopenharmony_ci	int err;
23558c2ecf20Sopenharmony_ci
23568c2ecf20Sopenharmony_ci	err = inode_newsize_ok(inode, size);
23578c2ecf20Sopenharmony_ci	if (err)
23588c2ecf20Sopenharmony_ci		goto out;
23598c2ecf20Sopenharmony_ci
23608c2ecf20Sopenharmony_ci	err = pagecache_write_begin(NULL, mapping, size, 0,
23618c2ecf20Sopenharmony_ci				    AOP_FLAG_CONT_EXPAND, &page, &fsdata);
23628c2ecf20Sopenharmony_ci	if (err)
23638c2ecf20Sopenharmony_ci		goto out;
23648c2ecf20Sopenharmony_ci
23658c2ecf20Sopenharmony_ci	err = pagecache_write_end(NULL, mapping, size, 0, 0, page, fsdata);
23668c2ecf20Sopenharmony_ci	BUG_ON(err > 0);
23678c2ecf20Sopenharmony_ci
23688c2ecf20Sopenharmony_ciout:
23698c2ecf20Sopenharmony_ci	return err;
23708c2ecf20Sopenharmony_ci}
23718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(generic_cont_expand_simple);
23728c2ecf20Sopenharmony_ci
23738c2ecf20Sopenharmony_cistatic int cont_expand_zero(struct file *file, struct address_space *mapping,
23748c2ecf20Sopenharmony_ci			    loff_t pos, loff_t *bytes)
23758c2ecf20Sopenharmony_ci{
23768c2ecf20Sopenharmony_ci	struct inode *inode = mapping->host;
23778c2ecf20Sopenharmony_ci	unsigned int blocksize = i_blocksize(inode);
23788c2ecf20Sopenharmony_ci	struct page *page;
23798c2ecf20Sopenharmony_ci	void *fsdata = NULL;
23808c2ecf20Sopenharmony_ci	pgoff_t index, curidx;
23818c2ecf20Sopenharmony_ci	loff_t curpos;
23828c2ecf20Sopenharmony_ci	unsigned zerofrom, offset, len;
23838c2ecf20Sopenharmony_ci	int err = 0;
23848c2ecf20Sopenharmony_ci
23858c2ecf20Sopenharmony_ci	index = pos >> PAGE_SHIFT;
23868c2ecf20Sopenharmony_ci	offset = pos & ~PAGE_MASK;
23878c2ecf20Sopenharmony_ci
23888c2ecf20Sopenharmony_ci	while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
23898c2ecf20Sopenharmony_ci		zerofrom = curpos & ~PAGE_MASK;
23908c2ecf20Sopenharmony_ci		if (zerofrom & (blocksize-1)) {
23918c2ecf20Sopenharmony_ci			*bytes |= (blocksize-1);
23928c2ecf20Sopenharmony_ci			(*bytes)++;
23938c2ecf20Sopenharmony_ci		}
23948c2ecf20Sopenharmony_ci		len = PAGE_SIZE - zerofrom;
23958c2ecf20Sopenharmony_ci
23968c2ecf20Sopenharmony_ci		err = pagecache_write_begin(file, mapping, curpos, len, 0,
23978c2ecf20Sopenharmony_ci					    &page, &fsdata);
23988c2ecf20Sopenharmony_ci		if (err)
23998c2ecf20Sopenharmony_ci			goto out;
24008c2ecf20Sopenharmony_ci		zero_user(page, zerofrom, len);
24018c2ecf20Sopenharmony_ci		err = pagecache_write_end(file, mapping, curpos, len, len,
24028c2ecf20Sopenharmony_ci						page, fsdata);
24038c2ecf20Sopenharmony_ci		if (err < 0)
24048c2ecf20Sopenharmony_ci			goto out;
24058c2ecf20Sopenharmony_ci		BUG_ON(err != len);
24068c2ecf20Sopenharmony_ci		err = 0;
24078c2ecf20Sopenharmony_ci
24088c2ecf20Sopenharmony_ci		balance_dirty_pages_ratelimited(mapping);
24098c2ecf20Sopenharmony_ci
24108c2ecf20Sopenharmony_ci		if (fatal_signal_pending(current)) {
24118c2ecf20Sopenharmony_ci			err = -EINTR;
24128c2ecf20Sopenharmony_ci			goto out;
24138c2ecf20Sopenharmony_ci		}
24148c2ecf20Sopenharmony_ci	}
24158c2ecf20Sopenharmony_ci
24168c2ecf20Sopenharmony_ci	/* page covers the boundary, find the boundary offset */
24178c2ecf20Sopenharmony_ci	if (index == curidx) {
24188c2ecf20Sopenharmony_ci		zerofrom = curpos & ~PAGE_MASK;
24198c2ecf20Sopenharmony_ci		/* if we will expand the thing last block will be filled */
24208c2ecf20Sopenharmony_ci		if (offset <= zerofrom) {
24218c2ecf20Sopenharmony_ci			goto out;
24228c2ecf20Sopenharmony_ci		}
24238c2ecf20Sopenharmony_ci		if (zerofrom & (blocksize-1)) {
24248c2ecf20Sopenharmony_ci			*bytes |= (blocksize-1);
24258c2ecf20Sopenharmony_ci			(*bytes)++;
24268c2ecf20Sopenharmony_ci		}
24278c2ecf20Sopenharmony_ci		len = offset - zerofrom;
24288c2ecf20Sopenharmony_ci
24298c2ecf20Sopenharmony_ci		err = pagecache_write_begin(file, mapping, curpos, len, 0,
24308c2ecf20Sopenharmony_ci					    &page, &fsdata);
24318c2ecf20Sopenharmony_ci		if (err)
24328c2ecf20Sopenharmony_ci			goto out;
24338c2ecf20Sopenharmony_ci		zero_user(page, zerofrom, len);
24348c2ecf20Sopenharmony_ci		err = pagecache_write_end(file, mapping, curpos, len, len,
24358c2ecf20Sopenharmony_ci						page, fsdata);
24368c2ecf20Sopenharmony_ci		if (err < 0)
24378c2ecf20Sopenharmony_ci			goto out;
24388c2ecf20Sopenharmony_ci		BUG_ON(err != len);
24398c2ecf20Sopenharmony_ci		err = 0;
24408c2ecf20Sopenharmony_ci	}
24418c2ecf20Sopenharmony_ciout:
24428c2ecf20Sopenharmony_ci	return err;
24438c2ecf20Sopenharmony_ci}
24448c2ecf20Sopenharmony_ci
24458c2ecf20Sopenharmony_ci/*
24468c2ecf20Sopenharmony_ci * For moronic filesystems that do not allow holes in file.
24478c2ecf20Sopenharmony_ci * We may have to extend the file.
24488c2ecf20Sopenharmony_ci */
24498c2ecf20Sopenharmony_ciint cont_write_begin(struct file *file, struct address_space *mapping,
24508c2ecf20Sopenharmony_ci			loff_t pos, unsigned len, unsigned flags,
24518c2ecf20Sopenharmony_ci			struct page **pagep, void **fsdata,
24528c2ecf20Sopenharmony_ci			get_block_t *get_block, loff_t *bytes)
24538c2ecf20Sopenharmony_ci{
24548c2ecf20Sopenharmony_ci	struct inode *inode = mapping->host;
24558c2ecf20Sopenharmony_ci	unsigned int blocksize = i_blocksize(inode);
24568c2ecf20Sopenharmony_ci	unsigned int zerofrom;
24578c2ecf20Sopenharmony_ci	int err;
24588c2ecf20Sopenharmony_ci
24598c2ecf20Sopenharmony_ci	err = cont_expand_zero(file, mapping, pos, bytes);
24608c2ecf20Sopenharmony_ci	if (err)
24618c2ecf20Sopenharmony_ci		return err;
24628c2ecf20Sopenharmony_ci
24638c2ecf20Sopenharmony_ci	zerofrom = *bytes & ~PAGE_MASK;
24648c2ecf20Sopenharmony_ci	if (pos+len > *bytes && zerofrom & (blocksize-1)) {
24658c2ecf20Sopenharmony_ci		*bytes |= (blocksize-1);
24668c2ecf20Sopenharmony_ci		(*bytes)++;
24678c2ecf20Sopenharmony_ci	}
24688c2ecf20Sopenharmony_ci
24698c2ecf20Sopenharmony_ci	return block_write_begin(mapping, pos, len, flags, pagep, get_block);
24708c2ecf20Sopenharmony_ci}
24718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(cont_write_begin);
24728c2ecf20Sopenharmony_ci
24738c2ecf20Sopenharmony_ciint block_commit_write(struct page *page, unsigned from, unsigned to)
24748c2ecf20Sopenharmony_ci{
24758c2ecf20Sopenharmony_ci	struct inode *inode = page->mapping->host;
24768c2ecf20Sopenharmony_ci	__block_commit_write(inode,page,from,to);
24778c2ecf20Sopenharmony_ci	return 0;
24788c2ecf20Sopenharmony_ci}
24798c2ecf20Sopenharmony_ciEXPORT_SYMBOL(block_commit_write);
24808c2ecf20Sopenharmony_ci
24818c2ecf20Sopenharmony_ci/*
24828c2ecf20Sopenharmony_ci * block_page_mkwrite() is not allowed to change the file size as it gets
24838c2ecf20Sopenharmony_ci * called from a page fault handler when a page is first dirtied. Hence we must
24848c2ecf20Sopenharmony_ci * be careful to check for EOF conditions here. We set the page up correctly
24858c2ecf20Sopenharmony_ci * for a written page which means we get ENOSPC checking when writing into
24868c2ecf20Sopenharmony_ci * holes and correct delalloc and unwritten extent mapping on filesystems that
24878c2ecf20Sopenharmony_ci * support these features.
24888c2ecf20Sopenharmony_ci *
24898c2ecf20Sopenharmony_ci * We are not allowed to take the i_mutex here so we have to play games to
24908c2ecf20Sopenharmony_ci * protect against truncate races as the page could now be beyond EOF.  Because
24918c2ecf20Sopenharmony_ci * truncate writes the inode size before removing pages, once we have the
24928c2ecf20Sopenharmony_ci * page lock we can determine safely if the page is beyond EOF. If it is not
24938c2ecf20Sopenharmony_ci * beyond EOF, then the page is guaranteed safe against truncation until we
24948c2ecf20Sopenharmony_ci * unlock the page.
24958c2ecf20Sopenharmony_ci *
24968c2ecf20Sopenharmony_ci * Direct callers of this function should protect against filesystem freezing
24978c2ecf20Sopenharmony_ci * using sb_start_pagefault() - sb_end_pagefault() functions.
24988c2ecf20Sopenharmony_ci */
24998c2ecf20Sopenharmony_ciint block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
25008c2ecf20Sopenharmony_ci			 get_block_t get_block)
25018c2ecf20Sopenharmony_ci{
25028c2ecf20Sopenharmony_ci	struct page *page = vmf->page;
25038c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(vma->vm_file);
25048c2ecf20Sopenharmony_ci	unsigned long end;
25058c2ecf20Sopenharmony_ci	loff_t size;
25068c2ecf20Sopenharmony_ci	int ret;
25078c2ecf20Sopenharmony_ci
25088c2ecf20Sopenharmony_ci	lock_page(page);
25098c2ecf20Sopenharmony_ci	size = i_size_read(inode);
25108c2ecf20Sopenharmony_ci	if ((page->mapping != inode->i_mapping) ||
25118c2ecf20Sopenharmony_ci	    (page_offset(page) > size)) {
25128c2ecf20Sopenharmony_ci		/* We overload EFAULT to mean page got truncated */
25138c2ecf20Sopenharmony_ci		ret = -EFAULT;
25148c2ecf20Sopenharmony_ci		goto out_unlock;
25158c2ecf20Sopenharmony_ci	}
25168c2ecf20Sopenharmony_ci
25178c2ecf20Sopenharmony_ci	/* page is wholly or partially inside EOF */
25188c2ecf20Sopenharmony_ci	if (((page->index + 1) << PAGE_SHIFT) > size)
25198c2ecf20Sopenharmony_ci		end = size & ~PAGE_MASK;
25208c2ecf20Sopenharmony_ci	else
25218c2ecf20Sopenharmony_ci		end = PAGE_SIZE;
25228c2ecf20Sopenharmony_ci
25238c2ecf20Sopenharmony_ci	ret = __block_write_begin(page, 0, end, get_block);
25248c2ecf20Sopenharmony_ci	if (!ret)
25258c2ecf20Sopenharmony_ci		ret = block_commit_write(page, 0, end);
25268c2ecf20Sopenharmony_ci
25278c2ecf20Sopenharmony_ci	if (unlikely(ret < 0))
25288c2ecf20Sopenharmony_ci		goto out_unlock;
25298c2ecf20Sopenharmony_ci	set_page_dirty(page);
25308c2ecf20Sopenharmony_ci	wait_for_stable_page(page);
25318c2ecf20Sopenharmony_ci	return 0;
25328c2ecf20Sopenharmony_ciout_unlock:
25338c2ecf20Sopenharmony_ci	unlock_page(page);
25348c2ecf20Sopenharmony_ci	return ret;
25358c2ecf20Sopenharmony_ci}
25368c2ecf20Sopenharmony_ciEXPORT_SYMBOL(block_page_mkwrite);
25378c2ecf20Sopenharmony_ci
25388c2ecf20Sopenharmony_ci/*
25398c2ecf20Sopenharmony_ci * nobh_write_begin()'s prereads are special: the buffer_heads are freed
25408c2ecf20Sopenharmony_ci * immediately, while under the page lock.  So it needs a special end_io
25418c2ecf20Sopenharmony_ci * handler which does not touch the bh after unlocking it.
25428c2ecf20Sopenharmony_ci */
25438c2ecf20Sopenharmony_cistatic void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
25448c2ecf20Sopenharmony_ci{
25458c2ecf20Sopenharmony_ci	__end_buffer_read_notouch(bh, uptodate);
25468c2ecf20Sopenharmony_ci}
25478c2ecf20Sopenharmony_ci
25488c2ecf20Sopenharmony_ci/*
25498c2ecf20Sopenharmony_ci * Attach the singly-linked list of buffers created by nobh_write_begin, to
25508c2ecf20Sopenharmony_ci * the page (converting it to circular linked list and taking care of page
25518c2ecf20Sopenharmony_ci * dirty races).
25528c2ecf20Sopenharmony_ci */
25538c2ecf20Sopenharmony_cistatic void attach_nobh_buffers(struct page *page, struct buffer_head *head)
25548c2ecf20Sopenharmony_ci{
25558c2ecf20Sopenharmony_ci	struct buffer_head *bh;
25568c2ecf20Sopenharmony_ci
25578c2ecf20Sopenharmony_ci	BUG_ON(!PageLocked(page));
25588c2ecf20Sopenharmony_ci
25598c2ecf20Sopenharmony_ci	spin_lock(&page->mapping->private_lock);
25608c2ecf20Sopenharmony_ci	bh = head;
25618c2ecf20Sopenharmony_ci	do {
25628c2ecf20Sopenharmony_ci		if (PageDirty(page))
25638c2ecf20Sopenharmony_ci			set_buffer_dirty(bh);
25648c2ecf20Sopenharmony_ci		if (!bh->b_this_page)
25658c2ecf20Sopenharmony_ci			bh->b_this_page = head;
25668c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
25678c2ecf20Sopenharmony_ci	} while (bh != head);
25688c2ecf20Sopenharmony_ci	attach_page_private(page, head);
25698c2ecf20Sopenharmony_ci	spin_unlock(&page->mapping->private_lock);
25708c2ecf20Sopenharmony_ci}
25718c2ecf20Sopenharmony_ci
25728c2ecf20Sopenharmony_ci/*
25738c2ecf20Sopenharmony_ci * On entry, the page is fully not uptodate.
25748c2ecf20Sopenharmony_ci * On exit the page is fully uptodate in the areas outside (from,to)
25758c2ecf20Sopenharmony_ci * The filesystem needs to handle block truncation upon failure.
25768c2ecf20Sopenharmony_ci */
25778c2ecf20Sopenharmony_ciint nobh_write_begin(struct address_space *mapping,
25788c2ecf20Sopenharmony_ci			loff_t pos, unsigned len, unsigned flags,
25798c2ecf20Sopenharmony_ci			struct page **pagep, void **fsdata,
25808c2ecf20Sopenharmony_ci			get_block_t *get_block)
25818c2ecf20Sopenharmony_ci{
25828c2ecf20Sopenharmony_ci	struct inode *inode = mapping->host;
25838c2ecf20Sopenharmony_ci	const unsigned blkbits = inode->i_blkbits;
25848c2ecf20Sopenharmony_ci	const unsigned blocksize = 1 << blkbits;
25858c2ecf20Sopenharmony_ci	struct buffer_head *head, *bh;
25868c2ecf20Sopenharmony_ci	struct page *page;
25878c2ecf20Sopenharmony_ci	pgoff_t index;
25888c2ecf20Sopenharmony_ci	unsigned from, to;
25898c2ecf20Sopenharmony_ci	unsigned block_in_page;
25908c2ecf20Sopenharmony_ci	unsigned block_start, block_end;
25918c2ecf20Sopenharmony_ci	sector_t block_in_file;
25928c2ecf20Sopenharmony_ci	int nr_reads = 0;
25938c2ecf20Sopenharmony_ci	int ret = 0;
25948c2ecf20Sopenharmony_ci	int is_mapped_to_disk = 1;
25958c2ecf20Sopenharmony_ci
25968c2ecf20Sopenharmony_ci	index = pos >> PAGE_SHIFT;
25978c2ecf20Sopenharmony_ci	from = pos & (PAGE_SIZE - 1);
25988c2ecf20Sopenharmony_ci	to = from + len;
25998c2ecf20Sopenharmony_ci
26008c2ecf20Sopenharmony_ci	page = grab_cache_page_write_begin(mapping, index, flags);
26018c2ecf20Sopenharmony_ci	if (!page)
26028c2ecf20Sopenharmony_ci		return -ENOMEM;
26038c2ecf20Sopenharmony_ci	*pagep = page;
26048c2ecf20Sopenharmony_ci	*fsdata = NULL;
26058c2ecf20Sopenharmony_ci
26068c2ecf20Sopenharmony_ci	if (page_has_buffers(page)) {
26078c2ecf20Sopenharmony_ci		ret = __block_write_begin(page, pos, len, get_block);
26088c2ecf20Sopenharmony_ci		if (unlikely(ret))
26098c2ecf20Sopenharmony_ci			goto out_release;
26108c2ecf20Sopenharmony_ci		return ret;
26118c2ecf20Sopenharmony_ci	}
26128c2ecf20Sopenharmony_ci
26138c2ecf20Sopenharmony_ci	if (PageMappedToDisk(page))
26148c2ecf20Sopenharmony_ci		return 0;
26158c2ecf20Sopenharmony_ci
26168c2ecf20Sopenharmony_ci	/*
26178c2ecf20Sopenharmony_ci	 * Allocate buffers so that we can keep track of state, and potentially
26188c2ecf20Sopenharmony_ci	 * attach them to the page if an error occurs. In the common case of
26198c2ecf20Sopenharmony_ci	 * no error, they will just be freed again without ever being attached
26208c2ecf20Sopenharmony_ci	 * to the page (which is all OK, because we're under the page lock).
26218c2ecf20Sopenharmony_ci	 *
26228c2ecf20Sopenharmony_ci	 * Be careful: the buffer linked list is a NULL terminated one, rather
26238c2ecf20Sopenharmony_ci	 * than the circular one we're used to.
26248c2ecf20Sopenharmony_ci	 */
26258c2ecf20Sopenharmony_ci	head = alloc_page_buffers(page, blocksize, false);
26268c2ecf20Sopenharmony_ci	if (!head) {
26278c2ecf20Sopenharmony_ci		ret = -ENOMEM;
26288c2ecf20Sopenharmony_ci		goto out_release;
26298c2ecf20Sopenharmony_ci	}
26308c2ecf20Sopenharmony_ci
26318c2ecf20Sopenharmony_ci	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
26328c2ecf20Sopenharmony_ci
26338c2ecf20Sopenharmony_ci	/*
26348c2ecf20Sopenharmony_ci	 * We loop across all blocks in the page, whether or not they are
26358c2ecf20Sopenharmony_ci	 * part of the affected region.  This is so we can discover if the
26368c2ecf20Sopenharmony_ci	 * page is fully mapped-to-disk.
26378c2ecf20Sopenharmony_ci	 */
26388c2ecf20Sopenharmony_ci	for (block_start = 0, block_in_page = 0, bh = head;
26398c2ecf20Sopenharmony_ci		  block_start < PAGE_SIZE;
26408c2ecf20Sopenharmony_ci		  block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
26418c2ecf20Sopenharmony_ci		int create;
26428c2ecf20Sopenharmony_ci
26438c2ecf20Sopenharmony_ci		block_end = block_start + blocksize;
26448c2ecf20Sopenharmony_ci		bh->b_state = 0;
26458c2ecf20Sopenharmony_ci		create = 1;
26468c2ecf20Sopenharmony_ci		if (block_start >= to)
26478c2ecf20Sopenharmony_ci			create = 0;
26488c2ecf20Sopenharmony_ci		ret = get_block(inode, block_in_file + block_in_page,
26498c2ecf20Sopenharmony_ci					bh, create);
26508c2ecf20Sopenharmony_ci		if (ret)
26518c2ecf20Sopenharmony_ci			goto failed;
26528c2ecf20Sopenharmony_ci		if (!buffer_mapped(bh))
26538c2ecf20Sopenharmony_ci			is_mapped_to_disk = 0;
26548c2ecf20Sopenharmony_ci		if (buffer_new(bh))
26558c2ecf20Sopenharmony_ci			clean_bdev_bh_alias(bh);
26568c2ecf20Sopenharmony_ci		if (PageUptodate(page)) {
26578c2ecf20Sopenharmony_ci			set_buffer_uptodate(bh);
26588c2ecf20Sopenharmony_ci			continue;
26598c2ecf20Sopenharmony_ci		}
26608c2ecf20Sopenharmony_ci		if (buffer_new(bh) || !buffer_mapped(bh)) {
26618c2ecf20Sopenharmony_ci			zero_user_segments(page, block_start, from,
26628c2ecf20Sopenharmony_ci							to, block_end);
26638c2ecf20Sopenharmony_ci			continue;
26648c2ecf20Sopenharmony_ci		}
26658c2ecf20Sopenharmony_ci		if (buffer_uptodate(bh))
26668c2ecf20Sopenharmony_ci			continue;	/* reiserfs does this */
26678c2ecf20Sopenharmony_ci		if (block_start < from || block_end > to) {
26688c2ecf20Sopenharmony_ci			lock_buffer(bh);
26698c2ecf20Sopenharmony_ci			bh->b_end_io = end_buffer_read_nobh;
26708c2ecf20Sopenharmony_ci			submit_bh(REQ_OP_READ, 0, bh);
26718c2ecf20Sopenharmony_ci			nr_reads++;
26728c2ecf20Sopenharmony_ci		}
26738c2ecf20Sopenharmony_ci	}
26748c2ecf20Sopenharmony_ci
26758c2ecf20Sopenharmony_ci	if (nr_reads) {
26768c2ecf20Sopenharmony_ci		/*
26778c2ecf20Sopenharmony_ci		 * The page is locked, so these buffers are protected from
26788c2ecf20Sopenharmony_ci		 * any VM or truncate activity.  Hence we don't need to care
26798c2ecf20Sopenharmony_ci		 * for the buffer_head refcounts.
26808c2ecf20Sopenharmony_ci		 */
26818c2ecf20Sopenharmony_ci		for (bh = head; bh; bh = bh->b_this_page) {
26828c2ecf20Sopenharmony_ci			wait_on_buffer(bh);
26838c2ecf20Sopenharmony_ci			if (!buffer_uptodate(bh))
26848c2ecf20Sopenharmony_ci				ret = -EIO;
26858c2ecf20Sopenharmony_ci		}
26868c2ecf20Sopenharmony_ci		if (ret)
26878c2ecf20Sopenharmony_ci			goto failed;
26888c2ecf20Sopenharmony_ci	}
26898c2ecf20Sopenharmony_ci
26908c2ecf20Sopenharmony_ci	if (is_mapped_to_disk)
26918c2ecf20Sopenharmony_ci		SetPageMappedToDisk(page);
26928c2ecf20Sopenharmony_ci
26938c2ecf20Sopenharmony_ci	*fsdata = head; /* to be released by nobh_write_end */
26948c2ecf20Sopenharmony_ci
26958c2ecf20Sopenharmony_ci	return 0;
26968c2ecf20Sopenharmony_ci
26978c2ecf20Sopenharmony_cifailed:
26988c2ecf20Sopenharmony_ci	BUG_ON(!ret);
26998c2ecf20Sopenharmony_ci	/*
27008c2ecf20Sopenharmony_ci	 * Error recovery is a bit difficult. We need to zero out blocks that
27018c2ecf20Sopenharmony_ci	 * were newly allocated, and dirty them to ensure they get written out.
27028c2ecf20Sopenharmony_ci	 * Buffers need to be attached to the page at this point, otherwise
27038c2ecf20Sopenharmony_ci	 * the handling of potential IO errors during writeout would be hard
27048c2ecf20Sopenharmony_ci	 * (could try doing synchronous writeout, but what if that fails too?)
27058c2ecf20Sopenharmony_ci	 */
27068c2ecf20Sopenharmony_ci	attach_nobh_buffers(page, head);
27078c2ecf20Sopenharmony_ci	page_zero_new_buffers(page, from, to);
27088c2ecf20Sopenharmony_ci
27098c2ecf20Sopenharmony_ciout_release:
27108c2ecf20Sopenharmony_ci	unlock_page(page);
27118c2ecf20Sopenharmony_ci	put_page(page);
27128c2ecf20Sopenharmony_ci	*pagep = NULL;
27138c2ecf20Sopenharmony_ci
27148c2ecf20Sopenharmony_ci	return ret;
27158c2ecf20Sopenharmony_ci}
27168c2ecf20Sopenharmony_ciEXPORT_SYMBOL(nobh_write_begin);
27178c2ecf20Sopenharmony_ci
27188c2ecf20Sopenharmony_ciint nobh_write_end(struct file *file, struct address_space *mapping,
27198c2ecf20Sopenharmony_ci			loff_t pos, unsigned len, unsigned copied,
27208c2ecf20Sopenharmony_ci			struct page *page, void *fsdata)
27218c2ecf20Sopenharmony_ci{
27228c2ecf20Sopenharmony_ci	struct inode *inode = page->mapping->host;
27238c2ecf20Sopenharmony_ci	struct buffer_head *head = fsdata;
27248c2ecf20Sopenharmony_ci	struct buffer_head *bh;
27258c2ecf20Sopenharmony_ci	BUG_ON(fsdata != NULL && page_has_buffers(page));
27268c2ecf20Sopenharmony_ci
27278c2ecf20Sopenharmony_ci	if (unlikely(copied < len) && head)
27288c2ecf20Sopenharmony_ci		attach_nobh_buffers(page, head);
27298c2ecf20Sopenharmony_ci	if (page_has_buffers(page))
27308c2ecf20Sopenharmony_ci		return generic_write_end(file, mapping, pos, len,
27318c2ecf20Sopenharmony_ci					copied, page, fsdata);
27328c2ecf20Sopenharmony_ci
27338c2ecf20Sopenharmony_ci	SetPageUptodate(page);
27348c2ecf20Sopenharmony_ci	set_page_dirty(page);
27358c2ecf20Sopenharmony_ci	if (pos+copied > inode->i_size) {
27368c2ecf20Sopenharmony_ci		i_size_write(inode, pos+copied);
27378c2ecf20Sopenharmony_ci		mark_inode_dirty(inode);
27388c2ecf20Sopenharmony_ci	}
27398c2ecf20Sopenharmony_ci
27408c2ecf20Sopenharmony_ci	unlock_page(page);
27418c2ecf20Sopenharmony_ci	put_page(page);
27428c2ecf20Sopenharmony_ci
27438c2ecf20Sopenharmony_ci	while (head) {
27448c2ecf20Sopenharmony_ci		bh = head;
27458c2ecf20Sopenharmony_ci		head = head->b_this_page;
27468c2ecf20Sopenharmony_ci		free_buffer_head(bh);
27478c2ecf20Sopenharmony_ci	}
27488c2ecf20Sopenharmony_ci
27498c2ecf20Sopenharmony_ci	return copied;
27508c2ecf20Sopenharmony_ci}
27518c2ecf20Sopenharmony_ciEXPORT_SYMBOL(nobh_write_end);
27528c2ecf20Sopenharmony_ci
27538c2ecf20Sopenharmony_ci/*
27548c2ecf20Sopenharmony_ci * nobh_writepage() - based on block_full_write_page() except
27558c2ecf20Sopenharmony_ci * that it tries to operate without attaching bufferheads to
27568c2ecf20Sopenharmony_ci * the page.
27578c2ecf20Sopenharmony_ci */
27588c2ecf20Sopenharmony_ciint nobh_writepage(struct page *page, get_block_t *get_block,
27598c2ecf20Sopenharmony_ci			struct writeback_control *wbc)
27608c2ecf20Sopenharmony_ci{
27618c2ecf20Sopenharmony_ci	struct inode * const inode = page->mapping->host;
27628c2ecf20Sopenharmony_ci	loff_t i_size = i_size_read(inode);
27638c2ecf20Sopenharmony_ci	const pgoff_t end_index = i_size >> PAGE_SHIFT;
27648c2ecf20Sopenharmony_ci	unsigned offset;
27658c2ecf20Sopenharmony_ci	int ret;
27668c2ecf20Sopenharmony_ci
27678c2ecf20Sopenharmony_ci	/* Is the page fully inside i_size? */
27688c2ecf20Sopenharmony_ci	if (page->index < end_index)
27698c2ecf20Sopenharmony_ci		goto out;
27708c2ecf20Sopenharmony_ci
27718c2ecf20Sopenharmony_ci	/* Is the page fully outside i_size? (truncate in progress) */
27728c2ecf20Sopenharmony_ci	offset = i_size & (PAGE_SIZE-1);
27738c2ecf20Sopenharmony_ci	if (page->index >= end_index+1 || !offset) {
27748c2ecf20Sopenharmony_ci		unlock_page(page);
27758c2ecf20Sopenharmony_ci		return 0; /* don't care */
27768c2ecf20Sopenharmony_ci	}
27778c2ecf20Sopenharmony_ci
27788c2ecf20Sopenharmony_ci	/*
27798c2ecf20Sopenharmony_ci	 * The page straddles i_size.  It must be zeroed out on each and every
27808c2ecf20Sopenharmony_ci	 * writepage invocation because it may be mmapped.  "A file is mapped
27818c2ecf20Sopenharmony_ci	 * in multiples of the page size.  For a file that is not a multiple of
27828c2ecf20Sopenharmony_ci	 * the  page size, the remaining memory is zeroed when mapped, and
27838c2ecf20Sopenharmony_ci	 * writes to that region are not written out to the file."
27848c2ecf20Sopenharmony_ci	 */
27858c2ecf20Sopenharmony_ci	zero_user_segment(page, offset, PAGE_SIZE);
27868c2ecf20Sopenharmony_ciout:
27878c2ecf20Sopenharmony_ci	ret = mpage_writepage(page, get_block, wbc);
27888c2ecf20Sopenharmony_ci	if (ret == -EAGAIN)
27898c2ecf20Sopenharmony_ci		ret = __block_write_full_page(inode, page, get_block, wbc,
27908c2ecf20Sopenharmony_ci					      end_buffer_async_write);
27918c2ecf20Sopenharmony_ci	return ret;
27928c2ecf20Sopenharmony_ci}
27938c2ecf20Sopenharmony_ciEXPORT_SYMBOL(nobh_writepage);
27948c2ecf20Sopenharmony_ci
27958c2ecf20Sopenharmony_ciint nobh_truncate_page(struct address_space *mapping,
27968c2ecf20Sopenharmony_ci			loff_t from, get_block_t *get_block)
27978c2ecf20Sopenharmony_ci{
27988c2ecf20Sopenharmony_ci	pgoff_t index = from >> PAGE_SHIFT;
27998c2ecf20Sopenharmony_ci	unsigned offset = from & (PAGE_SIZE-1);
28008c2ecf20Sopenharmony_ci	unsigned blocksize;
28018c2ecf20Sopenharmony_ci	sector_t iblock;
28028c2ecf20Sopenharmony_ci	unsigned length, pos;
28038c2ecf20Sopenharmony_ci	struct inode *inode = mapping->host;
28048c2ecf20Sopenharmony_ci	struct page *page;
28058c2ecf20Sopenharmony_ci	struct buffer_head map_bh;
28068c2ecf20Sopenharmony_ci	int err;
28078c2ecf20Sopenharmony_ci
28088c2ecf20Sopenharmony_ci	blocksize = i_blocksize(inode);
28098c2ecf20Sopenharmony_ci	length = offset & (blocksize - 1);
28108c2ecf20Sopenharmony_ci
28118c2ecf20Sopenharmony_ci	/* Block boundary? Nothing to do */
28128c2ecf20Sopenharmony_ci	if (!length)
28138c2ecf20Sopenharmony_ci		return 0;
28148c2ecf20Sopenharmony_ci
28158c2ecf20Sopenharmony_ci	length = blocksize - length;
28168c2ecf20Sopenharmony_ci	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
28178c2ecf20Sopenharmony_ci
28188c2ecf20Sopenharmony_ci	page = grab_cache_page(mapping, index);
28198c2ecf20Sopenharmony_ci	err = -ENOMEM;
28208c2ecf20Sopenharmony_ci	if (!page)
28218c2ecf20Sopenharmony_ci		goto out;
28228c2ecf20Sopenharmony_ci
28238c2ecf20Sopenharmony_ci	if (page_has_buffers(page)) {
28248c2ecf20Sopenharmony_cihas_buffers:
28258c2ecf20Sopenharmony_ci		unlock_page(page);
28268c2ecf20Sopenharmony_ci		put_page(page);
28278c2ecf20Sopenharmony_ci		return block_truncate_page(mapping, from, get_block);
28288c2ecf20Sopenharmony_ci	}
28298c2ecf20Sopenharmony_ci
28308c2ecf20Sopenharmony_ci	/* Find the buffer that contains "offset" */
28318c2ecf20Sopenharmony_ci	pos = blocksize;
28328c2ecf20Sopenharmony_ci	while (offset >= pos) {
28338c2ecf20Sopenharmony_ci		iblock++;
28348c2ecf20Sopenharmony_ci		pos += blocksize;
28358c2ecf20Sopenharmony_ci	}
28368c2ecf20Sopenharmony_ci
28378c2ecf20Sopenharmony_ci	map_bh.b_size = blocksize;
28388c2ecf20Sopenharmony_ci	map_bh.b_state = 0;
28398c2ecf20Sopenharmony_ci	err = get_block(inode, iblock, &map_bh, 0);
28408c2ecf20Sopenharmony_ci	if (err)
28418c2ecf20Sopenharmony_ci		goto unlock;
28428c2ecf20Sopenharmony_ci	/* unmapped? It's a hole - nothing to do */
28438c2ecf20Sopenharmony_ci	if (!buffer_mapped(&map_bh))
28448c2ecf20Sopenharmony_ci		goto unlock;
28458c2ecf20Sopenharmony_ci
28468c2ecf20Sopenharmony_ci	/* Ok, it's mapped. Make sure it's up-to-date */
28478c2ecf20Sopenharmony_ci	if (!PageUptodate(page)) {
28488c2ecf20Sopenharmony_ci		err = mapping->a_ops->readpage(NULL, page);
28498c2ecf20Sopenharmony_ci		if (err) {
28508c2ecf20Sopenharmony_ci			put_page(page);
28518c2ecf20Sopenharmony_ci			goto out;
28528c2ecf20Sopenharmony_ci		}
28538c2ecf20Sopenharmony_ci		lock_page(page);
28548c2ecf20Sopenharmony_ci		if (!PageUptodate(page)) {
28558c2ecf20Sopenharmony_ci			err = -EIO;
28568c2ecf20Sopenharmony_ci			goto unlock;
28578c2ecf20Sopenharmony_ci		}
28588c2ecf20Sopenharmony_ci		if (page_has_buffers(page))
28598c2ecf20Sopenharmony_ci			goto has_buffers;
28608c2ecf20Sopenharmony_ci	}
28618c2ecf20Sopenharmony_ci	zero_user(page, offset, length);
28628c2ecf20Sopenharmony_ci	set_page_dirty(page);
28638c2ecf20Sopenharmony_ci	err = 0;
28648c2ecf20Sopenharmony_ci
28658c2ecf20Sopenharmony_ciunlock:
28668c2ecf20Sopenharmony_ci	unlock_page(page);
28678c2ecf20Sopenharmony_ci	put_page(page);
28688c2ecf20Sopenharmony_ciout:
28698c2ecf20Sopenharmony_ci	return err;
28708c2ecf20Sopenharmony_ci}
28718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(nobh_truncate_page);
28728c2ecf20Sopenharmony_ci
28738c2ecf20Sopenharmony_ciint block_truncate_page(struct address_space *mapping,
28748c2ecf20Sopenharmony_ci			loff_t from, get_block_t *get_block)
28758c2ecf20Sopenharmony_ci{
28768c2ecf20Sopenharmony_ci	pgoff_t index = from >> PAGE_SHIFT;
28778c2ecf20Sopenharmony_ci	unsigned offset = from & (PAGE_SIZE-1);
28788c2ecf20Sopenharmony_ci	unsigned blocksize;
28798c2ecf20Sopenharmony_ci	sector_t iblock;
28808c2ecf20Sopenharmony_ci	unsigned length, pos;
28818c2ecf20Sopenharmony_ci	struct inode *inode = mapping->host;
28828c2ecf20Sopenharmony_ci	struct page *page;
28838c2ecf20Sopenharmony_ci	struct buffer_head *bh;
28848c2ecf20Sopenharmony_ci	int err;
28858c2ecf20Sopenharmony_ci
28868c2ecf20Sopenharmony_ci	blocksize = i_blocksize(inode);
28878c2ecf20Sopenharmony_ci	length = offset & (blocksize - 1);
28888c2ecf20Sopenharmony_ci
28898c2ecf20Sopenharmony_ci	/* Block boundary? Nothing to do */
28908c2ecf20Sopenharmony_ci	if (!length)
28918c2ecf20Sopenharmony_ci		return 0;
28928c2ecf20Sopenharmony_ci
28938c2ecf20Sopenharmony_ci	length = blocksize - length;
28948c2ecf20Sopenharmony_ci	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
28958c2ecf20Sopenharmony_ci
28968c2ecf20Sopenharmony_ci	page = grab_cache_page(mapping, index);
28978c2ecf20Sopenharmony_ci	err = -ENOMEM;
28988c2ecf20Sopenharmony_ci	if (!page)
28998c2ecf20Sopenharmony_ci		goto out;
29008c2ecf20Sopenharmony_ci
29018c2ecf20Sopenharmony_ci	if (!page_has_buffers(page))
29028c2ecf20Sopenharmony_ci		create_empty_buffers(page, blocksize, 0);
29038c2ecf20Sopenharmony_ci
29048c2ecf20Sopenharmony_ci	/* Find the buffer that contains "offset" */
29058c2ecf20Sopenharmony_ci	bh = page_buffers(page);
29068c2ecf20Sopenharmony_ci	pos = blocksize;
29078c2ecf20Sopenharmony_ci	while (offset >= pos) {
29088c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
29098c2ecf20Sopenharmony_ci		iblock++;
29108c2ecf20Sopenharmony_ci		pos += blocksize;
29118c2ecf20Sopenharmony_ci	}
29128c2ecf20Sopenharmony_ci
29138c2ecf20Sopenharmony_ci	err = 0;
29148c2ecf20Sopenharmony_ci	if (!buffer_mapped(bh)) {
29158c2ecf20Sopenharmony_ci		WARN_ON(bh->b_size != blocksize);
29168c2ecf20Sopenharmony_ci		err = get_block(inode, iblock, bh, 0);
29178c2ecf20Sopenharmony_ci		if (err)
29188c2ecf20Sopenharmony_ci			goto unlock;
29198c2ecf20Sopenharmony_ci		/* unmapped? It's a hole - nothing to do */
29208c2ecf20Sopenharmony_ci		if (!buffer_mapped(bh))
29218c2ecf20Sopenharmony_ci			goto unlock;
29228c2ecf20Sopenharmony_ci	}
29238c2ecf20Sopenharmony_ci
29248c2ecf20Sopenharmony_ci	/* Ok, it's mapped. Make sure it's up-to-date */
29258c2ecf20Sopenharmony_ci	if (PageUptodate(page))
29268c2ecf20Sopenharmony_ci		set_buffer_uptodate(bh);
29278c2ecf20Sopenharmony_ci
29288c2ecf20Sopenharmony_ci	if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
29298c2ecf20Sopenharmony_ci		err = -EIO;
29308c2ecf20Sopenharmony_ci		ll_rw_block(REQ_OP_READ, 0, 1, &bh);
29318c2ecf20Sopenharmony_ci		wait_on_buffer(bh);
29328c2ecf20Sopenharmony_ci		/* Uhhuh. Read error. Complain and punt. */
29338c2ecf20Sopenharmony_ci		if (!buffer_uptodate(bh))
29348c2ecf20Sopenharmony_ci			goto unlock;
29358c2ecf20Sopenharmony_ci	}
29368c2ecf20Sopenharmony_ci
29378c2ecf20Sopenharmony_ci	zero_user(page, offset, length);
29388c2ecf20Sopenharmony_ci	mark_buffer_dirty(bh);
29398c2ecf20Sopenharmony_ci	err = 0;
29408c2ecf20Sopenharmony_ci
29418c2ecf20Sopenharmony_ciunlock:
29428c2ecf20Sopenharmony_ci	unlock_page(page);
29438c2ecf20Sopenharmony_ci	put_page(page);
29448c2ecf20Sopenharmony_ciout:
29458c2ecf20Sopenharmony_ci	return err;
29468c2ecf20Sopenharmony_ci}
29478c2ecf20Sopenharmony_ciEXPORT_SYMBOL(block_truncate_page);
29488c2ecf20Sopenharmony_ci
29498c2ecf20Sopenharmony_ci/*
29508c2ecf20Sopenharmony_ci * The generic ->writepage function for buffer-backed address_spaces
29518c2ecf20Sopenharmony_ci */
29528c2ecf20Sopenharmony_ciint block_write_full_page(struct page *page, get_block_t *get_block,
29538c2ecf20Sopenharmony_ci			struct writeback_control *wbc)
29548c2ecf20Sopenharmony_ci{
29558c2ecf20Sopenharmony_ci	struct inode * const inode = page->mapping->host;
29568c2ecf20Sopenharmony_ci	loff_t i_size = i_size_read(inode);
29578c2ecf20Sopenharmony_ci	const pgoff_t end_index = i_size >> PAGE_SHIFT;
29588c2ecf20Sopenharmony_ci	unsigned offset;
29598c2ecf20Sopenharmony_ci
29608c2ecf20Sopenharmony_ci	/* Is the page fully inside i_size? */
29618c2ecf20Sopenharmony_ci	if (page->index < end_index)
29628c2ecf20Sopenharmony_ci		return __block_write_full_page(inode, page, get_block, wbc,
29638c2ecf20Sopenharmony_ci					       end_buffer_async_write);
29648c2ecf20Sopenharmony_ci
29658c2ecf20Sopenharmony_ci	/* Is the page fully outside i_size? (truncate in progress) */
29668c2ecf20Sopenharmony_ci	offset = i_size & (PAGE_SIZE-1);
29678c2ecf20Sopenharmony_ci	if (page->index >= end_index+1 || !offset) {
29688c2ecf20Sopenharmony_ci		unlock_page(page);
29698c2ecf20Sopenharmony_ci		return 0; /* don't care */
29708c2ecf20Sopenharmony_ci	}
29718c2ecf20Sopenharmony_ci
29728c2ecf20Sopenharmony_ci	/*
29738c2ecf20Sopenharmony_ci	 * The page straddles i_size.  It must be zeroed out on each and every
29748c2ecf20Sopenharmony_ci	 * writepage invocation because it may be mmapped.  "A file is mapped
29758c2ecf20Sopenharmony_ci	 * in multiples of the page size.  For a file that is not a multiple of
29768c2ecf20Sopenharmony_ci	 * the  page size, the remaining memory is zeroed when mapped, and
29778c2ecf20Sopenharmony_ci	 * writes to that region are not written out to the file."
29788c2ecf20Sopenharmony_ci	 */
29798c2ecf20Sopenharmony_ci	zero_user_segment(page, offset, PAGE_SIZE);
29808c2ecf20Sopenharmony_ci	return __block_write_full_page(inode, page, get_block, wbc,
29818c2ecf20Sopenharmony_ci							end_buffer_async_write);
29828c2ecf20Sopenharmony_ci}
29838c2ecf20Sopenharmony_ciEXPORT_SYMBOL(block_write_full_page);
29848c2ecf20Sopenharmony_ci
29858c2ecf20Sopenharmony_cisector_t generic_block_bmap(struct address_space *mapping, sector_t block,
29868c2ecf20Sopenharmony_ci			    get_block_t *get_block)
29878c2ecf20Sopenharmony_ci{
29888c2ecf20Sopenharmony_ci	struct inode *inode = mapping->host;
29898c2ecf20Sopenharmony_ci	struct buffer_head tmp = {
29908c2ecf20Sopenharmony_ci		.b_size = i_blocksize(inode),
29918c2ecf20Sopenharmony_ci	};
29928c2ecf20Sopenharmony_ci
29938c2ecf20Sopenharmony_ci	get_block(inode, block, &tmp, 0);
29948c2ecf20Sopenharmony_ci	return tmp.b_blocknr;
29958c2ecf20Sopenharmony_ci}
29968c2ecf20Sopenharmony_ciEXPORT_SYMBOL(generic_block_bmap);
29978c2ecf20Sopenharmony_ci
29988c2ecf20Sopenharmony_cistatic void end_bio_bh_io_sync(struct bio *bio)
29998c2ecf20Sopenharmony_ci{
30008c2ecf20Sopenharmony_ci	struct buffer_head *bh = bio->bi_private;
30018c2ecf20Sopenharmony_ci
30028c2ecf20Sopenharmony_ci	if (unlikely(bio_flagged(bio, BIO_QUIET)))
30038c2ecf20Sopenharmony_ci		set_bit(BH_Quiet, &bh->b_state);
30048c2ecf20Sopenharmony_ci
30058c2ecf20Sopenharmony_ci	bh->b_end_io(bh, !bio->bi_status);
30068c2ecf20Sopenharmony_ci	bio_put(bio);
30078c2ecf20Sopenharmony_ci}
30088c2ecf20Sopenharmony_ci
30098c2ecf20Sopenharmony_cistatic int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
30108c2ecf20Sopenharmony_ci			 enum rw_hint write_hint, struct writeback_control *wbc)
30118c2ecf20Sopenharmony_ci{
30128c2ecf20Sopenharmony_ci	struct bio *bio;
30138c2ecf20Sopenharmony_ci
30148c2ecf20Sopenharmony_ci	BUG_ON(!buffer_locked(bh));
30158c2ecf20Sopenharmony_ci	BUG_ON(!buffer_mapped(bh));
30168c2ecf20Sopenharmony_ci	BUG_ON(!bh->b_end_io);
30178c2ecf20Sopenharmony_ci	BUG_ON(buffer_delay(bh));
30188c2ecf20Sopenharmony_ci	BUG_ON(buffer_unwritten(bh));
30198c2ecf20Sopenharmony_ci
30208c2ecf20Sopenharmony_ci	/*
30218c2ecf20Sopenharmony_ci	 * Only clear out a write error when rewriting
30228c2ecf20Sopenharmony_ci	 */
30238c2ecf20Sopenharmony_ci	if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
30248c2ecf20Sopenharmony_ci		clear_buffer_write_io_error(bh);
30258c2ecf20Sopenharmony_ci
30268c2ecf20Sopenharmony_ci	bio = bio_alloc(GFP_NOIO, 1);
30278c2ecf20Sopenharmony_ci
30288c2ecf20Sopenharmony_ci	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
30298c2ecf20Sopenharmony_ci
30308c2ecf20Sopenharmony_ci	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
30318c2ecf20Sopenharmony_ci	bio_set_dev(bio, bh->b_bdev);
30328c2ecf20Sopenharmony_ci	bio->bi_write_hint = write_hint;
30338c2ecf20Sopenharmony_ci
30348c2ecf20Sopenharmony_ci	bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
30358c2ecf20Sopenharmony_ci	BUG_ON(bio->bi_iter.bi_size != bh->b_size);
30368c2ecf20Sopenharmony_ci
30378c2ecf20Sopenharmony_ci	bio->bi_end_io = end_bio_bh_io_sync;
30388c2ecf20Sopenharmony_ci	bio->bi_private = bh;
30398c2ecf20Sopenharmony_ci
30408c2ecf20Sopenharmony_ci	if (buffer_meta(bh))
30418c2ecf20Sopenharmony_ci		op_flags |= REQ_META;
30428c2ecf20Sopenharmony_ci	if (buffer_prio(bh))
30438c2ecf20Sopenharmony_ci		op_flags |= REQ_PRIO;
30448c2ecf20Sopenharmony_ci	bio_set_op_attrs(bio, op, op_flags);
30458c2ecf20Sopenharmony_ci
30468c2ecf20Sopenharmony_ci	/* Take care of bh's that straddle the end of the device */
30478c2ecf20Sopenharmony_ci	guard_bio_eod(bio);
30488c2ecf20Sopenharmony_ci
30498c2ecf20Sopenharmony_ci	if (wbc) {
30508c2ecf20Sopenharmony_ci		wbc_init_bio(wbc, bio);
30518c2ecf20Sopenharmony_ci		wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
30528c2ecf20Sopenharmony_ci	}
30538c2ecf20Sopenharmony_ci
30548c2ecf20Sopenharmony_ci	submit_bio(bio);
30558c2ecf20Sopenharmony_ci	return 0;
30568c2ecf20Sopenharmony_ci}
30578c2ecf20Sopenharmony_ci
30588c2ecf20Sopenharmony_ciint submit_bh(int op, int op_flags, struct buffer_head *bh)
30598c2ecf20Sopenharmony_ci{
30608c2ecf20Sopenharmony_ci	return submit_bh_wbc(op, op_flags, bh, 0, NULL);
30618c2ecf20Sopenharmony_ci}
30628c2ecf20Sopenharmony_ciEXPORT_SYMBOL(submit_bh);
30638c2ecf20Sopenharmony_ci
30648c2ecf20Sopenharmony_ci/**
30658c2ecf20Sopenharmony_ci * ll_rw_block: low-level access to block devices (DEPRECATED)
30668c2ecf20Sopenharmony_ci * @op: whether to %READ or %WRITE
30678c2ecf20Sopenharmony_ci * @op_flags: req_flag_bits
30688c2ecf20Sopenharmony_ci * @nr: number of &struct buffer_heads in the array
30698c2ecf20Sopenharmony_ci * @bhs: array of pointers to &struct buffer_head
30708c2ecf20Sopenharmony_ci *
30718c2ecf20Sopenharmony_ci * ll_rw_block() takes an array of pointers to &struct buffer_heads, and
30728c2ecf20Sopenharmony_ci * requests an I/O operation on them, either a %REQ_OP_READ or a %REQ_OP_WRITE.
30738c2ecf20Sopenharmony_ci * @op_flags contains flags modifying the detailed I/O behavior, most notably
30748c2ecf20Sopenharmony_ci * %REQ_RAHEAD.
30758c2ecf20Sopenharmony_ci *
30768c2ecf20Sopenharmony_ci * This function drops any buffer that it cannot get a lock on (with the
30778c2ecf20Sopenharmony_ci * BH_Lock state bit), any buffer that appears to be clean when doing a write
30788c2ecf20Sopenharmony_ci * request, and any buffer that appears to be up-to-date when doing read
30798c2ecf20Sopenharmony_ci * request.  Further it marks as clean buffers that are processed for
30808c2ecf20Sopenharmony_ci * writing (the buffer cache won't assume that they are actually clean
30818c2ecf20Sopenharmony_ci * until the buffer gets unlocked).
30828c2ecf20Sopenharmony_ci *
30838c2ecf20Sopenharmony_ci * ll_rw_block sets b_end_io to simple completion handler that marks
30848c2ecf20Sopenharmony_ci * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
30858c2ecf20Sopenharmony_ci * any waiters.
30868c2ecf20Sopenharmony_ci *
30878c2ecf20Sopenharmony_ci * All of the buffers must be for the same device, and must also be a
30888c2ecf20Sopenharmony_ci * multiple of the current approved size for the device.
30898c2ecf20Sopenharmony_ci */
30908c2ecf20Sopenharmony_civoid ll_rw_block(int op, int op_flags,  int nr, struct buffer_head *bhs[])
30918c2ecf20Sopenharmony_ci{
30928c2ecf20Sopenharmony_ci	int i;
30938c2ecf20Sopenharmony_ci
30948c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++) {
30958c2ecf20Sopenharmony_ci		struct buffer_head *bh = bhs[i];
30968c2ecf20Sopenharmony_ci
30978c2ecf20Sopenharmony_ci		if (!trylock_buffer(bh))
30988c2ecf20Sopenharmony_ci			continue;
30998c2ecf20Sopenharmony_ci		if (op == WRITE) {
31008c2ecf20Sopenharmony_ci			if (test_clear_buffer_dirty(bh)) {
31018c2ecf20Sopenharmony_ci				bh->b_end_io = end_buffer_write_sync;
31028c2ecf20Sopenharmony_ci				get_bh(bh);
31038c2ecf20Sopenharmony_ci				submit_bh(op, op_flags, bh);
31048c2ecf20Sopenharmony_ci				continue;
31058c2ecf20Sopenharmony_ci			}
31068c2ecf20Sopenharmony_ci		} else {
31078c2ecf20Sopenharmony_ci			if (!buffer_uptodate(bh)) {
31088c2ecf20Sopenharmony_ci				bh->b_end_io = end_buffer_read_sync;
31098c2ecf20Sopenharmony_ci				get_bh(bh);
31108c2ecf20Sopenharmony_ci				submit_bh(op, op_flags, bh);
31118c2ecf20Sopenharmony_ci				continue;
31128c2ecf20Sopenharmony_ci			}
31138c2ecf20Sopenharmony_ci		}
31148c2ecf20Sopenharmony_ci		unlock_buffer(bh);
31158c2ecf20Sopenharmony_ci	}
31168c2ecf20Sopenharmony_ci}
31178c2ecf20Sopenharmony_ciEXPORT_SYMBOL(ll_rw_block);
31188c2ecf20Sopenharmony_ci
31198c2ecf20Sopenharmony_civoid write_dirty_buffer(struct buffer_head *bh, int op_flags)
31208c2ecf20Sopenharmony_ci{
31218c2ecf20Sopenharmony_ci	lock_buffer(bh);
31228c2ecf20Sopenharmony_ci	if (!test_clear_buffer_dirty(bh)) {
31238c2ecf20Sopenharmony_ci		unlock_buffer(bh);
31248c2ecf20Sopenharmony_ci		return;
31258c2ecf20Sopenharmony_ci	}
31268c2ecf20Sopenharmony_ci	bh->b_end_io = end_buffer_write_sync;
31278c2ecf20Sopenharmony_ci	get_bh(bh);
31288c2ecf20Sopenharmony_ci	submit_bh(REQ_OP_WRITE, op_flags, bh);
31298c2ecf20Sopenharmony_ci}
31308c2ecf20Sopenharmony_ciEXPORT_SYMBOL(write_dirty_buffer);
31318c2ecf20Sopenharmony_ci
31328c2ecf20Sopenharmony_ci/*
31338c2ecf20Sopenharmony_ci * For a data-integrity writeout, we need to wait upon any in-progress I/O
31348c2ecf20Sopenharmony_ci * and then start new I/O and then wait upon it.  The caller must have a ref on
31358c2ecf20Sopenharmony_ci * the buffer_head.
31368c2ecf20Sopenharmony_ci */
31378c2ecf20Sopenharmony_ciint __sync_dirty_buffer(struct buffer_head *bh, int op_flags)
31388c2ecf20Sopenharmony_ci{
31398c2ecf20Sopenharmony_ci	int ret = 0;
31408c2ecf20Sopenharmony_ci
31418c2ecf20Sopenharmony_ci	WARN_ON(atomic_read(&bh->b_count) < 1);
31428c2ecf20Sopenharmony_ci	lock_buffer(bh);
31438c2ecf20Sopenharmony_ci	if (test_clear_buffer_dirty(bh)) {
31448c2ecf20Sopenharmony_ci		/*
31458c2ecf20Sopenharmony_ci		 * The bh should be mapped, but it might not be if the
31468c2ecf20Sopenharmony_ci		 * device was hot-removed. Not much we can do but fail the I/O.
31478c2ecf20Sopenharmony_ci		 */
31488c2ecf20Sopenharmony_ci		if (!buffer_mapped(bh)) {
31498c2ecf20Sopenharmony_ci			unlock_buffer(bh);
31508c2ecf20Sopenharmony_ci			return -EIO;
31518c2ecf20Sopenharmony_ci		}
31528c2ecf20Sopenharmony_ci
31538c2ecf20Sopenharmony_ci		get_bh(bh);
31548c2ecf20Sopenharmony_ci		bh->b_end_io = end_buffer_write_sync;
31558c2ecf20Sopenharmony_ci		ret = submit_bh(REQ_OP_WRITE, op_flags, bh);
31568c2ecf20Sopenharmony_ci		wait_on_buffer(bh);
31578c2ecf20Sopenharmony_ci		if (!ret && !buffer_uptodate(bh))
31588c2ecf20Sopenharmony_ci			ret = -EIO;
31598c2ecf20Sopenharmony_ci	} else {
31608c2ecf20Sopenharmony_ci		unlock_buffer(bh);
31618c2ecf20Sopenharmony_ci	}
31628c2ecf20Sopenharmony_ci	return ret;
31638c2ecf20Sopenharmony_ci}
31648c2ecf20Sopenharmony_ciEXPORT_SYMBOL(__sync_dirty_buffer);
31658c2ecf20Sopenharmony_ci
31668c2ecf20Sopenharmony_ciint sync_dirty_buffer(struct buffer_head *bh)
31678c2ecf20Sopenharmony_ci{
31688c2ecf20Sopenharmony_ci	return __sync_dirty_buffer(bh, REQ_SYNC);
31698c2ecf20Sopenharmony_ci}
31708c2ecf20Sopenharmony_ciEXPORT_SYMBOL(sync_dirty_buffer);
31718c2ecf20Sopenharmony_ci
31728c2ecf20Sopenharmony_ci/*
31738c2ecf20Sopenharmony_ci * try_to_free_buffers() checks if all the buffers on this particular page
31748c2ecf20Sopenharmony_ci * are unused, and releases them if so.
31758c2ecf20Sopenharmony_ci *
31768c2ecf20Sopenharmony_ci * Exclusion against try_to_free_buffers may be obtained by either
31778c2ecf20Sopenharmony_ci * locking the page or by holding its mapping's private_lock.
31788c2ecf20Sopenharmony_ci *
31798c2ecf20Sopenharmony_ci * If the page is dirty but all the buffers are clean then we need to
31808c2ecf20Sopenharmony_ci * be sure to mark the page clean as well.  This is because the page
31818c2ecf20Sopenharmony_ci * may be against a block device, and a later reattachment of buffers
31828c2ecf20Sopenharmony_ci * to a dirty page will set *all* buffers dirty.  Which would corrupt
31838c2ecf20Sopenharmony_ci * filesystem data on the same device.
31848c2ecf20Sopenharmony_ci *
31858c2ecf20Sopenharmony_ci * The same applies to regular filesystem pages: if all the buffers are
31868c2ecf20Sopenharmony_ci * clean then we set the page clean and proceed.  To do that, we require
31878c2ecf20Sopenharmony_ci * total exclusion from __set_page_dirty_buffers().  That is obtained with
31888c2ecf20Sopenharmony_ci * private_lock.
31898c2ecf20Sopenharmony_ci *
31908c2ecf20Sopenharmony_ci * try_to_free_buffers() is non-blocking.
31918c2ecf20Sopenharmony_ci */
31928c2ecf20Sopenharmony_cistatic inline int buffer_busy(struct buffer_head *bh)
31938c2ecf20Sopenharmony_ci{
31948c2ecf20Sopenharmony_ci	return atomic_read(&bh->b_count) |
31958c2ecf20Sopenharmony_ci		(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
31968c2ecf20Sopenharmony_ci}
31978c2ecf20Sopenharmony_ci
31988c2ecf20Sopenharmony_cistatic int
31998c2ecf20Sopenharmony_cidrop_buffers(struct page *page, struct buffer_head **buffers_to_free)
32008c2ecf20Sopenharmony_ci{
32018c2ecf20Sopenharmony_ci	struct buffer_head *head = page_buffers(page);
32028c2ecf20Sopenharmony_ci	struct buffer_head *bh;
32038c2ecf20Sopenharmony_ci
32048c2ecf20Sopenharmony_ci	bh = head;
32058c2ecf20Sopenharmony_ci	do {
32068c2ecf20Sopenharmony_ci		if (buffer_busy(bh))
32078c2ecf20Sopenharmony_ci			goto failed;
32088c2ecf20Sopenharmony_ci		bh = bh->b_this_page;
32098c2ecf20Sopenharmony_ci	} while (bh != head);
32108c2ecf20Sopenharmony_ci
32118c2ecf20Sopenharmony_ci	do {
32128c2ecf20Sopenharmony_ci		struct buffer_head *next = bh->b_this_page;
32138c2ecf20Sopenharmony_ci
32148c2ecf20Sopenharmony_ci		if (bh->b_assoc_map)
32158c2ecf20Sopenharmony_ci			__remove_assoc_queue(bh);
32168c2ecf20Sopenharmony_ci		bh = next;
32178c2ecf20Sopenharmony_ci	} while (bh != head);
32188c2ecf20Sopenharmony_ci	*buffers_to_free = head;
32198c2ecf20Sopenharmony_ci	detach_page_private(page);
32208c2ecf20Sopenharmony_ci	return 1;
32218c2ecf20Sopenharmony_cifailed:
32228c2ecf20Sopenharmony_ci	return 0;
32238c2ecf20Sopenharmony_ci}
32248c2ecf20Sopenharmony_ci
32258c2ecf20Sopenharmony_ciint try_to_free_buffers(struct page *page)
32268c2ecf20Sopenharmony_ci{
32278c2ecf20Sopenharmony_ci	struct address_space * const mapping = page->mapping;
32288c2ecf20Sopenharmony_ci	struct buffer_head *buffers_to_free = NULL;
32298c2ecf20Sopenharmony_ci	int ret = 0;
32308c2ecf20Sopenharmony_ci
32318c2ecf20Sopenharmony_ci	BUG_ON(!PageLocked(page));
32328c2ecf20Sopenharmony_ci	if (PageWriteback(page))
32338c2ecf20Sopenharmony_ci		return 0;
32348c2ecf20Sopenharmony_ci
32358c2ecf20Sopenharmony_ci	if (mapping == NULL) {		/* can this still happen? */
32368c2ecf20Sopenharmony_ci		ret = drop_buffers(page, &buffers_to_free);
32378c2ecf20Sopenharmony_ci		goto out;
32388c2ecf20Sopenharmony_ci	}
32398c2ecf20Sopenharmony_ci
32408c2ecf20Sopenharmony_ci	spin_lock(&mapping->private_lock);
32418c2ecf20Sopenharmony_ci	ret = drop_buffers(page, &buffers_to_free);
32428c2ecf20Sopenharmony_ci
32438c2ecf20Sopenharmony_ci	/*
32448c2ecf20Sopenharmony_ci	 * If the filesystem writes its buffers by hand (eg ext3)
32458c2ecf20Sopenharmony_ci	 * then we can have clean buffers against a dirty page.  We
32468c2ecf20Sopenharmony_ci	 * clean the page here; otherwise the VM will never notice
32478c2ecf20Sopenharmony_ci	 * that the filesystem did any IO at all.
32488c2ecf20Sopenharmony_ci	 *
32498c2ecf20Sopenharmony_ci	 * Also, during truncate, discard_buffer will have marked all
32508c2ecf20Sopenharmony_ci	 * the page's buffers clean.  We discover that here and clean
32518c2ecf20Sopenharmony_ci	 * the page also.
32528c2ecf20Sopenharmony_ci	 *
32538c2ecf20Sopenharmony_ci	 * private_lock must be held over this entire operation in order
32548c2ecf20Sopenharmony_ci	 * to synchronise against __set_page_dirty_buffers and prevent the
32558c2ecf20Sopenharmony_ci	 * dirty bit from being lost.
32568c2ecf20Sopenharmony_ci	 */
32578c2ecf20Sopenharmony_ci	if (ret)
32588c2ecf20Sopenharmony_ci		cancel_dirty_page(page);
32598c2ecf20Sopenharmony_ci	spin_unlock(&mapping->private_lock);
32608c2ecf20Sopenharmony_ciout:
32618c2ecf20Sopenharmony_ci	if (buffers_to_free) {
32628c2ecf20Sopenharmony_ci		struct buffer_head *bh = buffers_to_free;
32638c2ecf20Sopenharmony_ci
32648c2ecf20Sopenharmony_ci		do {
32658c2ecf20Sopenharmony_ci			struct buffer_head *next = bh->b_this_page;
32668c2ecf20Sopenharmony_ci			free_buffer_head(bh);
32678c2ecf20Sopenharmony_ci			bh = next;
32688c2ecf20Sopenharmony_ci		} while (bh != buffers_to_free);
32698c2ecf20Sopenharmony_ci	}
32708c2ecf20Sopenharmony_ci	return ret;
32718c2ecf20Sopenharmony_ci}
32728c2ecf20Sopenharmony_ciEXPORT_SYMBOL(try_to_free_buffers);
32738c2ecf20Sopenharmony_ci
32748c2ecf20Sopenharmony_ci/*
32758c2ecf20Sopenharmony_ci * There are no bdflush tunables left.  But distributions are
32768c2ecf20Sopenharmony_ci * still running obsolete flush daemons, so we terminate them here.
32778c2ecf20Sopenharmony_ci *
32788c2ecf20Sopenharmony_ci * Use of bdflush() is deprecated and will be removed in a future kernel.
32798c2ecf20Sopenharmony_ci * The `flush-X' kernel threads fully replace bdflush daemons and this call.
32808c2ecf20Sopenharmony_ci */
32818c2ecf20Sopenharmony_ciSYSCALL_DEFINE2(bdflush, int, func, long, data)
32828c2ecf20Sopenharmony_ci{
32838c2ecf20Sopenharmony_ci	static int msg_count;
32848c2ecf20Sopenharmony_ci
32858c2ecf20Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
32868c2ecf20Sopenharmony_ci		return -EPERM;
32878c2ecf20Sopenharmony_ci
32888c2ecf20Sopenharmony_ci	if (msg_count < 5) {
32898c2ecf20Sopenharmony_ci		msg_count++;
32908c2ecf20Sopenharmony_ci		printk(KERN_INFO
32918c2ecf20Sopenharmony_ci			"warning: process `%s' used the obsolete bdflush"
32928c2ecf20Sopenharmony_ci			" system call\n", current->comm);
32938c2ecf20Sopenharmony_ci		printk(KERN_INFO "Fix your initscripts?\n");
32948c2ecf20Sopenharmony_ci	}
32958c2ecf20Sopenharmony_ci
32968c2ecf20Sopenharmony_ci	if (func == 1)
32978c2ecf20Sopenharmony_ci		do_exit(0);
32988c2ecf20Sopenharmony_ci	return 0;
32998c2ecf20Sopenharmony_ci}
33008c2ecf20Sopenharmony_ci
33018c2ecf20Sopenharmony_ci/*
33028c2ecf20Sopenharmony_ci * Buffer-head allocation
33038c2ecf20Sopenharmony_ci */
33048c2ecf20Sopenharmony_cistatic struct kmem_cache *bh_cachep __read_mostly;
33058c2ecf20Sopenharmony_ci
33068c2ecf20Sopenharmony_ci/*
33078c2ecf20Sopenharmony_ci * Once the number of bh's in the machine exceeds this level, we start
33088c2ecf20Sopenharmony_ci * stripping them in writeback.
33098c2ecf20Sopenharmony_ci */
33108c2ecf20Sopenharmony_cistatic unsigned long max_buffer_heads;
33118c2ecf20Sopenharmony_ci
33128c2ecf20Sopenharmony_ciint buffer_heads_over_limit;
33138c2ecf20Sopenharmony_ci
33148c2ecf20Sopenharmony_cistruct bh_accounting {
33158c2ecf20Sopenharmony_ci	int nr;			/* Number of live bh's */
33168c2ecf20Sopenharmony_ci	int ratelimit;		/* Limit cacheline bouncing */
33178c2ecf20Sopenharmony_ci};
33188c2ecf20Sopenharmony_ci
33198c2ecf20Sopenharmony_cistatic DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0};
33208c2ecf20Sopenharmony_ci
33218c2ecf20Sopenharmony_cistatic void recalc_bh_state(void)
33228c2ecf20Sopenharmony_ci{
33238c2ecf20Sopenharmony_ci	int i;
33248c2ecf20Sopenharmony_ci	int tot = 0;
33258c2ecf20Sopenharmony_ci
33268c2ecf20Sopenharmony_ci	if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
33278c2ecf20Sopenharmony_ci		return;
33288c2ecf20Sopenharmony_ci	__this_cpu_write(bh_accounting.ratelimit, 0);
33298c2ecf20Sopenharmony_ci	for_each_online_cpu(i)
33308c2ecf20Sopenharmony_ci		tot += per_cpu(bh_accounting, i).nr;
33318c2ecf20Sopenharmony_ci	buffer_heads_over_limit = (tot > max_buffer_heads);
33328c2ecf20Sopenharmony_ci}
33338c2ecf20Sopenharmony_ci
33348c2ecf20Sopenharmony_cistruct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
33358c2ecf20Sopenharmony_ci{
33368c2ecf20Sopenharmony_ci	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
33378c2ecf20Sopenharmony_ci	if (ret) {
33388c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&ret->b_assoc_buffers);
33398c2ecf20Sopenharmony_ci		spin_lock_init(&ret->b_uptodate_lock);
33408c2ecf20Sopenharmony_ci		preempt_disable();
33418c2ecf20Sopenharmony_ci		__this_cpu_inc(bh_accounting.nr);
33428c2ecf20Sopenharmony_ci		recalc_bh_state();
33438c2ecf20Sopenharmony_ci		preempt_enable();
33448c2ecf20Sopenharmony_ci	}
33458c2ecf20Sopenharmony_ci	return ret;
33468c2ecf20Sopenharmony_ci}
33478c2ecf20Sopenharmony_ciEXPORT_SYMBOL(alloc_buffer_head);
33488c2ecf20Sopenharmony_ci
33498c2ecf20Sopenharmony_civoid free_buffer_head(struct buffer_head *bh)
33508c2ecf20Sopenharmony_ci{
33518c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&bh->b_assoc_buffers));
33528c2ecf20Sopenharmony_ci	kmem_cache_free(bh_cachep, bh);
33538c2ecf20Sopenharmony_ci	preempt_disable();
33548c2ecf20Sopenharmony_ci	__this_cpu_dec(bh_accounting.nr);
33558c2ecf20Sopenharmony_ci	recalc_bh_state();
33568c2ecf20Sopenharmony_ci	preempt_enable();
33578c2ecf20Sopenharmony_ci}
33588c2ecf20Sopenharmony_ciEXPORT_SYMBOL(free_buffer_head);
33598c2ecf20Sopenharmony_ci
33608c2ecf20Sopenharmony_cistatic int buffer_exit_cpu_dead(unsigned int cpu)
33618c2ecf20Sopenharmony_ci{
33628c2ecf20Sopenharmony_ci	int i;
33638c2ecf20Sopenharmony_ci	struct bh_lru *b = &per_cpu(bh_lrus, cpu);
33648c2ecf20Sopenharmony_ci
33658c2ecf20Sopenharmony_ci	for (i = 0; i < BH_LRU_SIZE; i++) {
33668c2ecf20Sopenharmony_ci		brelse(b->bhs[i]);
33678c2ecf20Sopenharmony_ci		b->bhs[i] = NULL;
33688c2ecf20Sopenharmony_ci	}
33698c2ecf20Sopenharmony_ci	this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
33708c2ecf20Sopenharmony_ci	per_cpu(bh_accounting, cpu).nr = 0;
33718c2ecf20Sopenharmony_ci	return 0;
33728c2ecf20Sopenharmony_ci}
33738c2ecf20Sopenharmony_ci
33748c2ecf20Sopenharmony_ci/**
33758c2ecf20Sopenharmony_ci * bh_uptodate_or_lock - Test whether the buffer is uptodate
33768c2ecf20Sopenharmony_ci * @bh: struct buffer_head
33778c2ecf20Sopenharmony_ci *
33788c2ecf20Sopenharmony_ci * Return true if the buffer is up-to-date and false,
33798c2ecf20Sopenharmony_ci * with the buffer locked, if not.
33808c2ecf20Sopenharmony_ci */
33818c2ecf20Sopenharmony_ciint bh_uptodate_or_lock(struct buffer_head *bh)
33828c2ecf20Sopenharmony_ci{
33838c2ecf20Sopenharmony_ci	if (!buffer_uptodate(bh)) {
33848c2ecf20Sopenharmony_ci		lock_buffer(bh);
33858c2ecf20Sopenharmony_ci		if (!buffer_uptodate(bh))
33868c2ecf20Sopenharmony_ci			return 0;
33878c2ecf20Sopenharmony_ci		unlock_buffer(bh);
33888c2ecf20Sopenharmony_ci	}
33898c2ecf20Sopenharmony_ci	return 1;
33908c2ecf20Sopenharmony_ci}
33918c2ecf20Sopenharmony_ciEXPORT_SYMBOL(bh_uptodate_or_lock);
33928c2ecf20Sopenharmony_ci
33938c2ecf20Sopenharmony_ci/**
33948c2ecf20Sopenharmony_ci * bh_submit_read - Submit a locked buffer for reading
33958c2ecf20Sopenharmony_ci * @bh: struct buffer_head
33968c2ecf20Sopenharmony_ci *
33978c2ecf20Sopenharmony_ci * Returns zero on success and -EIO on error.
33988c2ecf20Sopenharmony_ci */
33998c2ecf20Sopenharmony_ciint bh_submit_read(struct buffer_head *bh)
34008c2ecf20Sopenharmony_ci{
34018c2ecf20Sopenharmony_ci	BUG_ON(!buffer_locked(bh));
34028c2ecf20Sopenharmony_ci
34038c2ecf20Sopenharmony_ci	if (buffer_uptodate(bh)) {
34048c2ecf20Sopenharmony_ci		unlock_buffer(bh);
34058c2ecf20Sopenharmony_ci		return 0;
34068c2ecf20Sopenharmony_ci	}
34078c2ecf20Sopenharmony_ci
34088c2ecf20Sopenharmony_ci	get_bh(bh);
34098c2ecf20Sopenharmony_ci	bh->b_end_io = end_buffer_read_sync;
34108c2ecf20Sopenharmony_ci	submit_bh(REQ_OP_READ, 0, bh);
34118c2ecf20Sopenharmony_ci	wait_on_buffer(bh);
34128c2ecf20Sopenharmony_ci	if (buffer_uptodate(bh))
34138c2ecf20Sopenharmony_ci		return 0;
34148c2ecf20Sopenharmony_ci	return -EIO;
34158c2ecf20Sopenharmony_ci}
34168c2ecf20Sopenharmony_ciEXPORT_SYMBOL(bh_submit_read);
34178c2ecf20Sopenharmony_ci
34188c2ecf20Sopenharmony_civoid __init buffer_init(void)
34198c2ecf20Sopenharmony_ci{
34208c2ecf20Sopenharmony_ci	unsigned long nrpages;
34218c2ecf20Sopenharmony_ci	int ret;
34228c2ecf20Sopenharmony_ci
34238c2ecf20Sopenharmony_ci	bh_cachep = kmem_cache_create("buffer_head",
34248c2ecf20Sopenharmony_ci			sizeof(struct buffer_head), 0,
34258c2ecf20Sopenharmony_ci				(SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
34268c2ecf20Sopenharmony_ci				SLAB_MEM_SPREAD),
34278c2ecf20Sopenharmony_ci				NULL);
34288c2ecf20Sopenharmony_ci
34298c2ecf20Sopenharmony_ci	/*
34308c2ecf20Sopenharmony_ci	 * Limit the bh occupancy to 10% of ZONE_NORMAL
34318c2ecf20Sopenharmony_ci	 */
34328c2ecf20Sopenharmony_ci	nrpages = (nr_free_buffer_pages() * 10) / 100;
34338c2ecf20Sopenharmony_ci	max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head));
34348c2ecf20Sopenharmony_ci	ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead",
34358c2ecf20Sopenharmony_ci					NULL, buffer_exit_cpu_dead);
34368c2ecf20Sopenharmony_ci	WARN_ON(ret < 0);
34378c2ecf20Sopenharmony_ci}
3438