18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*-
38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0:
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * io.c
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Buffer cache handling
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/fs.h>
138c2ecf20Sopenharmony_ci#include <linux/types.h>
148c2ecf20Sopenharmony_ci#include <linux/highmem.h>
158c2ecf20Sopenharmony_ci#include <linux/bio.h>
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci#include <cluster/masklog.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#include "ocfs2.h"
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci#include "alloc.h"
228c2ecf20Sopenharmony_ci#include "inode.h"
238c2ecf20Sopenharmony_ci#include "journal.h"
248c2ecf20Sopenharmony_ci#include "uptodate.h"
258c2ecf20Sopenharmony_ci#include "buffer_head_io.h"
268c2ecf20Sopenharmony_ci#include "ocfs2_trace.h"
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci/*
298c2ecf20Sopenharmony_ci * Bits on bh->b_state used by ocfs2.
308c2ecf20Sopenharmony_ci *
318c2ecf20Sopenharmony_ci * These MUST be after the JBD2 bits.  Hence, we use BH_JBDPrivateStart.
328c2ecf20Sopenharmony_ci */
338c2ecf20Sopenharmony_cienum ocfs2_state_bits {
348c2ecf20Sopenharmony_ci	BH_NeedsValidate = BH_JBDPrivateStart,
358c2ecf20Sopenharmony_ci};
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci/* Expand the magic b_state functions */
388c2ecf20Sopenharmony_ciBUFFER_FNS(NeedsValidate, needs_validate);
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ciint ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
418c2ecf20Sopenharmony_ci		      struct ocfs2_caching_info *ci)
428c2ecf20Sopenharmony_ci{
438c2ecf20Sopenharmony_ci	int ret = 0;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	trace_ocfs2_write_block((unsigned long long)bh->b_blocknr, ci);
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
488c2ecf20Sopenharmony_ci	BUG_ON(buffer_jbd(bh));
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	/* No need to check for a soft readonly file system here. non
518c2ecf20Sopenharmony_ci	 * journalled writes are only ever done on system files which
528c2ecf20Sopenharmony_ci	 * can get modified during recovery even if read-only. */
538c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb)) {
548c2ecf20Sopenharmony_ci		ret = -EROFS;
558c2ecf20Sopenharmony_ci		mlog_errno(ret);
568c2ecf20Sopenharmony_ci		goto out;
578c2ecf20Sopenharmony_ci	}
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	ocfs2_metadata_cache_io_lock(ci);
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	lock_buffer(bh);
628c2ecf20Sopenharmony_ci	set_buffer_uptodate(bh);
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	/* remove from dirty list before I/O. */
658c2ecf20Sopenharmony_ci	clear_buffer_dirty(bh);
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	get_bh(bh); /* for end_buffer_write_sync() */
688c2ecf20Sopenharmony_ci	bh->b_end_io = end_buffer_write_sync;
698c2ecf20Sopenharmony_ci	submit_bh(REQ_OP_WRITE, 0, bh);
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	wait_on_buffer(bh);
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	if (buffer_uptodate(bh)) {
748c2ecf20Sopenharmony_ci		ocfs2_set_buffer_uptodate(ci, bh);
758c2ecf20Sopenharmony_ci	} else {
768c2ecf20Sopenharmony_ci		/* We don't need to remove the clustered uptodate
778c2ecf20Sopenharmony_ci		 * information for this bh as it's not marked locally
788c2ecf20Sopenharmony_ci		 * uptodate. */
798c2ecf20Sopenharmony_ci		ret = -EIO;
808c2ecf20Sopenharmony_ci		mlog_errno(ret);
818c2ecf20Sopenharmony_ci	}
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci	ocfs2_metadata_cache_io_unlock(ci);
848c2ecf20Sopenharmony_ciout:
858c2ecf20Sopenharmony_ci	return ret;
868c2ecf20Sopenharmony_ci}
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
898c2ecf20Sopenharmony_ci * will be easier to handle read failure.
908c2ecf20Sopenharmony_ci */
918c2ecf20Sopenharmony_ciint ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
928c2ecf20Sopenharmony_ci			   unsigned int nr, struct buffer_head *bhs[])
938c2ecf20Sopenharmony_ci{
948c2ecf20Sopenharmony_ci	int status = 0;
958c2ecf20Sopenharmony_ci	unsigned int i;
968c2ecf20Sopenharmony_ci	struct buffer_head *bh;
978c2ecf20Sopenharmony_ci	int new_bh = 0;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	trace_ocfs2_read_blocks_sync((unsigned long long)block, nr);
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	if (!nr)
1028c2ecf20Sopenharmony_ci		goto bail;
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	/* Don't put buffer head and re-assign it to NULL if it is allocated
1058c2ecf20Sopenharmony_ci	 * outside since the caller can't be aware of this alternation!
1068c2ecf20Sopenharmony_ci	 */
1078c2ecf20Sopenharmony_ci	new_bh = (bhs[0] == NULL);
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci	for (i = 0 ; i < nr ; i++) {
1108c2ecf20Sopenharmony_ci		if (bhs[i] == NULL) {
1118c2ecf20Sopenharmony_ci			bhs[i] = sb_getblk(osb->sb, block++);
1128c2ecf20Sopenharmony_ci			if (bhs[i] == NULL) {
1138c2ecf20Sopenharmony_ci				status = -ENOMEM;
1148c2ecf20Sopenharmony_ci				mlog_errno(status);
1158c2ecf20Sopenharmony_ci				break;
1168c2ecf20Sopenharmony_ci			}
1178c2ecf20Sopenharmony_ci		}
1188c2ecf20Sopenharmony_ci		bh = bhs[i];
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci		if (buffer_jbd(bh)) {
1218c2ecf20Sopenharmony_ci			trace_ocfs2_read_blocks_sync_jbd(
1228c2ecf20Sopenharmony_ci					(unsigned long long)bh->b_blocknr);
1238c2ecf20Sopenharmony_ci			continue;
1248c2ecf20Sopenharmony_ci		}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci		if (buffer_dirty(bh)) {
1278c2ecf20Sopenharmony_ci			/* This should probably be a BUG, or
1288c2ecf20Sopenharmony_ci			 * at least return an error. */
1298c2ecf20Sopenharmony_ci			mlog(ML_ERROR,
1308c2ecf20Sopenharmony_ci			     "trying to sync read a dirty "
1318c2ecf20Sopenharmony_ci			     "buffer! (blocknr = %llu), skipping\n",
1328c2ecf20Sopenharmony_ci			     (unsigned long long)bh->b_blocknr);
1338c2ecf20Sopenharmony_ci			continue;
1348c2ecf20Sopenharmony_ci		}
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci		lock_buffer(bh);
1378c2ecf20Sopenharmony_ci		if (buffer_jbd(bh)) {
1388c2ecf20Sopenharmony_ci#ifdef CATCH_BH_JBD_RACES
1398c2ecf20Sopenharmony_ci			mlog(ML_ERROR,
1408c2ecf20Sopenharmony_ci			     "block %llu had the JBD bit set "
1418c2ecf20Sopenharmony_ci			     "while I was in lock_buffer!",
1428c2ecf20Sopenharmony_ci			     (unsigned long long)bh->b_blocknr);
1438c2ecf20Sopenharmony_ci			BUG();
1448c2ecf20Sopenharmony_ci#else
1458c2ecf20Sopenharmony_ci			unlock_buffer(bh);
1468c2ecf20Sopenharmony_ci			continue;
1478c2ecf20Sopenharmony_ci#endif
1488c2ecf20Sopenharmony_ci		}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci		get_bh(bh); /* for end_buffer_read_sync() */
1518c2ecf20Sopenharmony_ci		bh->b_end_io = end_buffer_read_sync;
1528c2ecf20Sopenharmony_ci		submit_bh(REQ_OP_READ, 0, bh);
1538c2ecf20Sopenharmony_ci	}
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ciread_failure:
1568c2ecf20Sopenharmony_ci	for (i = nr; i > 0; i--) {
1578c2ecf20Sopenharmony_ci		bh = bhs[i - 1];
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci		if (unlikely(status)) {
1608c2ecf20Sopenharmony_ci			if (new_bh && bh) {
1618c2ecf20Sopenharmony_ci				/* If middle bh fails, let previous bh
1628c2ecf20Sopenharmony_ci				 * finish its read and then put it to
1638c2ecf20Sopenharmony_ci				 * aovoid bh leak
1648c2ecf20Sopenharmony_ci				 */
1658c2ecf20Sopenharmony_ci				if (!buffer_jbd(bh))
1668c2ecf20Sopenharmony_ci					wait_on_buffer(bh);
1678c2ecf20Sopenharmony_ci				put_bh(bh);
1688c2ecf20Sopenharmony_ci				bhs[i - 1] = NULL;
1698c2ecf20Sopenharmony_ci			} else if (bh && buffer_uptodate(bh)) {
1708c2ecf20Sopenharmony_ci				clear_buffer_uptodate(bh);
1718c2ecf20Sopenharmony_ci			}
1728c2ecf20Sopenharmony_ci			continue;
1738c2ecf20Sopenharmony_ci		}
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci		/* No need to wait on the buffer if it's managed by JBD. */
1768c2ecf20Sopenharmony_ci		if (!buffer_jbd(bh))
1778c2ecf20Sopenharmony_ci			wait_on_buffer(bh);
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci		if (!buffer_uptodate(bh)) {
1808c2ecf20Sopenharmony_ci			/* Status won't be cleared from here on out,
1818c2ecf20Sopenharmony_ci			 * so we can safely record this and loop back
1828c2ecf20Sopenharmony_ci			 * to cleanup the other buffers. */
1838c2ecf20Sopenharmony_ci			status = -EIO;
1848c2ecf20Sopenharmony_ci			goto read_failure;
1858c2ecf20Sopenharmony_ci		}
1868c2ecf20Sopenharmony_ci	}
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_cibail:
1898c2ecf20Sopenharmony_ci	return status;
1908c2ecf20Sopenharmony_ci}
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it
1938c2ecf20Sopenharmony_ci * will be easier to handle read failure.
1948c2ecf20Sopenharmony_ci */
1958c2ecf20Sopenharmony_ciint ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
1968c2ecf20Sopenharmony_ci		      struct buffer_head *bhs[], int flags,
1978c2ecf20Sopenharmony_ci		      int (*validate)(struct super_block *sb,
1988c2ecf20Sopenharmony_ci				      struct buffer_head *bh))
1998c2ecf20Sopenharmony_ci{
2008c2ecf20Sopenharmony_ci	int status = 0;
2018c2ecf20Sopenharmony_ci	int i, ignore_cache = 0;
2028c2ecf20Sopenharmony_ci	struct buffer_head *bh;
2038c2ecf20Sopenharmony_ci	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2048c2ecf20Sopenharmony_ci	int new_bh = 0;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags);
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	BUG_ON(!ci);
2098c2ecf20Sopenharmony_ci	BUG_ON((flags & OCFS2_BH_READAHEAD) &&
2108c2ecf20Sopenharmony_ci	       (flags & OCFS2_BH_IGNORE_CACHE));
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_ci	if (bhs == NULL) {
2138c2ecf20Sopenharmony_ci		status = -EINVAL;
2148c2ecf20Sopenharmony_ci		mlog_errno(status);
2158c2ecf20Sopenharmony_ci		goto bail;
2168c2ecf20Sopenharmony_ci	}
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	if (nr < 0) {
2198c2ecf20Sopenharmony_ci		mlog(ML_ERROR, "asked to read %d blocks!\n", nr);
2208c2ecf20Sopenharmony_ci		status = -EINVAL;
2218c2ecf20Sopenharmony_ci		mlog_errno(status);
2228c2ecf20Sopenharmony_ci		goto bail;
2238c2ecf20Sopenharmony_ci	}
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	if (nr == 0) {
2268c2ecf20Sopenharmony_ci		status = 0;
2278c2ecf20Sopenharmony_ci		goto bail;
2288c2ecf20Sopenharmony_ci	}
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	/* Don't put buffer head and re-assign it to NULL if it is allocated
2318c2ecf20Sopenharmony_ci	 * outside since the caller can't be aware of this alternation!
2328c2ecf20Sopenharmony_ci	 */
2338c2ecf20Sopenharmony_ci	new_bh = (bhs[0] == NULL);
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	ocfs2_metadata_cache_io_lock(ci);
2368c2ecf20Sopenharmony_ci	for (i = 0 ; i < nr ; i++) {
2378c2ecf20Sopenharmony_ci		if (bhs[i] == NULL) {
2388c2ecf20Sopenharmony_ci			bhs[i] = sb_getblk(sb, block++);
2398c2ecf20Sopenharmony_ci			if (bhs[i] == NULL) {
2408c2ecf20Sopenharmony_ci				ocfs2_metadata_cache_io_unlock(ci);
2418c2ecf20Sopenharmony_ci				status = -ENOMEM;
2428c2ecf20Sopenharmony_ci				mlog_errno(status);
2438c2ecf20Sopenharmony_ci				/* Don't forget to put previous bh! */
2448c2ecf20Sopenharmony_ci				break;
2458c2ecf20Sopenharmony_ci			}
2468c2ecf20Sopenharmony_ci		}
2478c2ecf20Sopenharmony_ci		bh = bhs[i];
2488c2ecf20Sopenharmony_ci		ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE);
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci		/* There are three read-ahead cases here which we need to
2518c2ecf20Sopenharmony_ci		 * be concerned with. All three assume a buffer has
2528c2ecf20Sopenharmony_ci		 * previously been submitted with OCFS2_BH_READAHEAD
2538c2ecf20Sopenharmony_ci		 * and it hasn't yet completed I/O.
2548c2ecf20Sopenharmony_ci		 *
2558c2ecf20Sopenharmony_ci		 * 1) The current request is sync to disk. This rarely
2568c2ecf20Sopenharmony_ci		 *    happens these days, and never when performance
2578c2ecf20Sopenharmony_ci		 *    matters - the code can just wait on the buffer
2588c2ecf20Sopenharmony_ci		 *    lock and re-submit.
2598c2ecf20Sopenharmony_ci		 *
2608c2ecf20Sopenharmony_ci		 * 2) The current request is cached, but not
2618c2ecf20Sopenharmony_ci		 *    readahead. ocfs2_buffer_uptodate() will return
2628c2ecf20Sopenharmony_ci		 *    false anyway, so we'll wind up waiting on the
2638c2ecf20Sopenharmony_ci		 *    buffer lock to do I/O. We re-check the request
2648c2ecf20Sopenharmony_ci		 *    with after getting the lock to avoid a re-submit.
2658c2ecf20Sopenharmony_ci		 *
2668c2ecf20Sopenharmony_ci		 * 3) The current request is readahead (and so must
2678c2ecf20Sopenharmony_ci		 *    also be a caching one). We short circuit if the
2688c2ecf20Sopenharmony_ci		 *    buffer is locked (under I/O) and if it's in the
2698c2ecf20Sopenharmony_ci		 *    uptodate cache. The re-check from #2 catches the
2708c2ecf20Sopenharmony_ci		 *    case that the previous read-ahead completes just
2718c2ecf20Sopenharmony_ci		 *    before our is-it-in-flight check.
2728c2ecf20Sopenharmony_ci		 */
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci		if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
2758c2ecf20Sopenharmony_ci			trace_ocfs2_read_blocks_from_disk(
2768c2ecf20Sopenharmony_ci			     (unsigned long long)bh->b_blocknr,
2778c2ecf20Sopenharmony_ci			     (unsigned long long)ocfs2_metadata_cache_owner(ci));
2788c2ecf20Sopenharmony_ci			/* We're using ignore_cache here to say
2798c2ecf20Sopenharmony_ci			 * "go to disk" */
2808c2ecf20Sopenharmony_ci			ignore_cache = 1;
2818c2ecf20Sopenharmony_ci		}
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci		trace_ocfs2_read_blocks_bh((unsigned long long)bh->b_blocknr,
2848c2ecf20Sopenharmony_ci			ignore_cache, buffer_jbd(bh), buffer_dirty(bh));
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci		if (buffer_jbd(bh)) {
2878c2ecf20Sopenharmony_ci			continue;
2888c2ecf20Sopenharmony_ci		}
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci		if (ignore_cache) {
2918c2ecf20Sopenharmony_ci			if (buffer_dirty(bh)) {
2928c2ecf20Sopenharmony_ci				/* This should probably be a BUG, or
2938c2ecf20Sopenharmony_ci				 * at least return an error. */
2948c2ecf20Sopenharmony_ci				continue;
2958c2ecf20Sopenharmony_ci			}
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci			/* A read-ahead request was made - if the
2988c2ecf20Sopenharmony_ci			 * buffer is already under read-ahead from a
2998c2ecf20Sopenharmony_ci			 * previously submitted request than we are
3008c2ecf20Sopenharmony_ci			 * done here. */
3018c2ecf20Sopenharmony_ci			if ((flags & OCFS2_BH_READAHEAD)
3028c2ecf20Sopenharmony_ci			    && ocfs2_buffer_read_ahead(ci, bh))
3038c2ecf20Sopenharmony_ci				continue;
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci			lock_buffer(bh);
3068c2ecf20Sopenharmony_ci			if (buffer_jbd(bh)) {
3078c2ecf20Sopenharmony_ci#ifdef CATCH_BH_JBD_RACES
3088c2ecf20Sopenharmony_ci				mlog(ML_ERROR, "block %llu had the JBD bit set "
3098c2ecf20Sopenharmony_ci					       "while I was in lock_buffer!",
3108c2ecf20Sopenharmony_ci				     (unsigned long long)bh->b_blocknr);
3118c2ecf20Sopenharmony_ci				BUG();
3128c2ecf20Sopenharmony_ci#else
3138c2ecf20Sopenharmony_ci				unlock_buffer(bh);
3148c2ecf20Sopenharmony_ci				continue;
3158c2ecf20Sopenharmony_ci#endif
3168c2ecf20Sopenharmony_ci			}
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci			/* Re-check ocfs2_buffer_uptodate() as a
3198c2ecf20Sopenharmony_ci			 * previously read-ahead buffer may have
3208c2ecf20Sopenharmony_ci			 * completed I/O while we were waiting for the
3218c2ecf20Sopenharmony_ci			 * buffer lock. */
3228c2ecf20Sopenharmony_ci			if (!(flags & OCFS2_BH_IGNORE_CACHE)
3238c2ecf20Sopenharmony_ci			    && !(flags & OCFS2_BH_READAHEAD)
3248c2ecf20Sopenharmony_ci			    && ocfs2_buffer_uptodate(ci, bh)) {
3258c2ecf20Sopenharmony_ci				unlock_buffer(bh);
3268c2ecf20Sopenharmony_ci				continue;
3278c2ecf20Sopenharmony_ci			}
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci			get_bh(bh); /* for end_buffer_read_sync() */
3308c2ecf20Sopenharmony_ci			if (validate)
3318c2ecf20Sopenharmony_ci				set_buffer_needs_validate(bh);
3328c2ecf20Sopenharmony_ci			bh->b_end_io = end_buffer_read_sync;
3338c2ecf20Sopenharmony_ci			submit_bh(REQ_OP_READ, 0, bh);
3348c2ecf20Sopenharmony_ci			continue;
3358c2ecf20Sopenharmony_ci		}
3368c2ecf20Sopenharmony_ci	}
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ciread_failure:
3398c2ecf20Sopenharmony_ci	for (i = (nr - 1); i >= 0; i--) {
3408c2ecf20Sopenharmony_ci		bh = bhs[i];
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_ci		if (!(flags & OCFS2_BH_READAHEAD)) {
3438c2ecf20Sopenharmony_ci			if (unlikely(status)) {
3448c2ecf20Sopenharmony_ci				/* Clear the buffers on error including those
3458c2ecf20Sopenharmony_ci				 * ever succeeded in reading
3468c2ecf20Sopenharmony_ci				 */
3478c2ecf20Sopenharmony_ci				if (new_bh && bh) {
3488c2ecf20Sopenharmony_ci					/* If middle bh fails, let previous bh
3498c2ecf20Sopenharmony_ci					 * finish its read and then put it to
3508c2ecf20Sopenharmony_ci					 * aovoid bh leak
3518c2ecf20Sopenharmony_ci					 */
3528c2ecf20Sopenharmony_ci					if (!buffer_jbd(bh))
3538c2ecf20Sopenharmony_ci						wait_on_buffer(bh);
3548c2ecf20Sopenharmony_ci					put_bh(bh);
3558c2ecf20Sopenharmony_ci					bhs[i] = NULL;
3568c2ecf20Sopenharmony_ci				} else if (bh && buffer_uptodate(bh)) {
3578c2ecf20Sopenharmony_ci					clear_buffer_uptodate(bh);
3588c2ecf20Sopenharmony_ci				}
3598c2ecf20Sopenharmony_ci				continue;
3608c2ecf20Sopenharmony_ci			}
3618c2ecf20Sopenharmony_ci			/* We know this can't have changed as we hold the
3628c2ecf20Sopenharmony_ci			 * owner sem. Avoid doing any work on the bh if the
3638c2ecf20Sopenharmony_ci			 * journal has it. */
3648c2ecf20Sopenharmony_ci			if (!buffer_jbd(bh))
3658c2ecf20Sopenharmony_ci				wait_on_buffer(bh);
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci			if (!buffer_uptodate(bh)) {
3688c2ecf20Sopenharmony_ci				/* Status won't be cleared from here on out,
3698c2ecf20Sopenharmony_ci				 * so we can safely record this and loop back
3708c2ecf20Sopenharmony_ci				 * to cleanup the other buffers. Don't need to
3718c2ecf20Sopenharmony_ci				 * remove the clustered uptodate information
3728c2ecf20Sopenharmony_ci				 * for this bh as it's not marked locally
3738c2ecf20Sopenharmony_ci				 * uptodate. */
3748c2ecf20Sopenharmony_ci				status = -EIO;
3758c2ecf20Sopenharmony_ci				clear_buffer_needs_validate(bh);
3768c2ecf20Sopenharmony_ci				goto read_failure;
3778c2ecf20Sopenharmony_ci			}
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci			if (buffer_needs_validate(bh)) {
3808c2ecf20Sopenharmony_ci				/* We never set NeedsValidate if the
3818c2ecf20Sopenharmony_ci				 * buffer was held by the journal, so
3828c2ecf20Sopenharmony_ci				 * that better not have changed */
3838c2ecf20Sopenharmony_ci				BUG_ON(buffer_jbd(bh));
3848c2ecf20Sopenharmony_ci				clear_buffer_needs_validate(bh);
3858c2ecf20Sopenharmony_ci				status = validate(sb, bh);
3868c2ecf20Sopenharmony_ci				if (status)
3878c2ecf20Sopenharmony_ci					goto read_failure;
3888c2ecf20Sopenharmony_ci			}
3898c2ecf20Sopenharmony_ci		}
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci		/* Always set the buffer in the cache, even if it was
3928c2ecf20Sopenharmony_ci		 * a forced read, or read-ahead which hasn't yet
3938c2ecf20Sopenharmony_ci		 * completed. */
3948c2ecf20Sopenharmony_ci		ocfs2_set_buffer_uptodate(ci, bh);
3958c2ecf20Sopenharmony_ci	}
3968c2ecf20Sopenharmony_ci	ocfs2_metadata_cache_io_unlock(ci);
3978c2ecf20Sopenharmony_ci
3988c2ecf20Sopenharmony_ci	trace_ocfs2_read_blocks_end((unsigned long long)block, nr,
3998c2ecf20Sopenharmony_ci				    flags, ignore_cache);
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_cibail:
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci	return status;
4048c2ecf20Sopenharmony_ci}
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci/* Check whether the blkno is the super block or one of the backups. */
4078c2ecf20Sopenharmony_cistatic void ocfs2_check_super_or_backup(struct super_block *sb,
4088c2ecf20Sopenharmony_ci					sector_t blkno)
4098c2ecf20Sopenharmony_ci{
4108c2ecf20Sopenharmony_ci	int i;
4118c2ecf20Sopenharmony_ci	u64 backup_blkno;
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci	if (blkno == OCFS2_SUPER_BLOCK_BLKNO)
4148c2ecf20Sopenharmony_ci		return;
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
4178c2ecf20Sopenharmony_ci		backup_blkno = ocfs2_backup_super_blkno(sb, i);
4188c2ecf20Sopenharmony_ci		if (backup_blkno == blkno)
4198c2ecf20Sopenharmony_ci			return;
4208c2ecf20Sopenharmony_ci	}
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	BUG();
4238c2ecf20Sopenharmony_ci}
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci/*
4268c2ecf20Sopenharmony_ci * Write super block and backups doesn't need to collaborate with journal,
4278c2ecf20Sopenharmony_ci * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
4288c2ecf20Sopenharmony_ci * into this function.
4298c2ecf20Sopenharmony_ci */
4308c2ecf20Sopenharmony_ciint ocfs2_write_super_or_backup(struct ocfs2_super *osb,
4318c2ecf20Sopenharmony_ci				struct buffer_head *bh)
4328c2ecf20Sopenharmony_ci{
4338c2ecf20Sopenharmony_ci	int ret = 0;
4348c2ecf20Sopenharmony_ci	struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci	BUG_ON(buffer_jbd(bh));
4378c2ecf20Sopenharmony_ci	ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) {
4408c2ecf20Sopenharmony_ci		ret = -EROFS;
4418c2ecf20Sopenharmony_ci		mlog_errno(ret);
4428c2ecf20Sopenharmony_ci		goto out;
4438c2ecf20Sopenharmony_ci	}
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci	lock_buffer(bh);
4468c2ecf20Sopenharmony_ci	set_buffer_uptodate(bh);
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	/* remove from dirty list before I/O. */
4498c2ecf20Sopenharmony_ci	clear_buffer_dirty(bh);
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ci	get_bh(bh); /* for end_buffer_write_sync() */
4528c2ecf20Sopenharmony_ci	bh->b_end_io = end_buffer_write_sync;
4538c2ecf20Sopenharmony_ci	ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
4548c2ecf20Sopenharmony_ci	submit_bh(REQ_OP_WRITE, 0, bh);
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci	wait_on_buffer(bh);
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci	if (!buffer_uptodate(bh)) {
4598c2ecf20Sopenharmony_ci		ret = -EIO;
4608c2ecf20Sopenharmony_ci		mlog_errno(ret);
4618c2ecf20Sopenharmony_ci	}
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ciout:
4648c2ecf20Sopenharmony_ci	return ret;
4658c2ecf20Sopenharmony_ci}
466