18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* -*- mode: c; c-basic-offset: 8; -*- 38c2ecf20Sopenharmony_ci * vim: noexpandtab sw=8 ts=8 sts=0: 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * io.c 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Buffer cache handling 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Copyright (C) 2002, 2004 Oracle. All rights reserved. 108c2ecf20Sopenharmony_ci */ 118c2ecf20Sopenharmony_ci 128c2ecf20Sopenharmony_ci#include <linux/fs.h> 138c2ecf20Sopenharmony_ci#include <linux/types.h> 148c2ecf20Sopenharmony_ci#include <linux/highmem.h> 158c2ecf20Sopenharmony_ci#include <linux/bio.h> 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#include <cluster/masklog.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#include "ocfs2.h" 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci#include "alloc.h" 228c2ecf20Sopenharmony_ci#include "inode.h" 238c2ecf20Sopenharmony_ci#include "journal.h" 248c2ecf20Sopenharmony_ci#include "uptodate.h" 258c2ecf20Sopenharmony_ci#include "buffer_head_io.h" 268c2ecf20Sopenharmony_ci#include "ocfs2_trace.h" 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci/* 298c2ecf20Sopenharmony_ci * Bits on bh->b_state used by ocfs2. 308c2ecf20Sopenharmony_ci * 318c2ecf20Sopenharmony_ci * These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart. 328c2ecf20Sopenharmony_ci */ 338c2ecf20Sopenharmony_cienum ocfs2_state_bits { 348c2ecf20Sopenharmony_ci BH_NeedsValidate = BH_JBDPrivateStart, 358c2ecf20Sopenharmony_ci}; 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci/* Expand the magic b_state functions */ 388c2ecf20Sopenharmony_ciBUFFER_FNS(NeedsValidate, needs_validate); 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ciint ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, 418c2ecf20Sopenharmony_ci struct ocfs2_caching_info *ci) 428c2ecf20Sopenharmony_ci{ 438c2ecf20Sopenharmony_ci int ret = 0; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci trace_ocfs2_write_block((unsigned long long)bh->b_blocknr, ci); 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO); 488c2ecf20Sopenharmony_ci BUG_ON(buffer_jbd(bh)); 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci /* No need to check for a soft readonly file system here. non 518c2ecf20Sopenharmony_ci * journalled writes are only ever done on system files which 528c2ecf20Sopenharmony_ci * can get modified during recovery even if read-only. */ 538c2ecf20Sopenharmony_ci if (ocfs2_is_hard_readonly(osb)) { 548c2ecf20Sopenharmony_ci ret = -EROFS; 558c2ecf20Sopenharmony_ci mlog_errno(ret); 568c2ecf20Sopenharmony_ci goto out; 578c2ecf20Sopenharmony_ci } 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_ci ocfs2_metadata_cache_io_lock(ci); 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci lock_buffer(bh); 628c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci /* remove from dirty list before I/O. */ 658c2ecf20Sopenharmony_ci clear_buffer_dirty(bh); 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci get_bh(bh); /* for end_buffer_write_sync() */ 688c2ecf20Sopenharmony_ci bh->b_end_io = end_buffer_write_sync; 698c2ecf20Sopenharmony_ci submit_bh(REQ_OP_WRITE, 0, bh); 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci wait_on_buffer(bh); 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci if (buffer_uptodate(bh)) { 748c2ecf20Sopenharmony_ci ocfs2_set_buffer_uptodate(ci, bh); 758c2ecf20Sopenharmony_ci } else { 768c2ecf20Sopenharmony_ci /* We don't need to remove the clustered uptodate 778c2ecf20Sopenharmony_ci * information for this bh as it's not marked locally 788c2ecf20Sopenharmony_ci * uptodate. */ 798c2ecf20Sopenharmony_ci ret = -EIO; 808c2ecf20Sopenharmony_ci mlog_errno(ret); 818c2ecf20Sopenharmony_ci } 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci ocfs2_metadata_cache_io_unlock(ci); 848c2ecf20Sopenharmony_ciout: 858c2ecf20Sopenharmony_ci return ret; 868c2ecf20Sopenharmony_ci} 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it 898c2ecf20Sopenharmony_ci * will be easier to handle read failure. 908c2ecf20Sopenharmony_ci */ 918c2ecf20Sopenharmony_ciint ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, 928c2ecf20Sopenharmony_ci unsigned int nr, struct buffer_head *bhs[]) 938c2ecf20Sopenharmony_ci{ 948c2ecf20Sopenharmony_ci int status = 0; 958c2ecf20Sopenharmony_ci unsigned int i; 968c2ecf20Sopenharmony_ci struct buffer_head *bh; 978c2ecf20Sopenharmony_ci int new_bh = 0; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci trace_ocfs2_read_blocks_sync((unsigned long long)block, nr); 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci if (!nr) 1028c2ecf20Sopenharmony_ci goto bail; 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci /* Don't put buffer head and re-assign it to NULL if it is allocated 1058c2ecf20Sopenharmony_ci * outside since the caller can't be aware of this alternation! 1068c2ecf20Sopenharmony_ci */ 1078c2ecf20Sopenharmony_ci new_bh = (bhs[0] == NULL); 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci for (i = 0 ; i < nr ; i++) { 1108c2ecf20Sopenharmony_ci if (bhs[i] == NULL) { 1118c2ecf20Sopenharmony_ci bhs[i] = sb_getblk(osb->sb, block++); 1128c2ecf20Sopenharmony_ci if (bhs[i] == NULL) { 1138c2ecf20Sopenharmony_ci status = -ENOMEM; 1148c2ecf20Sopenharmony_ci mlog_errno(status); 1158c2ecf20Sopenharmony_ci break; 1168c2ecf20Sopenharmony_ci } 1178c2ecf20Sopenharmony_ci } 1188c2ecf20Sopenharmony_ci bh = bhs[i]; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci if (buffer_jbd(bh)) { 1218c2ecf20Sopenharmony_ci trace_ocfs2_read_blocks_sync_jbd( 1228c2ecf20Sopenharmony_ci (unsigned long long)bh->b_blocknr); 1238c2ecf20Sopenharmony_ci continue; 1248c2ecf20Sopenharmony_ci } 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci if (buffer_dirty(bh)) { 1278c2ecf20Sopenharmony_ci /* This should probably be a BUG, or 1288c2ecf20Sopenharmony_ci * at least return an error. */ 1298c2ecf20Sopenharmony_ci mlog(ML_ERROR, 1308c2ecf20Sopenharmony_ci "trying to sync read a dirty " 1318c2ecf20Sopenharmony_ci "buffer! (blocknr = %llu), skipping\n", 1328c2ecf20Sopenharmony_ci (unsigned long long)bh->b_blocknr); 1338c2ecf20Sopenharmony_ci continue; 1348c2ecf20Sopenharmony_ci } 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci lock_buffer(bh); 1378c2ecf20Sopenharmony_ci if (buffer_jbd(bh)) { 1388c2ecf20Sopenharmony_ci#ifdef CATCH_BH_JBD_RACES 1398c2ecf20Sopenharmony_ci mlog(ML_ERROR, 1408c2ecf20Sopenharmony_ci "block %llu had the JBD bit set " 1418c2ecf20Sopenharmony_ci "while I was in lock_buffer!", 1428c2ecf20Sopenharmony_ci (unsigned long long)bh->b_blocknr); 1438c2ecf20Sopenharmony_ci BUG(); 1448c2ecf20Sopenharmony_ci#else 1458c2ecf20Sopenharmony_ci unlock_buffer(bh); 1468c2ecf20Sopenharmony_ci continue; 1478c2ecf20Sopenharmony_ci#endif 1488c2ecf20Sopenharmony_ci } 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci get_bh(bh); /* for end_buffer_read_sync() */ 1518c2ecf20Sopenharmony_ci bh->b_end_io = end_buffer_read_sync; 1528c2ecf20Sopenharmony_ci submit_bh(REQ_OP_READ, 0, bh); 1538c2ecf20Sopenharmony_ci } 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ciread_failure: 1568c2ecf20Sopenharmony_ci for (i = nr; i > 0; i--) { 1578c2ecf20Sopenharmony_ci bh = bhs[i - 1]; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci if (unlikely(status)) { 1608c2ecf20Sopenharmony_ci if (new_bh && bh) { 1618c2ecf20Sopenharmony_ci /* If middle bh fails, let previous bh 1628c2ecf20Sopenharmony_ci * finish its read and then put it to 1638c2ecf20Sopenharmony_ci * aovoid bh leak 1648c2ecf20Sopenharmony_ci */ 1658c2ecf20Sopenharmony_ci if (!buffer_jbd(bh)) 1668c2ecf20Sopenharmony_ci wait_on_buffer(bh); 1678c2ecf20Sopenharmony_ci put_bh(bh); 1688c2ecf20Sopenharmony_ci bhs[i - 1] = NULL; 1698c2ecf20Sopenharmony_ci } else if (bh && buffer_uptodate(bh)) { 1708c2ecf20Sopenharmony_ci clear_buffer_uptodate(bh); 1718c2ecf20Sopenharmony_ci } 1728c2ecf20Sopenharmony_ci continue; 1738c2ecf20Sopenharmony_ci } 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci /* No need to wait on the buffer if it's managed by JBD. */ 1768c2ecf20Sopenharmony_ci if (!buffer_jbd(bh)) 1778c2ecf20Sopenharmony_ci wait_on_buffer(bh); 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) { 1808c2ecf20Sopenharmony_ci /* Status won't be cleared from here on out, 1818c2ecf20Sopenharmony_ci * so we can safely record this and loop back 1828c2ecf20Sopenharmony_ci * to cleanup the other buffers. */ 1838c2ecf20Sopenharmony_ci status = -EIO; 1848c2ecf20Sopenharmony_ci goto read_failure; 1858c2ecf20Sopenharmony_ci } 1868c2ecf20Sopenharmony_ci } 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_cibail: 1898c2ecf20Sopenharmony_ci return status; 1908c2ecf20Sopenharmony_ci} 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it 1938c2ecf20Sopenharmony_ci * will be easier to handle read failure. 1948c2ecf20Sopenharmony_ci */ 1958c2ecf20Sopenharmony_ciint ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, 1968c2ecf20Sopenharmony_ci struct buffer_head *bhs[], int flags, 1978c2ecf20Sopenharmony_ci int (*validate)(struct super_block *sb, 1988c2ecf20Sopenharmony_ci struct buffer_head *bh)) 1998c2ecf20Sopenharmony_ci{ 2008c2ecf20Sopenharmony_ci int status = 0; 2018c2ecf20Sopenharmony_ci int i, ignore_cache = 0; 2028c2ecf20Sopenharmony_ci struct buffer_head *bh; 2038c2ecf20Sopenharmony_ci struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 2048c2ecf20Sopenharmony_ci int new_bh = 0; 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags); 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci BUG_ON(!ci); 2098c2ecf20Sopenharmony_ci BUG_ON((flags & OCFS2_BH_READAHEAD) && 2108c2ecf20Sopenharmony_ci (flags & OCFS2_BH_IGNORE_CACHE)); 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ci if (bhs == NULL) { 2138c2ecf20Sopenharmony_ci status = -EINVAL; 2148c2ecf20Sopenharmony_ci mlog_errno(status); 2158c2ecf20Sopenharmony_ci goto bail; 2168c2ecf20Sopenharmony_ci } 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci if (nr < 0) { 2198c2ecf20Sopenharmony_ci mlog(ML_ERROR, "asked to read %d blocks!\n", nr); 2208c2ecf20Sopenharmony_ci status = -EINVAL; 2218c2ecf20Sopenharmony_ci mlog_errno(status); 2228c2ecf20Sopenharmony_ci goto bail; 2238c2ecf20Sopenharmony_ci } 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci if (nr == 0) { 2268c2ecf20Sopenharmony_ci status = 0; 2278c2ecf20Sopenharmony_ci goto bail; 2288c2ecf20Sopenharmony_ci } 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci /* Don't put buffer head and re-assign it to NULL if it is allocated 2318c2ecf20Sopenharmony_ci * outside since the caller can't be aware of this alternation! 2328c2ecf20Sopenharmony_ci */ 2338c2ecf20Sopenharmony_ci new_bh = (bhs[0] == NULL); 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci ocfs2_metadata_cache_io_lock(ci); 2368c2ecf20Sopenharmony_ci for (i = 0 ; i < nr ; i++) { 2378c2ecf20Sopenharmony_ci if (bhs[i] == NULL) { 2388c2ecf20Sopenharmony_ci bhs[i] = sb_getblk(sb, block++); 2398c2ecf20Sopenharmony_ci if (bhs[i] == NULL) { 2408c2ecf20Sopenharmony_ci ocfs2_metadata_cache_io_unlock(ci); 2418c2ecf20Sopenharmony_ci status = -ENOMEM; 2428c2ecf20Sopenharmony_ci mlog_errno(status); 2438c2ecf20Sopenharmony_ci /* Don't forget to put previous bh! */ 2448c2ecf20Sopenharmony_ci break; 2458c2ecf20Sopenharmony_ci } 2468c2ecf20Sopenharmony_ci } 2478c2ecf20Sopenharmony_ci bh = bhs[i]; 2488c2ecf20Sopenharmony_ci ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE); 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci /* There are three read-ahead cases here which we need to 2518c2ecf20Sopenharmony_ci * be concerned with. All three assume a buffer has 2528c2ecf20Sopenharmony_ci * previously been submitted with OCFS2_BH_READAHEAD 2538c2ecf20Sopenharmony_ci * and it hasn't yet completed I/O. 2548c2ecf20Sopenharmony_ci * 2558c2ecf20Sopenharmony_ci * 1) The current request is sync to disk. This rarely 2568c2ecf20Sopenharmony_ci * happens these days, and never when performance 2578c2ecf20Sopenharmony_ci * matters - the code can just wait on the buffer 2588c2ecf20Sopenharmony_ci * lock and re-submit. 2598c2ecf20Sopenharmony_ci * 2608c2ecf20Sopenharmony_ci * 2) The current request is cached, but not 2618c2ecf20Sopenharmony_ci * readahead. ocfs2_buffer_uptodate() will return 2628c2ecf20Sopenharmony_ci * false anyway, so we'll wind up waiting on the 2638c2ecf20Sopenharmony_ci * buffer lock to do I/O. We re-check the request 2648c2ecf20Sopenharmony_ci * with after getting the lock to avoid a re-submit. 2658c2ecf20Sopenharmony_ci * 2668c2ecf20Sopenharmony_ci * 3) The current request is readahead (and so must 2678c2ecf20Sopenharmony_ci * also be a caching one). We short circuit if the 2688c2ecf20Sopenharmony_ci * buffer is locked (under I/O) and if it's in the 2698c2ecf20Sopenharmony_ci * uptodate cache. The re-check from #2 catches the 2708c2ecf20Sopenharmony_ci * case that the previous read-ahead completes just 2718c2ecf20Sopenharmony_ci * before our is-it-in-flight check. 2728c2ecf20Sopenharmony_ci */ 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) { 2758c2ecf20Sopenharmony_ci trace_ocfs2_read_blocks_from_disk( 2768c2ecf20Sopenharmony_ci (unsigned long long)bh->b_blocknr, 2778c2ecf20Sopenharmony_ci (unsigned long long)ocfs2_metadata_cache_owner(ci)); 2788c2ecf20Sopenharmony_ci /* We're using ignore_cache here to say 2798c2ecf20Sopenharmony_ci * "go to disk" */ 2808c2ecf20Sopenharmony_ci ignore_cache = 1; 2818c2ecf20Sopenharmony_ci } 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci trace_ocfs2_read_blocks_bh((unsigned long long)bh->b_blocknr, 2848c2ecf20Sopenharmony_ci ignore_cache, buffer_jbd(bh), buffer_dirty(bh)); 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci if (buffer_jbd(bh)) { 2878c2ecf20Sopenharmony_ci continue; 2888c2ecf20Sopenharmony_ci } 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_ci if (ignore_cache) { 2918c2ecf20Sopenharmony_ci if (buffer_dirty(bh)) { 2928c2ecf20Sopenharmony_ci /* This should probably be a BUG, or 2938c2ecf20Sopenharmony_ci * at least return an error. */ 2948c2ecf20Sopenharmony_ci continue; 2958c2ecf20Sopenharmony_ci } 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci /* A read-ahead request was made - if the 2988c2ecf20Sopenharmony_ci * buffer is already under read-ahead from a 2998c2ecf20Sopenharmony_ci * previously submitted request than we are 3008c2ecf20Sopenharmony_ci * done here. */ 3018c2ecf20Sopenharmony_ci if ((flags & OCFS2_BH_READAHEAD) 3028c2ecf20Sopenharmony_ci && ocfs2_buffer_read_ahead(ci, bh)) 3038c2ecf20Sopenharmony_ci continue; 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci lock_buffer(bh); 3068c2ecf20Sopenharmony_ci if (buffer_jbd(bh)) { 3078c2ecf20Sopenharmony_ci#ifdef CATCH_BH_JBD_RACES 3088c2ecf20Sopenharmony_ci mlog(ML_ERROR, "block %llu had the JBD bit set " 3098c2ecf20Sopenharmony_ci "while I was in lock_buffer!", 3108c2ecf20Sopenharmony_ci (unsigned long long)bh->b_blocknr); 3118c2ecf20Sopenharmony_ci BUG(); 3128c2ecf20Sopenharmony_ci#else 3138c2ecf20Sopenharmony_ci unlock_buffer(bh); 3148c2ecf20Sopenharmony_ci continue; 3158c2ecf20Sopenharmony_ci#endif 3168c2ecf20Sopenharmony_ci } 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci /* Re-check ocfs2_buffer_uptodate() as a 3198c2ecf20Sopenharmony_ci * previously read-ahead buffer may have 3208c2ecf20Sopenharmony_ci * completed I/O while we were waiting for the 3218c2ecf20Sopenharmony_ci * buffer lock. */ 3228c2ecf20Sopenharmony_ci if (!(flags & OCFS2_BH_IGNORE_CACHE) 3238c2ecf20Sopenharmony_ci && !(flags & OCFS2_BH_READAHEAD) 3248c2ecf20Sopenharmony_ci && ocfs2_buffer_uptodate(ci, bh)) { 3258c2ecf20Sopenharmony_ci unlock_buffer(bh); 3268c2ecf20Sopenharmony_ci continue; 3278c2ecf20Sopenharmony_ci } 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci get_bh(bh); /* for end_buffer_read_sync() */ 3308c2ecf20Sopenharmony_ci if (validate) 3318c2ecf20Sopenharmony_ci set_buffer_needs_validate(bh); 3328c2ecf20Sopenharmony_ci bh->b_end_io = end_buffer_read_sync; 3338c2ecf20Sopenharmony_ci submit_bh(REQ_OP_READ, 0, bh); 3348c2ecf20Sopenharmony_ci continue; 3358c2ecf20Sopenharmony_ci } 3368c2ecf20Sopenharmony_ci } 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ciread_failure: 3398c2ecf20Sopenharmony_ci for (i = (nr - 1); i >= 0; i--) { 3408c2ecf20Sopenharmony_ci bh = bhs[i]; 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci if (!(flags & OCFS2_BH_READAHEAD)) { 3438c2ecf20Sopenharmony_ci if (unlikely(status)) { 3448c2ecf20Sopenharmony_ci /* Clear the buffers on error including those 3458c2ecf20Sopenharmony_ci * ever succeeded in reading 3468c2ecf20Sopenharmony_ci */ 3478c2ecf20Sopenharmony_ci if (new_bh && bh) { 3488c2ecf20Sopenharmony_ci /* If middle bh fails, let previous bh 3498c2ecf20Sopenharmony_ci * finish its read and then put it to 3508c2ecf20Sopenharmony_ci * aovoid bh leak 3518c2ecf20Sopenharmony_ci */ 3528c2ecf20Sopenharmony_ci if (!buffer_jbd(bh)) 3538c2ecf20Sopenharmony_ci wait_on_buffer(bh); 3548c2ecf20Sopenharmony_ci put_bh(bh); 3558c2ecf20Sopenharmony_ci bhs[i] = NULL; 3568c2ecf20Sopenharmony_ci } else if (bh && buffer_uptodate(bh)) { 3578c2ecf20Sopenharmony_ci clear_buffer_uptodate(bh); 3588c2ecf20Sopenharmony_ci } 3598c2ecf20Sopenharmony_ci continue; 3608c2ecf20Sopenharmony_ci } 3618c2ecf20Sopenharmony_ci /* We know this can't have changed as we hold the 3628c2ecf20Sopenharmony_ci * owner sem. Avoid doing any work on the bh if the 3638c2ecf20Sopenharmony_ci * journal has it. */ 3648c2ecf20Sopenharmony_ci if (!buffer_jbd(bh)) 3658c2ecf20Sopenharmony_ci wait_on_buffer(bh); 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) { 3688c2ecf20Sopenharmony_ci /* Status won't be cleared from here on out, 3698c2ecf20Sopenharmony_ci * so we can safely record this and loop back 3708c2ecf20Sopenharmony_ci * to cleanup the other buffers. Don't need to 3718c2ecf20Sopenharmony_ci * remove the clustered uptodate information 3728c2ecf20Sopenharmony_ci * for this bh as it's not marked locally 3738c2ecf20Sopenharmony_ci * uptodate. */ 3748c2ecf20Sopenharmony_ci status = -EIO; 3758c2ecf20Sopenharmony_ci clear_buffer_needs_validate(bh); 3768c2ecf20Sopenharmony_ci goto read_failure; 3778c2ecf20Sopenharmony_ci } 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci if (buffer_needs_validate(bh)) { 3808c2ecf20Sopenharmony_ci /* We never set NeedsValidate if the 3818c2ecf20Sopenharmony_ci * buffer was held by the journal, so 3828c2ecf20Sopenharmony_ci * that better not have changed */ 3838c2ecf20Sopenharmony_ci BUG_ON(buffer_jbd(bh)); 3848c2ecf20Sopenharmony_ci clear_buffer_needs_validate(bh); 3858c2ecf20Sopenharmony_ci status = validate(sb, bh); 3868c2ecf20Sopenharmony_ci if (status) 3878c2ecf20Sopenharmony_ci goto read_failure; 3888c2ecf20Sopenharmony_ci } 3898c2ecf20Sopenharmony_ci } 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci /* Always set the buffer in the cache, even if it was 3928c2ecf20Sopenharmony_ci * a forced read, or read-ahead which hasn't yet 3938c2ecf20Sopenharmony_ci * completed. */ 3948c2ecf20Sopenharmony_ci ocfs2_set_buffer_uptodate(ci, bh); 3958c2ecf20Sopenharmony_ci } 3968c2ecf20Sopenharmony_ci ocfs2_metadata_cache_io_unlock(ci); 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci trace_ocfs2_read_blocks_end((unsigned long long)block, nr, 3998c2ecf20Sopenharmony_ci flags, ignore_cache); 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_cibail: 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci return status; 4048c2ecf20Sopenharmony_ci} 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci/* Check whether the blkno is the super block or one of the backups. */ 4078c2ecf20Sopenharmony_cistatic void ocfs2_check_super_or_backup(struct super_block *sb, 4088c2ecf20Sopenharmony_ci sector_t blkno) 4098c2ecf20Sopenharmony_ci{ 4108c2ecf20Sopenharmony_ci int i; 4118c2ecf20Sopenharmony_ci u64 backup_blkno; 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci if (blkno == OCFS2_SUPER_BLOCK_BLKNO) 4148c2ecf20Sopenharmony_ci return; 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { 4178c2ecf20Sopenharmony_ci backup_blkno = ocfs2_backup_super_blkno(sb, i); 4188c2ecf20Sopenharmony_ci if (backup_blkno == blkno) 4198c2ecf20Sopenharmony_ci return; 4208c2ecf20Sopenharmony_ci } 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci BUG(); 4238c2ecf20Sopenharmony_ci} 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci/* 4268c2ecf20Sopenharmony_ci * Write super block and backups doesn't need to collaborate with journal, 4278c2ecf20Sopenharmony_ci * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed 4288c2ecf20Sopenharmony_ci * into this function. 4298c2ecf20Sopenharmony_ci */ 4308c2ecf20Sopenharmony_ciint ocfs2_write_super_or_backup(struct ocfs2_super *osb, 4318c2ecf20Sopenharmony_ci struct buffer_head *bh) 4328c2ecf20Sopenharmony_ci{ 4338c2ecf20Sopenharmony_ci int ret = 0; 4348c2ecf20Sopenharmony_ci struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci BUG_ON(buffer_jbd(bh)); 4378c2ecf20Sopenharmony_ci ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr); 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) { 4408c2ecf20Sopenharmony_ci ret = -EROFS; 4418c2ecf20Sopenharmony_ci mlog_errno(ret); 4428c2ecf20Sopenharmony_ci goto out; 4438c2ecf20Sopenharmony_ci } 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci lock_buffer(bh); 4468c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci /* remove from dirty list before I/O. */ 4498c2ecf20Sopenharmony_ci clear_buffer_dirty(bh); 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci get_bh(bh); /* for end_buffer_write_sync() */ 4528c2ecf20Sopenharmony_ci bh->b_end_io = end_buffer_write_sync; 4538c2ecf20Sopenharmony_ci ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check); 4548c2ecf20Sopenharmony_ci submit_bh(REQ_OP_WRITE, 0, bh); 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci wait_on_buffer(bh); 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) { 4598c2ecf20Sopenharmony_ci ret = -EIO; 4608c2ecf20Sopenharmony_ci mlog_errno(ret); 4618c2ecf20Sopenharmony_ci } 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ciout: 4648c2ecf20Sopenharmony_ci return ret; 4658c2ecf20Sopenharmony_ci} 466