162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/fs/buffer.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 1991, 1992, 2002 Linus Torvalds 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci/* 962306a36Sopenharmony_ci * Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * Removed a lot of unnecessary code and simplified things now that 1262306a36Sopenharmony_ci * the buffer cache isn't our primary cache - Andrew Tridgell 12/96 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * Speed up hash, lru, and free list operations. Use gfp() for allocating 1562306a36Sopenharmony_ci * hash table, use SLAB cache for buffer heads. SMP threading. -DaveM 1662306a36Sopenharmony_ci * 1762306a36Sopenharmony_ci * Added 32k buffer block sizes - these are required older ARM systems. - RMK 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de> 2062306a36Sopenharmony_ci */ 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#include <linux/kernel.h> 2362306a36Sopenharmony_ci#include <linux/sched/signal.h> 2462306a36Sopenharmony_ci#include <linux/syscalls.h> 2562306a36Sopenharmony_ci#include <linux/fs.h> 2662306a36Sopenharmony_ci#include <linux/iomap.h> 2762306a36Sopenharmony_ci#include <linux/mm.h> 2862306a36Sopenharmony_ci#include <linux/percpu.h> 2962306a36Sopenharmony_ci#include <linux/slab.h> 3062306a36Sopenharmony_ci#include <linux/capability.h> 3162306a36Sopenharmony_ci#include <linux/blkdev.h> 3262306a36Sopenharmony_ci#include <linux/file.h> 3362306a36Sopenharmony_ci#include <linux/quotaops.h> 3462306a36Sopenharmony_ci#include <linux/highmem.h> 3562306a36Sopenharmony_ci#include <linux/export.h> 3662306a36Sopenharmony_ci#include <linux/backing-dev.h> 3762306a36Sopenharmony_ci#include <linux/writeback.h> 3862306a36Sopenharmony_ci#include <linux/hash.h> 3962306a36Sopenharmony_ci#include <linux/suspend.h> 4062306a36Sopenharmony_ci#include <linux/buffer_head.h> 4162306a36Sopenharmony_ci#include <linux/task_io_accounting_ops.h> 4262306a36Sopenharmony_ci#include <linux/bio.h> 4362306a36Sopenharmony_ci#include <linux/cpu.h> 4462306a36Sopenharmony_ci#include <linux/bitops.h> 4562306a36Sopenharmony_ci#include <linux/mpage.h> 4662306a36Sopenharmony_ci#include <linux/bit_spinlock.h> 4762306a36Sopenharmony_ci#include <linux/pagevec.h> 4862306a36Sopenharmony_ci#include <linux/sched/mm.h> 4962306a36Sopenharmony_ci#include <trace/events/block.h> 5062306a36Sopenharmony_ci#include <linux/fscrypt.h> 5162306a36Sopenharmony_ci#include <linux/fsverity.h> 5262306a36Sopenharmony_ci#include <linux/sched/isolation.h> 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci#include "internal.h" 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cistatic int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 5762306a36Sopenharmony_cistatic void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, 5862306a36Sopenharmony_ci struct writeback_control *wbc); 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ciinline void touch_buffer(struct buffer_head *bh) 6362306a36Sopenharmony_ci{ 6462306a36Sopenharmony_ci trace_block_touch_buffer(bh); 6562306a36Sopenharmony_ci folio_mark_accessed(bh->b_folio); 6662306a36Sopenharmony_ci} 6762306a36Sopenharmony_ciEXPORT_SYMBOL(touch_buffer); 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_civoid __lock_buffer(struct buffer_head *bh) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci wait_on_bit_lock_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE); 7262306a36Sopenharmony_ci} 7362306a36Sopenharmony_ciEXPORT_SYMBOL(__lock_buffer); 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_civoid unlock_buffer(struct buffer_head *bh) 7662306a36Sopenharmony_ci{ 7762306a36Sopenharmony_ci clear_bit_unlock(BH_Lock, &bh->b_state); 7862306a36Sopenharmony_ci smp_mb__after_atomic(); 7962306a36Sopenharmony_ci wake_up_bit(&bh->b_state, BH_Lock); 8062306a36Sopenharmony_ci} 8162306a36Sopenharmony_ciEXPORT_SYMBOL(unlock_buffer); 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci/* 8462306a36Sopenharmony_ci * Returns if the folio has dirty or writeback buffers. If all the buffers 8562306a36Sopenharmony_ci * are unlocked and clean then the folio_test_dirty information is stale. If 8662306a36Sopenharmony_ci * any of the buffers are locked, it is assumed they are locked for IO. 8762306a36Sopenharmony_ci */ 8862306a36Sopenharmony_civoid buffer_check_dirty_writeback(struct folio *folio, 8962306a36Sopenharmony_ci bool *dirty, bool *writeback) 9062306a36Sopenharmony_ci{ 9162306a36Sopenharmony_ci struct buffer_head *head, *bh; 9262306a36Sopenharmony_ci *dirty = false; 9362306a36Sopenharmony_ci *writeback = false; 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci BUG_ON(!folio_test_locked(folio)); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci head = folio_buffers(folio); 9862306a36Sopenharmony_ci if (!head) 9962306a36Sopenharmony_ci return; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci if (folio_test_writeback(folio)) 10262306a36Sopenharmony_ci *writeback = true; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci bh = head; 10562306a36Sopenharmony_ci do { 10662306a36Sopenharmony_ci if (buffer_locked(bh)) 10762306a36Sopenharmony_ci *writeback = true; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci if (buffer_dirty(bh)) 11062306a36Sopenharmony_ci *dirty = true; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci bh = bh->b_this_page; 11362306a36Sopenharmony_ci } while (bh != head); 11462306a36Sopenharmony_ci} 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci/* 11762306a36Sopenharmony_ci * Block until a buffer comes unlocked. This doesn't stop it 11862306a36Sopenharmony_ci * from becoming locked again - you have to lock it yourself 11962306a36Sopenharmony_ci * if you want to preserve its state. 12062306a36Sopenharmony_ci */ 12162306a36Sopenharmony_civoid __wait_on_buffer(struct buffer_head * bh) 12262306a36Sopenharmony_ci{ 12362306a36Sopenharmony_ci wait_on_bit_io(&bh->b_state, BH_Lock, TASK_UNINTERRUPTIBLE); 12462306a36Sopenharmony_ci} 12562306a36Sopenharmony_ciEXPORT_SYMBOL(__wait_on_buffer); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_cistatic void buffer_io_error(struct buffer_head *bh, char *msg) 12862306a36Sopenharmony_ci{ 12962306a36Sopenharmony_ci if (!test_bit(BH_Quiet, &bh->b_state)) 13062306a36Sopenharmony_ci printk_ratelimited(KERN_ERR 13162306a36Sopenharmony_ci "Buffer I/O error on dev %pg, logical block %llu%s\n", 13262306a36Sopenharmony_ci bh->b_bdev, (unsigned long long)bh->b_blocknr, msg); 13362306a36Sopenharmony_ci} 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci/* 13662306a36Sopenharmony_ci * End-of-IO handler helper function which does not touch the bh after 13762306a36Sopenharmony_ci * unlocking it. 13862306a36Sopenharmony_ci * Note: unlock_buffer() sort-of does touch the bh after unlocking it, but 13962306a36Sopenharmony_ci * a race there is benign: unlock_buffer() only use the bh's address for 14062306a36Sopenharmony_ci * hashing after unlocking the buffer, so it doesn't actually touch the bh 14162306a36Sopenharmony_ci * itself. 14262306a36Sopenharmony_ci */ 14362306a36Sopenharmony_cistatic void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate) 14462306a36Sopenharmony_ci{ 14562306a36Sopenharmony_ci if (uptodate) { 14662306a36Sopenharmony_ci set_buffer_uptodate(bh); 14762306a36Sopenharmony_ci } else { 14862306a36Sopenharmony_ci /* This happens, due to failed read-ahead attempts. */ 14962306a36Sopenharmony_ci clear_buffer_uptodate(bh); 15062306a36Sopenharmony_ci } 15162306a36Sopenharmony_ci unlock_buffer(bh); 15262306a36Sopenharmony_ci} 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci/* 15562306a36Sopenharmony_ci * Default synchronous end-of-IO handler.. Just mark it up-to-date and 15662306a36Sopenharmony_ci * unlock the buffer. 15762306a36Sopenharmony_ci */ 15862306a36Sopenharmony_civoid end_buffer_read_sync(struct buffer_head *bh, int uptodate) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci __end_buffer_read_notouch(bh, uptodate); 16162306a36Sopenharmony_ci put_bh(bh); 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ciEXPORT_SYMBOL(end_buffer_read_sync); 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_civoid end_buffer_write_sync(struct buffer_head *bh, int uptodate) 16662306a36Sopenharmony_ci{ 16762306a36Sopenharmony_ci if (uptodate) { 16862306a36Sopenharmony_ci set_buffer_uptodate(bh); 16962306a36Sopenharmony_ci } else { 17062306a36Sopenharmony_ci buffer_io_error(bh, ", lost sync page write"); 17162306a36Sopenharmony_ci mark_buffer_write_io_error(bh); 17262306a36Sopenharmony_ci clear_buffer_uptodate(bh); 17362306a36Sopenharmony_ci } 17462306a36Sopenharmony_ci unlock_buffer(bh); 17562306a36Sopenharmony_ci put_bh(bh); 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ciEXPORT_SYMBOL(end_buffer_write_sync); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci/* 18062306a36Sopenharmony_ci * Various filesystems appear to want __find_get_block to be non-blocking. 18162306a36Sopenharmony_ci * But it's the page lock which protects the buffers. To get around this, 18262306a36Sopenharmony_ci * we get exclusion from try_to_free_buffers with the blockdev mapping's 18362306a36Sopenharmony_ci * private_lock. 18462306a36Sopenharmony_ci * 18562306a36Sopenharmony_ci * Hack idea: for the blockdev mapping, private_lock contention 18662306a36Sopenharmony_ci * may be quite high. This code could TryLock the page, and if that 18762306a36Sopenharmony_ci * succeeds, there is no need to take private_lock. 18862306a36Sopenharmony_ci */ 18962306a36Sopenharmony_cistatic struct buffer_head * 19062306a36Sopenharmony_ci__find_get_block_slow(struct block_device *bdev, sector_t block) 19162306a36Sopenharmony_ci{ 19262306a36Sopenharmony_ci struct inode *bd_inode = bdev->bd_inode; 19362306a36Sopenharmony_ci struct address_space *bd_mapping = bd_inode->i_mapping; 19462306a36Sopenharmony_ci struct buffer_head *ret = NULL; 19562306a36Sopenharmony_ci pgoff_t index; 19662306a36Sopenharmony_ci struct buffer_head *bh; 19762306a36Sopenharmony_ci struct buffer_head *head; 19862306a36Sopenharmony_ci struct folio *folio; 19962306a36Sopenharmony_ci int all_mapped = 1; 20062306a36Sopenharmony_ci static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1); 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); 20362306a36Sopenharmony_ci folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0); 20462306a36Sopenharmony_ci if (IS_ERR(folio)) 20562306a36Sopenharmony_ci goto out; 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci spin_lock(&bd_mapping->private_lock); 20862306a36Sopenharmony_ci head = folio_buffers(folio); 20962306a36Sopenharmony_ci if (!head) 21062306a36Sopenharmony_ci goto out_unlock; 21162306a36Sopenharmony_ci bh = head; 21262306a36Sopenharmony_ci do { 21362306a36Sopenharmony_ci if (!buffer_mapped(bh)) 21462306a36Sopenharmony_ci all_mapped = 0; 21562306a36Sopenharmony_ci else if (bh->b_blocknr == block) { 21662306a36Sopenharmony_ci ret = bh; 21762306a36Sopenharmony_ci get_bh(bh); 21862306a36Sopenharmony_ci goto out_unlock; 21962306a36Sopenharmony_ci } 22062306a36Sopenharmony_ci bh = bh->b_this_page; 22162306a36Sopenharmony_ci } while (bh != head); 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci /* we might be here because some of the buffers on this page are 22462306a36Sopenharmony_ci * not mapped. This is due to various races between 22562306a36Sopenharmony_ci * file io on the block device and getblk. It gets dealt with 22662306a36Sopenharmony_ci * elsewhere, don't buffer_error if we had some unmapped buffers 22762306a36Sopenharmony_ci */ 22862306a36Sopenharmony_ci ratelimit_set_flags(&last_warned, RATELIMIT_MSG_ON_RELEASE); 22962306a36Sopenharmony_ci if (all_mapped && __ratelimit(&last_warned)) { 23062306a36Sopenharmony_ci printk("__find_get_block_slow() failed. block=%llu, " 23162306a36Sopenharmony_ci "b_blocknr=%llu, b_state=0x%08lx, b_size=%zu, " 23262306a36Sopenharmony_ci "device %pg blocksize: %d\n", 23362306a36Sopenharmony_ci (unsigned long long)block, 23462306a36Sopenharmony_ci (unsigned long long)bh->b_blocknr, 23562306a36Sopenharmony_ci bh->b_state, bh->b_size, bdev, 23662306a36Sopenharmony_ci 1 << bd_inode->i_blkbits); 23762306a36Sopenharmony_ci } 23862306a36Sopenharmony_ciout_unlock: 23962306a36Sopenharmony_ci spin_unlock(&bd_mapping->private_lock); 24062306a36Sopenharmony_ci folio_put(folio); 24162306a36Sopenharmony_ciout: 24262306a36Sopenharmony_ci return ret; 24362306a36Sopenharmony_ci} 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_cistatic void end_buffer_async_read(struct buffer_head *bh, int uptodate) 24662306a36Sopenharmony_ci{ 24762306a36Sopenharmony_ci unsigned long flags; 24862306a36Sopenharmony_ci struct buffer_head *first; 24962306a36Sopenharmony_ci struct buffer_head *tmp; 25062306a36Sopenharmony_ci struct folio *folio; 25162306a36Sopenharmony_ci int folio_uptodate = 1; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci BUG_ON(!buffer_async_read(bh)); 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci folio = bh->b_folio; 25662306a36Sopenharmony_ci if (uptodate) { 25762306a36Sopenharmony_ci set_buffer_uptodate(bh); 25862306a36Sopenharmony_ci } else { 25962306a36Sopenharmony_ci clear_buffer_uptodate(bh); 26062306a36Sopenharmony_ci buffer_io_error(bh, ", async page read"); 26162306a36Sopenharmony_ci folio_set_error(folio); 26262306a36Sopenharmony_ci } 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci /* 26562306a36Sopenharmony_ci * Be _very_ careful from here on. Bad things can happen if 26662306a36Sopenharmony_ci * two buffer heads end IO at almost the same time and both 26762306a36Sopenharmony_ci * decide that the page is now completely done. 26862306a36Sopenharmony_ci */ 26962306a36Sopenharmony_ci first = folio_buffers(folio); 27062306a36Sopenharmony_ci spin_lock_irqsave(&first->b_uptodate_lock, flags); 27162306a36Sopenharmony_ci clear_buffer_async_read(bh); 27262306a36Sopenharmony_ci unlock_buffer(bh); 27362306a36Sopenharmony_ci tmp = bh; 27462306a36Sopenharmony_ci do { 27562306a36Sopenharmony_ci if (!buffer_uptodate(tmp)) 27662306a36Sopenharmony_ci folio_uptodate = 0; 27762306a36Sopenharmony_ci if (buffer_async_read(tmp)) { 27862306a36Sopenharmony_ci BUG_ON(!buffer_locked(tmp)); 27962306a36Sopenharmony_ci goto still_busy; 28062306a36Sopenharmony_ci } 28162306a36Sopenharmony_ci tmp = tmp->b_this_page; 28262306a36Sopenharmony_ci } while (tmp != bh); 28362306a36Sopenharmony_ci spin_unlock_irqrestore(&first->b_uptodate_lock, flags); 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci /* 28662306a36Sopenharmony_ci * If all of the buffers are uptodate then we can set the page 28762306a36Sopenharmony_ci * uptodate. 28862306a36Sopenharmony_ci */ 28962306a36Sopenharmony_ci if (folio_uptodate) 29062306a36Sopenharmony_ci folio_mark_uptodate(folio); 29162306a36Sopenharmony_ci folio_unlock(folio); 29262306a36Sopenharmony_ci return; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_cistill_busy: 29562306a36Sopenharmony_ci spin_unlock_irqrestore(&first->b_uptodate_lock, flags); 29662306a36Sopenharmony_ci return; 29762306a36Sopenharmony_ci} 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_cistruct postprocess_bh_ctx { 30062306a36Sopenharmony_ci struct work_struct work; 30162306a36Sopenharmony_ci struct buffer_head *bh; 30262306a36Sopenharmony_ci}; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_cistatic void verify_bh(struct work_struct *work) 30562306a36Sopenharmony_ci{ 30662306a36Sopenharmony_ci struct postprocess_bh_ctx *ctx = 30762306a36Sopenharmony_ci container_of(work, struct postprocess_bh_ctx, work); 30862306a36Sopenharmony_ci struct buffer_head *bh = ctx->bh; 30962306a36Sopenharmony_ci bool valid; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci valid = fsverity_verify_blocks(bh->b_folio, bh->b_size, bh_offset(bh)); 31262306a36Sopenharmony_ci end_buffer_async_read(bh, valid); 31362306a36Sopenharmony_ci kfree(ctx); 31462306a36Sopenharmony_ci} 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_cistatic bool need_fsverity(struct buffer_head *bh) 31762306a36Sopenharmony_ci{ 31862306a36Sopenharmony_ci struct folio *folio = bh->b_folio; 31962306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci return fsverity_active(inode) && 32262306a36Sopenharmony_ci /* needed by ext4 */ 32362306a36Sopenharmony_ci folio->index < DIV_ROUND_UP(inode->i_size, PAGE_SIZE); 32462306a36Sopenharmony_ci} 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_cistatic void decrypt_bh(struct work_struct *work) 32762306a36Sopenharmony_ci{ 32862306a36Sopenharmony_ci struct postprocess_bh_ctx *ctx = 32962306a36Sopenharmony_ci container_of(work, struct postprocess_bh_ctx, work); 33062306a36Sopenharmony_ci struct buffer_head *bh = ctx->bh; 33162306a36Sopenharmony_ci int err; 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci err = fscrypt_decrypt_pagecache_blocks(bh->b_folio, bh->b_size, 33462306a36Sopenharmony_ci bh_offset(bh)); 33562306a36Sopenharmony_ci if (err == 0 && need_fsverity(bh)) { 33662306a36Sopenharmony_ci /* 33762306a36Sopenharmony_ci * We use different work queues for decryption and for verity 33862306a36Sopenharmony_ci * because verity may require reading metadata pages that need 33962306a36Sopenharmony_ci * decryption, and we shouldn't recurse to the same workqueue. 34062306a36Sopenharmony_ci */ 34162306a36Sopenharmony_ci INIT_WORK(&ctx->work, verify_bh); 34262306a36Sopenharmony_ci fsverity_enqueue_verify_work(&ctx->work); 34362306a36Sopenharmony_ci return; 34462306a36Sopenharmony_ci } 34562306a36Sopenharmony_ci end_buffer_async_read(bh, err == 0); 34662306a36Sopenharmony_ci kfree(ctx); 34762306a36Sopenharmony_ci} 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci/* 35062306a36Sopenharmony_ci * I/O completion handler for block_read_full_folio() - pages 35162306a36Sopenharmony_ci * which come unlocked at the end of I/O. 35262306a36Sopenharmony_ci */ 35362306a36Sopenharmony_cistatic void end_buffer_async_read_io(struct buffer_head *bh, int uptodate) 35462306a36Sopenharmony_ci{ 35562306a36Sopenharmony_ci struct inode *inode = bh->b_folio->mapping->host; 35662306a36Sopenharmony_ci bool decrypt = fscrypt_inode_uses_fs_layer_crypto(inode); 35762306a36Sopenharmony_ci bool verify = need_fsverity(bh); 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci /* Decrypt (with fscrypt) and/or verify (with fsverity) if needed. */ 36062306a36Sopenharmony_ci if (uptodate && (decrypt || verify)) { 36162306a36Sopenharmony_ci struct postprocess_bh_ctx *ctx = 36262306a36Sopenharmony_ci kmalloc(sizeof(*ctx), GFP_ATOMIC); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci if (ctx) { 36562306a36Sopenharmony_ci ctx->bh = bh; 36662306a36Sopenharmony_ci if (decrypt) { 36762306a36Sopenharmony_ci INIT_WORK(&ctx->work, decrypt_bh); 36862306a36Sopenharmony_ci fscrypt_enqueue_decrypt_work(&ctx->work); 36962306a36Sopenharmony_ci } else { 37062306a36Sopenharmony_ci INIT_WORK(&ctx->work, verify_bh); 37162306a36Sopenharmony_ci fsverity_enqueue_verify_work(&ctx->work); 37262306a36Sopenharmony_ci } 37362306a36Sopenharmony_ci return; 37462306a36Sopenharmony_ci } 37562306a36Sopenharmony_ci uptodate = 0; 37662306a36Sopenharmony_ci } 37762306a36Sopenharmony_ci end_buffer_async_read(bh, uptodate); 37862306a36Sopenharmony_ci} 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci/* 38162306a36Sopenharmony_ci * Completion handler for block_write_full_page() - pages which are unlocked 38262306a36Sopenharmony_ci * during I/O, and which have PageWriteback cleared upon I/O completion. 38362306a36Sopenharmony_ci */ 38462306a36Sopenharmony_civoid end_buffer_async_write(struct buffer_head *bh, int uptodate) 38562306a36Sopenharmony_ci{ 38662306a36Sopenharmony_ci unsigned long flags; 38762306a36Sopenharmony_ci struct buffer_head *first; 38862306a36Sopenharmony_ci struct buffer_head *tmp; 38962306a36Sopenharmony_ci struct folio *folio; 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci BUG_ON(!buffer_async_write(bh)); 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci folio = bh->b_folio; 39462306a36Sopenharmony_ci if (uptodate) { 39562306a36Sopenharmony_ci set_buffer_uptodate(bh); 39662306a36Sopenharmony_ci } else { 39762306a36Sopenharmony_ci buffer_io_error(bh, ", lost async page write"); 39862306a36Sopenharmony_ci mark_buffer_write_io_error(bh); 39962306a36Sopenharmony_ci clear_buffer_uptodate(bh); 40062306a36Sopenharmony_ci folio_set_error(folio); 40162306a36Sopenharmony_ci } 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci first = folio_buffers(folio); 40462306a36Sopenharmony_ci spin_lock_irqsave(&first->b_uptodate_lock, flags); 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci clear_buffer_async_write(bh); 40762306a36Sopenharmony_ci unlock_buffer(bh); 40862306a36Sopenharmony_ci tmp = bh->b_this_page; 40962306a36Sopenharmony_ci while (tmp != bh) { 41062306a36Sopenharmony_ci if (buffer_async_write(tmp)) { 41162306a36Sopenharmony_ci BUG_ON(!buffer_locked(tmp)); 41262306a36Sopenharmony_ci goto still_busy; 41362306a36Sopenharmony_ci } 41462306a36Sopenharmony_ci tmp = tmp->b_this_page; 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci spin_unlock_irqrestore(&first->b_uptodate_lock, flags); 41762306a36Sopenharmony_ci folio_end_writeback(folio); 41862306a36Sopenharmony_ci return; 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_cistill_busy: 42162306a36Sopenharmony_ci spin_unlock_irqrestore(&first->b_uptodate_lock, flags); 42262306a36Sopenharmony_ci return; 42362306a36Sopenharmony_ci} 42462306a36Sopenharmony_ciEXPORT_SYMBOL(end_buffer_async_write); 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci/* 42762306a36Sopenharmony_ci * If a page's buffers are under async readin (end_buffer_async_read 42862306a36Sopenharmony_ci * completion) then there is a possibility that another thread of 42962306a36Sopenharmony_ci * control could lock one of the buffers after it has completed 43062306a36Sopenharmony_ci * but while some of the other buffers have not completed. This 43162306a36Sopenharmony_ci * locked buffer would confuse end_buffer_async_read() into not unlocking 43262306a36Sopenharmony_ci * the page. So the absence of BH_Async_Read tells end_buffer_async_read() 43362306a36Sopenharmony_ci * that this buffer is not under async I/O. 43462306a36Sopenharmony_ci * 43562306a36Sopenharmony_ci * The page comes unlocked when it has no locked buffer_async buffers 43662306a36Sopenharmony_ci * left. 43762306a36Sopenharmony_ci * 43862306a36Sopenharmony_ci * PageLocked prevents anyone starting new async I/O reads any of 43962306a36Sopenharmony_ci * the buffers. 44062306a36Sopenharmony_ci * 44162306a36Sopenharmony_ci * PageWriteback is used to prevent simultaneous writeout of the same 44262306a36Sopenharmony_ci * page. 44362306a36Sopenharmony_ci * 44462306a36Sopenharmony_ci * PageLocked prevents anyone from starting writeback of a page which is 44562306a36Sopenharmony_ci * under read I/O (PageWriteback is only ever set against a locked page). 44662306a36Sopenharmony_ci */ 44762306a36Sopenharmony_cistatic void mark_buffer_async_read(struct buffer_head *bh) 44862306a36Sopenharmony_ci{ 44962306a36Sopenharmony_ci bh->b_end_io = end_buffer_async_read_io; 45062306a36Sopenharmony_ci set_buffer_async_read(bh); 45162306a36Sopenharmony_ci} 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_cistatic void mark_buffer_async_write_endio(struct buffer_head *bh, 45462306a36Sopenharmony_ci bh_end_io_t *handler) 45562306a36Sopenharmony_ci{ 45662306a36Sopenharmony_ci bh->b_end_io = handler; 45762306a36Sopenharmony_ci set_buffer_async_write(bh); 45862306a36Sopenharmony_ci} 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_civoid mark_buffer_async_write(struct buffer_head *bh) 46162306a36Sopenharmony_ci{ 46262306a36Sopenharmony_ci mark_buffer_async_write_endio(bh, end_buffer_async_write); 46362306a36Sopenharmony_ci} 46462306a36Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_async_write); 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci/* 46862306a36Sopenharmony_ci * fs/buffer.c contains helper functions for buffer-backed address space's 46962306a36Sopenharmony_ci * fsync functions. A common requirement for buffer-based filesystems is 47062306a36Sopenharmony_ci * that certain data from the backing blockdev needs to be written out for 47162306a36Sopenharmony_ci * a successful fsync(). For example, ext2 indirect blocks need to be 47262306a36Sopenharmony_ci * written back and waited upon before fsync() returns. 47362306a36Sopenharmony_ci * 47462306a36Sopenharmony_ci * The functions mark_buffer_inode_dirty(), fsync_inode_buffers(), 47562306a36Sopenharmony_ci * inode_has_buffers() and invalidate_inode_buffers() are provided for the 47662306a36Sopenharmony_ci * management of a list of dependent buffers at ->i_mapping->private_list. 47762306a36Sopenharmony_ci * 47862306a36Sopenharmony_ci * Locking is a little subtle: try_to_free_buffers() will remove buffers 47962306a36Sopenharmony_ci * from their controlling inode's queue when they are being freed. But 48062306a36Sopenharmony_ci * try_to_free_buffers() will be operating against the *blockdev* mapping 48162306a36Sopenharmony_ci * at the time, not against the S_ISREG file which depends on those buffers. 48262306a36Sopenharmony_ci * So the locking for private_list is via the private_lock in the address_space 48362306a36Sopenharmony_ci * which backs the buffers. Which is different from the address_space 48462306a36Sopenharmony_ci * against which the buffers are listed. So for a particular address_space, 48562306a36Sopenharmony_ci * mapping->private_lock does *not* protect mapping->private_list! In fact, 48662306a36Sopenharmony_ci * mapping->private_list will always be protected by the backing blockdev's 48762306a36Sopenharmony_ci * ->private_lock. 48862306a36Sopenharmony_ci * 48962306a36Sopenharmony_ci * Which introduces a requirement: all buffers on an address_space's 49062306a36Sopenharmony_ci * ->private_list must be from the same address_space: the blockdev's. 49162306a36Sopenharmony_ci * 49262306a36Sopenharmony_ci * address_spaces which do not place buffers at ->private_list via these 49362306a36Sopenharmony_ci * utility functions are free to use private_lock and private_list for 49462306a36Sopenharmony_ci * whatever they want. The only requirement is that list_empty(private_list) 49562306a36Sopenharmony_ci * be true at clear_inode() time. 49662306a36Sopenharmony_ci * 49762306a36Sopenharmony_ci * FIXME: clear_inode should not call invalidate_inode_buffers(). The 49862306a36Sopenharmony_ci * filesystems should do that. invalidate_inode_buffers() should just go 49962306a36Sopenharmony_ci * BUG_ON(!list_empty). 50062306a36Sopenharmony_ci * 50162306a36Sopenharmony_ci * FIXME: mark_buffer_dirty_inode() is a data-plane operation. It should 50262306a36Sopenharmony_ci * take an address_space, not an inode. And it should be called 50362306a36Sopenharmony_ci * mark_buffer_dirty_fsync() to clearly define why those buffers are being 50462306a36Sopenharmony_ci * queued up. 50562306a36Sopenharmony_ci * 50662306a36Sopenharmony_ci * FIXME: mark_buffer_dirty_inode() doesn't need to add the buffer to the 50762306a36Sopenharmony_ci * list if it is already on a list. Because if the buffer is on a list, 50862306a36Sopenharmony_ci * it *must* already be on the right one. If not, the filesystem is being 50962306a36Sopenharmony_ci * silly. This will save a ton of locking. But first we have to ensure 51062306a36Sopenharmony_ci * that buffers are taken *off* the old inode's list when they are freed 51162306a36Sopenharmony_ci * (presumably in truncate). That requires careful auditing of all 51262306a36Sopenharmony_ci * filesystems (do it inside bforget()). It could also be done by bringing 51362306a36Sopenharmony_ci * b_inode back. 51462306a36Sopenharmony_ci */ 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci/* 51762306a36Sopenharmony_ci * The buffer's backing address_space's private_lock must be held 51862306a36Sopenharmony_ci */ 51962306a36Sopenharmony_cistatic void __remove_assoc_queue(struct buffer_head *bh) 52062306a36Sopenharmony_ci{ 52162306a36Sopenharmony_ci list_del_init(&bh->b_assoc_buffers); 52262306a36Sopenharmony_ci WARN_ON(!bh->b_assoc_map); 52362306a36Sopenharmony_ci bh->b_assoc_map = NULL; 52462306a36Sopenharmony_ci} 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ciint inode_has_buffers(struct inode *inode) 52762306a36Sopenharmony_ci{ 52862306a36Sopenharmony_ci return !list_empty(&inode->i_data.private_list); 52962306a36Sopenharmony_ci} 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci/* 53262306a36Sopenharmony_ci * osync is designed to support O_SYNC io. It waits synchronously for 53362306a36Sopenharmony_ci * all already-submitted IO to complete, but does not queue any new 53462306a36Sopenharmony_ci * writes to the disk. 53562306a36Sopenharmony_ci * 53662306a36Sopenharmony_ci * To do O_SYNC writes, just queue the buffer writes with write_dirty_buffer 53762306a36Sopenharmony_ci * as you dirty the buffers, and then use osync_inode_buffers to wait for 53862306a36Sopenharmony_ci * completion. Any other dirty buffers which are not yet queued for 53962306a36Sopenharmony_ci * write will not be flushed to disk by the osync. 54062306a36Sopenharmony_ci */ 54162306a36Sopenharmony_cistatic int osync_buffers_list(spinlock_t *lock, struct list_head *list) 54262306a36Sopenharmony_ci{ 54362306a36Sopenharmony_ci struct buffer_head *bh; 54462306a36Sopenharmony_ci struct list_head *p; 54562306a36Sopenharmony_ci int err = 0; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci spin_lock(lock); 54862306a36Sopenharmony_cirepeat: 54962306a36Sopenharmony_ci list_for_each_prev(p, list) { 55062306a36Sopenharmony_ci bh = BH_ENTRY(p); 55162306a36Sopenharmony_ci if (buffer_locked(bh)) { 55262306a36Sopenharmony_ci get_bh(bh); 55362306a36Sopenharmony_ci spin_unlock(lock); 55462306a36Sopenharmony_ci wait_on_buffer(bh); 55562306a36Sopenharmony_ci if (!buffer_uptodate(bh)) 55662306a36Sopenharmony_ci err = -EIO; 55762306a36Sopenharmony_ci brelse(bh); 55862306a36Sopenharmony_ci spin_lock(lock); 55962306a36Sopenharmony_ci goto repeat; 56062306a36Sopenharmony_ci } 56162306a36Sopenharmony_ci } 56262306a36Sopenharmony_ci spin_unlock(lock); 56362306a36Sopenharmony_ci return err; 56462306a36Sopenharmony_ci} 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci/** 56762306a36Sopenharmony_ci * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers 56862306a36Sopenharmony_ci * @mapping: the mapping which wants those buffers written 56962306a36Sopenharmony_ci * 57062306a36Sopenharmony_ci * Starts I/O against the buffers at mapping->private_list, and waits upon 57162306a36Sopenharmony_ci * that I/O. 57262306a36Sopenharmony_ci * 57362306a36Sopenharmony_ci * Basically, this is a convenience function for fsync(). 57462306a36Sopenharmony_ci * @mapping is a file or directory which needs those buffers to be written for 57562306a36Sopenharmony_ci * a successful fsync(). 57662306a36Sopenharmony_ci */ 57762306a36Sopenharmony_ciint sync_mapping_buffers(struct address_space *mapping) 57862306a36Sopenharmony_ci{ 57962306a36Sopenharmony_ci struct address_space *buffer_mapping = mapping->private_data; 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci if (buffer_mapping == NULL || list_empty(&mapping->private_list)) 58262306a36Sopenharmony_ci return 0; 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci return fsync_buffers_list(&buffer_mapping->private_lock, 58562306a36Sopenharmony_ci &mapping->private_list); 58662306a36Sopenharmony_ci} 58762306a36Sopenharmony_ciEXPORT_SYMBOL(sync_mapping_buffers); 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci/** 59062306a36Sopenharmony_ci * generic_buffers_fsync_noflush - generic buffer fsync implementation 59162306a36Sopenharmony_ci * for simple filesystems with no inode lock 59262306a36Sopenharmony_ci * 59362306a36Sopenharmony_ci * @file: file to synchronize 59462306a36Sopenharmony_ci * @start: start offset in bytes 59562306a36Sopenharmony_ci * @end: end offset in bytes (inclusive) 59662306a36Sopenharmony_ci * @datasync: only synchronize essential metadata if true 59762306a36Sopenharmony_ci * 59862306a36Sopenharmony_ci * This is a generic implementation of the fsync method for simple 59962306a36Sopenharmony_ci * filesystems which track all non-inode metadata in the buffers list 60062306a36Sopenharmony_ci * hanging off the address_space structure. 60162306a36Sopenharmony_ci */ 60262306a36Sopenharmony_ciint generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, 60362306a36Sopenharmony_ci bool datasync) 60462306a36Sopenharmony_ci{ 60562306a36Sopenharmony_ci struct inode *inode = file->f_mapping->host; 60662306a36Sopenharmony_ci int err; 60762306a36Sopenharmony_ci int ret; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci err = file_write_and_wait_range(file, start, end); 61062306a36Sopenharmony_ci if (err) 61162306a36Sopenharmony_ci return err; 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci ret = sync_mapping_buffers(inode->i_mapping); 61462306a36Sopenharmony_ci if (!(inode->i_state & I_DIRTY_ALL)) 61562306a36Sopenharmony_ci goto out; 61662306a36Sopenharmony_ci if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 61762306a36Sopenharmony_ci goto out; 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci err = sync_inode_metadata(inode, 1); 62062306a36Sopenharmony_ci if (ret == 0) 62162306a36Sopenharmony_ci ret = err; 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ciout: 62462306a36Sopenharmony_ci /* check and advance again to catch errors after syncing out buffers */ 62562306a36Sopenharmony_ci err = file_check_and_advance_wb_err(file); 62662306a36Sopenharmony_ci if (ret == 0) 62762306a36Sopenharmony_ci ret = err; 62862306a36Sopenharmony_ci return ret; 62962306a36Sopenharmony_ci} 63062306a36Sopenharmony_ciEXPORT_SYMBOL(generic_buffers_fsync_noflush); 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci/** 63362306a36Sopenharmony_ci * generic_buffers_fsync - generic buffer fsync implementation 63462306a36Sopenharmony_ci * for simple filesystems with no inode lock 63562306a36Sopenharmony_ci * 63662306a36Sopenharmony_ci * @file: file to synchronize 63762306a36Sopenharmony_ci * @start: start offset in bytes 63862306a36Sopenharmony_ci * @end: end offset in bytes (inclusive) 63962306a36Sopenharmony_ci * @datasync: only synchronize essential metadata if true 64062306a36Sopenharmony_ci * 64162306a36Sopenharmony_ci * This is a generic implementation of the fsync method for simple 64262306a36Sopenharmony_ci * filesystems which track all non-inode metadata in the buffers list 64362306a36Sopenharmony_ci * hanging off the address_space structure. This also makes sure that 64462306a36Sopenharmony_ci * a device cache flush operation is called at the end. 64562306a36Sopenharmony_ci */ 64662306a36Sopenharmony_ciint generic_buffers_fsync(struct file *file, loff_t start, loff_t end, 64762306a36Sopenharmony_ci bool datasync) 64862306a36Sopenharmony_ci{ 64962306a36Sopenharmony_ci struct inode *inode = file->f_mapping->host; 65062306a36Sopenharmony_ci int ret; 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci ret = generic_buffers_fsync_noflush(file, start, end, datasync); 65362306a36Sopenharmony_ci if (!ret) 65462306a36Sopenharmony_ci ret = blkdev_issue_flush(inode->i_sb->s_bdev); 65562306a36Sopenharmony_ci return ret; 65662306a36Sopenharmony_ci} 65762306a36Sopenharmony_ciEXPORT_SYMBOL(generic_buffers_fsync); 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci/* 66062306a36Sopenharmony_ci * Called when we've recently written block `bblock', and it is known that 66162306a36Sopenharmony_ci * `bblock' was for a buffer_boundary() buffer. This means that the block at 66262306a36Sopenharmony_ci * `bblock + 1' is probably a dirty indirect block. Hunt it down and, if it's 66362306a36Sopenharmony_ci * dirty, schedule it for IO. So that indirects merge nicely with their data. 66462306a36Sopenharmony_ci */ 66562306a36Sopenharmony_civoid write_boundary_block(struct block_device *bdev, 66662306a36Sopenharmony_ci sector_t bblock, unsigned blocksize) 66762306a36Sopenharmony_ci{ 66862306a36Sopenharmony_ci struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); 66962306a36Sopenharmony_ci if (bh) { 67062306a36Sopenharmony_ci if (buffer_dirty(bh)) 67162306a36Sopenharmony_ci write_dirty_buffer(bh, 0); 67262306a36Sopenharmony_ci put_bh(bh); 67362306a36Sopenharmony_ci } 67462306a36Sopenharmony_ci} 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_civoid mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) 67762306a36Sopenharmony_ci{ 67862306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 67962306a36Sopenharmony_ci struct address_space *buffer_mapping = bh->b_folio->mapping; 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci mark_buffer_dirty(bh); 68262306a36Sopenharmony_ci if (!mapping->private_data) { 68362306a36Sopenharmony_ci mapping->private_data = buffer_mapping; 68462306a36Sopenharmony_ci } else { 68562306a36Sopenharmony_ci BUG_ON(mapping->private_data != buffer_mapping); 68662306a36Sopenharmony_ci } 68762306a36Sopenharmony_ci if (!bh->b_assoc_map) { 68862306a36Sopenharmony_ci spin_lock(&buffer_mapping->private_lock); 68962306a36Sopenharmony_ci list_move_tail(&bh->b_assoc_buffers, 69062306a36Sopenharmony_ci &mapping->private_list); 69162306a36Sopenharmony_ci bh->b_assoc_map = mapping; 69262306a36Sopenharmony_ci spin_unlock(&buffer_mapping->private_lock); 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci} 69562306a36Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_dirty_inode); 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci/* 69862306a36Sopenharmony_ci * Add a page to the dirty page list. 69962306a36Sopenharmony_ci * 70062306a36Sopenharmony_ci * It is a sad fact of life that this function is called from several places 70162306a36Sopenharmony_ci * deeply under spinlocking. It may not sleep. 70262306a36Sopenharmony_ci * 70362306a36Sopenharmony_ci * If the page has buffers, the uptodate buffers are set dirty, to preserve 70462306a36Sopenharmony_ci * dirty-state coherency between the page and the buffers. It the page does 70562306a36Sopenharmony_ci * not have buffers then when they are later attached they will all be set 70662306a36Sopenharmony_ci * dirty. 70762306a36Sopenharmony_ci * 70862306a36Sopenharmony_ci * The buffers are dirtied before the page is dirtied. There's a small race 70962306a36Sopenharmony_ci * window in which a writepage caller may see the page cleanness but not the 71062306a36Sopenharmony_ci * buffer dirtiness. That's fine. If this code were to set the page dirty 71162306a36Sopenharmony_ci * before the buffers, a concurrent writepage caller could clear the page dirty 71262306a36Sopenharmony_ci * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean 71362306a36Sopenharmony_ci * page on the dirty page list. 71462306a36Sopenharmony_ci * 71562306a36Sopenharmony_ci * We use private_lock to lock against try_to_free_buffers while using the 71662306a36Sopenharmony_ci * page's buffer list. Also use this to protect against clean buffers being 71762306a36Sopenharmony_ci * added to the page after it was set dirty. 71862306a36Sopenharmony_ci * 71962306a36Sopenharmony_ci * FIXME: may need to call ->reservepage here as well. That's rather up to the 72062306a36Sopenharmony_ci * address_space though. 72162306a36Sopenharmony_ci */ 72262306a36Sopenharmony_cibool block_dirty_folio(struct address_space *mapping, struct folio *folio) 72362306a36Sopenharmony_ci{ 72462306a36Sopenharmony_ci struct buffer_head *head; 72562306a36Sopenharmony_ci bool newly_dirty; 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci spin_lock(&mapping->private_lock); 72862306a36Sopenharmony_ci head = folio_buffers(folio); 72962306a36Sopenharmony_ci if (head) { 73062306a36Sopenharmony_ci struct buffer_head *bh = head; 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_ci do { 73362306a36Sopenharmony_ci set_buffer_dirty(bh); 73462306a36Sopenharmony_ci bh = bh->b_this_page; 73562306a36Sopenharmony_ci } while (bh != head); 73662306a36Sopenharmony_ci } 73762306a36Sopenharmony_ci /* 73862306a36Sopenharmony_ci * Lock out page's memcg migration to keep PageDirty 73962306a36Sopenharmony_ci * synchronized with per-memcg dirty page counters. 74062306a36Sopenharmony_ci */ 74162306a36Sopenharmony_ci folio_memcg_lock(folio); 74262306a36Sopenharmony_ci newly_dirty = !folio_test_set_dirty(folio); 74362306a36Sopenharmony_ci spin_unlock(&mapping->private_lock); 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci if (newly_dirty) 74662306a36Sopenharmony_ci __folio_mark_dirty(folio, mapping, 1); 74762306a36Sopenharmony_ci 74862306a36Sopenharmony_ci folio_memcg_unlock(folio); 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci if (newly_dirty) 75162306a36Sopenharmony_ci __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci return newly_dirty; 75462306a36Sopenharmony_ci} 75562306a36Sopenharmony_ciEXPORT_SYMBOL(block_dirty_folio); 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci/* 75862306a36Sopenharmony_ci * Write out and wait upon a list of buffers. 75962306a36Sopenharmony_ci * 76062306a36Sopenharmony_ci * We have conflicting pressures: we want to make sure that all 76162306a36Sopenharmony_ci * initially dirty buffers get waited on, but that any subsequently 76262306a36Sopenharmony_ci * dirtied buffers don't. After all, we don't want fsync to last 76362306a36Sopenharmony_ci * forever if somebody is actively writing to the file. 76462306a36Sopenharmony_ci * 76562306a36Sopenharmony_ci * Do this in two main stages: first we copy dirty buffers to a 76662306a36Sopenharmony_ci * temporary inode list, queueing the writes as we go. Then we clean 76762306a36Sopenharmony_ci * up, waiting for those writes to complete. 76862306a36Sopenharmony_ci * 76962306a36Sopenharmony_ci * During this second stage, any subsequent updates to the file may end 77062306a36Sopenharmony_ci * up refiling the buffer on the original inode's dirty list again, so 77162306a36Sopenharmony_ci * there is a chance we will end up with a buffer queued for write but 77262306a36Sopenharmony_ci * not yet completed on that list. So, as a final cleanup we go through 77362306a36Sopenharmony_ci * the osync code to catch these locked, dirty buffers without requeuing 77462306a36Sopenharmony_ci * any newly dirty buffers for write. 77562306a36Sopenharmony_ci */ 77662306a36Sopenharmony_cistatic int fsync_buffers_list(spinlock_t *lock, struct list_head *list) 77762306a36Sopenharmony_ci{ 77862306a36Sopenharmony_ci struct buffer_head *bh; 77962306a36Sopenharmony_ci struct list_head tmp; 78062306a36Sopenharmony_ci struct address_space *mapping; 78162306a36Sopenharmony_ci int err = 0, err2; 78262306a36Sopenharmony_ci struct blk_plug plug; 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci INIT_LIST_HEAD(&tmp); 78562306a36Sopenharmony_ci blk_start_plug(&plug); 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci spin_lock(lock); 78862306a36Sopenharmony_ci while (!list_empty(list)) { 78962306a36Sopenharmony_ci bh = BH_ENTRY(list->next); 79062306a36Sopenharmony_ci mapping = bh->b_assoc_map; 79162306a36Sopenharmony_ci __remove_assoc_queue(bh); 79262306a36Sopenharmony_ci /* Avoid race with mark_buffer_dirty_inode() which does 79362306a36Sopenharmony_ci * a lockless check and we rely on seeing the dirty bit */ 79462306a36Sopenharmony_ci smp_mb(); 79562306a36Sopenharmony_ci if (buffer_dirty(bh) || buffer_locked(bh)) { 79662306a36Sopenharmony_ci list_add(&bh->b_assoc_buffers, &tmp); 79762306a36Sopenharmony_ci bh->b_assoc_map = mapping; 79862306a36Sopenharmony_ci if (buffer_dirty(bh)) { 79962306a36Sopenharmony_ci get_bh(bh); 80062306a36Sopenharmony_ci spin_unlock(lock); 80162306a36Sopenharmony_ci /* 80262306a36Sopenharmony_ci * Ensure any pending I/O completes so that 80362306a36Sopenharmony_ci * write_dirty_buffer() actually writes the 80462306a36Sopenharmony_ci * current contents - it is a noop if I/O is 80562306a36Sopenharmony_ci * still in flight on potentially older 80662306a36Sopenharmony_ci * contents. 80762306a36Sopenharmony_ci */ 80862306a36Sopenharmony_ci write_dirty_buffer(bh, REQ_SYNC); 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci /* 81162306a36Sopenharmony_ci * Kick off IO for the previous mapping. Note 81262306a36Sopenharmony_ci * that we will not run the very last mapping, 81362306a36Sopenharmony_ci * wait_on_buffer() will do that for us 81462306a36Sopenharmony_ci * through sync_buffer(). 81562306a36Sopenharmony_ci */ 81662306a36Sopenharmony_ci brelse(bh); 81762306a36Sopenharmony_ci spin_lock(lock); 81862306a36Sopenharmony_ci } 81962306a36Sopenharmony_ci } 82062306a36Sopenharmony_ci } 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci spin_unlock(lock); 82362306a36Sopenharmony_ci blk_finish_plug(&plug); 82462306a36Sopenharmony_ci spin_lock(lock); 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci while (!list_empty(&tmp)) { 82762306a36Sopenharmony_ci bh = BH_ENTRY(tmp.prev); 82862306a36Sopenharmony_ci get_bh(bh); 82962306a36Sopenharmony_ci mapping = bh->b_assoc_map; 83062306a36Sopenharmony_ci __remove_assoc_queue(bh); 83162306a36Sopenharmony_ci /* Avoid race with mark_buffer_dirty_inode() which does 83262306a36Sopenharmony_ci * a lockless check and we rely on seeing the dirty bit */ 83362306a36Sopenharmony_ci smp_mb(); 83462306a36Sopenharmony_ci if (buffer_dirty(bh)) { 83562306a36Sopenharmony_ci list_add(&bh->b_assoc_buffers, 83662306a36Sopenharmony_ci &mapping->private_list); 83762306a36Sopenharmony_ci bh->b_assoc_map = mapping; 83862306a36Sopenharmony_ci } 83962306a36Sopenharmony_ci spin_unlock(lock); 84062306a36Sopenharmony_ci wait_on_buffer(bh); 84162306a36Sopenharmony_ci if (!buffer_uptodate(bh)) 84262306a36Sopenharmony_ci err = -EIO; 84362306a36Sopenharmony_ci brelse(bh); 84462306a36Sopenharmony_ci spin_lock(lock); 84562306a36Sopenharmony_ci } 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_ci spin_unlock(lock); 84862306a36Sopenharmony_ci err2 = osync_buffers_list(lock, list); 84962306a36Sopenharmony_ci if (err) 85062306a36Sopenharmony_ci return err; 85162306a36Sopenharmony_ci else 85262306a36Sopenharmony_ci return err2; 85362306a36Sopenharmony_ci} 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci/* 85662306a36Sopenharmony_ci * Invalidate any and all dirty buffers on a given inode. We are 85762306a36Sopenharmony_ci * probably unmounting the fs, but that doesn't mean we have already 85862306a36Sopenharmony_ci * done a sync(). Just drop the buffers from the inode list. 85962306a36Sopenharmony_ci * 86062306a36Sopenharmony_ci * NOTE: we take the inode's blockdev's mapping's private_lock. Which 86162306a36Sopenharmony_ci * assumes that all the buffers are against the blockdev. Not true 86262306a36Sopenharmony_ci * for reiserfs. 86362306a36Sopenharmony_ci */ 86462306a36Sopenharmony_civoid invalidate_inode_buffers(struct inode *inode) 86562306a36Sopenharmony_ci{ 86662306a36Sopenharmony_ci if (inode_has_buffers(inode)) { 86762306a36Sopenharmony_ci struct address_space *mapping = &inode->i_data; 86862306a36Sopenharmony_ci struct list_head *list = &mapping->private_list; 86962306a36Sopenharmony_ci struct address_space *buffer_mapping = mapping->private_data; 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci spin_lock(&buffer_mapping->private_lock); 87262306a36Sopenharmony_ci while (!list_empty(list)) 87362306a36Sopenharmony_ci __remove_assoc_queue(BH_ENTRY(list->next)); 87462306a36Sopenharmony_ci spin_unlock(&buffer_mapping->private_lock); 87562306a36Sopenharmony_ci } 87662306a36Sopenharmony_ci} 87762306a36Sopenharmony_ciEXPORT_SYMBOL(invalidate_inode_buffers); 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci/* 88062306a36Sopenharmony_ci * Remove any clean buffers from the inode's buffer list. This is called 88162306a36Sopenharmony_ci * when we're trying to free the inode itself. Those buffers can pin it. 88262306a36Sopenharmony_ci * 88362306a36Sopenharmony_ci * Returns true if all buffers were removed. 88462306a36Sopenharmony_ci */ 88562306a36Sopenharmony_ciint remove_inode_buffers(struct inode *inode) 88662306a36Sopenharmony_ci{ 88762306a36Sopenharmony_ci int ret = 1; 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_ci if (inode_has_buffers(inode)) { 89062306a36Sopenharmony_ci struct address_space *mapping = &inode->i_data; 89162306a36Sopenharmony_ci struct list_head *list = &mapping->private_list; 89262306a36Sopenharmony_ci struct address_space *buffer_mapping = mapping->private_data; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci spin_lock(&buffer_mapping->private_lock); 89562306a36Sopenharmony_ci while (!list_empty(list)) { 89662306a36Sopenharmony_ci struct buffer_head *bh = BH_ENTRY(list->next); 89762306a36Sopenharmony_ci if (buffer_dirty(bh)) { 89862306a36Sopenharmony_ci ret = 0; 89962306a36Sopenharmony_ci break; 90062306a36Sopenharmony_ci } 90162306a36Sopenharmony_ci __remove_assoc_queue(bh); 90262306a36Sopenharmony_ci } 90362306a36Sopenharmony_ci spin_unlock(&buffer_mapping->private_lock); 90462306a36Sopenharmony_ci } 90562306a36Sopenharmony_ci return ret; 90662306a36Sopenharmony_ci} 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci/* 90962306a36Sopenharmony_ci * Create the appropriate buffers when given a folio for data area and 91062306a36Sopenharmony_ci * the size of each buffer.. Use the bh->b_this_page linked list to 91162306a36Sopenharmony_ci * follow the buffers created. Return NULL if unable to create more 91262306a36Sopenharmony_ci * buffers. 91362306a36Sopenharmony_ci * 91462306a36Sopenharmony_ci * The retry flag is used to differentiate async IO (paging, swapping) 91562306a36Sopenharmony_ci * which may not fail from ordinary buffer allocations. 91662306a36Sopenharmony_ci */ 91762306a36Sopenharmony_cistruct buffer_head *folio_alloc_buffers(struct folio *folio, unsigned long size, 91862306a36Sopenharmony_ci bool retry) 91962306a36Sopenharmony_ci{ 92062306a36Sopenharmony_ci struct buffer_head *bh, *head; 92162306a36Sopenharmony_ci gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT; 92262306a36Sopenharmony_ci long offset; 92362306a36Sopenharmony_ci struct mem_cgroup *memcg, *old_memcg; 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci if (retry) 92662306a36Sopenharmony_ci gfp |= __GFP_NOFAIL; 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci /* The folio lock pins the memcg */ 92962306a36Sopenharmony_ci memcg = folio_memcg(folio); 93062306a36Sopenharmony_ci old_memcg = set_active_memcg(memcg); 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci head = NULL; 93362306a36Sopenharmony_ci offset = folio_size(folio); 93462306a36Sopenharmony_ci while ((offset -= size) >= 0) { 93562306a36Sopenharmony_ci bh = alloc_buffer_head(gfp); 93662306a36Sopenharmony_ci if (!bh) 93762306a36Sopenharmony_ci goto no_grow; 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci bh->b_this_page = head; 94062306a36Sopenharmony_ci bh->b_blocknr = -1; 94162306a36Sopenharmony_ci head = bh; 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci bh->b_size = size; 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ci /* Link the buffer to its folio */ 94662306a36Sopenharmony_ci folio_set_bh(bh, folio, offset); 94762306a36Sopenharmony_ci } 94862306a36Sopenharmony_ciout: 94962306a36Sopenharmony_ci set_active_memcg(old_memcg); 95062306a36Sopenharmony_ci return head; 95162306a36Sopenharmony_ci/* 95262306a36Sopenharmony_ci * In case anything failed, we just free everything we got. 95362306a36Sopenharmony_ci */ 95462306a36Sopenharmony_cino_grow: 95562306a36Sopenharmony_ci if (head) { 95662306a36Sopenharmony_ci do { 95762306a36Sopenharmony_ci bh = head; 95862306a36Sopenharmony_ci head = head->b_this_page; 95962306a36Sopenharmony_ci free_buffer_head(bh); 96062306a36Sopenharmony_ci } while (head); 96162306a36Sopenharmony_ci } 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci goto out; 96462306a36Sopenharmony_ci} 96562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(folio_alloc_buffers); 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_cistruct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, 96862306a36Sopenharmony_ci bool retry) 96962306a36Sopenharmony_ci{ 97062306a36Sopenharmony_ci return folio_alloc_buffers(page_folio(page), size, retry); 97162306a36Sopenharmony_ci} 97262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(alloc_page_buffers); 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_cistatic inline void link_dev_buffers(struct folio *folio, 97562306a36Sopenharmony_ci struct buffer_head *head) 97662306a36Sopenharmony_ci{ 97762306a36Sopenharmony_ci struct buffer_head *bh, *tail; 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci bh = head; 98062306a36Sopenharmony_ci do { 98162306a36Sopenharmony_ci tail = bh; 98262306a36Sopenharmony_ci bh = bh->b_this_page; 98362306a36Sopenharmony_ci } while (bh); 98462306a36Sopenharmony_ci tail->b_this_page = head; 98562306a36Sopenharmony_ci folio_attach_private(folio, head); 98662306a36Sopenharmony_ci} 98762306a36Sopenharmony_ci 98862306a36Sopenharmony_cistatic sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) 98962306a36Sopenharmony_ci{ 99062306a36Sopenharmony_ci sector_t retval = ~((sector_t)0); 99162306a36Sopenharmony_ci loff_t sz = bdev_nr_bytes(bdev); 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci if (sz) { 99462306a36Sopenharmony_ci unsigned int sizebits = blksize_bits(size); 99562306a36Sopenharmony_ci retval = (sz >> sizebits); 99662306a36Sopenharmony_ci } 99762306a36Sopenharmony_ci return retval; 99862306a36Sopenharmony_ci} 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci/* 100162306a36Sopenharmony_ci * Initialise the state of a blockdev folio's buffers. 100262306a36Sopenharmony_ci */ 100362306a36Sopenharmony_cistatic sector_t folio_init_buffers(struct folio *folio, 100462306a36Sopenharmony_ci struct block_device *bdev, sector_t block, int size) 100562306a36Sopenharmony_ci{ 100662306a36Sopenharmony_ci struct buffer_head *head = folio_buffers(folio); 100762306a36Sopenharmony_ci struct buffer_head *bh = head; 100862306a36Sopenharmony_ci bool uptodate = folio_test_uptodate(folio); 100962306a36Sopenharmony_ci sector_t end_block = blkdev_max_block(bdev, size); 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci do { 101262306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 101362306a36Sopenharmony_ci bh->b_end_io = NULL; 101462306a36Sopenharmony_ci bh->b_private = NULL; 101562306a36Sopenharmony_ci bh->b_bdev = bdev; 101662306a36Sopenharmony_ci bh->b_blocknr = block; 101762306a36Sopenharmony_ci if (uptodate) 101862306a36Sopenharmony_ci set_buffer_uptodate(bh); 101962306a36Sopenharmony_ci if (block < end_block) 102062306a36Sopenharmony_ci set_buffer_mapped(bh); 102162306a36Sopenharmony_ci } 102262306a36Sopenharmony_ci block++; 102362306a36Sopenharmony_ci bh = bh->b_this_page; 102462306a36Sopenharmony_ci } while (bh != head); 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ci /* 102762306a36Sopenharmony_ci * Caller needs to validate requested block against end of device. 102862306a36Sopenharmony_ci */ 102962306a36Sopenharmony_ci return end_block; 103062306a36Sopenharmony_ci} 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci/* 103362306a36Sopenharmony_ci * Create the page-cache page that contains the requested block. 103462306a36Sopenharmony_ci * 103562306a36Sopenharmony_ci * This is used purely for blockdev mappings. 103662306a36Sopenharmony_ci */ 103762306a36Sopenharmony_cistatic int 103862306a36Sopenharmony_cigrow_dev_page(struct block_device *bdev, sector_t block, 103962306a36Sopenharmony_ci pgoff_t index, int size, int sizebits, gfp_t gfp) 104062306a36Sopenharmony_ci{ 104162306a36Sopenharmony_ci struct inode *inode = bdev->bd_inode; 104262306a36Sopenharmony_ci struct folio *folio; 104362306a36Sopenharmony_ci struct buffer_head *bh; 104462306a36Sopenharmony_ci sector_t end_block; 104562306a36Sopenharmony_ci int ret = 0; 104662306a36Sopenharmony_ci gfp_t gfp_mask; 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp; 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_ci /* 105162306a36Sopenharmony_ci * XXX: __getblk_slow() can not really deal with failure and 105262306a36Sopenharmony_ci * will endlessly loop on improvised global reclaim. Prefer 105362306a36Sopenharmony_ci * looping in the allocator rather than here, at least that 105462306a36Sopenharmony_ci * code knows what it's doing. 105562306a36Sopenharmony_ci */ 105662306a36Sopenharmony_ci gfp_mask |= __GFP_NOFAIL; 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci folio = __filemap_get_folio(inode->i_mapping, index, 105962306a36Sopenharmony_ci FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp_mask); 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci bh = folio_buffers(folio); 106262306a36Sopenharmony_ci if (bh) { 106362306a36Sopenharmony_ci if (bh->b_size == size) { 106462306a36Sopenharmony_ci end_block = folio_init_buffers(folio, bdev, 106562306a36Sopenharmony_ci (sector_t)index << sizebits, size); 106662306a36Sopenharmony_ci goto done; 106762306a36Sopenharmony_ci } 106862306a36Sopenharmony_ci if (!try_to_free_buffers(folio)) 106962306a36Sopenharmony_ci goto failed; 107062306a36Sopenharmony_ci } 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci bh = folio_alloc_buffers(folio, size, true); 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci /* 107562306a36Sopenharmony_ci * Link the folio to the buffers and initialise them. Take the 107662306a36Sopenharmony_ci * lock to be atomic wrt __find_get_block(), which does not 107762306a36Sopenharmony_ci * run under the folio lock. 107862306a36Sopenharmony_ci */ 107962306a36Sopenharmony_ci spin_lock(&inode->i_mapping->private_lock); 108062306a36Sopenharmony_ci link_dev_buffers(folio, bh); 108162306a36Sopenharmony_ci end_block = folio_init_buffers(folio, bdev, 108262306a36Sopenharmony_ci (sector_t)index << sizebits, size); 108362306a36Sopenharmony_ci spin_unlock(&inode->i_mapping->private_lock); 108462306a36Sopenharmony_cidone: 108562306a36Sopenharmony_ci ret = (block < end_block) ? 1 : -ENXIO; 108662306a36Sopenharmony_cifailed: 108762306a36Sopenharmony_ci folio_unlock(folio); 108862306a36Sopenharmony_ci folio_put(folio); 108962306a36Sopenharmony_ci return ret; 109062306a36Sopenharmony_ci} 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci/* 109362306a36Sopenharmony_ci * Create buffers for the specified block device block's page. If 109462306a36Sopenharmony_ci * that page was dirty, the buffers are set dirty also. 109562306a36Sopenharmony_ci */ 109662306a36Sopenharmony_cistatic int 109762306a36Sopenharmony_cigrow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp) 109862306a36Sopenharmony_ci{ 109962306a36Sopenharmony_ci pgoff_t index; 110062306a36Sopenharmony_ci int sizebits; 110162306a36Sopenharmony_ci 110262306a36Sopenharmony_ci sizebits = PAGE_SHIFT - __ffs(size); 110362306a36Sopenharmony_ci index = block >> sizebits; 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci /* 110662306a36Sopenharmony_ci * Check for a block which wants to lie outside our maximum possible 110762306a36Sopenharmony_ci * pagecache index. (this comparison is done using sector_t types). 110862306a36Sopenharmony_ci */ 110962306a36Sopenharmony_ci if (unlikely(index != block >> sizebits)) { 111062306a36Sopenharmony_ci printk(KERN_ERR "%s: requested out-of-range block %llu for " 111162306a36Sopenharmony_ci "device %pg\n", 111262306a36Sopenharmony_ci __func__, (unsigned long long)block, 111362306a36Sopenharmony_ci bdev); 111462306a36Sopenharmony_ci return -EIO; 111562306a36Sopenharmony_ci } 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci /* Create a page with the proper size buffers.. */ 111862306a36Sopenharmony_ci return grow_dev_page(bdev, block, index, size, sizebits, gfp); 111962306a36Sopenharmony_ci} 112062306a36Sopenharmony_ci 112162306a36Sopenharmony_cistatic struct buffer_head * 112262306a36Sopenharmony_ci__getblk_slow(struct block_device *bdev, sector_t block, 112362306a36Sopenharmony_ci unsigned size, gfp_t gfp) 112462306a36Sopenharmony_ci{ 112562306a36Sopenharmony_ci /* Size must be multiple of hard sectorsize */ 112662306a36Sopenharmony_ci if (unlikely(size & (bdev_logical_block_size(bdev)-1) || 112762306a36Sopenharmony_ci (size < 512 || size > PAGE_SIZE))) { 112862306a36Sopenharmony_ci printk(KERN_ERR "getblk(): invalid block size %d requested\n", 112962306a36Sopenharmony_ci size); 113062306a36Sopenharmony_ci printk(KERN_ERR "logical block size: %d\n", 113162306a36Sopenharmony_ci bdev_logical_block_size(bdev)); 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci dump_stack(); 113462306a36Sopenharmony_ci return NULL; 113562306a36Sopenharmony_ci } 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_ci for (;;) { 113862306a36Sopenharmony_ci struct buffer_head *bh; 113962306a36Sopenharmony_ci int ret; 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci bh = __find_get_block(bdev, block, size); 114262306a36Sopenharmony_ci if (bh) 114362306a36Sopenharmony_ci return bh; 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci ret = grow_buffers(bdev, block, size, gfp); 114662306a36Sopenharmony_ci if (ret < 0) 114762306a36Sopenharmony_ci return NULL; 114862306a36Sopenharmony_ci } 114962306a36Sopenharmony_ci} 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci/* 115262306a36Sopenharmony_ci * The relationship between dirty buffers and dirty pages: 115362306a36Sopenharmony_ci * 115462306a36Sopenharmony_ci * Whenever a page has any dirty buffers, the page's dirty bit is set, and 115562306a36Sopenharmony_ci * the page is tagged dirty in the page cache. 115662306a36Sopenharmony_ci * 115762306a36Sopenharmony_ci * At all times, the dirtiness of the buffers represents the dirtiness of 115862306a36Sopenharmony_ci * subsections of the page. If the page has buffers, the page dirty bit is 115962306a36Sopenharmony_ci * merely a hint about the true dirty state. 116062306a36Sopenharmony_ci * 116162306a36Sopenharmony_ci * When a page is set dirty in its entirety, all its buffers are marked dirty 116262306a36Sopenharmony_ci * (if the page has buffers). 116362306a36Sopenharmony_ci * 116462306a36Sopenharmony_ci * When a buffer is marked dirty, its page is dirtied, but the page's other 116562306a36Sopenharmony_ci * buffers are not. 116662306a36Sopenharmony_ci * 116762306a36Sopenharmony_ci * Also. When blockdev buffers are explicitly read with bread(), they 116862306a36Sopenharmony_ci * individually become uptodate. But their backing page remains not 116962306a36Sopenharmony_ci * uptodate - even if all of its buffers are uptodate. A subsequent 117062306a36Sopenharmony_ci * block_read_full_folio() against that folio will discover all the uptodate 117162306a36Sopenharmony_ci * buffers, will set the folio uptodate and will perform no I/O. 117262306a36Sopenharmony_ci */ 117362306a36Sopenharmony_ci 117462306a36Sopenharmony_ci/** 117562306a36Sopenharmony_ci * mark_buffer_dirty - mark a buffer_head as needing writeout 117662306a36Sopenharmony_ci * @bh: the buffer_head to mark dirty 117762306a36Sopenharmony_ci * 117862306a36Sopenharmony_ci * mark_buffer_dirty() will set the dirty bit against the buffer, then set 117962306a36Sopenharmony_ci * its backing page dirty, then tag the page as dirty in the page cache 118062306a36Sopenharmony_ci * and then attach the address_space's inode to its superblock's dirty 118162306a36Sopenharmony_ci * inode list. 118262306a36Sopenharmony_ci * 118362306a36Sopenharmony_ci * mark_buffer_dirty() is atomic. It takes bh->b_folio->mapping->private_lock, 118462306a36Sopenharmony_ci * i_pages lock and mapping->host->i_lock. 118562306a36Sopenharmony_ci */ 118662306a36Sopenharmony_civoid mark_buffer_dirty(struct buffer_head *bh) 118762306a36Sopenharmony_ci{ 118862306a36Sopenharmony_ci WARN_ON_ONCE(!buffer_uptodate(bh)); 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci trace_block_dirty_buffer(bh); 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci /* 119362306a36Sopenharmony_ci * Very *carefully* optimize the it-is-already-dirty case. 119462306a36Sopenharmony_ci * 119562306a36Sopenharmony_ci * Don't let the final "is it dirty" escape to before we 119662306a36Sopenharmony_ci * perhaps modified the buffer. 119762306a36Sopenharmony_ci */ 119862306a36Sopenharmony_ci if (buffer_dirty(bh)) { 119962306a36Sopenharmony_ci smp_mb(); 120062306a36Sopenharmony_ci if (buffer_dirty(bh)) 120162306a36Sopenharmony_ci return; 120262306a36Sopenharmony_ci } 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci if (!test_set_buffer_dirty(bh)) { 120562306a36Sopenharmony_ci struct folio *folio = bh->b_folio; 120662306a36Sopenharmony_ci struct address_space *mapping = NULL; 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci folio_memcg_lock(folio); 120962306a36Sopenharmony_ci if (!folio_test_set_dirty(folio)) { 121062306a36Sopenharmony_ci mapping = folio->mapping; 121162306a36Sopenharmony_ci if (mapping) 121262306a36Sopenharmony_ci __folio_mark_dirty(folio, mapping, 0); 121362306a36Sopenharmony_ci } 121462306a36Sopenharmony_ci folio_memcg_unlock(folio); 121562306a36Sopenharmony_ci if (mapping) 121662306a36Sopenharmony_ci __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 121762306a36Sopenharmony_ci } 121862306a36Sopenharmony_ci} 121962306a36Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_dirty); 122062306a36Sopenharmony_ci 122162306a36Sopenharmony_civoid mark_buffer_write_io_error(struct buffer_head *bh) 122262306a36Sopenharmony_ci{ 122362306a36Sopenharmony_ci set_buffer_write_io_error(bh); 122462306a36Sopenharmony_ci /* FIXME: do we need to set this in both places? */ 122562306a36Sopenharmony_ci if (bh->b_folio && bh->b_folio->mapping) 122662306a36Sopenharmony_ci mapping_set_error(bh->b_folio->mapping, -EIO); 122762306a36Sopenharmony_ci if (bh->b_assoc_map) { 122862306a36Sopenharmony_ci mapping_set_error(bh->b_assoc_map, -EIO); 122962306a36Sopenharmony_ci errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO); 123062306a36Sopenharmony_ci } 123162306a36Sopenharmony_ci} 123262306a36Sopenharmony_ciEXPORT_SYMBOL(mark_buffer_write_io_error); 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci/* 123562306a36Sopenharmony_ci * Decrement a buffer_head's reference count. If all buffers against a page 123662306a36Sopenharmony_ci * have zero reference count, are clean and unlocked, and if the page is clean 123762306a36Sopenharmony_ci * and unlocked then try_to_free_buffers() may strip the buffers from the page 123862306a36Sopenharmony_ci * in preparation for freeing it (sometimes, rarely, buffers are removed from 123962306a36Sopenharmony_ci * a page but it ends up not being freed, and buffers may later be reattached). 124062306a36Sopenharmony_ci */ 124162306a36Sopenharmony_civoid __brelse(struct buffer_head * buf) 124262306a36Sopenharmony_ci{ 124362306a36Sopenharmony_ci if (atomic_read(&buf->b_count)) { 124462306a36Sopenharmony_ci put_bh(buf); 124562306a36Sopenharmony_ci return; 124662306a36Sopenharmony_ci } 124762306a36Sopenharmony_ci WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n"); 124862306a36Sopenharmony_ci} 124962306a36Sopenharmony_ciEXPORT_SYMBOL(__brelse); 125062306a36Sopenharmony_ci 125162306a36Sopenharmony_ci/* 125262306a36Sopenharmony_ci * bforget() is like brelse(), except it discards any 125362306a36Sopenharmony_ci * potentially dirty data. 125462306a36Sopenharmony_ci */ 125562306a36Sopenharmony_civoid __bforget(struct buffer_head *bh) 125662306a36Sopenharmony_ci{ 125762306a36Sopenharmony_ci clear_buffer_dirty(bh); 125862306a36Sopenharmony_ci if (bh->b_assoc_map) { 125962306a36Sopenharmony_ci struct address_space *buffer_mapping = bh->b_folio->mapping; 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_ci spin_lock(&buffer_mapping->private_lock); 126262306a36Sopenharmony_ci list_del_init(&bh->b_assoc_buffers); 126362306a36Sopenharmony_ci bh->b_assoc_map = NULL; 126462306a36Sopenharmony_ci spin_unlock(&buffer_mapping->private_lock); 126562306a36Sopenharmony_ci } 126662306a36Sopenharmony_ci __brelse(bh); 126762306a36Sopenharmony_ci} 126862306a36Sopenharmony_ciEXPORT_SYMBOL(__bforget); 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_cistatic struct buffer_head *__bread_slow(struct buffer_head *bh) 127162306a36Sopenharmony_ci{ 127262306a36Sopenharmony_ci lock_buffer(bh); 127362306a36Sopenharmony_ci if (buffer_uptodate(bh)) { 127462306a36Sopenharmony_ci unlock_buffer(bh); 127562306a36Sopenharmony_ci return bh; 127662306a36Sopenharmony_ci } else { 127762306a36Sopenharmony_ci get_bh(bh); 127862306a36Sopenharmony_ci bh->b_end_io = end_buffer_read_sync; 127962306a36Sopenharmony_ci submit_bh(REQ_OP_READ, bh); 128062306a36Sopenharmony_ci wait_on_buffer(bh); 128162306a36Sopenharmony_ci if (buffer_uptodate(bh)) 128262306a36Sopenharmony_ci return bh; 128362306a36Sopenharmony_ci } 128462306a36Sopenharmony_ci brelse(bh); 128562306a36Sopenharmony_ci return NULL; 128662306a36Sopenharmony_ci} 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci/* 128962306a36Sopenharmony_ci * Per-cpu buffer LRU implementation. To reduce the cost of __find_get_block(). 129062306a36Sopenharmony_ci * The bhs[] array is sorted - newest buffer is at bhs[0]. Buffers have their 129162306a36Sopenharmony_ci * refcount elevated by one when they're in an LRU. A buffer can only appear 129262306a36Sopenharmony_ci * once in a particular CPU's LRU. A single buffer can be present in multiple 129362306a36Sopenharmony_ci * CPU's LRUs at the same time. 129462306a36Sopenharmony_ci * 129562306a36Sopenharmony_ci * This is a transparent caching front-end to sb_bread(), sb_getblk() and 129662306a36Sopenharmony_ci * sb_find_get_block(). 129762306a36Sopenharmony_ci * 129862306a36Sopenharmony_ci * The LRUs themselves only need locking against invalidate_bh_lrus. We use 129962306a36Sopenharmony_ci * a local interrupt disable for that. 130062306a36Sopenharmony_ci */ 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ci#define BH_LRU_SIZE 16 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_cistruct bh_lru { 130562306a36Sopenharmony_ci struct buffer_head *bhs[BH_LRU_SIZE]; 130662306a36Sopenharmony_ci}; 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct bh_lru, bh_lrus) = {{ NULL }}; 130962306a36Sopenharmony_ci 131062306a36Sopenharmony_ci#ifdef CONFIG_SMP 131162306a36Sopenharmony_ci#define bh_lru_lock() local_irq_disable() 131262306a36Sopenharmony_ci#define bh_lru_unlock() local_irq_enable() 131362306a36Sopenharmony_ci#else 131462306a36Sopenharmony_ci#define bh_lru_lock() preempt_disable() 131562306a36Sopenharmony_ci#define bh_lru_unlock() preempt_enable() 131662306a36Sopenharmony_ci#endif 131762306a36Sopenharmony_ci 131862306a36Sopenharmony_cistatic inline void check_irqs_on(void) 131962306a36Sopenharmony_ci{ 132062306a36Sopenharmony_ci#ifdef irqs_disabled 132162306a36Sopenharmony_ci BUG_ON(irqs_disabled()); 132262306a36Sopenharmony_ci#endif 132362306a36Sopenharmony_ci} 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_ci/* 132662306a36Sopenharmony_ci * Install a buffer_head into this cpu's LRU. If not already in the LRU, it is 132762306a36Sopenharmony_ci * inserted at the front, and the buffer_head at the back if any is evicted. 132862306a36Sopenharmony_ci * Or, if already in the LRU it is moved to the front. 132962306a36Sopenharmony_ci */ 133062306a36Sopenharmony_cistatic void bh_lru_install(struct buffer_head *bh) 133162306a36Sopenharmony_ci{ 133262306a36Sopenharmony_ci struct buffer_head *evictee = bh; 133362306a36Sopenharmony_ci struct bh_lru *b; 133462306a36Sopenharmony_ci int i; 133562306a36Sopenharmony_ci 133662306a36Sopenharmony_ci check_irqs_on(); 133762306a36Sopenharmony_ci bh_lru_lock(); 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_ci /* 134062306a36Sopenharmony_ci * the refcount of buffer_head in bh_lru prevents dropping the 134162306a36Sopenharmony_ci * attached page(i.e., try_to_free_buffers) so it could cause 134262306a36Sopenharmony_ci * failing page migration. 134362306a36Sopenharmony_ci * Skip putting upcoming bh into bh_lru until migration is done. 134462306a36Sopenharmony_ci */ 134562306a36Sopenharmony_ci if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) { 134662306a36Sopenharmony_ci bh_lru_unlock(); 134762306a36Sopenharmony_ci return; 134862306a36Sopenharmony_ci } 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci b = this_cpu_ptr(&bh_lrus); 135162306a36Sopenharmony_ci for (i = 0; i < BH_LRU_SIZE; i++) { 135262306a36Sopenharmony_ci swap(evictee, b->bhs[i]); 135362306a36Sopenharmony_ci if (evictee == bh) { 135462306a36Sopenharmony_ci bh_lru_unlock(); 135562306a36Sopenharmony_ci return; 135662306a36Sopenharmony_ci } 135762306a36Sopenharmony_ci } 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci get_bh(bh); 136062306a36Sopenharmony_ci bh_lru_unlock(); 136162306a36Sopenharmony_ci brelse(evictee); 136262306a36Sopenharmony_ci} 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_ci/* 136562306a36Sopenharmony_ci * Look up the bh in this cpu's LRU. If it's there, move it to the head. 136662306a36Sopenharmony_ci */ 136762306a36Sopenharmony_cistatic struct buffer_head * 136862306a36Sopenharmony_cilookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) 136962306a36Sopenharmony_ci{ 137062306a36Sopenharmony_ci struct buffer_head *ret = NULL; 137162306a36Sopenharmony_ci unsigned int i; 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci check_irqs_on(); 137462306a36Sopenharmony_ci bh_lru_lock(); 137562306a36Sopenharmony_ci if (cpu_is_isolated(smp_processor_id())) { 137662306a36Sopenharmony_ci bh_lru_unlock(); 137762306a36Sopenharmony_ci return NULL; 137862306a36Sopenharmony_ci } 137962306a36Sopenharmony_ci for (i = 0; i < BH_LRU_SIZE; i++) { 138062306a36Sopenharmony_ci struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]); 138162306a36Sopenharmony_ci 138262306a36Sopenharmony_ci if (bh && bh->b_blocknr == block && bh->b_bdev == bdev && 138362306a36Sopenharmony_ci bh->b_size == size) { 138462306a36Sopenharmony_ci if (i) { 138562306a36Sopenharmony_ci while (i) { 138662306a36Sopenharmony_ci __this_cpu_write(bh_lrus.bhs[i], 138762306a36Sopenharmony_ci __this_cpu_read(bh_lrus.bhs[i - 1])); 138862306a36Sopenharmony_ci i--; 138962306a36Sopenharmony_ci } 139062306a36Sopenharmony_ci __this_cpu_write(bh_lrus.bhs[0], bh); 139162306a36Sopenharmony_ci } 139262306a36Sopenharmony_ci get_bh(bh); 139362306a36Sopenharmony_ci ret = bh; 139462306a36Sopenharmony_ci break; 139562306a36Sopenharmony_ci } 139662306a36Sopenharmony_ci } 139762306a36Sopenharmony_ci bh_lru_unlock(); 139862306a36Sopenharmony_ci return ret; 139962306a36Sopenharmony_ci} 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci/* 140262306a36Sopenharmony_ci * Perform a pagecache lookup for the matching buffer. If it's there, refresh 140362306a36Sopenharmony_ci * it in the LRU and mark it as accessed. If it is not present then return 140462306a36Sopenharmony_ci * NULL 140562306a36Sopenharmony_ci */ 140662306a36Sopenharmony_cistruct buffer_head * 140762306a36Sopenharmony_ci__find_get_block(struct block_device *bdev, sector_t block, unsigned size) 140862306a36Sopenharmony_ci{ 140962306a36Sopenharmony_ci struct buffer_head *bh = lookup_bh_lru(bdev, block, size); 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci if (bh == NULL) { 141262306a36Sopenharmony_ci /* __find_get_block_slow will mark the page accessed */ 141362306a36Sopenharmony_ci bh = __find_get_block_slow(bdev, block); 141462306a36Sopenharmony_ci if (bh) 141562306a36Sopenharmony_ci bh_lru_install(bh); 141662306a36Sopenharmony_ci } else 141762306a36Sopenharmony_ci touch_buffer(bh); 141862306a36Sopenharmony_ci 141962306a36Sopenharmony_ci return bh; 142062306a36Sopenharmony_ci} 142162306a36Sopenharmony_ciEXPORT_SYMBOL(__find_get_block); 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci/* 142462306a36Sopenharmony_ci * __getblk_gfp() will locate (and, if necessary, create) the buffer_head 142562306a36Sopenharmony_ci * which corresponds to the passed block_device, block and size. The 142662306a36Sopenharmony_ci * returned buffer has its reference count incremented. 142762306a36Sopenharmony_ci * 142862306a36Sopenharmony_ci * __getblk_gfp() will lock up the machine if grow_dev_page's 142962306a36Sopenharmony_ci * try_to_free_buffers() attempt is failing. FIXME, perhaps? 143062306a36Sopenharmony_ci */ 143162306a36Sopenharmony_cistruct buffer_head * 143262306a36Sopenharmony_ci__getblk_gfp(struct block_device *bdev, sector_t block, 143362306a36Sopenharmony_ci unsigned size, gfp_t gfp) 143462306a36Sopenharmony_ci{ 143562306a36Sopenharmony_ci struct buffer_head *bh = __find_get_block(bdev, block, size); 143662306a36Sopenharmony_ci 143762306a36Sopenharmony_ci might_sleep(); 143862306a36Sopenharmony_ci if (bh == NULL) 143962306a36Sopenharmony_ci bh = __getblk_slow(bdev, block, size, gfp); 144062306a36Sopenharmony_ci return bh; 144162306a36Sopenharmony_ci} 144262306a36Sopenharmony_ciEXPORT_SYMBOL(__getblk_gfp); 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci/* 144562306a36Sopenharmony_ci * Do async read-ahead on a buffer.. 144662306a36Sopenharmony_ci */ 144762306a36Sopenharmony_civoid __breadahead(struct block_device *bdev, sector_t block, unsigned size) 144862306a36Sopenharmony_ci{ 144962306a36Sopenharmony_ci struct buffer_head *bh = __getblk(bdev, block, size); 145062306a36Sopenharmony_ci if (likely(bh)) { 145162306a36Sopenharmony_ci bh_readahead(bh, REQ_RAHEAD); 145262306a36Sopenharmony_ci brelse(bh); 145362306a36Sopenharmony_ci } 145462306a36Sopenharmony_ci} 145562306a36Sopenharmony_ciEXPORT_SYMBOL(__breadahead); 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci/** 145862306a36Sopenharmony_ci * __bread_gfp() - reads a specified block and returns the bh 145962306a36Sopenharmony_ci * @bdev: the block_device to read from 146062306a36Sopenharmony_ci * @block: number of block 146162306a36Sopenharmony_ci * @size: size (in bytes) to read 146262306a36Sopenharmony_ci * @gfp: page allocation flag 146362306a36Sopenharmony_ci * 146462306a36Sopenharmony_ci * Reads a specified block, and returns buffer head that contains it. 146562306a36Sopenharmony_ci * The page cache can be allocated from non-movable area 146662306a36Sopenharmony_ci * not to prevent page migration if you set gfp to zero. 146762306a36Sopenharmony_ci * It returns NULL if the block was unreadable. 146862306a36Sopenharmony_ci */ 146962306a36Sopenharmony_cistruct buffer_head * 147062306a36Sopenharmony_ci__bread_gfp(struct block_device *bdev, sector_t block, 147162306a36Sopenharmony_ci unsigned size, gfp_t gfp) 147262306a36Sopenharmony_ci{ 147362306a36Sopenharmony_ci struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci if (likely(bh) && !buffer_uptodate(bh)) 147662306a36Sopenharmony_ci bh = __bread_slow(bh); 147762306a36Sopenharmony_ci return bh; 147862306a36Sopenharmony_ci} 147962306a36Sopenharmony_ciEXPORT_SYMBOL(__bread_gfp); 148062306a36Sopenharmony_ci 148162306a36Sopenharmony_cistatic void __invalidate_bh_lrus(struct bh_lru *b) 148262306a36Sopenharmony_ci{ 148362306a36Sopenharmony_ci int i; 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_ci for (i = 0; i < BH_LRU_SIZE; i++) { 148662306a36Sopenharmony_ci brelse(b->bhs[i]); 148762306a36Sopenharmony_ci b->bhs[i] = NULL; 148862306a36Sopenharmony_ci } 148962306a36Sopenharmony_ci} 149062306a36Sopenharmony_ci/* 149162306a36Sopenharmony_ci * invalidate_bh_lrus() is called rarely - but not only at unmount. 149262306a36Sopenharmony_ci * This doesn't race because it runs in each cpu either in irq 149362306a36Sopenharmony_ci * or with preempt disabled. 149462306a36Sopenharmony_ci */ 149562306a36Sopenharmony_cistatic void invalidate_bh_lru(void *arg) 149662306a36Sopenharmony_ci{ 149762306a36Sopenharmony_ci struct bh_lru *b = &get_cpu_var(bh_lrus); 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_ci __invalidate_bh_lrus(b); 150062306a36Sopenharmony_ci put_cpu_var(bh_lrus); 150162306a36Sopenharmony_ci} 150262306a36Sopenharmony_ci 150362306a36Sopenharmony_cibool has_bh_in_lru(int cpu, void *dummy) 150462306a36Sopenharmony_ci{ 150562306a36Sopenharmony_ci struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu); 150662306a36Sopenharmony_ci int i; 150762306a36Sopenharmony_ci 150862306a36Sopenharmony_ci for (i = 0; i < BH_LRU_SIZE; i++) { 150962306a36Sopenharmony_ci if (b->bhs[i]) 151062306a36Sopenharmony_ci return true; 151162306a36Sopenharmony_ci } 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_ci return false; 151462306a36Sopenharmony_ci} 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_civoid invalidate_bh_lrus(void) 151762306a36Sopenharmony_ci{ 151862306a36Sopenharmony_ci on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1); 151962306a36Sopenharmony_ci} 152062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(invalidate_bh_lrus); 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_ci/* 152362306a36Sopenharmony_ci * It's called from workqueue context so we need a bh_lru_lock to close 152462306a36Sopenharmony_ci * the race with preemption/irq. 152562306a36Sopenharmony_ci */ 152662306a36Sopenharmony_civoid invalidate_bh_lrus_cpu(void) 152762306a36Sopenharmony_ci{ 152862306a36Sopenharmony_ci struct bh_lru *b; 152962306a36Sopenharmony_ci 153062306a36Sopenharmony_ci bh_lru_lock(); 153162306a36Sopenharmony_ci b = this_cpu_ptr(&bh_lrus); 153262306a36Sopenharmony_ci __invalidate_bh_lrus(b); 153362306a36Sopenharmony_ci bh_lru_unlock(); 153462306a36Sopenharmony_ci} 153562306a36Sopenharmony_ci 153662306a36Sopenharmony_civoid folio_set_bh(struct buffer_head *bh, struct folio *folio, 153762306a36Sopenharmony_ci unsigned long offset) 153862306a36Sopenharmony_ci{ 153962306a36Sopenharmony_ci bh->b_folio = folio; 154062306a36Sopenharmony_ci BUG_ON(offset >= folio_size(folio)); 154162306a36Sopenharmony_ci if (folio_test_highmem(folio)) 154262306a36Sopenharmony_ci /* 154362306a36Sopenharmony_ci * This catches illegal uses and preserves the offset: 154462306a36Sopenharmony_ci */ 154562306a36Sopenharmony_ci bh->b_data = (char *)(0 + offset); 154662306a36Sopenharmony_ci else 154762306a36Sopenharmony_ci bh->b_data = folio_address(folio) + offset; 154862306a36Sopenharmony_ci} 154962306a36Sopenharmony_ciEXPORT_SYMBOL(folio_set_bh); 155062306a36Sopenharmony_ci 155162306a36Sopenharmony_ci/* 155262306a36Sopenharmony_ci * Called when truncating a buffer on a page completely. 155362306a36Sopenharmony_ci */ 155462306a36Sopenharmony_ci 155562306a36Sopenharmony_ci/* Bits that are cleared during an invalidate */ 155662306a36Sopenharmony_ci#define BUFFER_FLAGS_DISCARD \ 155762306a36Sopenharmony_ci (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \ 155862306a36Sopenharmony_ci 1 << BH_Delay | 1 << BH_Unwritten) 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_cistatic void discard_buffer(struct buffer_head * bh) 156162306a36Sopenharmony_ci{ 156262306a36Sopenharmony_ci unsigned long b_state; 156362306a36Sopenharmony_ci 156462306a36Sopenharmony_ci lock_buffer(bh); 156562306a36Sopenharmony_ci clear_buffer_dirty(bh); 156662306a36Sopenharmony_ci bh->b_bdev = NULL; 156762306a36Sopenharmony_ci b_state = READ_ONCE(bh->b_state); 156862306a36Sopenharmony_ci do { 156962306a36Sopenharmony_ci } while (!try_cmpxchg(&bh->b_state, &b_state, 157062306a36Sopenharmony_ci b_state & ~BUFFER_FLAGS_DISCARD)); 157162306a36Sopenharmony_ci unlock_buffer(bh); 157262306a36Sopenharmony_ci} 157362306a36Sopenharmony_ci 157462306a36Sopenharmony_ci/** 157562306a36Sopenharmony_ci * block_invalidate_folio - Invalidate part or all of a buffer-backed folio. 157662306a36Sopenharmony_ci * @folio: The folio which is affected. 157762306a36Sopenharmony_ci * @offset: start of the range to invalidate 157862306a36Sopenharmony_ci * @length: length of the range to invalidate 157962306a36Sopenharmony_ci * 158062306a36Sopenharmony_ci * block_invalidate_folio() is called when all or part of the folio has been 158162306a36Sopenharmony_ci * invalidated by a truncate operation. 158262306a36Sopenharmony_ci * 158362306a36Sopenharmony_ci * block_invalidate_folio() does not have to release all buffers, but it must 158462306a36Sopenharmony_ci * ensure that no dirty buffer is left outside @offset and that no I/O 158562306a36Sopenharmony_ci * is underway against any of the blocks which are outside the truncation 158662306a36Sopenharmony_ci * point. Because the caller is about to free (and possibly reuse) those 158762306a36Sopenharmony_ci * blocks on-disk. 158862306a36Sopenharmony_ci */ 158962306a36Sopenharmony_civoid block_invalidate_folio(struct folio *folio, size_t offset, size_t length) 159062306a36Sopenharmony_ci{ 159162306a36Sopenharmony_ci struct buffer_head *head, *bh, *next; 159262306a36Sopenharmony_ci size_t curr_off = 0; 159362306a36Sopenharmony_ci size_t stop = length + offset; 159462306a36Sopenharmony_ci 159562306a36Sopenharmony_ci BUG_ON(!folio_test_locked(folio)); 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_ci /* 159862306a36Sopenharmony_ci * Check for overflow 159962306a36Sopenharmony_ci */ 160062306a36Sopenharmony_ci BUG_ON(stop > folio_size(folio) || stop < length); 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_ci head = folio_buffers(folio); 160362306a36Sopenharmony_ci if (!head) 160462306a36Sopenharmony_ci return; 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_ci bh = head; 160762306a36Sopenharmony_ci do { 160862306a36Sopenharmony_ci size_t next_off = curr_off + bh->b_size; 160962306a36Sopenharmony_ci next = bh->b_this_page; 161062306a36Sopenharmony_ci 161162306a36Sopenharmony_ci /* 161262306a36Sopenharmony_ci * Are we still fully in range ? 161362306a36Sopenharmony_ci */ 161462306a36Sopenharmony_ci if (next_off > stop) 161562306a36Sopenharmony_ci goto out; 161662306a36Sopenharmony_ci 161762306a36Sopenharmony_ci /* 161862306a36Sopenharmony_ci * is this block fully invalidated? 161962306a36Sopenharmony_ci */ 162062306a36Sopenharmony_ci if (offset <= curr_off) 162162306a36Sopenharmony_ci discard_buffer(bh); 162262306a36Sopenharmony_ci curr_off = next_off; 162362306a36Sopenharmony_ci bh = next; 162462306a36Sopenharmony_ci } while (bh != head); 162562306a36Sopenharmony_ci 162662306a36Sopenharmony_ci /* 162762306a36Sopenharmony_ci * We release buffers only if the entire folio is being invalidated. 162862306a36Sopenharmony_ci * The get_block cached value has been unconditionally invalidated, 162962306a36Sopenharmony_ci * so real IO is not possible anymore. 163062306a36Sopenharmony_ci */ 163162306a36Sopenharmony_ci if (length == folio_size(folio)) 163262306a36Sopenharmony_ci filemap_release_folio(folio, 0); 163362306a36Sopenharmony_ciout: 163462306a36Sopenharmony_ci return; 163562306a36Sopenharmony_ci} 163662306a36Sopenharmony_ciEXPORT_SYMBOL(block_invalidate_folio); 163762306a36Sopenharmony_ci 163862306a36Sopenharmony_ci/* 163962306a36Sopenharmony_ci * We attach and possibly dirty the buffers atomically wrt 164062306a36Sopenharmony_ci * block_dirty_folio() via private_lock. try_to_free_buffers 164162306a36Sopenharmony_ci * is already excluded via the folio lock. 164262306a36Sopenharmony_ci */ 164362306a36Sopenharmony_civoid folio_create_empty_buffers(struct folio *folio, unsigned long blocksize, 164462306a36Sopenharmony_ci unsigned long b_state) 164562306a36Sopenharmony_ci{ 164662306a36Sopenharmony_ci struct buffer_head *bh, *head, *tail; 164762306a36Sopenharmony_ci 164862306a36Sopenharmony_ci head = folio_alloc_buffers(folio, blocksize, true); 164962306a36Sopenharmony_ci bh = head; 165062306a36Sopenharmony_ci do { 165162306a36Sopenharmony_ci bh->b_state |= b_state; 165262306a36Sopenharmony_ci tail = bh; 165362306a36Sopenharmony_ci bh = bh->b_this_page; 165462306a36Sopenharmony_ci } while (bh); 165562306a36Sopenharmony_ci tail->b_this_page = head; 165662306a36Sopenharmony_ci 165762306a36Sopenharmony_ci spin_lock(&folio->mapping->private_lock); 165862306a36Sopenharmony_ci if (folio_test_uptodate(folio) || folio_test_dirty(folio)) { 165962306a36Sopenharmony_ci bh = head; 166062306a36Sopenharmony_ci do { 166162306a36Sopenharmony_ci if (folio_test_dirty(folio)) 166262306a36Sopenharmony_ci set_buffer_dirty(bh); 166362306a36Sopenharmony_ci if (folio_test_uptodate(folio)) 166462306a36Sopenharmony_ci set_buffer_uptodate(bh); 166562306a36Sopenharmony_ci bh = bh->b_this_page; 166662306a36Sopenharmony_ci } while (bh != head); 166762306a36Sopenharmony_ci } 166862306a36Sopenharmony_ci folio_attach_private(folio, head); 166962306a36Sopenharmony_ci spin_unlock(&folio->mapping->private_lock); 167062306a36Sopenharmony_ci} 167162306a36Sopenharmony_ciEXPORT_SYMBOL(folio_create_empty_buffers); 167262306a36Sopenharmony_ci 167362306a36Sopenharmony_civoid create_empty_buffers(struct page *page, 167462306a36Sopenharmony_ci unsigned long blocksize, unsigned long b_state) 167562306a36Sopenharmony_ci{ 167662306a36Sopenharmony_ci folio_create_empty_buffers(page_folio(page), blocksize, b_state); 167762306a36Sopenharmony_ci} 167862306a36Sopenharmony_ciEXPORT_SYMBOL(create_empty_buffers); 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci/** 168162306a36Sopenharmony_ci * clean_bdev_aliases: clean a range of buffers in block device 168262306a36Sopenharmony_ci * @bdev: Block device to clean buffers in 168362306a36Sopenharmony_ci * @block: Start of a range of blocks to clean 168462306a36Sopenharmony_ci * @len: Number of blocks to clean 168562306a36Sopenharmony_ci * 168662306a36Sopenharmony_ci * We are taking a range of blocks for data and we don't want writeback of any 168762306a36Sopenharmony_ci * buffer-cache aliases starting from return from this function and until the 168862306a36Sopenharmony_ci * moment when something will explicitly mark the buffer dirty (hopefully that 168962306a36Sopenharmony_ci * will not happen until we will free that block ;-) We don't even need to mark 169062306a36Sopenharmony_ci * it not-uptodate - nobody can expect anything from a newly allocated buffer 169162306a36Sopenharmony_ci * anyway. We used to use unmap_buffer() for such invalidation, but that was 169262306a36Sopenharmony_ci * wrong. We definitely don't want to mark the alias unmapped, for example - it 169362306a36Sopenharmony_ci * would confuse anyone who might pick it with bread() afterwards... 169462306a36Sopenharmony_ci * 169562306a36Sopenharmony_ci * Also.. Note that bforget() doesn't lock the buffer. So there can be 169662306a36Sopenharmony_ci * writeout I/O going on against recently-freed buffers. We don't wait on that 169762306a36Sopenharmony_ci * I/O in bforget() - it's more efficient to wait on the I/O only if we really 169862306a36Sopenharmony_ci * need to. That happens here. 169962306a36Sopenharmony_ci */ 170062306a36Sopenharmony_civoid clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) 170162306a36Sopenharmony_ci{ 170262306a36Sopenharmony_ci struct inode *bd_inode = bdev->bd_inode; 170362306a36Sopenharmony_ci struct address_space *bd_mapping = bd_inode->i_mapping; 170462306a36Sopenharmony_ci struct folio_batch fbatch; 170562306a36Sopenharmony_ci pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits); 170662306a36Sopenharmony_ci pgoff_t end; 170762306a36Sopenharmony_ci int i, count; 170862306a36Sopenharmony_ci struct buffer_head *bh; 170962306a36Sopenharmony_ci struct buffer_head *head; 171062306a36Sopenharmony_ci 171162306a36Sopenharmony_ci end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits); 171262306a36Sopenharmony_ci folio_batch_init(&fbatch); 171362306a36Sopenharmony_ci while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) { 171462306a36Sopenharmony_ci count = folio_batch_count(&fbatch); 171562306a36Sopenharmony_ci for (i = 0; i < count; i++) { 171662306a36Sopenharmony_ci struct folio *folio = fbatch.folios[i]; 171762306a36Sopenharmony_ci 171862306a36Sopenharmony_ci if (!folio_buffers(folio)) 171962306a36Sopenharmony_ci continue; 172062306a36Sopenharmony_ci /* 172162306a36Sopenharmony_ci * We use folio lock instead of bd_mapping->private_lock 172262306a36Sopenharmony_ci * to pin buffers here since we can afford to sleep and 172362306a36Sopenharmony_ci * it scales better than a global spinlock lock. 172462306a36Sopenharmony_ci */ 172562306a36Sopenharmony_ci folio_lock(folio); 172662306a36Sopenharmony_ci /* Recheck when the folio is locked which pins bhs */ 172762306a36Sopenharmony_ci head = folio_buffers(folio); 172862306a36Sopenharmony_ci if (!head) 172962306a36Sopenharmony_ci goto unlock_page; 173062306a36Sopenharmony_ci bh = head; 173162306a36Sopenharmony_ci do { 173262306a36Sopenharmony_ci if (!buffer_mapped(bh) || (bh->b_blocknr < block)) 173362306a36Sopenharmony_ci goto next; 173462306a36Sopenharmony_ci if (bh->b_blocknr >= block + len) 173562306a36Sopenharmony_ci break; 173662306a36Sopenharmony_ci clear_buffer_dirty(bh); 173762306a36Sopenharmony_ci wait_on_buffer(bh); 173862306a36Sopenharmony_ci clear_buffer_req(bh); 173962306a36Sopenharmony_cinext: 174062306a36Sopenharmony_ci bh = bh->b_this_page; 174162306a36Sopenharmony_ci } while (bh != head); 174262306a36Sopenharmony_ciunlock_page: 174362306a36Sopenharmony_ci folio_unlock(folio); 174462306a36Sopenharmony_ci } 174562306a36Sopenharmony_ci folio_batch_release(&fbatch); 174662306a36Sopenharmony_ci cond_resched(); 174762306a36Sopenharmony_ci /* End of range already reached? */ 174862306a36Sopenharmony_ci if (index > end || !index) 174962306a36Sopenharmony_ci break; 175062306a36Sopenharmony_ci } 175162306a36Sopenharmony_ci} 175262306a36Sopenharmony_ciEXPORT_SYMBOL(clean_bdev_aliases); 175362306a36Sopenharmony_ci 175462306a36Sopenharmony_ci/* 175562306a36Sopenharmony_ci * Size is a power-of-two in the range 512..PAGE_SIZE, 175662306a36Sopenharmony_ci * and the case we care about most is PAGE_SIZE. 175762306a36Sopenharmony_ci * 175862306a36Sopenharmony_ci * So this *could* possibly be written with those 175962306a36Sopenharmony_ci * constraints in mind (relevant mostly if some 176062306a36Sopenharmony_ci * architecture has a slow bit-scan instruction) 176162306a36Sopenharmony_ci */ 176262306a36Sopenharmony_cistatic inline int block_size_bits(unsigned int blocksize) 176362306a36Sopenharmony_ci{ 176462306a36Sopenharmony_ci return ilog2(blocksize); 176562306a36Sopenharmony_ci} 176662306a36Sopenharmony_ci 176762306a36Sopenharmony_cistatic struct buffer_head *folio_create_buffers(struct folio *folio, 176862306a36Sopenharmony_ci struct inode *inode, 176962306a36Sopenharmony_ci unsigned int b_state) 177062306a36Sopenharmony_ci{ 177162306a36Sopenharmony_ci BUG_ON(!folio_test_locked(folio)); 177262306a36Sopenharmony_ci 177362306a36Sopenharmony_ci if (!folio_buffers(folio)) 177462306a36Sopenharmony_ci folio_create_empty_buffers(folio, 177562306a36Sopenharmony_ci 1 << READ_ONCE(inode->i_blkbits), 177662306a36Sopenharmony_ci b_state); 177762306a36Sopenharmony_ci return folio_buffers(folio); 177862306a36Sopenharmony_ci} 177962306a36Sopenharmony_ci 178062306a36Sopenharmony_ci/* 178162306a36Sopenharmony_ci * NOTE! All mapped/uptodate combinations are valid: 178262306a36Sopenharmony_ci * 178362306a36Sopenharmony_ci * Mapped Uptodate Meaning 178462306a36Sopenharmony_ci * 178562306a36Sopenharmony_ci * No No "unknown" - must do get_block() 178662306a36Sopenharmony_ci * No Yes "hole" - zero-filled 178762306a36Sopenharmony_ci * Yes No "allocated" - allocated on disk, not read in 178862306a36Sopenharmony_ci * Yes Yes "valid" - allocated and up-to-date in memory. 178962306a36Sopenharmony_ci * 179062306a36Sopenharmony_ci * "Dirty" is valid only with the last case (mapped+uptodate). 179162306a36Sopenharmony_ci */ 179262306a36Sopenharmony_ci 179362306a36Sopenharmony_ci/* 179462306a36Sopenharmony_ci * While block_write_full_page is writing back the dirty buffers under 179562306a36Sopenharmony_ci * the page lock, whoever dirtied the buffers may decide to clean them 179662306a36Sopenharmony_ci * again at any time. We handle that by only looking at the buffer 179762306a36Sopenharmony_ci * state inside lock_buffer(). 179862306a36Sopenharmony_ci * 179962306a36Sopenharmony_ci * If block_write_full_page() is called for regular writeback 180062306a36Sopenharmony_ci * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a 180162306a36Sopenharmony_ci * locked buffer. This only can happen if someone has written the buffer 180262306a36Sopenharmony_ci * directly, with submit_bh(). At the address_space level PageWriteback 180362306a36Sopenharmony_ci * prevents this contention from occurring. 180462306a36Sopenharmony_ci * 180562306a36Sopenharmony_ci * If block_write_full_page() is called with wbc->sync_mode == 180662306a36Sopenharmony_ci * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this 180762306a36Sopenharmony_ci * causes the writes to be flagged as synchronous writes. 180862306a36Sopenharmony_ci */ 180962306a36Sopenharmony_ciint __block_write_full_folio(struct inode *inode, struct folio *folio, 181062306a36Sopenharmony_ci get_block_t *get_block, struct writeback_control *wbc, 181162306a36Sopenharmony_ci bh_end_io_t *handler) 181262306a36Sopenharmony_ci{ 181362306a36Sopenharmony_ci int err; 181462306a36Sopenharmony_ci sector_t block; 181562306a36Sopenharmony_ci sector_t last_block; 181662306a36Sopenharmony_ci struct buffer_head *bh, *head; 181762306a36Sopenharmony_ci unsigned int blocksize, bbits; 181862306a36Sopenharmony_ci int nr_underway = 0; 181962306a36Sopenharmony_ci blk_opf_t write_flags = wbc_to_write_flags(wbc); 182062306a36Sopenharmony_ci 182162306a36Sopenharmony_ci head = folio_create_buffers(folio, inode, 182262306a36Sopenharmony_ci (1 << BH_Dirty) | (1 << BH_Uptodate)); 182362306a36Sopenharmony_ci 182462306a36Sopenharmony_ci /* 182562306a36Sopenharmony_ci * Be very careful. We have no exclusion from block_dirty_folio 182662306a36Sopenharmony_ci * here, and the (potentially unmapped) buffers may become dirty at 182762306a36Sopenharmony_ci * any time. If a buffer becomes dirty here after we've inspected it 182862306a36Sopenharmony_ci * then we just miss that fact, and the folio stays dirty. 182962306a36Sopenharmony_ci * 183062306a36Sopenharmony_ci * Buffers outside i_size may be dirtied by block_dirty_folio; 183162306a36Sopenharmony_ci * handle that here by just cleaning them. 183262306a36Sopenharmony_ci */ 183362306a36Sopenharmony_ci 183462306a36Sopenharmony_ci bh = head; 183562306a36Sopenharmony_ci blocksize = bh->b_size; 183662306a36Sopenharmony_ci bbits = block_size_bits(blocksize); 183762306a36Sopenharmony_ci 183862306a36Sopenharmony_ci block = (sector_t)folio->index << (PAGE_SHIFT - bbits); 183962306a36Sopenharmony_ci last_block = (i_size_read(inode) - 1) >> bbits; 184062306a36Sopenharmony_ci 184162306a36Sopenharmony_ci /* 184262306a36Sopenharmony_ci * Get all the dirty buffers mapped to disk addresses and 184362306a36Sopenharmony_ci * handle any aliases from the underlying blockdev's mapping. 184462306a36Sopenharmony_ci */ 184562306a36Sopenharmony_ci do { 184662306a36Sopenharmony_ci if (block > last_block) { 184762306a36Sopenharmony_ci /* 184862306a36Sopenharmony_ci * mapped buffers outside i_size will occur, because 184962306a36Sopenharmony_ci * this folio can be outside i_size when there is a 185062306a36Sopenharmony_ci * truncate in progress. 185162306a36Sopenharmony_ci */ 185262306a36Sopenharmony_ci /* 185362306a36Sopenharmony_ci * The buffer was zeroed by block_write_full_page() 185462306a36Sopenharmony_ci */ 185562306a36Sopenharmony_ci clear_buffer_dirty(bh); 185662306a36Sopenharmony_ci set_buffer_uptodate(bh); 185762306a36Sopenharmony_ci } else if ((!buffer_mapped(bh) || buffer_delay(bh)) && 185862306a36Sopenharmony_ci buffer_dirty(bh)) { 185962306a36Sopenharmony_ci WARN_ON(bh->b_size != blocksize); 186062306a36Sopenharmony_ci err = get_block(inode, block, bh, 1); 186162306a36Sopenharmony_ci if (err) 186262306a36Sopenharmony_ci goto recover; 186362306a36Sopenharmony_ci clear_buffer_delay(bh); 186462306a36Sopenharmony_ci if (buffer_new(bh)) { 186562306a36Sopenharmony_ci /* blockdev mappings never come here */ 186662306a36Sopenharmony_ci clear_buffer_new(bh); 186762306a36Sopenharmony_ci clean_bdev_bh_alias(bh); 186862306a36Sopenharmony_ci } 186962306a36Sopenharmony_ci } 187062306a36Sopenharmony_ci bh = bh->b_this_page; 187162306a36Sopenharmony_ci block++; 187262306a36Sopenharmony_ci } while (bh != head); 187362306a36Sopenharmony_ci 187462306a36Sopenharmony_ci do { 187562306a36Sopenharmony_ci if (!buffer_mapped(bh)) 187662306a36Sopenharmony_ci continue; 187762306a36Sopenharmony_ci /* 187862306a36Sopenharmony_ci * If it's a fully non-blocking write attempt and we cannot 187962306a36Sopenharmony_ci * lock the buffer then redirty the folio. Note that this can 188062306a36Sopenharmony_ci * potentially cause a busy-wait loop from writeback threads 188162306a36Sopenharmony_ci * and kswapd activity, but those code paths have their own 188262306a36Sopenharmony_ci * higher-level throttling. 188362306a36Sopenharmony_ci */ 188462306a36Sopenharmony_ci if (wbc->sync_mode != WB_SYNC_NONE) { 188562306a36Sopenharmony_ci lock_buffer(bh); 188662306a36Sopenharmony_ci } else if (!trylock_buffer(bh)) { 188762306a36Sopenharmony_ci folio_redirty_for_writepage(wbc, folio); 188862306a36Sopenharmony_ci continue; 188962306a36Sopenharmony_ci } 189062306a36Sopenharmony_ci if (test_clear_buffer_dirty(bh)) { 189162306a36Sopenharmony_ci mark_buffer_async_write_endio(bh, handler); 189262306a36Sopenharmony_ci } else { 189362306a36Sopenharmony_ci unlock_buffer(bh); 189462306a36Sopenharmony_ci } 189562306a36Sopenharmony_ci } while ((bh = bh->b_this_page) != head); 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_ci /* 189862306a36Sopenharmony_ci * The folio and its buffers are protected by the writeback flag, 189962306a36Sopenharmony_ci * so we can drop the bh refcounts early. 190062306a36Sopenharmony_ci */ 190162306a36Sopenharmony_ci BUG_ON(folio_test_writeback(folio)); 190262306a36Sopenharmony_ci folio_start_writeback(folio); 190362306a36Sopenharmony_ci 190462306a36Sopenharmony_ci do { 190562306a36Sopenharmony_ci struct buffer_head *next = bh->b_this_page; 190662306a36Sopenharmony_ci if (buffer_async_write(bh)) { 190762306a36Sopenharmony_ci submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc); 190862306a36Sopenharmony_ci nr_underway++; 190962306a36Sopenharmony_ci } 191062306a36Sopenharmony_ci bh = next; 191162306a36Sopenharmony_ci } while (bh != head); 191262306a36Sopenharmony_ci folio_unlock(folio); 191362306a36Sopenharmony_ci 191462306a36Sopenharmony_ci err = 0; 191562306a36Sopenharmony_cidone: 191662306a36Sopenharmony_ci if (nr_underway == 0) { 191762306a36Sopenharmony_ci /* 191862306a36Sopenharmony_ci * The folio was marked dirty, but the buffers were 191962306a36Sopenharmony_ci * clean. Someone wrote them back by hand with 192062306a36Sopenharmony_ci * write_dirty_buffer/submit_bh. A rare case. 192162306a36Sopenharmony_ci */ 192262306a36Sopenharmony_ci folio_end_writeback(folio); 192362306a36Sopenharmony_ci 192462306a36Sopenharmony_ci /* 192562306a36Sopenharmony_ci * The folio and buffer_heads can be released at any time from 192662306a36Sopenharmony_ci * here on. 192762306a36Sopenharmony_ci */ 192862306a36Sopenharmony_ci } 192962306a36Sopenharmony_ci return err; 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_cirecover: 193262306a36Sopenharmony_ci /* 193362306a36Sopenharmony_ci * ENOSPC, or some other error. We may already have added some 193462306a36Sopenharmony_ci * blocks to the file, so we need to write these out to avoid 193562306a36Sopenharmony_ci * exposing stale data. 193662306a36Sopenharmony_ci * The folio is currently locked and not marked for writeback 193762306a36Sopenharmony_ci */ 193862306a36Sopenharmony_ci bh = head; 193962306a36Sopenharmony_ci /* Recovery: lock and submit the mapped buffers */ 194062306a36Sopenharmony_ci do { 194162306a36Sopenharmony_ci if (buffer_mapped(bh) && buffer_dirty(bh) && 194262306a36Sopenharmony_ci !buffer_delay(bh)) { 194362306a36Sopenharmony_ci lock_buffer(bh); 194462306a36Sopenharmony_ci mark_buffer_async_write_endio(bh, handler); 194562306a36Sopenharmony_ci } else { 194662306a36Sopenharmony_ci /* 194762306a36Sopenharmony_ci * The buffer may have been set dirty during 194862306a36Sopenharmony_ci * attachment to a dirty folio. 194962306a36Sopenharmony_ci */ 195062306a36Sopenharmony_ci clear_buffer_dirty(bh); 195162306a36Sopenharmony_ci } 195262306a36Sopenharmony_ci } while ((bh = bh->b_this_page) != head); 195362306a36Sopenharmony_ci folio_set_error(folio); 195462306a36Sopenharmony_ci BUG_ON(folio_test_writeback(folio)); 195562306a36Sopenharmony_ci mapping_set_error(folio->mapping, err); 195662306a36Sopenharmony_ci folio_start_writeback(folio); 195762306a36Sopenharmony_ci do { 195862306a36Sopenharmony_ci struct buffer_head *next = bh->b_this_page; 195962306a36Sopenharmony_ci if (buffer_async_write(bh)) { 196062306a36Sopenharmony_ci clear_buffer_dirty(bh); 196162306a36Sopenharmony_ci submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc); 196262306a36Sopenharmony_ci nr_underway++; 196362306a36Sopenharmony_ci } 196462306a36Sopenharmony_ci bh = next; 196562306a36Sopenharmony_ci } while (bh != head); 196662306a36Sopenharmony_ci folio_unlock(folio); 196762306a36Sopenharmony_ci goto done; 196862306a36Sopenharmony_ci} 196962306a36Sopenharmony_ciEXPORT_SYMBOL(__block_write_full_folio); 197062306a36Sopenharmony_ci 197162306a36Sopenharmony_ci/* 197262306a36Sopenharmony_ci * If a folio has any new buffers, zero them out here, and mark them uptodate 197362306a36Sopenharmony_ci * and dirty so they'll be written out (in order to prevent uninitialised 197462306a36Sopenharmony_ci * block data from leaking). And clear the new bit. 197562306a36Sopenharmony_ci */ 197662306a36Sopenharmony_civoid folio_zero_new_buffers(struct folio *folio, size_t from, size_t to) 197762306a36Sopenharmony_ci{ 197862306a36Sopenharmony_ci size_t block_start, block_end; 197962306a36Sopenharmony_ci struct buffer_head *head, *bh; 198062306a36Sopenharmony_ci 198162306a36Sopenharmony_ci BUG_ON(!folio_test_locked(folio)); 198262306a36Sopenharmony_ci head = folio_buffers(folio); 198362306a36Sopenharmony_ci if (!head) 198462306a36Sopenharmony_ci return; 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_ci bh = head; 198762306a36Sopenharmony_ci block_start = 0; 198862306a36Sopenharmony_ci do { 198962306a36Sopenharmony_ci block_end = block_start + bh->b_size; 199062306a36Sopenharmony_ci 199162306a36Sopenharmony_ci if (buffer_new(bh)) { 199262306a36Sopenharmony_ci if (block_end > from && block_start < to) { 199362306a36Sopenharmony_ci if (!folio_test_uptodate(folio)) { 199462306a36Sopenharmony_ci size_t start, xend; 199562306a36Sopenharmony_ci 199662306a36Sopenharmony_ci start = max(from, block_start); 199762306a36Sopenharmony_ci xend = min(to, block_end); 199862306a36Sopenharmony_ci 199962306a36Sopenharmony_ci folio_zero_segment(folio, start, xend); 200062306a36Sopenharmony_ci set_buffer_uptodate(bh); 200162306a36Sopenharmony_ci } 200262306a36Sopenharmony_ci 200362306a36Sopenharmony_ci clear_buffer_new(bh); 200462306a36Sopenharmony_ci mark_buffer_dirty(bh); 200562306a36Sopenharmony_ci } 200662306a36Sopenharmony_ci } 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci block_start = block_end; 200962306a36Sopenharmony_ci bh = bh->b_this_page; 201062306a36Sopenharmony_ci } while (bh != head); 201162306a36Sopenharmony_ci} 201262306a36Sopenharmony_ciEXPORT_SYMBOL(folio_zero_new_buffers); 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_cistatic int 201562306a36Sopenharmony_ciiomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, 201662306a36Sopenharmony_ci const struct iomap *iomap) 201762306a36Sopenharmony_ci{ 201862306a36Sopenharmony_ci loff_t offset = block << inode->i_blkbits; 201962306a36Sopenharmony_ci 202062306a36Sopenharmony_ci bh->b_bdev = iomap->bdev; 202162306a36Sopenharmony_ci 202262306a36Sopenharmony_ci /* 202362306a36Sopenharmony_ci * Block points to offset in file we need to map, iomap contains 202462306a36Sopenharmony_ci * the offset at which the map starts. If the map ends before the 202562306a36Sopenharmony_ci * current block, then do not map the buffer and let the caller 202662306a36Sopenharmony_ci * handle it. 202762306a36Sopenharmony_ci */ 202862306a36Sopenharmony_ci if (offset >= iomap->offset + iomap->length) 202962306a36Sopenharmony_ci return -EIO; 203062306a36Sopenharmony_ci 203162306a36Sopenharmony_ci switch (iomap->type) { 203262306a36Sopenharmony_ci case IOMAP_HOLE: 203362306a36Sopenharmony_ci /* 203462306a36Sopenharmony_ci * If the buffer is not up to date or beyond the current EOF, 203562306a36Sopenharmony_ci * we need to mark it as new to ensure sub-block zeroing is 203662306a36Sopenharmony_ci * executed if necessary. 203762306a36Sopenharmony_ci */ 203862306a36Sopenharmony_ci if (!buffer_uptodate(bh) || 203962306a36Sopenharmony_ci (offset >= i_size_read(inode))) 204062306a36Sopenharmony_ci set_buffer_new(bh); 204162306a36Sopenharmony_ci return 0; 204262306a36Sopenharmony_ci case IOMAP_DELALLOC: 204362306a36Sopenharmony_ci if (!buffer_uptodate(bh) || 204462306a36Sopenharmony_ci (offset >= i_size_read(inode))) 204562306a36Sopenharmony_ci set_buffer_new(bh); 204662306a36Sopenharmony_ci set_buffer_uptodate(bh); 204762306a36Sopenharmony_ci set_buffer_mapped(bh); 204862306a36Sopenharmony_ci set_buffer_delay(bh); 204962306a36Sopenharmony_ci return 0; 205062306a36Sopenharmony_ci case IOMAP_UNWRITTEN: 205162306a36Sopenharmony_ci /* 205262306a36Sopenharmony_ci * For unwritten regions, we always need to ensure that regions 205362306a36Sopenharmony_ci * in the block we are not writing to are zeroed. Mark the 205462306a36Sopenharmony_ci * buffer as new to ensure this. 205562306a36Sopenharmony_ci */ 205662306a36Sopenharmony_ci set_buffer_new(bh); 205762306a36Sopenharmony_ci set_buffer_unwritten(bh); 205862306a36Sopenharmony_ci fallthrough; 205962306a36Sopenharmony_ci case IOMAP_MAPPED: 206062306a36Sopenharmony_ci if ((iomap->flags & IOMAP_F_NEW) || 206162306a36Sopenharmony_ci offset >= i_size_read(inode)) { 206262306a36Sopenharmony_ci /* 206362306a36Sopenharmony_ci * This can happen if truncating the block device races 206462306a36Sopenharmony_ci * with the check in the caller as i_size updates on 206562306a36Sopenharmony_ci * block devices aren't synchronized by i_rwsem for 206662306a36Sopenharmony_ci * block devices. 206762306a36Sopenharmony_ci */ 206862306a36Sopenharmony_ci if (S_ISBLK(inode->i_mode)) 206962306a36Sopenharmony_ci return -EIO; 207062306a36Sopenharmony_ci set_buffer_new(bh); 207162306a36Sopenharmony_ci } 207262306a36Sopenharmony_ci bh->b_blocknr = (iomap->addr + offset - iomap->offset) >> 207362306a36Sopenharmony_ci inode->i_blkbits; 207462306a36Sopenharmony_ci set_buffer_mapped(bh); 207562306a36Sopenharmony_ci return 0; 207662306a36Sopenharmony_ci default: 207762306a36Sopenharmony_ci WARN_ON_ONCE(1); 207862306a36Sopenharmony_ci return -EIO; 207962306a36Sopenharmony_ci } 208062306a36Sopenharmony_ci} 208162306a36Sopenharmony_ci 208262306a36Sopenharmony_ciint __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, 208362306a36Sopenharmony_ci get_block_t *get_block, const struct iomap *iomap) 208462306a36Sopenharmony_ci{ 208562306a36Sopenharmony_ci unsigned from = pos & (PAGE_SIZE - 1); 208662306a36Sopenharmony_ci unsigned to = from + len; 208762306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 208862306a36Sopenharmony_ci unsigned block_start, block_end; 208962306a36Sopenharmony_ci sector_t block; 209062306a36Sopenharmony_ci int err = 0; 209162306a36Sopenharmony_ci unsigned blocksize, bbits; 209262306a36Sopenharmony_ci struct buffer_head *bh, *head, *wait[2], **wait_bh=wait; 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_ci BUG_ON(!folio_test_locked(folio)); 209562306a36Sopenharmony_ci BUG_ON(from > PAGE_SIZE); 209662306a36Sopenharmony_ci BUG_ON(to > PAGE_SIZE); 209762306a36Sopenharmony_ci BUG_ON(from > to); 209862306a36Sopenharmony_ci 209962306a36Sopenharmony_ci head = folio_create_buffers(folio, inode, 0); 210062306a36Sopenharmony_ci blocksize = head->b_size; 210162306a36Sopenharmony_ci bbits = block_size_bits(blocksize); 210262306a36Sopenharmony_ci 210362306a36Sopenharmony_ci block = (sector_t)folio->index << (PAGE_SHIFT - bbits); 210462306a36Sopenharmony_ci 210562306a36Sopenharmony_ci for(bh = head, block_start = 0; bh != head || !block_start; 210662306a36Sopenharmony_ci block++, block_start=block_end, bh = bh->b_this_page) { 210762306a36Sopenharmony_ci block_end = block_start + blocksize; 210862306a36Sopenharmony_ci if (block_end <= from || block_start >= to) { 210962306a36Sopenharmony_ci if (folio_test_uptodate(folio)) { 211062306a36Sopenharmony_ci if (!buffer_uptodate(bh)) 211162306a36Sopenharmony_ci set_buffer_uptodate(bh); 211262306a36Sopenharmony_ci } 211362306a36Sopenharmony_ci continue; 211462306a36Sopenharmony_ci } 211562306a36Sopenharmony_ci if (buffer_new(bh)) 211662306a36Sopenharmony_ci clear_buffer_new(bh); 211762306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 211862306a36Sopenharmony_ci WARN_ON(bh->b_size != blocksize); 211962306a36Sopenharmony_ci if (get_block) 212062306a36Sopenharmony_ci err = get_block(inode, block, bh, 1); 212162306a36Sopenharmony_ci else 212262306a36Sopenharmony_ci err = iomap_to_bh(inode, block, bh, iomap); 212362306a36Sopenharmony_ci if (err) 212462306a36Sopenharmony_ci break; 212562306a36Sopenharmony_ci 212662306a36Sopenharmony_ci if (buffer_new(bh)) { 212762306a36Sopenharmony_ci clean_bdev_bh_alias(bh); 212862306a36Sopenharmony_ci if (folio_test_uptodate(folio)) { 212962306a36Sopenharmony_ci clear_buffer_new(bh); 213062306a36Sopenharmony_ci set_buffer_uptodate(bh); 213162306a36Sopenharmony_ci mark_buffer_dirty(bh); 213262306a36Sopenharmony_ci continue; 213362306a36Sopenharmony_ci } 213462306a36Sopenharmony_ci if (block_end > to || block_start < from) 213562306a36Sopenharmony_ci folio_zero_segments(folio, 213662306a36Sopenharmony_ci to, block_end, 213762306a36Sopenharmony_ci block_start, from); 213862306a36Sopenharmony_ci continue; 213962306a36Sopenharmony_ci } 214062306a36Sopenharmony_ci } 214162306a36Sopenharmony_ci if (folio_test_uptodate(folio)) { 214262306a36Sopenharmony_ci if (!buffer_uptodate(bh)) 214362306a36Sopenharmony_ci set_buffer_uptodate(bh); 214462306a36Sopenharmony_ci continue; 214562306a36Sopenharmony_ci } 214662306a36Sopenharmony_ci if (!buffer_uptodate(bh) && !buffer_delay(bh) && 214762306a36Sopenharmony_ci !buffer_unwritten(bh) && 214862306a36Sopenharmony_ci (block_start < from || block_end > to)) { 214962306a36Sopenharmony_ci bh_read_nowait(bh, 0); 215062306a36Sopenharmony_ci *wait_bh++=bh; 215162306a36Sopenharmony_ci } 215262306a36Sopenharmony_ci } 215362306a36Sopenharmony_ci /* 215462306a36Sopenharmony_ci * If we issued read requests - let them complete. 215562306a36Sopenharmony_ci */ 215662306a36Sopenharmony_ci while(wait_bh > wait) { 215762306a36Sopenharmony_ci wait_on_buffer(*--wait_bh); 215862306a36Sopenharmony_ci if (!buffer_uptodate(*wait_bh)) 215962306a36Sopenharmony_ci err = -EIO; 216062306a36Sopenharmony_ci } 216162306a36Sopenharmony_ci if (unlikely(err)) 216262306a36Sopenharmony_ci folio_zero_new_buffers(folio, from, to); 216362306a36Sopenharmony_ci return err; 216462306a36Sopenharmony_ci} 216562306a36Sopenharmony_ci 216662306a36Sopenharmony_ciint __block_write_begin(struct page *page, loff_t pos, unsigned len, 216762306a36Sopenharmony_ci get_block_t *get_block) 216862306a36Sopenharmony_ci{ 216962306a36Sopenharmony_ci return __block_write_begin_int(page_folio(page), pos, len, get_block, 217062306a36Sopenharmony_ci NULL); 217162306a36Sopenharmony_ci} 217262306a36Sopenharmony_ciEXPORT_SYMBOL(__block_write_begin); 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_cistatic void __block_commit_write(struct folio *folio, size_t from, size_t to) 217562306a36Sopenharmony_ci{ 217662306a36Sopenharmony_ci size_t block_start, block_end; 217762306a36Sopenharmony_ci bool partial = false; 217862306a36Sopenharmony_ci unsigned blocksize; 217962306a36Sopenharmony_ci struct buffer_head *bh, *head; 218062306a36Sopenharmony_ci 218162306a36Sopenharmony_ci bh = head = folio_buffers(folio); 218262306a36Sopenharmony_ci blocksize = bh->b_size; 218362306a36Sopenharmony_ci 218462306a36Sopenharmony_ci block_start = 0; 218562306a36Sopenharmony_ci do { 218662306a36Sopenharmony_ci block_end = block_start + blocksize; 218762306a36Sopenharmony_ci if (block_end <= from || block_start >= to) { 218862306a36Sopenharmony_ci if (!buffer_uptodate(bh)) 218962306a36Sopenharmony_ci partial = true; 219062306a36Sopenharmony_ci } else { 219162306a36Sopenharmony_ci set_buffer_uptodate(bh); 219262306a36Sopenharmony_ci mark_buffer_dirty(bh); 219362306a36Sopenharmony_ci } 219462306a36Sopenharmony_ci if (buffer_new(bh)) 219562306a36Sopenharmony_ci clear_buffer_new(bh); 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci block_start = block_end; 219862306a36Sopenharmony_ci bh = bh->b_this_page; 219962306a36Sopenharmony_ci } while (bh != head); 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci /* 220262306a36Sopenharmony_ci * If this is a partial write which happened to make all buffers 220362306a36Sopenharmony_ci * uptodate then we can optimize away a bogus read_folio() for 220462306a36Sopenharmony_ci * the next read(). Here we 'discover' whether the folio went 220562306a36Sopenharmony_ci * uptodate as a result of this (potentially partial) write. 220662306a36Sopenharmony_ci */ 220762306a36Sopenharmony_ci if (!partial) 220862306a36Sopenharmony_ci folio_mark_uptodate(folio); 220962306a36Sopenharmony_ci} 221062306a36Sopenharmony_ci 221162306a36Sopenharmony_ci/* 221262306a36Sopenharmony_ci * block_write_begin takes care of the basic task of block allocation and 221362306a36Sopenharmony_ci * bringing partial write blocks uptodate first. 221462306a36Sopenharmony_ci * 221562306a36Sopenharmony_ci * The filesystem needs to handle block truncation upon failure. 221662306a36Sopenharmony_ci */ 221762306a36Sopenharmony_ciint block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, 221862306a36Sopenharmony_ci struct page **pagep, get_block_t *get_block) 221962306a36Sopenharmony_ci{ 222062306a36Sopenharmony_ci pgoff_t index = pos >> PAGE_SHIFT; 222162306a36Sopenharmony_ci struct page *page; 222262306a36Sopenharmony_ci int status; 222362306a36Sopenharmony_ci 222462306a36Sopenharmony_ci page = grab_cache_page_write_begin(mapping, index); 222562306a36Sopenharmony_ci if (!page) 222662306a36Sopenharmony_ci return -ENOMEM; 222762306a36Sopenharmony_ci 222862306a36Sopenharmony_ci status = __block_write_begin(page, pos, len, get_block); 222962306a36Sopenharmony_ci if (unlikely(status)) { 223062306a36Sopenharmony_ci unlock_page(page); 223162306a36Sopenharmony_ci put_page(page); 223262306a36Sopenharmony_ci page = NULL; 223362306a36Sopenharmony_ci } 223462306a36Sopenharmony_ci 223562306a36Sopenharmony_ci *pagep = page; 223662306a36Sopenharmony_ci return status; 223762306a36Sopenharmony_ci} 223862306a36Sopenharmony_ciEXPORT_SYMBOL(block_write_begin); 223962306a36Sopenharmony_ci 224062306a36Sopenharmony_ciint block_write_end(struct file *file, struct address_space *mapping, 224162306a36Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 224262306a36Sopenharmony_ci struct page *page, void *fsdata) 224362306a36Sopenharmony_ci{ 224462306a36Sopenharmony_ci struct folio *folio = page_folio(page); 224562306a36Sopenharmony_ci size_t start = pos - folio_pos(folio); 224662306a36Sopenharmony_ci 224762306a36Sopenharmony_ci if (unlikely(copied < len)) { 224862306a36Sopenharmony_ci /* 224962306a36Sopenharmony_ci * The buffers that were written will now be uptodate, so 225062306a36Sopenharmony_ci * we don't have to worry about a read_folio reading them 225162306a36Sopenharmony_ci * and overwriting a partial write. However if we have 225262306a36Sopenharmony_ci * encountered a short write and only partially written 225362306a36Sopenharmony_ci * into a buffer, it will not be marked uptodate, so a 225462306a36Sopenharmony_ci * read_folio might come in and destroy our partial write. 225562306a36Sopenharmony_ci * 225662306a36Sopenharmony_ci * Do the simplest thing, and just treat any short write to a 225762306a36Sopenharmony_ci * non uptodate folio as a zero-length write, and force the 225862306a36Sopenharmony_ci * caller to redo the whole thing. 225962306a36Sopenharmony_ci */ 226062306a36Sopenharmony_ci if (!folio_test_uptodate(folio)) 226162306a36Sopenharmony_ci copied = 0; 226262306a36Sopenharmony_ci 226362306a36Sopenharmony_ci folio_zero_new_buffers(folio, start+copied, start+len); 226462306a36Sopenharmony_ci } 226562306a36Sopenharmony_ci flush_dcache_folio(folio); 226662306a36Sopenharmony_ci 226762306a36Sopenharmony_ci /* This could be a short (even 0-length) commit */ 226862306a36Sopenharmony_ci __block_commit_write(folio, start, start + copied); 226962306a36Sopenharmony_ci 227062306a36Sopenharmony_ci return copied; 227162306a36Sopenharmony_ci} 227262306a36Sopenharmony_ciEXPORT_SYMBOL(block_write_end); 227362306a36Sopenharmony_ci 227462306a36Sopenharmony_ciint generic_write_end(struct file *file, struct address_space *mapping, 227562306a36Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 227662306a36Sopenharmony_ci struct page *page, void *fsdata) 227762306a36Sopenharmony_ci{ 227862306a36Sopenharmony_ci struct inode *inode = mapping->host; 227962306a36Sopenharmony_ci loff_t old_size = inode->i_size; 228062306a36Sopenharmony_ci bool i_size_changed = false; 228162306a36Sopenharmony_ci 228262306a36Sopenharmony_ci copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 228362306a36Sopenharmony_ci 228462306a36Sopenharmony_ci /* 228562306a36Sopenharmony_ci * No need to use i_size_read() here, the i_size cannot change under us 228662306a36Sopenharmony_ci * because we hold i_rwsem. 228762306a36Sopenharmony_ci * 228862306a36Sopenharmony_ci * But it's important to update i_size while still holding page lock: 228962306a36Sopenharmony_ci * page writeout could otherwise come in and zero beyond i_size. 229062306a36Sopenharmony_ci */ 229162306a36Sopenharmony_ci if (pos + copied > inode->i_size) { 229262306a36Sopenharmony_ci i_size_write(inode, pos + copied); 229362306a36Sopenharmony_ci i_size_changed = true; 229462306a36Sopenharmony_ci } 229562306a36Sopenharmony_ci 229662306a36Sopenharmony_ci unlock_page(page); 229762306a36Sopenharmony_ci put_page(page); 229862306a36Sopenharmony_ci 229962306a36Sopenharmony_ci if (old_size < pos) 230062306a36Sopenharmony_ci pagecache_isize_extended(inode, old_size, pos); 230162306a36Sopenharmony_ci /* 230262306a36Sopenharmony_ci * Don't mark the inode dirty under page lock. First, it unnecessarily 230362306a36Sopenharmony_ci * makes the holding time of page lock longer. Second, it forces lock 230462306a36Sopenharmony_ci * ordering of page lock and transaction start for journaling 230562306a36Sopenharmony_ci * filesystems. 230662306a36Sopenharmony_ci */ 230762306a36Sopenharmony_ci if (i_size_changed) 230862306a36Sopenharmony_ci mark_inode_dirty(inode); 230962306a36Sopenharmony_ci return copied; 231062306a36Sopenharmony_ci} 231162306a36Sopenharmony_ciEXPORT_SYMBOL(generic_write_end); 231262306a36Sopenharmony_ci 231362306a36Sopenharmony_ci/* 231462306a36Sopenharmony_ci * block_is_partially_uptodate checks whether buffers within a folio are 231562306a36Sopenharmony_ci * uptodate or not. 231662306a36Sopenharmony_ci * 231762306a36Sopenharmony_ci * Returns true if all buffers which correspond to the specified part 231862306a36Sopenharmony_ci * of the folio are uptodate. 231962306a36Sopenharmony_ci */ 232062306a36Sopenharmony_cibool block_is_partially_uptodate(struct folio *folio, size_t from, size_t count) 232162306a36Sopenharmony_ci{ 232262306a36Sopenharmony_ci unsigned block_start, block_end, blocksize; 232362306a36Sopenharmony_ci unsigned to; 232462306a36Sopenharmony_ci struct buffer_head *bh, *head; 232562306a36Sopenharmony_ci bool ret = true; 232662306a36Sopenharmony_ci 232762306a36Sopenharmony_ci head = folio_buffers(folio); 232862306a36Sopenharmony_ci if (!head) 232962306a36Sopenharmony_ci return false; 233062306a36Sopenharmony_ci blocksize = head->b_size; 233162306a36Sopenharmony_ci to = min_t(unsigned, folio_size(folio) - from, count); 233262306a36Sopenharmony_ci to = from + to; 233362306a36Sopenharmony_ci if (from < blocksize && to > folio_size(folio) - blocksize) 233462306a36Sopenharmony_ci return false; 233562306a36Sopenharmony_ci 233662306a36Sopenharmony_ci bh = head; 233762306a36Sopenharmony_ci block_start = 0; 233862306a36Sopenharmony_ci do { 233962306a36Sopenharmony_ci block_end = block_start + blocksize; 234062306a36Sopenharmony_ci if (block_end > from && block_start < to) { 234162306a36Sopenharmony_ci if (!buffer_uptodate(bh)) { 234262306a36Sopenharmony_ci ret = false; 234362306a36Sopenharmony_ci break; 234462306a36Sopenharmony_ci } 234562306a36Sopenharmony_ci if (block_end >= to) 234662306a36Sopenharmony_ci break; 234762306a36Sopenharmony_ci } 234862306a36Sopenharmony_ci block_start = block_end; 234962306a36Sopenharmony_ci bh = bh->b_this_page; 235062306a36Sopenharmony_ci } while (bh != head); 235162306a36Sopenharmony_ci 235262306a36Sopenharmony_ci return ret; 235362306a36Sopenharmony_ci} 235462306a36Sopenharmony_ciEXPORT_SYMBOL(block_is_partially_uptodate); 235562306a36Sopenharmony_ci 235662306a36Sopenharmony_ci/* 235762306a36Sopenharmony_ci * Generic "read_folio" function for block devices that have the normal 235862306a36Sopenharmony_ci * get_block functionality. This is most of the block device filesystems. 235962306a36Sopenharmony_ci * Reads the folio asynchronously --- the unlock_buffer() and 236062306a36Sopenharmony_ci * set/clear_buffer_uptodate() functions propagate buffer state into the 236162306a36Sopenharmony_ci * folio once IO has completed. 236262306a36Sopenharmony_ci */ 236362306a36Sopenharmony_ciint block_read_full_folio(struct folio *folio, get_block_t *get_block) 236462306a36Sopenharmony_ci{ 236562306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 236662306a36Sopenharmony_ci sector_t iblock, lblock; 236762306a36Sopenharmony_ci struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; 236862306a36Sopenharmony_ci unsigned int blocksize, bbits; 236962306a36Sopenharmony_ci int nr, i; 237062306a36Sopenharmony_ci int fully_mapped = 1; 237162306a36Sopenharmony_ci bool page_error = false; 237262306a36Sopenharmony_ci loff_t limit = i_size_read(inode); 237362306a36Sopenharmony_ci 237462306a36Sopenharmony_ci /* This is needed for ext4. */ 237562306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) 237662306a36Sopenharmony_ci limit = inode->i_sb->s_maxbytes; 237762306a36Sopenharmony_ci 237862306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_test_large(folio), folio); 237962306a36Sopenharmony_ci 238062306a36Sopenharmony_ci head = folio_create_buffers(folio, inode, 0); 238162306a36Sopenharmony_ci blocksize = head->b_size; 238262306a36Sopenharmony_ci bbits = block_size_bits(blocksize); 238362306a36Sopenharmony_ci 238462306a36Sopenharmony_ci iblock = (sector_t)folio->index << (PAGE_SHIFT - bbits); 238562306a36Sopenharmony_ci lblock = (limit+blocksize-1) >> bbits; 238662306a36Sopenharmony_ci bh = head; 238762306a36Sopenharmony_ci nr = 0; 238862306a36Sopenharmony_ci i = 0; 238962306a36Sopenharmony_ci 239062306a36Sopenharmony_ci do { 239162306a36Sopenharmony_ci if (buffer_uptodate(bh)) 239262306a36Sopenharmony_ci continue; 239362306a36Sopenharmony_ci 239462306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 239562306a36Sopenharmony_ci int err = 0; 239662306a36Sopenharmony_ci 239762306a36Sopenharmony_ci fully_mapped = 0; 239862306a36Sopenharmony_ci if (iblock < lblock) { 239962306a36Sopenharmony_ci WARN_ON(bh->b_size != blocksize); 240062306a36Sopenharmony_ci err = get_block(inode, iblock, bh, 0); 240162306a36Sopenharmony_ci if (err) { 240262306a36Sopenharmony_ci folio_set_error(folio); 240362306a36Sopenharmony_ci page_error = true; 240462306a36Sopenharmony_ci } 240562306a36Sopenharmony_ci } 240662306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 240762306a36Sopenharmony_ci folio_zero_range(folio, i * blocksize, 240862306a36Sopenharmony_ci blocksize); 240962306a36Sopenharmony_ci if (!err) 241062306a36Sopenharmony_ci set_buffer_uptodate(bh); 241162306a36Sopenharmony_ci continue; 241262306a36Sopenharmony_ci } 241362306a36Sopenharmony_ci /* 241462306a36Sopenharmony_ci * get_block() might have updated the buffer 241562306a36Sopenharmony_ci * synchronously 241662306a36Sopenharmony_ci */ 241762306a36Sopenharmony_ci if (buffer_uptodate(bh)) 241862306a36Sopenharmony_ci continue; 241962306a36Sopenharmony_ci } 242062306a36Sopenharmony_ci arr[nr++] = bh; 242162306a36Sopenharmony_ci } while (i++, iblock++, (bh = bh->b_this_page) != head); 242262306a36Sopenharmony_ci 242362306a36Sopenharmony_ci if (fully_mapped) 242462306a36Sopenharmony_ci folio_set_mappedtodisk(folio); 242562306a36Sopenharmony_ci 242662306a36Sopenharmony_ci if (!nr) { 242762306a36Sopenharmony_ci /* 242862306a36Sopenharmony_ci * All buffers are uptodate - we can set the folio uptodate 242962306a36Sopenharmony_ci * as well. But not if get_block() returned an error. 243062306a36Sopenharmony_ci */ 243162306a36Sopenharmony_ci if (!page_error) 243262306a36Sopenharmony_ci folio_mark_uptodate(folio); 243362306a36Sopenharmony_ci folio_unlock(folio); 243462306a36Sopenharmony_ci return 0; 243562306a36Sopenharmony_ci } 243662306a36Sopenharmony_ci 243762306a36Sopenharmony_ci /* Stage two: lock the buffers */ 243862306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 243962306a36Sopenharmony_ci bh = arr[i]; 244062306a36Sopenharmony_ci lock_buffer(bh); 244162306a36Sopenharmony_ci mark_buffer_async_read(bh); 244262306a36Sopenharmony_ci } 244362306a36Sopenharmony_ci 244462306a36Sopenharmony_ci /* 244562306a36Sopenharmony_ci * Stage 3: start the IO. Check for uptodateness 244662306a36Sopenharmony_ci * inside the buffer lock in case another process reading 244762306a36Sopenharmony_ci * the underlying blockdev brought it uptodate (the sct fix). 244862306a36Sopenharmony_ci */ 244962306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 245062306a36Sopenharmony_ci bh = arr[i]; 245162306a36Sopenharmony_ci if (buffer_uptodate(bh)) 245262306a36Sopenharmony_ci end_buffer_async_read(bh, 1); 245362306a36Sopenharmony_ci else 245462306a36Sopenharmony_ci submit_bh(REQ_OP_READ, bh); 245562306a36Sopenharmony_ci } 245662306a36Sopenharmony_ci return 0; 245762306a36Sopenharmony_ci} 245862306a36Sopenharmony_ciEXPORT_SYMBOL(block_read_full_folio); 245962306a36Sopenharmony_ci 246062306a36Sopenharmony_ci/* utility function for filesystems that need to do work on expanding 246162306a36Sopenharmony_ci * truncates. Uses filesystem pagecache writes to allow the filesystem to 246262306a36Sopenharmony_ci * deal with the hole. 246362306a36Sopenharmony_ci */ 246462306a36Sopenharmony_ciint generic_cont_expand_simple(struct inode *inode, loff_t size) 246562306a36Sopenharmony_ci{ 246662306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 246762306a36Sopenharmony_ci const struct address_space_operations *aops = mapping->a_ops; 246862306a36Sopenharmony_ci struct page *page; 246962306a36Sopenharmony_ci void *fsdata = NULL; 247062306a36Sopenharmony_ci int err; 247162306a36Sopenharmony_ci 247262306a36Sopenharmony_ci err = inode_newsize_ok(inode, size); 247362306a36Sopenharmony_ci if (err) 247462306a36Sopenharmony_ci goto out; 247562306a36Sopenharmony_ci 247662306a36Sopenharmony_ci err = aops->write_begin(NULL, mapping, size, 0, &page, &fsdata); 247762306a36Sopenharmony_ci if (err) 247862306a36Sopenharmony_ci goto out; 247962306a36Sopenharmony_ci 248062306a36Sopenharmony_ci err = aops->write_end(NULL, mapping, size, 0, 0, page, fsdata); 248162306a36Sopenharmony_ci BUG_ON(err > 0); 248262306a36Sopenharmony_ci 248362306a36Sopenharmony_ciout: 248462306a36Sopenharmony_ci return err; 248562306a36Sopenharmony_ci} 248662306a36Sopenharmony_ciEXPORT_SYMBOL(generic_cont_expand_simple); 248762306a36Sopenharmony_ci 248862306a36Sopenharmony_cistatic int cont_expand_zero(struct file *file, struct address_space *mapping, 248962306a36Sopenharmony_ci loff_t pos, loff_t *bytes) 249062306a36Sopenharmony_ci{ 249162306a36Sopenharmony_ci struct inode *inode = mapping->host; 249262306a36Sopenharmony_ci const struct address_space_operations *aops = mapping->a_ops; 249362306a36Sopenharmony_ci unsigned int blocksize = i_blocksize(inode); 249462306a36Sopenharmony_ci struct page *page; 249562306a36Sopenharmony_ci void *fsdata = NULL; 249662306a36Sopenharmony_ci pgoff_t index, curidx; 249762306a36Sopenharmony_ci loff_t curpos; 249862306a36Sopenharmony_ci unsigned zerofrom, offset, len; 249962306a36Sopenharmony_ci int err = 0; 250062306a36Sopenharmony_ci 250162306a36Sopenharmony_ci index = pos >> PAGE_SHIFT; 250262306a36Sopenharmony_ci offset = pos & ~PAGE_MASK; 250362306a36Sopenharmony_ci 250462306a36Sopenharmony_ci while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) { 250562306a36Sopenharmony_ci zerofrom = curpos & ~PAGE_MASK; 250662306a36Sopenharmony_ci if (zerofrom & (blocksize-1)) { 250762306a36Sopenharmony_ci *bytes |= (blocksize-1); 250862306a36Sopenharmony_ci (*bytes)++; 250962306a36Sopenharmony_ci } 251062306a36Sopenharmony_ci len = PAGE_SIZE - zerofrom; 251162306a36Sopenharmony_ci 251262306a36Sopenharmony_ci err = aops->write_begin(file, mapping, curpos, len, 251362306a36Sopenharmony_ci &page, &fsdata); 251462306a36Sopenharmony_ci if (err) 251562306a36Sopenharmony_ci goto out; 251662306a36Sopenharmony_ci zero_user(page, zerofrom, len); 251762306a36Sopenharmony_ci err = aops->write_end(file, mapping, curpos, len, len, 251862306a36Sopenharmony_ci page, fsdata); 251962306a36Sopenharmony_ci if (err < 0) 252062306a36Sopenharmony_ci goto out; 252162306a36Sopenharmony_ci BUG_ON(err != len); 252262306a36Sopenharmony_ci err = 0; 252362306a36Sopenharmony_ci 252462306a36Sopenharmony_ci balance_dirty_pages_ratelimited(mapping); 252562306a36Sopenharmony_ci 252662306a36Sopenharmony_ci if (fatal_signal_pending(current)) { 252762306a36Sopenharmony_ci err = -EINTR; 252862306a36Sopenharmony_ci goto out; 252962306a36Sopenharmony_ci } 253062306a36Sopenharmony_ci } 253162306a36Sopenharmony_ci 253262306a36Sopenharmony_ci /* page covers the boundary, find the boundary offset */ 253362306a36Sopenharmony_ci if (index == curidx) { 253462306a36Sopenharmony_ci zerofrom = curpos & ~PAGE_MASK; 253562306a36Sopenharmony_ci /* if we will expand the thing last block will be filled */ 253662306a36Sopenharmony_ci if (offset <= zerofrom) { 253762306a36Sopenharmony_ci goto out; 253862306a36Sopenharmony_ci } 253962306a36Sopenharmony_ci if (zerofrom & (blocksize-1)) { 254062306a36Sopenharmony_ci *bytes |= (blocksize-1); 254162306a36Sopenharmony_ci (*bytes)++; 254262306a36Sopenharmony_ci } 254362306a36Sopenharmony_ci len = offset - zerofrom; 254462306a36Sopenharmony_ci 254562306a36Sopenharmony_ci err = aops->write_begin(file, mapping, curpos, len, 254662306a36Sopenharmony_ci &page, &fsdata); 254762306a36Sopenharmony_ci if (err) 254862306a36Sopenharmony_ci goto out; 254962306a36Sopenharmony_ci zero_user(page, zerofrom, len); 255062306a36Sopenharmony_ci err = aops->write_end(file, mapping, curpos, len, len, 255162306a36Sopenharmony_ci page, fsdata); 255262306a36Sopenharmony_ci if (err < 0) 255362306a36Sopenharmony_ci goto out; 255462306a36Sopenharmony_ci BUG_ON(err != len); 255562306a36Sopenharmony_ci err = 0; 255662306a36Sopenharmony_ci } 255762306a36Sopenharmony_ciout: 255862306a36Sopenharmony_ci return err; 255962306a36Sopenharmony_ci} 256062306a36Sopenharmony_ci 256162306a36Sopenharmony_ci/* 256262306a36Sopenharmony_ci * For moronic filesystems that do not allow holes in file. 256362306a36Sopenharmony_ci * We may have to extend the file. 256462306a36Sopenharmony_ci */ 256562306a36Sopenharmony_ciint cont_write_begin(struct file *file, struct address_space *mapping, 256662306a36Sopenharmony_ci loff_t pos, unsigned len, 256762306a36Sopenharmony_ci struct page **pagep, void **fsdata, 256862306a36Sopenharmony_ci get_block_t *get_block, loff_t *bytes) 256962306a36Sopenharmony_ci{ 257062306a36Sopenharmony_ci struct inode *inode = mapping->host; 257162306a36Sopenharmony_ci unsigned int blocksize = i_blocksize(inode); 257262306a36Sopenharmony_ci unsigned int zerofrom; 257362306a36Sopenharmony_ci int err; 257462306a36Sopenharmony_ci 257562306a36Sopenharmony_ci err = cont_expand_zero(file, mapping, pos, bytes); 257662306a36Sopenharmony_ci if (err) 257762306a36Sopenharmony_ci return err; 257862306a36Sopenharmony_ci 257962306a36Sopenharmony_ci zerofrom = *bytes & ~PAGE_MASK; 258062306a36Sopenharmony_ci if (pos+len > *bytes && zerofrom & (blocksize-1)) { 258162306a36Sopenharmony_ci *bytes |= (blocksize-1); 258262306a36Sopenharmony_ci (*bytes)++; 258362306a36Sopenharmony_ci } 258462306a36Sopenharmony_ci 258562306a36Sopenharmony_ci return block_write_begin(mapping, pos, len, pagep, get_block); 258662306a36Sopenharmony_ci} 258762306a36Sopenharmony_ciEXPORT_SYMBOL(cont_write_begin); 258862306a36Sopenharmony_ci 258962306a36Sopenharmony_civoid block_commit_write(struct page *page, unsigned from, unsigned to) 259062306a36Sopenharmony_ci{ 259162306a36Sopenharmony_ci struct folio *folio = page_folio(page); 259262306a36Sopenharmony_ci __block_commit_write(folio, from, to); 259362306a36Sopenharmony_ci} 259462306a36Sopenharmony_ciEXPORT_SYMBOL(block_commit_write); 259562306a36Sopenharmony_ci 259662306a36Sopenharmony_ci/* 259762306a36Sopenharmony_ci * block_page_mkwrite() is not allowed to change the file size as it gets 259862306a36Sopenharmony_ci * called from a page fault handler when a page is first dirtied. Hence we must 259962306a36Sopenharmony_ci * be careful to check for EOF conditions here. We set the page up correctly 260062306a36Sopenharmony_ci * for a written page which means we get ENOSPC checking when writing into 260162306a36Sopenharmony_ci * holes and correct delalloc and unwritten extent mapping on filesystems that 260262306a36Sopenharmony_ci * support these features. 260362306a36Sopenharmony_ci * 260462306a36Sopenharmony_ci * We are not allowed to take the i_mutex here so we have to play games to 260562306a36Sopenharmony_ci * protect against truncate races as the page could now be beyond EOF. Because 260662306a36Sopenharmony_ci * truncate writes the inode size before removing pages, once we have the 260762306a36Sopenharmony_ci * page lock we can determine safely if the page is beyond EOF. If it is not 260862306a36Sopenharmony_ci * beyond EOF, then the page is guaranteed safe against truncation until we 260962306a36Sopenharmony_ci * unlock the page. 261062306a36Sopenharmony_ci * 261162306a36Sopenharmony_ci * Direct callers of this function should protect against filesystem freezing 261262306a36Sopenharmony_ci * using sb_start_pagefault() - sb_end_pagefault() functions. 261362306a36Sopenharmony_ci */ 261462306a36Sopenharmony_ciint block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, 261562306a36Sopenharmony_ci get_block_t get_block) 261662306a36Sopenharmony_ci{ 261762306a36Sopenharmony_ci struct folio *folio = page_folio(vmf->page); 261862306a36Sopenharmony_ci struct inode *inode = file_inode(vma->vm_file); 261962306a36Sopenharmony_ci unsigned long end; 262062306a36Sopenharmony_ci loff_t size; 262162306a36Sopenharmony_ci int ret; 262262306a36Sopenharmony_ci 262362306a36Sopenharmony_ci folio_lock(folio); 262462306a36Sopenharmony_ci size = i_size_read(inode); 262562306a36Sopenharmony_ci if ((folio->mapping != inode->i_mapping) || 262662306a36Sopenharmony_ci (folio_pos(folio) >= size)) { 262762306a36Sopenharmony_ci /* We overload EFAULT to mean page got truncated */ 262862306a36Sopenharmony_ci ret = -EFAULT; 262962306a36Sopenharmony_ci goto out_unlock; 263062306a36Sopenharmony_ci } 263162306a36Sopenharmony_ci 263262306a36Sopenharmony_ci end = folio_size(folio); 263362306a36Sopenharmony_ci /* folio is wholly or partially inside EOF */ 263462306a36Sopenharmony_ci if (folio_pos(folio) + end > size) 263562306a36Sopenharmony_ci end = size - folio_pos(folio); 263662306a36Sopenharmony_ci 263762306a36Sopenharmony_ci ret = __block_write_begin_int(folio, 0, end, get_block, NULL); 263862306a36Sopenharmony_ci if (unlikely(ret)) 263962306a36Sopenharmony_ci goto out_unlock; 264062306a36Sopenharmony_ci 264162306a36Sopenharmony_ci __block_commit_write(folio, 0, end); 264262306a36Sopenharmony_ci 264362306a36Sopenharmony_ci folio_mark_dirty(folio); 264462306a36Sopenharmony_ci folio_wait_stable(folio); 264562306a36Sopenharmony_ci return 0; 264662306a36Sopenharmony_ciout_unlock: 264762306a36Sopenharmony_ci folio_unlock(folio); 264862306a36Sopenharmony_ci return ret; 264962306a36Sopenharmony_ci} 265062306a36Sopenharmony_ciEXPORT_SYMBOL(block_page_mkwrite); 265162306a36Sopenharmony_ci 265262306a36Sopenharmony_ciint block_truncate_page(struct address_space *mapping, 265362306a36Sopenharmony_ci loff_t from, get_block_t *get_block) 265462306a36Sopenharmony_ci{ 265562306a36Sopenharmony_ci pgoff_t index = from >> PAGE_SHIFT; 265662306a36Sopenharmony_ci unsigned blocksize; 265762306a36Sopenharmony_ci sector_t iblock; 265862306a36Sopenharmony_ci size_t offset, length, pos; 265962306a36Sopenharmony_ci struct inode *inode = mapping->host; 266062306a36Sopenharmony_ci struct folio *folio; 266162306a36Sopenharmony_ci struct buffer_head *bh; 266262306a36Sopenharmony_ci int err = 0; 266362306a36Sopenharmony_ci 266462306a36Sopenharmony_ci blocksize = i_blocksize(inode); 266562306a36Sopenharmony_ci length = from & (blocksize - 1); 266662306a36Sopenharmony_ci 266762306a36Sopenharmony_ci /* Block boundary? Nothing to do */ 266862306a36Sopenharmony_ci if (!length) 266962306a36Sopenharmony_ci return 0; 267062306a36Sopenharmony_ci 267162306a36Sopenharmony_ci length = blocksize - length; 267262306a36Sopenharmony_ci iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits); 267362306a36Sopenharmony_ci 267462306a36Sopenharmony_ci folio = filemap_grab_folio(mapping, index); 267562306a36Sopenharmony_ci if (IS_ERR(folio)) 267662306a36Sopenharmony_ci return PTR_ERR(folio); 267762306a36Sopenharmony_ci 267862306a36Sopenharmony_ci bh = folio_buffers(folio); 267962306a36Sopenharmony_ci if (!bh) { 268062306a36Sopenharmony_ci folio_create_empty_buffers(folio, blocksize, 0); 268162306a36Sopenharmony_ci bh = folio_buffers(folio); 268262306a36Sopenharmony_ci } 268362306a36Sopenharmony_ci 268462306a36Sopenharmony_ci /* Find the buffer that contains "offset" */ 268562306a36Sopenharmony_ci offset = offset_in_folio(folio, from); 268662306a36Sopenharmony_ci pos = blocksize; 268762306a36Sopenharmony_ci while (offset >= pos) { 268862306a36Sopenharmony_ci bh = bh->b_this_page; 268962306a36Sopenharmony_ci iblock++; 269062306a36Sopenharmony_ci pos += blocksize; 269162306a36Sopenharmony_ci } 269262306a36Sopenharmony_ci 269362306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 269462306a36Sopenharmony_ci WARN_ON(bh->b_size != blocksize); 269562306a36Sopenharmony_ci err = get_block(inode, iblock, bh, 0); 269662306a36Sopenharmony_ci if (err) 269762306a36Sopenharmony_ci goto unlock; 269862306a36Sopenharmony_ci /* unmapped? It's a hole - nothing to do */ 269962306a36Sopenharmony_ci if (!buffer_mapped(bh)) 270062306a36Sopenharmony_ci goto unlock; 270162306a36Sopenharmony_ci } 270262306a36Sopenharmony_ci 270362306a36Sopenharmony_ci /* Ok, it's mapped. Make sure it's up-to-date */ 270462306a36Sopenharmony_ci if (folio_test_uptodate(folio)) 270562306a36Sopenharmony_ci set_buffer_uptodate(bh); 270662306a36Sopenharmony_ci 270762306a36Sopenharmony_ci if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) { 270862306a36Sopenharmony_ci err = bh_read(bh, 0); 270962306a36Sopenharmony_ci /* Uhhuh. Read error. Complain and punt. */ 271062306a36Sopenharmony_ci if (err < 0) 271162306a36Sopenharmony_ci goto unlock; 271262306a36Sopenharmony_ci } 271362306a36Sopenharmony_ci 271462306a36Sopenharmony_ci folio_zero_range(folio, offset, length); 271562306a36Sopenharmony_ci mark_buffer_dirty(bh); 271662306a36Sopenharmony_ci 271762306a36Sopenharmony_ciunlock: 271862306a36Sopenharmony_ci folio_unlock(folio); 271962306a36Sopenharmony_ci folio_put(folio); 272062306a36Sopenharmony_ci 272162306a36Sopenharmony_ci return err; 272262306a36Sopenharmony_ci} 272362306a36Sopenharmony_ciEXPORT_SYMBOL(block_truncate_page); 272462306a36Sopenharmony_ci 272562306a36Sopenharmony_ci/* 272662306a36Sopenharmony_ci * The generic ->writepage function for buffer-backed address_spaces 272762306a36Sopenharmony_ci */ 272862306a36Sopenharmony_ciint block_write_full_page(struct page *page, get_block_t *get_block, 272962306a36Sopenharmony_ci struct writeback_control *wbc) 273062306a36Sopenharmony_ci{ 273162306a36Sopenharmony_ci struct folio *folio = page_folio(page); 273262306a36Sopenharmony_ci struct inode * const inode = folio->mapping->host; 273362306a36Sopenharmony_ci loff_t i_size = i_size_read(inode); 273462306a36Sopenharmony_ci 273562306a36Sopenharmony_ci /* Is the folio fully inside i_size? */ 273662306a36Sopenharmony_ci if (folio_pos(folio) + folio_size(folio) <= i_size) 273762306a36Sopenharmony_ci return __block_write_full_folio(inode, folio, get_block, wbc, 273862306a36Sopenharmony_ci end_buffer_async_write); 273962306a36Sopenharmony_ci 274062306a36Sopenharmony_ci /* Is the folio fully outside i_size? (truncate in progress) */ 274162306a36Sopenharmony_ci if (folio_pos(folio) >= i_size) { 274262306a36Sopenharmony_ci folio_unlock(folio); 274362306a36Sopenharmony_ci return 0; /* don't care */ 274462306a36Sopenharmony_ci } 274562306a36Sopenharmony_ci 274662306a36Sopenharmony_ci /* 274762306a36Sopenharmony_ci * The folio straddles i_size. It must be zeroed out on each and every 274862306a36Sopenharmony_ci * writepage invocation because it may be mmapped. "A file is mapped 274962306a36Sopenharmony_ci * in multiples of the page size. For a file that is not a multiple of 275062306a36Sopenharmony_ci * the page size, the remaining memory is zeroed when mapped, and 275162306a36Sopenharmony_ci * writes to that region are not written out to the file." 275262306a36Sopenharmony_ci */ 275362306a36Sopenharmony_ci folio_zero_segment(folio, offset_in_folio(folio, i_size), 275462306a36Sopenharmony_ci folio_size(folio)); 275562306a36Sopenharmony_ci return __block_write_full_folio(inode, folio, get_block, wbc, 275662306a36Sopenharmony_ci end_buffer_async_write); 275762306a36Sopenharmony_ci} 275862306a36Sopenharmony_ciEXPORT_SYMBOL(block_write_full_page); 275962306a36Sopenharmony_ci 276062306a36Sopenharmony_cisector_t generic_block_bmap(struct address_space *mapping, sector_t block, 276162306a36Sopenharmony_ci get_block_t *get_block) 276262306a36Sopenharmony_ci{ 276362306a36Sopenharmony_ci struct inode *inode = mapping->host; 276462306a36Sopenharmony_ci struct buffer_head tmp = { 276562306a36Sopenharmony_ci .b_size = i_blocksize(inode), 276662306a36Sopenharmony_ci }; 276762306a36Sopenharmony_ci 276862306a36Sopenharmony_ci get_block(inode, block, &tmp, 0); 276962306a36Sopenharmony_ci return tmp.b_blocknr; 277062306a36Sopenharmony_ci} 277162306a36Sopenharmony_ciEXPORT_SYMBOL(generic_block_bmap); 277262306a36Sopenharmony_ci 277362306a36Sopenharmony_cistatic void end_bio_bh_io_sync(struct bio *bio) 277462306a36Sopenharmony_ci{ 277562306a36Sopenharmony_ci struct buffer_head *bh = bio->bi_private; 277662306a36Sopenharmony_ci 277762306a36Sopenharmony_ci if (unlikely(bio_flagged(bio, BIO_QUIET))) 277862306a36Sopenharmony_ci set_bit(BH_Quiet, &bh->b_state); 277962306a36Sopenharmony_ci 278062306a36Sopenharmony_ci bh->b_end_io(bh, !bio->bi_status); 278162306a36Sopenharmony_ci bio_put(bio); 278262306a36Sopenharmony_ci} 278362306a36Sopenharmony_ci 278462306a36Sopenharmony_cistatic void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, 278562306a36Sopenharmony_ci struct writeback_control *wbc) 278662306a36Sopenharmony_ci{ 278762306a36Sopenharmony_ci const enum req_op op = opf & REQ_OP_MASK; 278862306a36Sopenharmony_ci struct bio *bio; 278962306a36Sopenharmony_ci 279062306a36Sopenharmony_ci BUG_ON(!buffer_locked(bh)); 279162306a36Sopenharmony_ci BUG_ON(!buffer_mapped(bh)); 279262306a36Sopenharmony_ci BUG_ON(!bh->b_end_io); 279362306a36Sopenharmony_ci BUG_ON(buffer_delay(bh)); 279462306a36Sopenharmony_ci BUG_ON(buffer_unwritten(bh)); 279562306a36Sopenharmony_ci 279662306a36Sopenharmony_ci /* 279762306a36Sopenharmony_ci * Only clear out a write error when rewriting 279862306a36Sopenharmony_ci */ 279962306a36Sopenharmony_ci if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE)) 280062306a36Sopenharmony_ci clear_buffer_write_io_error(bh); 280162306a36Sopenharmony_ci 280262306a36Sopenharmony_ci if (buffer_meta(bh)) 280362306a36Sopenharmony_ci opf |= REQ_META; 280462306a36Sopenharmony_ci if (buffer_prio(bh)) 280562306a36Sopenharmony_ci opf |= REQ_PRIO; 280662306a36Sopenharmony_ci 280762306a36Sopenharmony_ci bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO); 280862306a36Sopenharmony_ci 280962306a36Sopenharmony_ci fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); 281062306a36Sopenharmony_ci 281162306a36Sopenharmony_ci bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 281262306a36Sopenharmony_ci 281362306a36Sopenharmony_ci __bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 281462306a36Sopenharmony_ci 281562306a36Sopenharmony_ci bio->bi_end_io = end_bio_bh_io_sync; 281662306a36Sopenharmony_ci bio->bi_private = bh; 281762306a36Sopenharmony_ci 281862306a36Sopenharmony_ci /* Take care of bh's that straddle the end of the device */ 281962306a36Sopenharmony_ci guard_bio_eod(bio); 282062306a36Sopenharmony_ci 282162306a36Sopenharmony_ci if (wbc) { 282262306a36Sopenharmony_ci wbc_init_bio(wbc, bio); 282362306a36Sopenharmony_ci wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size); 282462306a36Sopenharmony_ci } 282562306a36Sopenharmony_ci 282662306a36Sopenharmony_ci submit_bio(bio); 282762306a36Sopenharmony_ci} 282862306a36Sopenharmony_ci 282962306a36Sopenharmony_civoid submit_bh(blk_opf_t opf, struct buffer_head *bh) 283062306a36Sopenharmony_ci{ 283162306a36Sopenharmony_ci submit_bh_wbc(opf, bh, NULL); 283262306a36Sopenharmony_ci} 283362306a36Sopenharmony_ciEXPORT_SYMBOL(submit_bh); 283462306a36Sopenharmony_ci 283562306a36Sopenharmony_civoid write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) 283662306a36Sopenharmony_ci{ 283762306a36Sopenharmony_ci lock_buffer(bh); 283862306a36Sopenharmony_ci if (!test_clear_buffer_dirty(bh)) { 283962306a36Sopenharmony_ci unlock_buffer(bh); 284062306a36Sopenharmony_ci return; 284162306a36Sopenharmony_ci } 284262306a36Sopenharmony_ci bh->b_end_io = end_buffer_write_sync; 284362306a36Sopenharmony_ci get_bh(bh); 284462306a36Sopenharmony_ci submit_bh(REQ_OP_WRITE | op_flags, bh); 284562306a36Sopenharmony_ci} 284662306a36Sopenharmony_ciEXPORT_SYMBOL(write_dirty_buffer); 284762306a36Sopenharmony_ci 284862306a36Sopenharmony_ci/* 284962306a36Sopenharmony_ci * For a data-integrity writeout, we need to wait upon any in-progress I/O 285062306a36Sopenharmony_ci * and then start new I/O and then wait upon it. The caller must have a ref on 285162306a36Sopenharmony_ci * the buffer_head. 285262306a36Sopenharmony_ci */ 285362306a36Sopenharmony_ciint __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) 285462306a36Sopenharmony_ci{ 285562306a36Sopenharmony_ci WARN_ON(atomic_read(&bh->b_count) < 1); 285662306a36Sopenharmony_ci lock_buffer(bh); 285762306a36Sopenharmony_ci if (test_clear_buffer_dirty(bh)) { 285862306a36Sopenharmony_ci /* 285962306a36Sopenharmony_ci * The bh should be mapped, but it might not be if the 286062306a36Sopenharmony_ci * device was hot-removed. Not much we can do but fail the I/O. 286162306a36Sopenharmony_ci */ 286262306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 286362306a36Sopenharmony_ci unlock_buffer(bh); 286462306a36Sopenharmony_ci return -EIO; 286562306a36Sopenharmony_ci } 286662306a36Sopenharmony_ci 286762306a36Sopenharmony_ci get_bh(bh); 286862306a36Sopenharmony_ci bh->b_end_io = end_buffer_write_sync; 286962306a36Sopenharmony_ci submit_bh(REQ_OP_WRITE | op_flags, bh); 287062306a36Sopenharmony_ci wait_on_buffer(bh); 287162306a36Sopenharmony_ci if (!buffer_uptodate(bh)) 287262306a36Sopenharmony_ci return -EIO; 287362306a36Sopenharmony_ci } else { 287462306a36Sopenharmony_ci unlock_buffer(bh); 287562306a36Sopenharmony_ci } 287662306a36Sopenharmony_ci return 0; 287762306a36Sopenharmony_ci} 287862306a36Sopenharmony_ciEXPORT_SYMBOL(__sync_dirty_buffer); 287962306a36Sopenharmony_ci 288062306a36Sopenharmony_ciint sync_dirty_buffer(struct buffer_head *bh) 288162306a36Sopenharmony_ci{ 288262306a36Sopenharmony_ci return __sync_dirty_buffer(bh, REQ_SYNC); 288362306a36Sopenharmony_ci} 288462306a36Sopenharmony_ciEXPORT_SYMBOL(sync_dirty_buffer); 288562306a36Sopenharmony_ci 288662306a36Sopenharmony_ci/* 288762306a36Sopenharmony_ci * try_to_free_buffers() checks if all the buffers on this particular folio 288862306a36Sopenharmony_ci * are unused, and releases them if so. 288962306a36Sopenharmony_ci * 289062306a36Sopenharmony_ci * Exclusion against try_to_free_buffers may be obtained by either 289162306a36Sopenharmony_ci * locking the folio or by holding its mapping's private_lock. 289262306a36Sopenharmony_ci * 289362306a36Sopenharmony_ci * If the folio is dirty but all the buffers are clean then we need to 289462306a36Sopenharmony_ci * be sure to mark the folio clean as well. This is because the folio 289562306a36Sopenharmony_ci * may be against a block device, and a later reattachment of buffers 289662306a36Sopenharmony_ci * to a dirty folio will set *all* buffers dirty. Which would corrupt 289762306a36Sopenharmony_ci * filesystem data on the same device. 289862306a36Sopenharmony_ci * 289962306a36Sopenharmony_ci * The same applies to regular filesystem folios: if all the buffers are 290062306a36Sopenharmony_ci * clean then we set the folio clean and proceed. To do that, we require 290162306a36Sopenharmony_ci * total exclusion from block_dirty_folio(). That is obtained with 290262306a36Sopenharmony_ci * private_lock. 290362306a36Sopenharmony_ci * 290462306a36Sopenharmony_ci * try_to_free_buffers() is non-blocking. 290562306a36Sopenharmony_ci */ 290662306a36Sopenharmony_cistatic inline int buffer_busy(struct buffer_head *bh) 290762306a36Sopenharmony_ci{ 290862306a36Sopenharmony_ci return atomic_read(&bh->b_count) | 290962306a36Sopenharmony_ci (bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock))); 291062306a36Sopenharmony_ci} 291162306a36Sopenharmony_ci 291262306a36Sopenharmony_cistatic bool 291362306a36Sopenharmony_cidrop_buffers(struct folio *folio, struct buffer_head **buffers_to_free) 291462306a36Sopenharmony_ci{ 291562306a36Sopenharmony_ci struct buffer_head *head = folio_buffers(folio); 291662306a36Sopenharmony_ci struct buffer_head *bh; 291762306a36Sopenharmony_ci 291862306a36Sopenharmony_ci bh = head; 291962306a36Sopenharmony_ci do { 292062306a36Sopenharmony_ci if (buffer_busy(bh)) 292162306a36Sopenharmony_ci goto failed; 292262306a36Sopenharmony_ci bh = bh->b_this_page; 292362306a36Sopenharmony_ci } while (bh != head); 292462306a36Sopenharmony_ci 292562306a36Sopenharmony_ci do { 292662306a36Sopenharmony_ci struct buffer_head *next = bh->b_this_page; 292762306a36Sopenharmony_ci 292862306a36Sopenharmony_ci if (bh->b_assoc_map) 292962306a36Sopenharmony_ci __remove_assoc_queue(bh); 293062306a36Sopenharmony_ci bh = next; 293162306a36Sopenharmony_ci } while (bh != head); 293262306a36Sopenharmony_ci *buffers_to_free = head; 293362306a36Sopenharmony_ci folio_detach_private(folio); 293462306a36Sopenharmony_ci return true; 293562306a36Sopenharmony_cifailed: 293662306a36Sopenharmony_ci return false; 293762306a36Sopenharmony_ci} 293862306a36Sopenharmony_ci 293962306a36Sopenharmony_cibool try_to_free_buffers(struct folio *folio) 294062306a36Sopenharmony_ci{ 294162306a36Sopenharmony_ci struct address_space * const mapping = folio->mapping; 294262306a36Sopenharmony_ci struct buffer_head *buffers_to_free = NULL; 294362306a36Sopenharmony_ci bool ret = 0; 294462306a36Sopenharmony_ci 294562306a36Sopenharmony_ci BUG_ON(!folio_test_locked(folio)); 294662306a36Sopenharmony_ci if (folio_test_writeback(folio)) 294762306a36Sopenharmony_ci return false; 294862306a36Sopenharmony_ci 294962306a36Sopenharmony_ci if (mapping == NULL) { /* can this still happen? */ 295062306a36Sopenharmony_ci ret = drop_buffers(folio, &buffers_to_free); 295162306a36Sopenharmony_ci goto out; 295262306a36Sopenharmony_ci } 295362306a36Sopenharmony_ci 295462306a36Sopenharmony_ci spin_lock(&mapping->private_lock); 295562306a36Sopenharmony_ci ret = drop_buffers(folio, &buffers_to_free); 295662306a36Sopenharmony_ci 295762306a36Sopenharmony_ci /* 295862306a36Sopenharmony_ci * If the filesystem writes its buffers by hand (eg ext3) 295962306a36Sopenharmony_ci * then we can have clean buffers against a dirty folio. We 296062306a36Sopenharmony_ci * clean the folio here; otherwise the VM will never notice 296162306a36Sopenharmony_ci * that the filesystem did any IO at all. 296262306a36Sopenharmony_ci * 296362306a36Sopenharmony_ci * Also, during truncate, discard_buffer will have marked all 296462306a36Sopenharmony_ci * the folio's buffers clean. We discover that here and clean 296562306a36Sopenharmony_ci * the folio also. 296662306a36Sopenharmony_ci * 296762306a36Sopenharmony_ci * private_lock must be held over this entire operation in order 296862306a36Sopenharmony_ci * to synchronise against block_dirty_folio and prevent the 296962306a36Sopenharmony_ci * dirty bit from being lost. 297062306a36Sopenharmony_ci */ 297162306a36Sopenharmony_ci if (ret) 297262306a36Sopenharmony_ci folio_cancel_dirty(folio); 297362306a36Sopenharmony_ci spin_unlock(&mapping->private_lock); 297462306a36Sopenharmony_ciout: 297562306a36Sopenharmony_ci if (buffers_to_free) { 297662306a36Sopenharmony_ci struct buffer_head *bh = buffers_to_free; 297762306a36Sopenharmony_ci 297862306a36Sopenharmony_ci do { 297962306a36Sopenharmony_ci struct buffer_head *next = bh->b_this_page; 298062306a36Sopenharmony_ci free_buffer_head(bh); 298162306a36Sopenharmony_ci bh = next; 298262306a36Sopenharmony_ci } while (bh != buffers_to_free); 298362306a36Sopenharmony_ci } 298462306a36Sopenharmony_ci return ret; 298562306a36Sopenharmony_ci} 298662306a36Sopenharmony_ciEXPORT_SYMBOL(try_to_free_buffers); 298762306a36Sopenharmony_ci 298862306a36Sopenharmony_ci/* 298962306a36Sopenharmony_ci * Buffer-head allocation 299062306a36Sopenharmony_ci */ 299162306a36Sopenharmony_cistatic struct kmem_cache *bh_cachep __read_mostly; 299262306a36Sopenharmony_ci 299362306a36Sopenharmony_ci/* 299462306a36Sopenharmony_ci * Once the number of bh's in the machine exceeds this level, we start 299562306a36Sopenharmony_ci * stripping them in writeback. 299662306a36Sopenharmony_ci */ 299762306a36Sopenharmony_cistatic unsigned long max_buffer_heads; 299862306a36Sopenharmony_ci 299962306a36Sopenharmony_ciint buffer_heads_over_limit; 300062306a36Sopenharmony_ci 300162306a36Sopenharmony_cistruct bh_accounting { 300262306a36Sopenharmony_ci int nr; /* Number of live bh's */ 300362306a36Sopenharmony_ci int ratelimit; /* Limit cacheline bouncing */ 300462306a36Sopenharmony_ci}; 300562306a36Sopenharmony_ci 300662306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct bh_accounting, bh_accounting) = {0, 0}; 300762306a36Sopenharmony_ci 300862306a36Sopenharmony_cistatic void recalc_bh_state(void) 300962306a36Sopenharmony_ci{ 301062306a36Sopenharmony_ci int i; 301162306a36Sopenharmony_ci int tot = 0; 301262306a36Sopenharmony_ci 301362306a36Sopenharmony_ci if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096) 301462306a36Sopenharmony_ci return; 301562306a36Sopenharmony_ci __this_cpu_write(bh_accounting.ratelimit, 0); 301662306a36Sopenharmony_ci for_each_online_cpu(i) 301762306a36Sopenharmony_ci tot += per_cpu(bh_accounting, i).nr; 301862306a36Sopenharmony_ci buffer_heads_over_limit = (tot > max_buffer_heads); 301962306a36Sopenharmony_ci} 302062306a36Sopenharmony_ci 302162306a36Sopenharmony_cistruct buffer_head *alloc_buffer_head(gfp_t gfp_flags) 302262306a36Sopenharmony_ci{ 302362306a36Sopenharmony_ci struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); 302462306a36Sopenharmony_ci if (ret) { 302562306a36Sopenharmony_ci INIT_LIST_HEAD(&ret->b_assoc_buffers); 302662306a36Sopenharmony_ci spin_lock_init(&ret->b_uptodate_lock); 302762306a36Sopenharmony_ci preempt_disable(); 302862306a36Sopenharmony_ci __this_cpu_inc(bh_accounting.nr); 302962306a36Sopenharmony_ci recalc_bh_state(); 303062306a36Sopenharmony_ci preempt_enable(); 303162306a36Sopenharmony_ci } 303262306a36Sopenharmony_ci return ret; 303362306a36Sopenharmony_ci} 303462306a36Sopenharmony_ciEXPORT_SYMBOL(alloc_buffer_head); 303562306a36Sopenharmony_ci 303662306a36Sopenharmony_civoid free_buffer_head(struct buffer_head *bh) 303762306a36Sopenharmony_ci{ 303862306a36Sopenharmony_ci BUG_ON(!list_empty(&bh->b_assoc_buffers)); 303962306a36Sopenharmony_ci kmem_cache_free(bh_cachep, bh); 304062306a36Sopenharmony_ci preempt_disable(); 304162306a36Sopenharmony_ci __this_cpu_dec(bh_accounting.nr); 304262306a36Sopenharmony_ci recalc_bh_state(); 304362306a36Sopenharmony_ci preempt_enable(); 304462306a36Sopenharmony_ci} 304562306a36Sopenharmony_ciEXPORT_SYMBOL(free_buffer_head); 304662306a36Sopenharmony_ci 304762306a36Sopenharmony_cistatic int buffer_exit_cpu_dead(unsigned int cpu) 304862306a36Sopenharmony_ci{ 304962306a36Sopenharmony_ci int i; 305062306a36Sopenharmony_ci struct bh_lru *b = &per_cpu(bh_lrus, cpu); 305162306a36Sopenharmony_ci 305262306a36Sopenharmony_ci for (i = 0; i < BH_LRU_SIZE; i++) { 305362306a36Sopenharmony_ci brelse(b->bhs[i]); 305462306a36Sopenharmony_ci b->bhs[i] = NULL; 305562306a36Sopenharmony_ci } 305662306a36Sopenharmony_ci this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr); 305762306a36Sopenharmony_ci per_cpu(bh_accounting, cpu).nr = 0; 305862306a36Sopenharmony_ci return 0; 305962306a36Sopenharmony_ci} 306062306a36Sopenharmony_ci 306162306a36Sopenharmony_ci/** 306262306a36Sopenharmony_ci * bh_uptodate_or_lock - Test whether the buffer is uptodate 306362306a36Sopenharmony_ci * @bh: struct buffer_head 306462306a36Sopenharmony_ci * 306562306a36Sopenharmony_ci * Return true if the buffer is up-to-date and false, 306662306a36Sopenharmony_ci * with the buffer locked, if not. 306762306a36Sopenharmony_ci */ 306862306a36Sopenharmony_ciint bh_uptodate_or_lock(struct buffer_head *bh) 306962306a36Sopenharmony_ci{ 307062306a36Sopenharmony_ci if (!buffer_uptodate(bh)) { 307162306a36Sopenharmony_ci lock_buffer(bh); 307262306a36Sopenharmony_ci if (!buffer_uptodate(bh)) 307362306a36Sopenharmony_ci return 0; 307462306a36Sopenharmony_ci unlock_buffer(bh); 307562306a36Sopenharmony_ci } 307662306a36Sopenharmony_ci return 1; 307762306a36Sopenharmony_ci} 307862306a36Sopenharmony_ciEXPORT_SYMBOL(bh_uptodate_or_lock); 307962306a36Sopenharmony_ci 308062306a36Sopenharmony_ci/** 308162306a36Sopenharmony_ci * __bh_read - Submit read for a locked buffer 308262306a36Sopenharmony_ci * @bh: struct buffer_head 308362306a36Sopenharmony_ci * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ 308462306a36Sopenharmony_ci * @wait: wait until reading finish 308562306a36Sopenharmony_ci * 308662306a36Sopenharmony_ci * Returns zero on success or don't wait, and -EIO on error. 308762306a36Sopenharmony_ci */ 308862306a36Sopenharmony_ciint __bh_read(struct buffer_head *bh, blk_opf_t op_flags, bool wait) 308962306a36Sopenharmony_ci{ 309062306a36Sopenharmony_ci int ret = 0; 309162306a36Sopenharmony_ci 309262306a36Sopenharmony_ci BUG_ON(!buffer_locked(bh)); 309362306a36Sopenharmony_ci 309462306a36Sopenharmony_ci get_bh(bh); 309562306a36Sopenharmony_ci bh->b_end_io = end_buffer_read_sync; 309662306a36Sopenharmony_ci submit_bh(REQ_OP_READ | op_flags, bh); 309762306a36Sopenharmony_ci if (wait) { 309862306a36Sopenharmony_ci wait_on_buffer(bh); 309962306a36Sopenharmony_ci if (!buffer_uptodate(bh)) 310062306a36Sopenharmony_ci ret = -EIO; 310162306a36Sopenharmony_ci } 310262306a36Sopenharmony_ci return ret; 310362306a36Sopenharmony_ci} 310462306a36Sopenharmony_ciEXPORT_SYMBOL(__bh_read); 310562306a36Sopenharmony_ci 310662306a36Sopenharmony_ci/** 310762306a36Sopenharmony_ci * __bh_read_batch - Submit read for a batch of unlocked buffers 310862306a36Sopenharmony_ci * @nr: entry number of the buffer batch 310962306a36Sopenharmony_ci * @bhs: a batch of struct buffer_head 311062306a36Sopenharmony_ci * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ 311162306a36Sopenharmony_ci * @force_lock: force to get a lock on the buffer if set, otherwise drops any 311262306a36Sopenharmony_ci * buffer that cannot lock. 311362306a36Sopenharmony_ci * 311462306a36Sopenharmony_ci * Returns zero on success or don't wait, and -EIO on error. 311562306a36Sopenharmony_ci */ 311662306a36Sopenharmony_civoid __bh_read_batch(int nr, struct buffer_head *bhs[], 311762306a36Sopenharmony_ci blk_opf_t op_flags, bool force_lock) 311862306a36Sopenharmony_ci{ 311962306a36Sopenharmony_ci int i; 312062306a36Sopenharmony_ci 312162306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 312262306a36Sopenharmony_ci struct buffer_head *bh = bhs[i]; 312362306a36Sopenharmony_ci 312462306a36Sopenharmony_ci if (buffer_uptodate(bh)) 312562306a36Sopenharmony_ci continue; 312662306a36Sopenharmony_ci 312762306a36Sopenharmony_ci if (force_lock) 312862306a36Sopenharmony_ci lock_buffer(bh); 312962306a36Sopenharmony_ci else 313062306a36Sopenharmony_ci if (!trylock_buffer(bh)) 313162306a36Sopenharmony_ci continue; 313262306a36Sopenharmony_ci 313362306a36Sopenharmony_ci if (buffer_uptodate(bh)) { 313462306a36Sopenharmony_ci unlock_buffer(bh); 313562306a36Sopenharmony_ci continue; 313662306a36Sopenharmony_ci } 313762306a36Sopenharmony_ci 313862306a36Sopenharmony_ci bh->b_end_io = end_buffer_read_sync; 313962306a36Sopenharmony_ci get_bh(bh); 314062306a36Sopenharmony_ci submit_bh(REQ_OP_READ | op_flags, bh); 314162306a36Sopenharmony_ci } 314262306a36Sopenharmony_ci} 314362306a36Sopenharmony_ciEXPORT_SYMBOL(__bh_read_batch); 314462306a36Sopenharmony_ci 314562306a36Sopenharmony_civoid __init buffer_init(void) 314662306a36Sopenharmony_ci{ 314762306a36Sopenharmony_ci unsigned long nrpages; 314862306a36Sopenharmony_ci int ret; 314962306a36Sopenharmony_ci 315062306a36Sopenharmony_ci bh_cachep = kmem_cache_create("buffer_head", 315162306a36Sopenharmony_ci sizeof(struct buffer_head), 0, 315262306a36Sopenharmony_ci (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 315362306a36Sopenharmony_ci SLAB_MEM_SPREAD), 315462306a36Sopenharmony_ci NULL); 315562306a36Sopenharmony_ci 315662306a36Sopenharmony_ci /* 315762306a36Sopenharmony_ci * Limit the bh occupancy to 10% of ZONE_NORMAL 315862306a36Sopenharmony_ci */ 315962306a36Sopenharmony_ci nrpages = (nr_free_buffer_pages() * 10) / 100; 316062306a36Sopenharmony_ci max_buffer_heads = nrpages * (PAGE_SIZE / sizeof(struct buffer_head)); 316162306a36Sopenharmony_ci ret = cpuhp_setup_state_nocalls(CPUHP_FS_BUFF_DEAD, "fs/buffer:dead", 316262306a36Sopenharmony_ci NULL, buffer_exit_cpu_dead); 316362306a36Sopenharmony_ci WARN_ON(ret < 0); 316462306a36Sopenharmony_ci} 3165