162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * fs/mpage.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2002, Linus Torvalds. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Contains functions related to preparing and submitting BIOs which contain 862306a36Sopenharmony_ci * multiple pagecache pages. 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * 15May2002 Andrew Morton 1162306a36Sopenharmony_ci * Initial version 1262306a36Sopenharmony_ci * 27Jun2002 axboe@suse.de 1362306a36Sopenharmony_ci * use bio_add_page() to build bio's just the right size 1462306a36Sopenharmony_ci */ 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include <linux/kernel.h> 1762306a36Sopenharmony_ci#include <linux/export.h> 1862306a36Sopenharmony_ci#include <linux/mm.h> 1962306a36Sopenharmony_ci#include <linux/kdev_t.h> 2062306a36Sopenharmony_ci#include <linux/gfp.h> 2162306a36Sopenharmony_ci#include <linux/bio.h> 2262306a36Sopenharmony_ci#include <linux/fs.h> 2362306a36Sopenharmony_ci#include <linux/buffer_head.h> 2462306a36Sopenharmony_ci#include <linux/blkdev.h> 2562306a36Sopenharmony_ci#include <linux/highmem.h> 2662306a36Sopenharmony_ci#include <linux/prefetch.h> 2762306a36Sopenharmony_ci#include <linux/mpage.h> 2862306a36Sopenharmony_ci#include <linux/mm_inline.h> 2962306a36Sopenharmony_ci#include <linux/writeback.h> 3062306a36Sopenharmony_ci#include <linux/backing-dev.h> 3162306a36Sopenharmony_ci#include <linux/pagevec.h> 3262306a36Sopenharmony_ci#include "internal.h" 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci/* 3562306a36Sopenharmony_ci * I/O completion handler for multipage BIOs. 3662306a36Sopenharmony_ci * 3762306a36Sopenharmony_ci * The mpage code never puts partial pages into a BIO (except for end-of-file). 3862306a36Sopenharmony_ci * If a page does not map to a contiguous run of blocks then it simply falls 3962306a36Sopenharmony_ci * back to block_read_full_folio(). 4062306a36Sopenharmony_ci * 4162306a36Sopenharmony_ci * Why is this? If a page's completion depends on a number of different BIOs 4262306a36Sopenharmony_ci * which can complete in any order (or at the same time) then determining the 4362306a36Sopenharmony_ci * status of that page is hard. See end_buffer_async_read() for the details. 4462306a36Sopenharmony_ci * There is no point in duplicating all that complexity. 4562306a36Sopenharmony_ci */ 4662306a36Sopenharmony_cistatic void mpage_read_end_io(struct bio *bio) 4762306a36Sopenharmony_ci{ 4862306a36Sopenharmony_ci struct folio_iter fi; 4962306a36Sopenharmony_ci int err = blk_status_to_errno(bio->bi_status); 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci bio_for_each_folio_all(fi, bio) { 5262306a36Sopenharmony_ci if (err) 5362306a36Sopenharmony_ci folio_set_error(fi.folio); 5462306a36Sopenharmony_ci else 5562306a36Sopenharmony_ci folio_mark_uptodate(fi.folio); 5662306a36Sopenharmony_ci folio_unlock(fi.folio); 5762306a36Sopenharmony_ci } 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci bio_put(bio); 6062306a36Sopenharmony_ci} 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistatic void mpage_write_end_io(struct bio *bio) 6362306a36Sopenharmony_ci{ 6462306a36Sopenharmony_ci struct folio_iter fi; 6562306a36Sopenharmony_ci int err = blk_status_to_errno(bio->bi_status); 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci bio_for_each_folio_all(fi, bio) { 6862306a36Sopenharmony_ci if (err) { 6962306a36Sopenharmony_ci folio_set_error(fi.folio); 7062306a36Sopenharmony_ci mapping_set_error(fi.folio->mapping, err); 7162306a36Sopenharmony_ci } 7262306a36Sopenharmony_ci folio_end_writeback(fi.folio); 7362306a36Sopenharmony_ci } 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci bio_put(bio); 7662306a36Sopenharmony_ci} 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_cistatic struct bio *mpage_bio_submit_read(struct bio *bio) 7962306a36Sopenharmony_ci{ 8062306a36Sopenharmony_ci bio->bi_end_io = mpage_read_end_io; 8162306a36Sopenharmony_ci guard_bio_eod(bio); 8262306a36Sopenharmony_ci submit_bio(bio); 8362306a36Sopenharmony_ci return NULL; 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_cistatic struct bio *mpage_bio_submit_write(struct bio *bio) 8762306a36Sopenharmony_ci{ 8862306a36Sopenharmony_ci bio->bi_end_io = mpage_write_end_io; 8962306a36Sopenharmony_ci guard_bio_eod(bio); 9062306a36Sopenharmony_ci submit_bio(bio); 9162306a36Sopenharmony_ci return NULL; 9262306a36Sopenharmony_ci} 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci/* 9562306a36Sopenharmony_ci * support function for mpage_readahead. The fs supplied get_block might 9662306a36Sopenharmony_ci * return an up to date buffer. This is used to map that buffer into 9762306a36Sopenharmony_ci * the page, which allows read_folio to avoid triggering a duplicate call 9862306a36Sopenharmony_ci * to get_block. 9962306a36Sopenharmony_ci * 10062306a36Sopenharmony_ci * The idea is to avoid adding buffers to pages that don't already have 10162306a36Sopenharmony_ci * them. So when the buffer is up to date and the page size == block size, 10262306a36Sopenharmony_ci * this marks the page up to date instead of adding new buffers. 10362306a36Sopenharmony_ci */ 10462306a36Sopenharmony_cistatic void map_buffer_to_folio(struct folio *folio, struct buffer_head *bh, 10562306a36Sopenharmony_ci int page_block) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 10862306a36Sopenharmony_ci struct buffer_head *page_bh, *head; 10962306a36Sopenharmony_ci int block = 0; 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci head = folio_buffers(folio); 11262306a36Sopenharmony_ci if (!head) { 11362306a36Sopenharmony_ci /* 11462306a36Sopenharmony_ci * don't make any buffers if there is only one buffer on 11562306a36Sopenharmony_ci * the folio and the folio just needs to be set up to date 11662306a36Sopenharmony_ci */ 11762306a36Sopenharmony_ci if (inode->i_blkbits == PAGE_SHIFT && 11862306a36Sopenharmony_ci buffer_uptodate(bh)) { 11962306a36Sopenharmony_ci folio_mark_uptodate(folio); 12062306a36Sopenharmony_ci return; 12162306a36Sopenharmony_ci } 12262306a36Sopenharmony_ci create_empty_buffers(&folio->page, i_blocksize(inode), 0); 12362306a36Sopenharmony_ci head = folio_buffers(folio); 12462306a36Sopenharmony_ci } 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci page_bh = head; 12762306a36Sopenharmony_ci do { 12862306a36Sopenharmony_ci if (block == page_block) { 12962306a36Sopenharmony_ci page_bh->b_state = bh->b_state; 13062306a36Sopenharmony_ci page_bh->b_bdev = bh->b_bdev; 13162306a36Sopenharmony_ci page_bh->b_blocknr = bh->b_blocknr; 13262306a36Sopenharmony_ci break; 13362306a36Sopenharmony_ci } 13462306a36Sopenharmony_ci page_bh = page_bh->b_this_page; 13562306a36Sopenharmony_ci block++; 13662306a36Sopenharmony_ci } while (page_bh != head); 13762306a36Sopenharmony_ci} 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_cistruct mpage_readpage_args { 14062306a36Sopenharmony_ci struct bio *bio; 14162306a36Sopenharmony_ci struct folio *folio; 14262306a36Sopenharmony_ci unsigned int nr_pages; 14362306a36Sopenharmony_ci bool is_readahead; 14462306a36Sopenharmony_ci sector_t last_block_in_bio; 14562306a36Sopenharmony_ci struct buffer_head map_bh; 14662306a36Sopenharmony_ci unsigned long first_logical_block; 14762306a36Sopenharmony_ci get_block_t *get_block; 14862306a36Sopenharmony_ci}; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci/* 15162306a36Sopenharmony_ci * This is the worker routine which does all the work of mapping the disk 15262306a36Sopenharmony_ci * blocks and constructs largest possible bios, submits them for IO if the 15362306a36Sopenharmony_ci * blocks are not contiguous on the disk. 15462306a36Sopenharmony_ci * 15562306a36Sopenharmony_ci * We pass a buffer_head back and forth and use its buffer_mapped() flag to 15662306a36Sopenharmony_ci * represent the validity of its disk mapping and to decide when to do the next 15762306a36Sopenharmony_ci * get_block() call. 15862306a36Sopenharmony_ci */ 15962306a36Sopenharmony_cistatic struct bio *do_mpage_readpage(struct mpage_readpage_args *args) 16062306a36Sopenharmony_ci{ 16162306a36Sopenharmony_ci struct folio *folio = args->folio; 16262306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 16362306a36Sopenharmony_ci const unsigned blkbits = inode->i_blkbits; 16462306a36Sopenharmony_ci const unsigned blocks_per_page = PAGE_SIZE >> blkbits; 16562306a36Sopenharmony_ci const unsigned blocksize = 1 << blkbits; 16662306a36Sopenharmony_ci struct buffer_head *map_bh = &args->map_bh; 16762306a36Sopenharmony_ci sector_t block_in_file; 16862306a36Sopenharmony_ci sector_t last_block; 16962306a36Sopenharmony_ci sector_t last_block_in_file; 17062306a36Sopenharmony_ci sector_t blocks[MAX_BUF_PER_PAGE]; 17162306a36Sopenharmony_ci unsigned page_block; 17262306a36Sopenharmony_ci unsigned first_hole = blocks_per_page; 17362306a36Sopenharmony_ci struct block_device *bdev = NULL; 17462306a36Sopenharmony_ci int length; 17562306a36Sopenharmony_ci int fully_mapped = 1; 17662306a36Sopenharmony_ci blk_opf_t opf = REQ_OP_READ; 17762306a36Sopenharmony_ci unsigned nblocks; 17862306a36Sopenharmony_ci unsigned relative_block; 17962306a36Sopenharmony_ci gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci /* MAX_BUF_PER_PAGE, for example */ 18262306a36Sopenharmony_ci VM_BUG_ON_FOLIO(folio_test_large(folio), folio); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci if (args->is_readahead) { 18562306a36Sopenharmony_ci opf |= REQ_RAHEAD; 18662306a36Sopenharmony_ci gfp |= __GFP_NORETRY | __GFP_NOWARN; 18762306a36Sopenharmony_ci } 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci if (folio_buffers(folio)) 19062306a36Sopenharmony_ci goto confused; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits); 19362306a36Sopenharmony_ci last_block = block_in_file + args->nr_pages * blocks_per_page; 19462306a36Sopenharmony_ci last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; 19562306a36Sopenharmony_ci if (last_block > last_block_in_file) 19662306a36Sopenharmony_ci last_block = last_block_in_file; 19762306a36Sopenharmony_ci page_block = 0; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci /* 20062306a36Sopenharmony_ci * Map blocks using the result from the previous get_blocks call first. 20162306a36Sopenharmony_ci */ 20262306a36Sopenharmony_ci nblocks = map_bh->b_size >> blkbits; 20362306a36Sopenharmony_ci if (buffer_mapped(map_bh) && 20462306a36Sopenharmony_ci block_in_file > args->first_logical_block && 20562306a36Sopenharmony_ci block_in_file < (args->first_logical_block + nblocks)) { 20662306a36Sopenharmony_ci unsigned map_offset = block_in_file - args->first_logical_block; 20762306a36Sopenharmony_ci unsigned last = nblocks - map_offset; 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci for (relative_block = 0; ; relative_block++) { 21062306a36Sopenharmony_ci if (relative_block == last) { 21162306a36Sopenharmony_ci clear_buffer_mapped(map_bh); 21262306a36Sopenharmony_ci break; 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci if (page_block == blocks_per_page) 21562306a36Sopenharmony_ci break; 21662306a36Sopenharmony_ci blocks[page_block] = map_bh->b_blocknr + map_offset + 21762306a36Sopenharmony_ci relative_block; 21862306a36Sopenharmony_ci page_block++; 21962306a36Sopenharmony_ci block_in_file++; 22062306a36Sopenharmony_ci } 22162306a36Sopenharmony_ci bdev = map_bh->b_bdev; 22262306a36Sopenharmony_ci } 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci /* 22562306a36Sopenharmony_ci * Then do more get_blocks calls until we are done with this folio. 22662306a36Sopenharmony_ci */ 22762306a36Sopenharmony_ci map_bh->b_folio = folio; 22862306a36Sopenharmony_ci while (page_block < blocks_per_page) { 22962306a36Sopenharmony_ci map_bh->b_state = 0; 23062306a36Sopenharmony_ci map_bh->b_size = 0; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci if (block_in_file < last_block) { 23362306a36Sopenharmony_ci map_bh->b_size = (last_block-block_in_file) << blkbits; 23462306a36Sopenharmony_ci if (args->get_block(inode, block_in_file, map_bh, 0)) 23562306a36Sopenharmony_ci goto confused; 23662306a36Sopenharmony_ci args->first_logical_block = block_in_file; 23762306a36Sopenharmony_ci } 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (!buffer_mapped(map_bh)) { 24062306a36Sopenharmony_ci fully_mapped = 0; 24162306a36Sopenharmony_ci if (first_hole == blocks_per_page) 24262306a36Sopenharmony_ci first_hole = page_block; 24362306a36Sopenharmony_ci page_block++; 24462306a36Sopenharmony_ci block_in_file++; 24562306a36Sopenharmony_ci continue; 24662306a36Sopenharmony_ci } 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci /* some filesystems will copy data into the page during 24962306a36Sopenharmony_ci * the get_block call, in which case we don't want to 25062306a36Sopenharmony_ci * read it again. map_buffer_to_folio copies the data 25162306a36Sopenharmony_ci * we just collected from get_block into the folio's buffers 25262306a36Sopenharmony_ci * so read_folio doesn't have to repeat the get_block call 25362306a36Sopenharmony_ci */ 25462306a36Sopenharmony_ci if (buffer_uptodate(map_bh)) { 25562306a36Sopenharmony_ci map_buffer_to_folio(folio, map_bh, page_block); 25662306a36Sopenharmony_ci goto confused; 25762306a36Sopenharmony_ci } 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci if (first_hole != blocks_per_page) 26062306a36Sopenharmony_ci goto confused; /* hole -> non-hole */ 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci /* Contiguous blocks? */ 26362306a36Sopenharmony_ci if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1) 26462306a36Sopenharmony_ci goto confused; 26562306a36Sopenharmony_ci nblocks = map_bh->b_size >> blkbits; 26662306a36Sopenharmony_ci for (relative_block = 0; ; relative_block++) { 26762306a36Sopenharmony_ci if (relative_block == nblocks) { 26862306a36Sopenharmony_ci clear_buffer_mapped(map_bh); 26962306a36Sopenharmony_ci break; 27062306a36Sopenharmony_ci } else if (page_block == blocks_per_page) 27162306a36Sopenharmony_ci break; 27262306a36Sopenharmony_ci blocks[page_block] = map_bh->b_blocknr+relative_block; 27362306a36Sopenharmony_ci page_block++; 27462306a36Sopenharmony_ci block_in_file++; 27562306a36Sopenharmony_ci } 27662306a36Sopenharmony_ci bdev = map_bh->b_bdev; 27762306a36Sopenharmony_ci } 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci if (first_hole != blocks_per_page) { 28062306a36Sopenharmony_ci folio_zero_segment(folio, first_hole << blkbits, PAGE_SIZE); 28162306a36Sopenharmony_ci if (first_hole == 0) { 28262306a36Sopenharmony_ci folio_mark_uptodate(folio); 28362306a36Sopenharmony_ci folio_unlock(folio); 28462306a36Sopenharmony_ci goto out; 28562306a36Sopenharmony_ci } 28662306a36Sopenharmony_ci } else if (fully_mapped) { 28762306a36Sopenharmony_ci folio_set_mappedtodisk(folio); 28862306a36Sopenharmony_ci } 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci /* 29162306a36Sopenharmony_ci * This folio will go to BIO. Do we need to send this BIO off first? 29262306a36Sopenharmony_ci */ 29362306a36Sopenharmony_ci if (args->bio && (args->last_block_in_bio != blocks[0] - 1)) 29462306a36Sopenharmony_ci args->bio = mpage_bio_submit_read(args->bio); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_cialloc_new: 29762306a36Sopenharmony_ci if (args->bio == NULL) { 29862306a36Sopenharmony_ci args->bio = bio_alloc(bdev, bio_max_segs(args->nr_pages), opf, 29962306a36Sopenharmony_ci gfp); 30062306a36Sopenharmony_ci if (args->bio == NULL) 30162306a36Sopenharmony_ci goto confused; 30262306a36Sopenharmony_ci args->bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); 30362306a36Sopenharmony_ci } 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci length = first_hole << blkbits; 30662306a36Sopenharmony_ci if (!bio_add_folio(args->bio, folio, length, 0)) { 30762306a36Sopenharmony_ci args->bio = mpage_bio_submit_read(args->bio); 30862306a36Sopenharmony_ci goto alloc_new; 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci relative_block = block_in_file - args->first_logical_block; 31262306a36Sopenharmony_ci nblocks = map_bh->b_size >> blkbits; 31362306a36Sopenharmony_ci if ((buffer_boundary(map_bh) && relative_block == nblocks) || 31462306a36Sopenharmony_ci (first_hole != blocks_per_page)) 31562306a36Sopenharmony_ci args->bio = mpage_bio_submit_read(args->bio); 31662306a36Sopenharmony_ci else 31762306a36Sopenharmony_ci args->last_block_in_bio = blocks[blocks_per_page - 1]; 31862306a36Sopenharmony_ciout: 31962306a36Sopenharmony_ci return args->bio; 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ciconfused: 32262306a36Sopenharmony_ci if (args->bio) 32362306a36Sopenharmony_ci args->bio = mpage_bio_submit_read(args->bio); 32462306a36Sopenharmony_ci if (!folio_test_uptodate(folio)) 32562306a36Sopenharmony_ci block_read_full_folio(folio, args->get_block); 32662306a36Sopenharmony_ci else 32762306a36Sopenharmony_ci folio_unlock(folio); 32862306a36Sopenharmony_ci goto out; 32962306a36Sopenharmony_ci} 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci/** 33262306a36Sopenharmony_ci * mpage_readahead - start reads against pages 33362306a36Sopenharmony_ci * @rac: Describes which pages to read. 33462306a36Sopenharmony_ci * @get_block: The filesystem's block mapper function. 33562306a36Sopenharmony_ci * 33662306a36Sopenharmony_ci * This function walks the pages and the blocks within each page, building and 33762306a36Sopenharmony_ci * emitting large BIOs. 33862306a36Sopenharmony_ci * 33962306a36Sopenharmony_ci * If anything unusual happens, such as: 34062306a36Sopenharmony_ci * 34162306a36Sopenharmony_ci * - encountering a page which has buffers 34262306a36Sopenharmony_ci * - encountering a page which has a non-hole after a hole 34362306a36Sopenharmony_ci * - encountering a page with non-contiguous blocks 34462306a36Sopenharmony_ci * 34562306a36Sopenharmony_ci * then this code just gives up and calls the buffer_head-based read function. 34662306a36Sopenharmony_ci * It does handle a page which has holes at the end - that is a common case: 34762306a36Sopenharmony_ci * the end-of-file on blocksize < PAGE_SIZE setups. 34862306a36Sopenharmony_ci * 34962306a36Sopenharmony_ci * BH_Boundary explanation: 35062306a36Sopenharmony_ci * 35162306a36Sopenharmony_ci * There is a problem. The mpage read code assembles several pages, gets all 35262306a36Sopenharmony_ci * their disk mappings, and then submits them all. That's fine, but obtaining 35362306a36Sopenharmony_ci * the disk mappings may require I/O. Reads of indirect blocks, for example. 35462306a36Sopenharmony_ci * 35562306a36Sopenharmony_ci * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be 35662306a36Sopenharmony_ci * submitted in the following order: 35762306a36Sopenharmony_ci * 35862306a36Sopenharmony_ci * 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16 35962306a36Sopenharmony_ci * 36062306a36Sopenharmony_ci * because the indirect block has to be read to get the mappings of blocks 36162306a36Sopenharmony_ci * 13,14,15,16. Obviously, this impacts performance. 36262306a36Sopenharmony_ci * 36362306a36Sopenharmony_ci * So what we do it to allow the filesystem's get_block() function to set 36462306a36Sopenharmony_ci * BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block 36562306a36Sopenharmony_ci * after this one will require I/O against a block which is probably close to 36662306a36Sopenharmony_ci * this one. So you should push what I/O you have currently accumulated. 36762306a36Sopenharmony_ci * 36862306a36Sopenharmony_ci * This all causes the disk requests to be issued in the correct order. 36962306a36Sopenharmony_ci */ 37062306a36Sopenharmony_civoid mpage_readahead(struct readahead_control *rac, get_block_t get_block) 37162306a36Sopenharmony_ci{ 37262306a36Sopenharmony_ci struct folio *folio; 37362306a36Sopenharmony_ci struct mpage_readpage_args args = { 37462306a36Sopenharmony_ci .get_block = get_block, 37562306a36Sopenharmony_ci .is_readahead = true, 37662306a36Sopenharmony_ci }; 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci while ((folio = readahead_folio(rac))) { 37962306a36Sopenharmony_ci prefetchw(&folio->flags); 38062306a36Sopenharmony_ci args.folio = folio; 38162306a36Sopenharmony_ci args.nr_pages = readahead_count(rac); 38262306a36Sopenharmony_ci args.bio = do_mpage_readpage(&args); 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci if (args.bio) 38562306a36Sopenharmony_ci mpage_bio_submit_read(args.bio); 38662306a36Sopenharmony_ci} 38762306a36Sopenharmony_ciEXPORT_SYMBOL(mpage_readahead); 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci/* 39062306a36Sopenharmony_ci * This isn't called much at all 39162306a36Sopenharmony_ci */ 39262306a36Sopenharmony_ciint mpage_read_folio(struct folio *folio, get_block_t get_block) 39362306a36Sopenharmony_ci{ 39462306a36Sopenharmony_ci struct mpage_readpage_args args = { 39562306a36Sopenharmony_ci .folio = folio, 39662306a36Sopenharmony_ci .nr_pages = 1, 39762306a36Sopenharmony_ci .get_block = get_block, 39862306a36Sopenharmony_ci }; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci args.bio = do_mpage_readpage(&args); 40162306a36Sopenharmony_ci if (args.bio) 40262306a36Sopenharmony_ci mpage_bio_submit_read(args.bio); 40362306a36Sopenharmony_ci return 0; 40462306a36Sopenharmony_ci} 40562306a36Sopenharmony_ciEXPORT_SYMBOL(mpage_read_folio); 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci/* 40862306a36Sopenharmony_ci * Writing is not so simple. 40962306a36Sopenharmony_ci * 41062306a36Sopenharmony_ci * If the page has buffers then they will be used for obtaining the disk 41162306a36Sopenharmony_ci * mapping. We only support pages which are fully mapped-and-dirty, with a 41262306a36Sopenharmony_ci * special case for pages which are unmapped at the end: end-of-file. 41362306a36Sopenharmony_ci * 41462306a36Sopenharmony_ci * If the page has no buffers (preferred) then the page is mapped here. 41562306a36Sopenharmony_ci * 41662306a36Sopenharmony_ci * If all blocks are found to be contiguous then the page can go into the 41762306a36Sopenharmony_ci * BIO. Otherwise fall back to the mapping's writepage(). 41862306a36Sopenharmony_ci * 41962306a36Sopenharmony_ci * FIXME: This code wants an estimate of how many pages are still to be 42062306a36Sopenharmony_ci * written, so it can intelligently allocate a suitably-sized BIO. For now, 42162306a36Sopenharmony_ci * just allocate full-size (16-page) BIOs. 42262306a36Sopenharmony_ci */ 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_cistruct mpage_data { 42562306a36Sopenharmony_ci struct bio *bio; 42662306a36Sopenharmony_ci sector_t last_block_in_bio; 42762306a36Sopenharmony_ci get_block_t *get_block; 42862306a36Sopenharmony_ci}; 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci/* 43162306a36Sopenharmony_ci * We have our BIO, so we can now mark the buffers clean. Make 43262306a36Sopenharmony_ci * sure to only clean buffers which we know we'll be writing. 43362306a36Sopenharmony_ci */ 43462306a36Sopenharmony_cistatic void clean_buffers(struct page *page, unsigned first_unmapped) 43562306a36Sopenharmony_ci{ 43662306a36Sopenharmony_ci unsigned buffer_counter = 0; 43762306a36Sopenharmony_ci struct buffer_head *bh, *head; 43862306a36Sopenharmony_ci if (!page_has_buffers(page)) 43962306a36Sopenharmony_ci return; 44062306a36Sopenharmony_ci head = page_buffers(page); 44162306a36Sopenharmony_ci bh = head; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci do { 44462306a36Sopenharmony_ci if (buffer_counter++ == first_unmapped) 44562306a36Sopenharmony_ci break; 44662306a36Sopenharmony_ci clear_buffer_dirty(bh); 44762306a36Sopenharmony_ci bh = bh->b_this_page; 44862306a36Sopenharmony_ci } while (bh != head); 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci /* 45162306a36Sopenharmony_ci * we cannot drop the bh if the page is not uptodate or a concurrent 45262306a36Sopenharmony_ci * read_folio would fail to serialize with the bh and it would read from 45362306a36Sopenharmony_ci * disk before we reach the platter. 45462306a36Sopenharmony_ci */ 45562306a36Sopenharmony_ci if (buffer_heads_over_limit && PageUptodate(page)) 45662306a36Sopenharmony_ci try_to_free_buffers(page_folio(page)); 45762306a36Sopenharmony_ci} 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci/* 46062306a36Sopenharmony_ci * For situations where we want to clean all buffers attached to a page. 46162306a36Sopenharmony_ci * We don't need to calculate how many buffers are attached to the page, 46262306a36Sopenharmony_ci * we just need to specify a number larger than the maximum number of buffers. 46362306a36Sopenharmony_ci */ 46462306a36Sopenharmony_civoid clean_page_buffers(struct page *page) 46562306a36Sopenharmony_ci{ 46662306a36Sopenharmony_ci clean_buffers(page, ~0U); 46762306a36Sopenharmony_ci} 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_cistatic int __mpage_writepage(struct folio *folio, struct writeback_control *wbc, 47062306a36Sopenharmony_ci void *data) 47162306a36Sopenharmony_ci{ 47262306a36Sopenharmony_ci struct mpage_data *mpd = data; 47362306a36Sopenharmony_ci struct bio *bio = mpd->bio; 47462306a36Sopenharmony_ci struct address_space *mapping = folio->mapping; 47562306a36Sopenharmony_ci struct inode *inode = mapping->host; 47662306a36Sopenharmony_ci const unsigned blkbits = inode->i_blkbits; 47762306a36Sopenharmony_ci const unsigned blocks_per_page = PAGE_SIZE >> blkbits; 47862306a36Sopenharmony_ci sector_t last_block; 47962306a36Sopenharmony_ci sector_t block_in_file; 48062306a36Sopenharmony_ci sector_t blocks[MAX_BUF_PER_PAGE]; 48162306a36Sopenharmony_ci unsigned page_block; 48262306a36Sopenharmony_ci unsigned first_unmapped = blocks_per_page; 48362306a36Sopenharmony_ci struct block_device *bdev = NULL; 48462306a36Sopenharmony_ci int boundary = 0; 48562306a36Sopenharmony_ci sector_t boundary_block = 0; 48662306a36Sopenharmony_ci struct block_device *boundary_bdev = NULL; 48762306a36Sopenharmony_ci size_t length; 48862306a36Sopenharmony_ci struct buffer_head map_bh; 48962306a36Sopenharmony_ci loff_t i_size = i_size_read(inode); 49062306a36Sopenharmony_ci int ret = 0; 49162306a36Sopenharmony_ci struct buffer_head *head = folio_buffers(folio); 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci if (head) { 49462306a36Sopenharmony_ci struct buffer_head *bh = head; 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci /* If they're all mapped and dirty, do it */ 49762306a36Sopenharmony_ci page_block = 0; 49862306a36Sopenharmony_ci do { 49962306a36Sopenharmony_ci BUG_ON(buffer_locked(bh)); 50062306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 50162306a36Sopenharmony_ci /* 50262306a36Sopenharmony_ci * unmapped dirty buffers are created by 50362306a36Sopenharmony_ci * block_dirty_folio -> mmapped data 50462306a36Sopenharmony_ci */ 50562306a36Sopenharmony_ci if (buffer_dirty(bh)) 50662306a36Sopenharmony_ci goto confused; 50762306a36Sopenharmony_ci if (first_unmapped == blocks_per_page) 50862306a36Sopenharmony_ci first_unmapped = page_block; 50962306a36Sopenharmony_ci continue; 51062306a36Sopenharmony_ci } 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci if (first_unmapped != blocks_per_page) 51362306a36Sopenharmony_ci goto confused; /* hole -> non-hole */ 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci if (!buffer_dirty(bh) || !buffer_uptodate(bh)) 51662306a36Sopenharmony_ci goto confused; 51762306a36Sopenharmony_ci if (page_block) { 51862306a36Sopenharmony_ci if (bh->b_blocknr != blocks[page_block-1] + 1) 51962306a36Sopenharmony_ci goto confused; 52062306a36Sopenharmony_ci } 52162306a36Sopenharmony_ci blocks[page_block++] = bh->b_blocknr; 52262306a36Sopenharmony_ci boundary = buffer_boundary(bh); 52362306a36Sopenharmony_ci if (boundary) { 52462306a36Sopenharmony_ci boundary_block = bh->b_blocknr; 52562306a36Sopenharmony_ci boundary_bdev = bh->b_bdev; 52662306a36Sopenharmony_ci } 52762306a36Sopenharmony_ci bdev = bh->b_bdev; 52862306a36Sopenharmony_ci } while ((bh = bh->b_this_page) != head); 52962306a36Sopenharmony_ci 53062306a36Sopenharmony_ci if (first_unmapped) 53162306a36Sopenharmony_ci goto page_is_mapped; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci /* 53462306a36Sopenharmony_ci * Page has buffers, but they are all unmapped. The page was 53562306a36Sopenharmony_ci * created by pagein or read over a hole which was handled by 53662306a36Sopenharmony_ci * block_read_full_folio(). If this address_space is also 53762306a36Sopenharmony_ci * using mpage_readahead then this can rarely happen. 53862306a36Sopenharmony_ci */ 53962306a36Sopenharmony_ci goto confused; 54062306a36Sopenharmony_ci } 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci /* 54362306a36Sopenharmony_ci * The page has no buffers: map it to disk 54462306a36Sopenharmony_ci */ 54562306a36Sopenharmony_ci BUG_ON(!folio_test_uptodate(folio)); 54662306a36Sopenharmony_ci block_in_file = (sector_t)folio->index << (PAGE_SHIFT - blkbits); 54762306a36Sopenharmony_ci /* 54862306a36Sopenharmony_ci * Whole page beyond EOF? Skip allocating blocks to avoid leaking 54962306a36Sopenharmony_ci * space. 55062306a36Sopenharmony_ci */ 55162306a36Sopenharmony_ci if (block_in_file >= (i_size + (1 << blkbits) - 1) >> blkbits) 55262306a36Sopenharmony_ci goto page_is_mapped; 55362306a36Sopenharmony_ci last_block = (i_size - 1) >> blkbits; 55462306a36Sopenharmony_ci map_bh.b_folio = folio; 55562306a36Sopenharmony_ci for (page_block = 0; page_block < blocks_per_page; ) { 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci map_bh.b_state = 0; 55862306a36Sopenharmony_ci map_bh.b_size = 1 << blkbits; 55962306a36Sopenharmony_ci if (mpd->get_block(inode, block_in_file, &map_bh, 1)) 56062306a36Sopenharmony_ci goto confused; 56162306a36Sopenharmony_ci if (!buffer_mapped(&map_bh)) 56262306a36Sopenharmony_ci goto confused; 56362306a36Sopenharmony_ci if (buffer_new(&map_bh)) 56462306a36Sopenharmony_ci clean_bdev_bh_alias(&map_bh); 56562306a36Sopenharmony_ci if (buffer_boundary(&map_bh)) { 56662306a36Sopenharmony_ci boundary_block = map_bh.b_blocknr; 56762306a36Sopenharmony_ci boundary_bdev = map_bh.b_bdev; 56862306a36Sopenharmony_ci } 56962306a36Sopenharmony_ci if (page_block) { 57062306a36Sopenharmony_ci if (map_bh.b_blocknr != blocks[page_block-1] + 1) 57162306a36Sopenharmony_ci goto confused; 57262306a36Sopenharmony_ci } 57362306a36Sopenharmony_ci blocks[page_block++] = map_bh.b_blocknr; 57462306a36Sopenharmony_ci boundary = buffer_boundary(&map_bh); 57562306a36Sopenharmony_ci bdev = map_bh.b_bdev; 57662306a36Sopenharmony_ci if (block_in_file == last_block) 57762306a36Sopenharmony_ci break; 57862306a36Sopenharmony_ci block_in_file++; 57962306a36Sopenharmony_ci } 58062306a36Sopenharmony_ci BUG_ON(page_block == 0); 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci first_unmapped = page_block; 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_cipage_is_mapped: 58562306a36Sopenharmony_ci /* Don't bother writing beyond EOF, truncate will discard the folio */ 58662306a36Sopenharmony_ci if (folio_pos(folio) >= i_size) 58762306a36Sopenharmony_ci goto confused; 58862306a36Sopenharmony_ci length = folio_size(folio); 58962306a36Sopenharmony_ci if (folio_pos(folio) + length > i_size) { 59062306a36Sopenharmony_ci /* 59162306a36Sopenharmony_ci * The page straddles i_size. It must be zeroed out on each 59262306a36Sopenharmony_ci * and every writepage invocation because it may be mmapped. 59362306a36Sopenharmony_ci * "A file is mapped in multiples of the page size. For a file 59462306a36Sopenharmony_ci * that is not a multiple of the page size, the remaining memory 59562306a36Sopenharmony_ci * is zeroed when mapped, and writes to that region are not 59662306a36Sopenharmony_ci * written out to the file." 59762306a36Sopenharmony_ci */ 59862306a36Sopenharmony_ci length = i_size - folio_pos(folio); 59962306a36Sopenharmony_ci folio_zero_segment(folio, length, folio_size(folio)); 60062306a36Sopenharmony_ci } 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci /* 60362306a36Sopenharmony_ci * This page will go to BIO. Do we need to send this BIO off first? 60462306a36Sopenharmony_ci */ 60562306a36Sopenharmony_ci if (bio && mpd->last_block_in_bio != blocks[0] - 1) 60662306a36Sopenharmony_ci bio = mpage_bio_submit_write(bio); 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_cialloc_new: 60962306a36Sopenharmony_ci if (bio == NULL) { 61062306a36Sopenharmony_ci bio = bio_alloc(bdev, BIO_MAX_VECS, 61162306a36Sopenharmony_ci REQ_OP_WRITE | wbc_to_write_flags(wbc), 61262306a36Sopenharmony_ci GFP_NOFS); 61362306a36Sopenharmony_ci bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); 61462306a36Sopenharmony_ci wbc_init_bio(wbc, bio); 61562306a36Sopenharmony_ci } 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci /* 61862306a36Sopenharmony_ci * Must try to add the page before marking the buffer clean or 61962306a36Sopenharmony_ci * the confused fail path above (OOM) will be very confused when 62062306a36Sopenharmony_ci * it finds all bh marked clean (i.e. it will not write anything) 62162306a36Sopenharmony_ci */ 62262306a36Sopenharmony_ci wbc_account_cgroup_owner(wbc, &folio->page, folio_size(folio)); 62362306a36Sopenharmony_ci length = first_unmapped << blkbits; 62462306a36Sopenharmony_ci if (!bio_add_folio(bio, folio, length, 0)) { 62562306a36Sopenharmony_ci bio = mpage_bio_submit_write(bio); 62662306a36Sopenharmony_ci goto alloc_new; 62762306a36Sopenharmony_ci } 62862306a36Sopenharmony_ci 62962306a36Sopenharmony_ci clean_buffers(&folio->page, first_unmapped); 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci BUG_ON(folio_test_writeback(folio)); 63262306a36Sopenharmony_ci folio_start_writeback(folio); 63362306a36Sopenharmony_ci folio_unlock(folio); 63462306a36Sopenharmony_ci if (boundary || (first_unmapped != blocks_per_page)) { 63562306a36Sopenharmony_ci bio = mpage_bio_submit_write(bio); 63662306a36Sopenharmony_ci if (boundary_block) { 63762306a36Sopenharmony_ci write_boundary_block(boundary_bdev, 63862306a36Sopenharmony_ci boundary_block, 1 << blkbits); 63962306a36Sopenharmony_ci } 64062306a36Sopenharmony_ci } else { 64162306a36Sopenharmony_ci mpd->last_block_in_bio = blocks[blocks_per_page - 1]; 64262306a36Sopenharmony_ci } 64362306a36Sopenharmony_ci goto out; 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ciconfused: 64662306a36Sopenharmony_ci if (bio) 64762306a36Sopenharmony_ci bio = mpage_bio_submit_write(bio); 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci /* 65062306a36Sopenharmony_ci * The caller has a ref on the inode, so *mapping is stable 65162306a36Sopenharmony_ci */ 65262306a36Sopenharmony_ci ret = block_write_full_page(&folio->page, mpd->get_block, wbc); 65362306a36Sopenharmony_ci mapping_set_error(mapping, ret); 65462306a36Sopenharmony_ciout: 65562306a36Sopenharmony_ci mpd->bio = bio; 65662306a36Sopenharmony_ci return ret; 65762306a36Sopenharmony_ci} 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci/** 66062306a36Sopenharmony_ci * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them 66162306a36Sopenharmony_ci * @mapping: address space structure to write 66262306a36Sopenharmony_ci * @wbc: subtract the number of written pages from *@wbc->nr_to_write 66362306a36Sopenharmony_ci * @get_block: the filesystem's block mapper function. 66462306a36Sopenharmony_ci * 66562306a36Sopenharmony_ci * This is a library function, which implements the writepages() 66662306a36Sopenharmony_ci * address_space_operation. 66762306a36Sopenharmony_ci */ 66862306a36Sopenharmony_ciint 66962306a36Sopenharmony_cimpage_writepages(struct address_space *mapping, 67062306a36Sopenharmony_ci struct writeback_control *wbc, get_block_t get_block) 67162306a36Sopenharmony_ci{ 67262306a36Sopenharmony_ci struct mpage_data mpd = { 67362306a36Sopenharmony_ci .get_block = get_block, 67462306a36Sopenharmony_ci }; 67562306a36Sopenharmony_ci struct blk_plug plug; 67662306a36Sopenharmony_ci int ret; 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci blk_start_plug(&plug); 67962306a36Sopenharmony_ci ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); 68062306a36Sopenharmony_ci if (mpd.bio) 68162306a36Sopenharmony_ci mpage_bio_submit_write(mpd.bio); 68262306a36Sopenharmony_ci blk_finish_plug(&plug); 68362306a36Sopenharmony_ci return ret; 68462306a36Sopenharmony_ci} 68562306a36Sopenharmony_ciEXPORT_SYMBOL(mpage_writepages); 686