162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/fs/ext4/page-io.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This contains the new page_io functions for ext4 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Written by Theodore Ts'o, 2010. 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/fs.h> 1162306a36Sopenharmony_ci#include <linux/time.h> 1262306a36Sopenharmony_ci#include <linux/highuid.h> 1362306a36Sopenharmony_ci#include <linux/pagemap.h> 1462306a36Sopenharmony_ci#include <linux/quotaops.h> 1562306a36Sopenharmony_ci#include <linux/string.h> 1662306a36Sopenharmony_ci#include <linux/buffer_head.h> 1762306a36Sopenharmony_ci#include <linux/writeback.h> 1862306a36Sopenharmony_ci#include <linux/pagevec.h> 1962306a36Sopenharmony_ci#include <linux/mpage.h> 2062306a36Sopenharmony_ci#include <linux/namei.h> 2162306a36Sopenharmony_ci#include <linux/uio.h> 2262306a36Sopenharmony_ci#include <linux/bio.h> 2362306a36Sopenharmony_ci#include <linux/workqueue.h> 2462306a36Sopenharmony_ci#include <linux/kernel.h> 2562306a36Sopenharmony_ci#include <linux/slab.h> 2662306a36Sopenharmony_ci#include <linux/mm.h> 2762306a36Sopenharmony_ci#include <linux/sched/mm.h> 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci#include "ext4_jbd2.h" 3062306a36Sopenharmony_ci#include "xattr.h" 3162306a36Sopenharmony_ci#include "acl.h" 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_cistatic struct kmem_cache *io_end_cachep; 3462306a36Sopenharmony_cistatic struct kmem_cache *io_end_vec_cachep; 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ciint __init ext4_init_pageio(void) 3762306a36Sopenharmony_ci{ 3862306a36Sopenharmony_ci io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); 3962306a36Sopenharmony_ci if (io_end_cachep == NULL) 4062306a36Sopenharmony_ci return -ENOMEM; 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0); 4362306a36Sopenharmony_ci if (io_end_vec_cachep == NULL) { 4462306a36Sopenharmony_ci kmem_cache_destroy(io_end_cachep); 4562306a36Sopenharmony_ci return -ENOMEM; 4662306a36Sopenharmony_ci } 4762306a36Sopenharmony_ci return 0; 4862306a36Sopenharmony_ci} 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_civoid ext4_exit_pageio(void) 5162306a36Sopenharmony_ci{ 5262306a36Sopenharmony_ci kmem_cache_destroy(io_end_cachep); 5362306a36Sopenharmony_ci kmem_cache_destroy(io_end_vec_cachep); 5462306a36Sopenharmony_ci} 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cistruct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end) 5762306a36Sopenharmony_ci{ 5862306a36Sopenharmony_ci struct ext4_io_end_vec *io_end_vec; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS); 6162306a36Sopenharmony_ci if (!io_end_vec) 6262306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 6362306a36Sopenharmony_ci INIT_LIST_HEAD(&io_end_vec->list); 6462306a36Sopenharmony_ci list_add_tail(&io_end_vec->list, &io_end->list_vec); 6562306a36Sopenharmony_ci return io_end_vec; 6662306a36Sopenharmony_ci} 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_cistatic void ext4_free_io_end_vec(ext4_io_end_t *io_end) 6962306a36Sopenharmony_ci{ 7062306a36Sopenharmony_ci struct ext4_io_end_vec *io_end_vec, *tmp; 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci if (list_empty(&io_end->list_vec)) 7362306a36Sopenharmony_ci return; 7462306a36Sopenharmony_ci list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) { 7562306a36Sopenharmony_ci list_del(&io_end_vec->list); 7662306a36Sopenharmony_ci kmem_cache_free(io_end_vec_cachep, io_end_vec); 7762306a36Sopenharmony_ci } 7862306a36Sopenharmony_ci} 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_cistruct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end) 8162306a36Sopenharmony_ci{ 8262306a36Sopenharmony_ci BUG_ON(list_empty(&io_end->list_vec)); 8362306a36Sopenharmony_ci return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list); 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci/* 8762306a36Sopenharmony_ci * Print an buffer I/O error compatible with the fs/buffer.c. This 8862306a36Sopenharmony_ci * provides compatibility with dmesg scrapers that look for a specific 8962306a36Sopenharmony_ci * buffer I/O error message. We really need a unified error reporting 9062306a36Sopenharmony_ci * structure to userspace ala Digital Unix's uerf system, but it's 9162306a36Sopenharmony_ci * probably not going to happen in my lifetime, due to LKML politics... 9262306a36Sopenharmony_ci */ 9362306a36Sopenharmony_cistatic void buffer_io_error(struct buffer_head *bh) 9462306a36Sopenharmony_ci{ 9562306a36Sopenharmony_ci printk_ratelimited(KERN_ERR "Buffer I/O error on device %pg, logical block %llu\n", 9662306a36Sopenharmony_ci bh->b_bdev, 9762306a36Sopenharmony_ci (unsigned long long)bh->b_blocknr); 9862306a36Sopenharmony_ci} 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistatic void ext4_finish_bio(struct bio *bio) 10162306a36Sopenharmony_ci{ 10262306a36Sopenharmony_ci struct folio_iter fi; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci bio_for_each_folio_all(fi, bio) { 10562306a36Sopenharmony_ci struct folio *folio = fi.folio; 10662306a36Sopenharmony_ci struct folio *io_folio = NULL; 10762306a36Sopenharmony_ci struct buffer_head *bh, *head; 10862306a36Sopenharmony_ci size_t bio_start = fi.offset; 10962306a36Sopenharmony_ci size_t bio_end = bio_start + fi.length; 11062306a36Sopenharmony_ci unsigned under_io = 0; 11162306a36Sopenharmony_ci unsigned long flags; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci if (fscrypt_is_bounce_folio(folio)) { 11462306a36Sopenharmony_ci io_folio = folio; 11562306a36Sopenharmony_ci folio = fscrypt_pagecache_folio(folio); 11662306a36Sopenharmony_ci } 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci if (bio->bi_status) { 11962306a36Sopenharmony_ci int err = blk_status_to_errno(bio->bi_status); 12062306a36Sopenharmony_ci folio_set_error(folio); 12162306a36Sopenharmony_ci mapping_set_error(folio->mapping, err); 12262306a36Sopenharmony_ci } 12362306a36Sopenharmony_ci bh = head = folio_buffers(folio); 12462306a36Sopenharmony_ci /* 12562306a36Sopenharmony_ci * We check all buffers in the folio under b_uptodate_lock 12662306a36Sopenharmony_ci * to avoid races with other end io clearing async_write flags 12762306a36Sopenharmony_ci */ 12862306a36Sopenharmony_ci spin_lock_irqsave(&head->b_uptodate_lock, flags); 12962306a36Sopenharmony_ci do { 13062306a36Sopenharmony_ci if (bh_offset(bh) < bio_start || 13162306a36Sopenharmony_ci bh_offset(bh) + bh->b_size > bio_end) { 13262306a36Sopenharmony_ci if (buffer_async_write(bh)) 13362306a36Sopenharmony_ci under_io++; 13462306a36Sopenharmony_ci continue; 13562306a36Sopenharmony_ci } 13662306a36Sopenharmony_ci clear_buffer_async_write(bh); 13762306a36Sopenharmony_ci if (bio->bi_status) { 13862306a36Sopenharmony_ci set_buffer_write_io_error(bh); 13962306a36Sopenharmony_ci buffer_io_error(bh); 14062306a36Sopenharmony_ci } 14162306a36Sopenharmony_ci } while ((bh = bh->b_this_page) != head); 14262306a36Sopenharmony_ci spin_unlock_irqrestore(&head->b_uptodate_lock, flags); 14362306a36Sopenharmony_ci if (!under_io) { 14462306a36Sopenharmony_ci fscrypt_free_bounce_page(&io_folio->page); 14562306a36Sopenharmony_ci folio_end_writeback(folio); 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci } 14862306a36Sopenharmony_ci} 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_cistatic void ext4_release_io_end(ext4_io_end_t *io_end) 15162306a36Sopenharmony_ci{ 15262306a36Sopenharmony_ci struct bio *bio, *next_bio; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci BUG_ON(!list_empty(&io_end->list)); 15562306a36Sopenharmony_ci BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); 15662306a36Sopenharmony_ci WARN_ON(io_end->handle); 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci for (bio = io_end->bio; bio; bio = next_bio) { 15962306a36Sopenharmony_ci next_bio = bio->bi_private; 16062306a36Sopenharmony_ci ext4_finish_bio(bio); 16162306a36Sopenharmony_ci bio_put(bio); 16262306a36Sopenharmony_ci } 16362306a36Sopenharmony_ci ext4_free_io_end_vec(io_end); 16462306a36Sopenharmony_ci kmem_cache_free(io_end_cachep, io_end); 16562306a36Sopenharmony_ci} 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci/* 16862306a36Sopenharmony_ci * Check a range of space and convert unwritten extents to written. Note that 16962306a36Sopenharmony_ci * we are protected from truncate touching same part of extent tree by the 17062306a36Sopenharmony_ci * fact that truncate code waits for all DIO to finish (thus exclusion from 17162306a36Sopenharmony_ci * direct IO is achieved) and also waits for PageWriteback bits. Thus we 17262306a36Sopenharmony_ci * cannot get to ext4_ext_truncate() before all IOs overlapping that range are 17362306a36Sopenharmony_ci * completed (happens from ext4_free_ioend()). 17462306a36Sopenharmony_ci */ 17562306a36Sopenharmony_cistatic int ext4_end_io_end(ext4_io_end_t *io_end) 17662306a36Sopenharmony_ci{ 17762306a36Sopenharmony_ci struct inode *inode = io_end->inode; 17862306a36Sopenharmony_ci handle_t *handle = io_end->handle; 17962306a36Sopenharmony_ci int ret = 0; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p," 18262306a36Sopenharmony_ci "list->prev 0x%p\n", 18362306a36Sopenharmony_ci io_end, inode->i_ino, io_end->list.next, io_end->list.prev); 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci io_end->handle = NULL; /* Following call will use up the handle */ 18662306a36Sopenharmony_ci ret = ext4_convert_unwritten_io_end_vec(handle, io_end); 18762306a36Sopenharmony_ci if (ret < 0 && !ext4_forced_shutdown(inode->i_sb)) { 18862306a36Sopenharmony_ci ext4_msg(inode->i_sb, KERN_EMERG, 18962306a36Sopenharmony_ci "failed to convert unwritten extents to written " 19062306a36Sopenharmony_ci "extents -- potential data loss! " 19162306a36Sopenharmony_ci "(inode %lu, error %d)", inode->i_ino, ret); 19262306a36Sopenharmony_ci } 19362306a36Sopenharmony_ci ext4_clear_io_unwritten_flag(io_end); 19462306a36Sopenharmony_ci ext4_release_io_end(io_end); 19562306a36Sopenharmony_ci return ret; 19662306a36Sopenharmony_ci} 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_cistatic void dump_completed_IO(struct inode *inode, struct list_head *head) 19962306a36Sopenharmony_ci{ 20062306a36Sopenharmony_ci#ifdef EXT4FS_DEBUG 20162306a36Sopenharmony_ci struct list_head *cur, *before, *after; 20262306a36Sopenharmony_ci ext4_io_end_t *io_end, *io_end0, *io_end1; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci if (list_empty(head)) 20562306a36Sopenharmony_ci return; 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci ext4_debug("Dump inode %lu completed io list\n", inode->i_ino); 20862306a36Sopenharmony_ci list_for_each_entry(io_end, head, list) { 20962306a36Sopenharmony_ci cur = &io_end->list; 21062306a36Sopenharmony_ci before = cur->prev; 21162306a36Sopenharmony_ci io_end0 = container_of(before, ext4_io_end_t, list); 21262306a36Sopenharmony_ci after = cur->next; 21362306a36Sopenharmony_ci io_end1 = container_of(after, ext4_io_end_t, list); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", 21662306a36Sopenharmony_ci io_end, inode->i_ino, io_end0, io_end1); 21762306a36Sopenharmony_ci } 21862306a36Sopenharmony_ci#endif 21962306a36Sopenharmony_ci} 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci/* Add the io_end to per-inode completed end_io list. */ 22262306a36Sopenharmony_cistatic void ext4_add_complete_io(ext4_io_end_t *io_end) 22362306a36Sopenharmony_ci{ 22462306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(io_end->inode); 22562306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb); 22662306a36Sopenharmony_ci struct workqueue_struct *wq; 22762306a36Sopenharmony_ci unsigned long flags; 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci /* Only reserved conversions from writeback should enter here */ 23062306a36Sopenharmony_ci WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); 23162306a36Sopenharmony_ci WARN_ON(!io_end->handle && sbi->s_journal); 23262306a36Sopenharmony_ci spin_lock_irqsave(&ei->i_completed_io_lock, flags); 23362306a36Sopenharmony_ci wq = sbi->rsv_conversion_wq; 23462306a36Sopenharmony_ci if (list_empty(&ei->i_rsv_conversion_list)) 23562306a36Sopenharmony_ci queue_work(wq, &ei->i_rsv_conversion_work); 23662306a36Sopenharmony_ci list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); 23762306a36Sopenharmony_ci spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 23862306a36Sopenharmony_ci} 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_cistatic int ext4_do_flush_completed_IO(struct inode *inode, 24162306a36Sopenharmony_ci struct list_head *head) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci ext4_io_end_t *io_end; 24462306a36Sopenharmony_ci struct list_head unwritten; 24562306a36Sopenharmony_ci unsigned long flags; 24662306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 24762306a36Sopenharmony_ci int err, ret = 0; 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci spin_lock_irqsave(&ei->i_completed_io_lock, flags); 25062306a36Sopenharmony_ci dump_completed_IO(inode, head); 25162306a36Sopenharmony_ci list_replace_init(head, &unwritten); 25262306a36Sopenharmony_ci spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci while (!list_empty(&unwritten)) { 25562306a36Sopenharmony_ci io_end = list_entry(unwritten.next, ext4_io_end_t, list); 25662306a36Sopenharmony_ci BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); 25762306a36Sopenharmony_ci list_del_init(&io_end->list); 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci err = ext4_end_io_end(io_end); 26062306a36Sopenharmony_ci if (unlikely(!ret && err)) 26162306a36Sopenharmony_ci ret = err; 26262306a36Sopenharmony_ci } 26362306a36Sopenharmony_ci return ret; 26462306a36Sopenharmony_ci} 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci/* 26762306a36Sopenharmony_ci * work on completed IO, to convert unwritten extents to extents 26862306a36Sopenharmony_ci */ 26962306a36Sopenharmony_civoid ext4_end_io_rsv_work(struct work_struct *work) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, 27262306a36Sopenharmony_ci i_rsv_conversion_work); 27362306a36Sopenharmony_ci ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list); 27462306a36Sopenharmony_ci} 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ciext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) 27762306a36Sopenharmony_ci{ 27862306a36Sopenharmony_ci ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags); 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci if (io_end) { 28162306a36Sopenharmony_ci io_end->inode = inode; 28262306a36Sopenharmony_ci INIT_LIST_HEAD(&io_end->list); 28362306a36Sopenharmony_ci INIT_LIST_HEAD(&io_end->list_vec); 28462306a36Sopenharmony_ci refcount_set(&io_end->count, 1); 28562306a36Sopenharmony_ci } 28662306a36Sopenharmony_ci return io_end; 28762306a36Sopenharmony_ci} 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_civoid ext4_put_io_end_defer(ext4_io_end_t *io_end) 29062306a36Sopenharmony_ci{ 29162306a36Sopenharmony_ci if (refcount_dec_and_test(&io_end->count)) { 29262306a36Sopenharmony_ci if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || 29362306a36Sopenharmony_ci list_empty(&io_end->list_vec)) { 29462306a36Sopenharmony_ci ext4_release_io_end(io_end); 29562306a36Sopenharmony_ci return; 29662306a36Sopenharmony_ci } 29762306a36Sopenharmony_ci ext4_add_complete_io(io_end); 29862306a36Sopenharmony_ci } 29962306a36Sopenharmony_ci} 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ciint ext4_put_io_end(ext4_io_end_t *io_end) 30262306a36Sopenharmony_ci{ 30362306a36Sopenharmony_ci int err = 0; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci if (refcount_dec_and_test(&io_end->count)) { 30662306a36Sopenharmony_ci if (io_end->flag & EXT4_IO_END_UNWRITTEN) { 30762306a36Sopenharmony_ci err = ext4_convert_unwritten_io_end_vec(io_end->handle, 30862306a36Sopenharmony_ci io_end); 30962306a36Sopenharmony_ci io_end->handle = NULL; 31062306a36Sopenharmony_ci ext4_clear_io_unwritten_flag(io_end); 31162306a36Sopenharmony_ci } 31262306a36Sopenharmony_ci ext4_release_io_end(io_end); 31362306a36Sopenharmony_ci } 31462306a36Sopenharmony_ci return err; 31562306a36Sopenharmony_ci} 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ciext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) 31862306a36Sopenharmony_ci{ 31962306a36Sopenharmony_ci refcount_inc(&io_end->count); 32062306a36Sopenharmony_ci return io_end; 32162306a36Sopenharmony_ci} 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci/* BIO completion function for page writeback */ 32462306a36Sopenharmony_cistatic void ext4_end_bio(struct bio *bio) 32562306a36Sopenharmony_ci{ 32662306a36Sopenharmony_ci ext4_io_end_t *io_end = bio->bi_private; 32762306a36Sopenharmony_ci sector_t bi_sector = bio->bi_iter.bi_sector; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci if (WARN_ONCE(!io_end, "io_end is NULL: %pg: sector %Lu len %u err %d\n", 33062306a36Sopenharmony_ci bio->bi_bdev, 33162306a36Sopenharmony_ci (long long) bio->bi_iter.bi_sector, 33262306a36Sopenharmony_ci (unsigned) bio_sectors(bio), 33362306a36Sopenharmony_ci bio->bi_status)) { 33462306a36Sopenharmony_ci ext4_finish_bio(bio); 33562306a36Sopenharmony_ci bio_put(bio); 33662306a36Sopenharmony_ci return; 33762306a36Sopenharmony_ci } 33862306a36Sopenharmony_ci bio->bi_end_io = NULL; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci if (bio->bi_status) { 34162306a36Sopenharmony_ci struct inode *inode = io_end->inode; 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " 34462306a36Sopenharmony_ci "starting block %llu)", 34562306a36Sopenharmony_ci bio->bi_status, inode->i_ino, 34662306a36Sopenharmony_ci (unsigned long long) 34762306a36Sopenharmony_ci bi_sector >> (inode->i_blkbits - 9)); 34862306a36Sopenharmony_ci mapping_set_error(inode->i_mapping, 34962306a36Sopenharmony_ci blk_status_to_errno(bio->bi_status)); 35062306a36Sopenharmony_ci } 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci if (io_end->flag & EXT4_IO_END_UNWRITTEN) { 35362306a36Sopenharmony_ci /* 35462306a36Sopenharmony_ci * Link bio into list hanging from io_end. We have to do it 35562306a36Sopenharmony_ci * atomically as bio completions can be racing against each 35662306a36Sopenharmony_ci * other. 35762306a36Sopenharmony_ci */ 35862306a36Sopenharmony_ci bio->bi_private = xchg(&io_end->bio, bio); 35962306a36Sopenharmony_ci ext4_put_io_end_defer(io_end); 36062306a36Sopenharmony_ci } else { 36162306a36Sopenharmony_ci /* 36262306a36Sopenharmony_ci * Drop io_end reference early. Inode can get freed once 36362306a36Sopenharmony_ci * we finish the bio. 36462306a36Sopenharmony_ci */ 36562306a36Sopenharmony_ci ext4_put_io_end_defer(io_end); 36662306a36Sopenharmony_ci ext4_finish_bio(bio); 36762306a36Sopenharmony_ci bio_put(bio); 36862306a36Sopenharmony_ci } 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_civoid ext4_io_submit(struct ext4_io_submit *io) 37262306a36Sopenharmony_ci{ 37362306a36Sopenharmony_ci struct bio *bio = io->io_bio; 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci if (bio) { 37662306a36Sopenharmony_ci if (io->io_wbc->sync_mode == WB_SYNC_ALL) 37762306a36Sopenharmony_ci io->io_bio->bi_opf |= REQ_SYNC; 37862306a36Sopenharmony_ci submit_bio(io->io_bio); 37962306a36Sopenharmony_ci } 38062306a36Sopenharmony_ci io->io_bio = NULL; 38162306a36Sopenharmony_ci} 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_civoid ext4_io_submit_init(struct ext4_io_submit *io, 38462306a36Sopenharmony_ci struct writeback_control *wbc) 38562306a36Sopenharmony_ci{ 38662306a36Sopenharmony_ci io->io_wbc = wbc; 38762306a36Sopenharmony_ci io->io_bio = NULL; 38862306a36Sopenharmony_ci io->io_end = NULL; 38962306a36Sopenharmony_ci} 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_cistatic void io_submit_init_bio(struct ext4_io_submit *io, 39262306a36Sopenharmony_ci struct buffer_head *bh) 39362306a36Sopenharmony_ci{ 39462306a36Sopenharmony_ci struct bio *bio; 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci /* 39762306a36Sopenharmony_ci * bio_alloc will _always_ be able to allocate a bio if 39862306a36Sopenharmony_ci * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset(). 39962306a36Sopenharmony_ci */ 40062306a36Sopenharmony_ci bio = bio_alloc(bh->b_bdev, BIO_MAX_VECS, REQ_OP_WRITE, GFP_NOIO); 40162306a36Sopenharmony_ci fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); 40262306a36Sopenharmony_ci bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 40362306a36Sopenharmony_ci bio->bi_end_io = ext4_end_bio; 40462306a36Sopenharmony_ci bio->bi_private = ext4_get_io_end(io->io_end); 40562306a36Sopenharmony_ci io->io_bio = bio; 40662306a36Sopenharmony_ci io->io_next_block = bh->b_blocknr; 40762306a36Sopenharmony_ci wbc_init_bio(io->io_wbc, bio); 40862306a36Sopenharmony_ci} 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_cistatic void io_submit_add_bh(struct ext4_io_submit *io, 41162306a36Sopenharmony_ci struct inode *inode, 41262306a36Sopenharmony_ci struct folio *folio, 41362306a36Sopenharmony_ci struct folio *io_folio, 41462306a36Sopenharmony_ci struct buffer_head *bh) 41562306a36Sopenharmony_ci{ 41662306a36Sopenharmony_ci if (io->io_bio && (bh->b_blocknr != io->io_next_block || 41762306a36Sopenharmony_ci !fscrypt_mergeable_bio_bh(io->io_bio, bh))) { 41862306a36Sopenharmony_cisubmit_and_retry: 41962306a36Sopenharmony_ci ext4_io_submit(io); 42062306a36Sopenharmony_ci } 42162306a36Sopenharmony_ci if (io->io_bio == NULL) 42262306a36Sopenharmony_ci io_submit_init_bio(io, bh); 42362306a36Sopenharmony_ci if (!bio_add_folio(io->io_bio, io_folio, bh->b_size, bh_offset(bh))) 42462306a36Sopenharmony_ci goto submit_and_retry; 42562306a36Sopenharmony_ci wbc_account_cgroup_owner(io->io_wbc, &folio->page, bh->b_size); 42662306a36Sopenharmony_ci io->io_next_block++; 42762306a36Sopenharmony_ci} 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ciint ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio, 43062306a36Sopenharmony_ci size_t len) 43162306a36Sopenharmony_ci{ 43262306a36Sopenharmony_ci struct folio *io_folio = folio; 43362306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 43462306a36Sopenharmony_ci unsigned block_start; 43562306a36Sopenharmony_ci struct buffer_head *bh, *head; 43662306a36Sopenharmony_ci int ret = 0; 43762306a36Sopenharmony_ci int nr_to_submit = 0; 43862306a36Sopenharmony_ci struct writeback_control *wbc = io->io_wbc; 43962306a36Sopenharmony_ci bool keep_towrite = false; 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci BUG_ON(!folio_test_locked(folio)); 44262306a36Sopenharmony_ci BUG_ON(folio_test_writeback(folio)); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci folio_clear_error(folio); 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci /* 44762306a36Sopenharmony_ci * Comments copied from block_write_full_page: 44862306a36Sopenharmony_ci * 44962306a36Sopenharmony_ci * The folio straddles i_size. It must be zeroed out on each and every 45062306a36Sopenharmony_ci * writepage invocation because it may be mmapped. "A file is mapped 45162306a36Sopenharmony_ci * in multiples of the page size. For a file that is not a multiple of 45262306a36Sopenharmony_ci * the page size, the remaining memory is zeroed when mapped, and 45362306a36Sopenharmony_ci * writes to that region are not written out to the file." 45462306a36Sopenharmony_ci */ 45562306a36Sopenharmony_ci if (len < folio_size(folio)) 45662306a36Sopenharmony_ci folio_zero_segment(folio, len, folio_size(folio)); 45762306a36Sopenharmony_ci /* 45862306a36Sopenharmony_ci * In the first loop we prepare and mark buffers to submit. We have to 45962306a36Sopenharmony_ci * mark all buffers in the folio before submitting so that 46062306a36Sopenharmony_ci * folio_end_writeback() cannot be called from ext4_end_bio() when IO 46162306a36Sopenharmony_ci * on the first buffer finishes and we are still working on submitting 46262306a36Sopenharmony_ci * the second buffer. 46362306a36Sopenharmony_ci */ 46462306a36Sopenharmony_ci bh = head = folio_buffers(folio); 46562306a36Sopenharmony_ci do { 46662306a36Sopenharmony_ci block_start = bh_offset(bh); 46762306a36Sopenharmony_ci if (block_start >= len) { 46862306a36Sopenharmony_ci clear_buffer_dirty(bh); 46962306a36Sopenharmony_ci set_buffer_uptodate(bh); 47062306a36Sopenharmony_ci continue; 47162306a36Sopenharmony_ci } 47262306a36Sopenharmony_ci if (!buffer_dirty(bh) || buffer_delay(bh) || 47362306a36Sopenharmony_ci !buffer_mapped(bh) || buffer_unwritten(bh)) { 47462306a36Sopenharmony_ci /* A hole? We can safely clear the dirty bit */ 47562306a36Sopenharmony_ci if (!buffer_mapped(bh)) 47662306a36Sopenharmony_ci clear_buffer_dirty(bh); 47762306a36Sopenharmony_ci /* 47862306a36Sopenharmony_ci * Keeping dirty some buffer we cannot write? Make sure 47962306a36Sopenharmony_ci * to redirty the folio and keep TOWRITE tag so that 48062306a36Sopenharmony_ci * racing WB_SYNC_ALL writeback does not skip the folio. 48162306a36Sopenharmony_ci * This happens e.g. when doing writeout for 48262306a36Sopenharmony_ci * transaction commit or when journalled data is not 48362306a36Sopenharmony_ci * yet committed. 48462306a36Sopenharmony_ci */ 48562306a36Sopenharmony_ci if (buffer_dirty(bh) || 48662306a36Sopenharmony_ci (buffer_jbd(bh) && buffer_jbddirty(bh))) { 48762306a36Sopenharmony_ci if (!folio_test_dirty(folio)) 48862306a36Sopenharmony_ci folio_redirty_for_writepage(wbc, folio); 48962306a36Sopenharmony_ci keep_towrite = true; 49062306a36Sopenharmony_ci } 49162306a36Sopenharmony_ci continue; 49262306a36Sopenharmony_ci } 49362306a36Sopenharmony_ci if (buffer_new(bh)) 49462306a36Sopenharmony_ci clear_buffer_new(bh); 49562306a36Sopenharmony_ci set_buffer_async_write(bh); 49662306a36Sopenharmony_ci clear_buffer_dirty(bh); 49762306a36Sopenharmony_ci nr_to_submit++; 49862306a36Sopenharmony_ci } while ((bh = bh->b_this_page) != head); 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci /* Nothing to submit? Just unlock the folio... */ 50162306a36Sopenharmony_ci if (!nr_to_submit) 50262306a36Sopenharmony_ci return 0; 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci bh = head = folio_buffers(folio); 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci /* 50762306a36Sopenharmony_ci * If any blocks are being written to an encrypted file, encrypt them 50862306a36Sopenharmony_ci * into a bounce page. For simplicity, just encrypt until the last 50962306a36Sopenharmony_ci * block which might be needed. This may cause some unneeded blocks 51062306a36Sopenharmony_ci * (e.g. holes) to be unnecessarily encrypted, but this is rare and 51162306a36Sopenharmony_ci * can't happen in the common case of blocksize == PAGE_SIZE. 51262306a36Sopenharmony_ci */ 51362306a36Sopenharmony_ci if (fscrypt_inode_uses_fs_layer_crypto(inode)) { 51462306a36Sopenharmony_ci gfp_t gfp_flags = GFP_NOFS; 51562306a36Sopenharmony_ci unsigned int enc_bytes = round_up(len, i_blocksize(inode)); 51662306a36Sopenharmony_ci struct page *bounce_page; 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci /* 51962306a36Sopenharmony_ci * Since bounce page allocation uses a mempool, we can only use 52062306a36Sopenharmony_ci * a waiting mask (i.e. request guaranteed allocation) on the 52162306a36Sopenharmony_ci * first page of the bio. Otherwise it can deadlock. 52262306a36Sopenharmony_ci */ 52362306a36Sopenharmony_ci if (io->io_bio) 52462306a36Sopenharmony_ci gfp_flags = GFP_NOWAIT | __GFP_NOWARN; 52562306a36Sopenharmony_ci retry_encrypt: 52662306a36Sopenharmony_ci bounce_page = fscrypt_encrypt_pagecache_blocks(&folio->page, 52762306a36Sopenharmony_ci enc_bytes, 0, gfp_flags); 52862306a36Sopenharmony_ci if (IS_ERR(bounce_page)) { 52962306a36Sopenharmony_ci ret = PTR_ERR(bounce_page); 53062306a36Sopenharmony_ci if (ret == -ENOMEM && 53162306a36Sopenharmony_ci (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { 53262306a36Sopenharmony_ci gfp_t new_gfp_flags = GFP_NOFS; 53362306a36Sopenharmony_ci if (io->io_bio) 53462306a36Sopenharmony_ci ext4_io_submit(io); 53562306a36Sopenharmony_ci else 53662306a36Sopenharmony_ci new_gfp_flags |= __GFP_NOFAIL; 53762306a36Sopenharmony_ci memalloc_retry_wait(gfp_flags); 53862306a36Sopenharmony_ci gfp_flags = new_gfp_flags; 53962306a36Sopenharmony_ci goto retry_encrypt; 54062306a36Sopenharmony_ci } 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret); 54362306a36Sopenharmony_ci folio_redirty_for_writepage(wbc, folio); 54462306a36Sopenharmony_ci do { 54562306a36Sopenharmony_ci if (buffer_async_write(bh)) { 54662306a36Sopenharmony_ci clear_buffer_async_write(bh); 54762306a36Sopenharmony_ci set_buffer_dirty(bh); 54862306a36Sopenharmony_ci } 54962306a36Sopenharmony_ci bh = bh->b_this_page; 55062306a36Sopenharmony_ci } while (bh != head); 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci return ret; 55362306a36Sopenharmony_ci } 55462306a36Sopenharmony_ci io_folio = page_folio(bounce_page); 55562306a36Sopenharmony_ci } 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci __folio_start_writeback(folio, keep_towrite); 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ci /* Now submit buffers to write */ 56062306a36Sopenharmony_ci do { 56162306a36Sopenharmony_ci if (!buffer_async_write(bh)) 56262306a36Sopenharmony_ci continue; 56362306a36Sopenharmony_ci io_submit_add_bh(io, inode, folio, io_folio, bh); 56462306a36Sopenharmony_ci } while ((bh = bh->b_this_page) != head); 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci return 0; 56762306a36Sopenharmony_ci} 568