162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 1991, 1992 Linus Torvalds 462306a36Sopenharmony_ci * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 562306a36Sopenharmony_ci * Copyright (C) 2016 - 2020 Christoph Hellwig 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci#include <linux/init.h> 862306a36Sopenharmony_ci#include <linux/mm.h> 962306a36Sopenharmony_ci#include <linux/blkdev.h> 1062306a36Sopenharmony_ci#include <linux/buffer_head.h> 1162306a36Sopenharmony_ci#include <linux/mpage.h> 1262306a36Sopenharmony_ci#include <linux/uio.h> 1362306a36Sopenharmony_ci#include <linux/namei.h> 1462306a36Sopenharmony_ci#include <linux/task_io_accounting_ops.h> 1562306a36Sopenharmony_ci#include <linux/falloc.h> 1662306a36Sopenharmony_ci#include <linux/suspend.h> 1762306a36Sopenharmony_ci#include <linux/fs.h> 1862306a36Sopenharmony_ci#include <linux/iomap.h> 1962306a36Sopenharmony_ci#include <linux/module.h> 2062306a36Sopenharmony_ci#include "blk.h" 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_cistatic inline struct inode *bdev_file_inode(struct file *file) 2362306a36Sopenharmony_ci{ 2462306a36Sopenharmony_ci return file->f_mapping->host; 2562306a36Sopenharmony_ci} 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistatic blk_opf_t dio_bio_write_op(struct kiocb *iocb) 2862306a36Sopenharmony_ci{ 2962306a36Sopenharmony_ci blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci /* avoid the need for a I/O completion work item */ 3262306a36Sopenharmony_ci if (iocb_is_dsync(iocb)) 3362306a36Sopenharmony_ci opf |= REQ_FUA; 3462306a36Sopenharmony_ci return opf; 3562306a36Sopenharmony_ci} 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_cistatic bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos, 3862306a36Sopenharmony_ci struct iov_iter *iter) 3962306a36Sopenharmony_ci{ 4062306a36Sopenharmony_ci return pos & (bdev_logical_block_size(bdev) - 1) || 4162306a36Sopenharmony_ci !bdev_iter_is_aligned(bdev, iter); 4262306a36Sopenharmony_ci} 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci#define DIO_INLINE_BIO_VECS 4 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_cistatic ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, 4762306a36Sopenharmony_ci struct iov_iter *iter, unsigned int nr_pages) 4862306a36Sopenharmony_ci{ 4962306a36Sopenharmony_ci struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); 5062306a36Sopenharmony_ci struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs; 5162306a36Sopenharmony_ci loff_t pos = iocb->ki_pos; 5262306a36Sopenharmony_ci bool should_dirty = false; 5362306a36Sopenharmony_ci struct bio bio; 5462306a36Sopenharmony_ci ssize_t ret; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci if (blkdev_dio_unaligned(bdev, pos, iter)) 5762306a36Sopenharmony_ci return -EINVAL; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci if (nr_pages <= DIO_INLINE_BIO_VECS) 6062306a36Sopenharmony_ci vecs = inline_vecs; 6162306a36Sopenharmony_ci else { 6262306a36Sopenharmony_ci vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec), 6362306a36Sopenharmony_ci GFP_KERNEL); 6462306a36Sopenharmony_ci if (!vecs) 6562306a36Sopenharmony_ci return -ENOMEM; 6662306a36Sopenharmony_ci } 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci if (iov_iter_rw(iter) == READ) { 6962306a36Sopenharmony_ci bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ); 7062306a36Sopenharmony_ci if (user_backed_iter(iter)) 7162306a36Sopenharmony_ci should_dirty = true; 7262306a36Sopenharmony_ci } else { 7362306a36Sopenharmony_ci bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb)); 7462306a36Sopenharmony_ci } 7562306a36Sopenharmony_ci bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; 7662306a36Sopenharmony_ci bio.bi_ioprio = iocb->ki_ioprio; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci ret = bio_iov_iter_get_pages(&bio, iter); 7962306a36Sopenharmony_ci if (unlikely(ret)) 8062306a36Sopenharmony_ci goto out; 8162306a36Sopenharmony_ci ret = bio.bi_iter.bi_size; 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci if (iov_iter_rw(iter) == WRITE) 8462306a36Sopenharmony_ci task_io_account_write(ret); 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) 8762306a36Sopenharmony_ci bio.bi_opf |= REQ_NOWAIT; 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci submit_bio_wait(&bio); 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci bio_release_pages(&bio, should_dirty); 9262306a36Sopenharmony_ci if (unlikely(bio.bi_status)) 9362306a36Sopenharmony_ci ret = blk_status_to_errno(bio.bi_status); 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ciout: 9662306a36Sopenharmony_ci if (vecs != inline_vecs) 9762306a36Sopenharmony_ci kfree(vecs); 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci bio_uninit(&bio); 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci return ret; 10262306a36Sopenharmony_ci} 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_cienum { 10562306a36Sopenharmony_ci DIO_SHOULD_DIRTY = 1, 10662306a36Sopenharmony_ci DIO_IS_SYNC = 2, 10762306a36Sopenharmony_ci}; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_cistruct blkdev_dio { 11062306a36Sopenharmony_ci union { 11162306a36Sopenharmony_ci struct kiocb *iocb; 11262306a36Sopenharmony_ci struct task_struct *waiter; 11362306a36Sopenharmony_ci }; 11462306a36Sopenharmony_ci size_t size; 11562306a36Sopenharmony_ci atomic_t ref; 11662306a36Sopenharmony_ci unsigned int flags; 11762306a36Sopenharmony_ci struct bio bio ____cacheline_aligned_in_smp; 11862306a36Sopenharmony_ci}; 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_cistatic struct bio_set blkdev_dio_pool; 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_cistatic void blkdev_bio_end_io(struct bio *bio) 12362306a36Sopenharmony_ci{ 12462306a36Sopenharmony_ci struct blkdev_dio *dio = bio->bi_private; 12562306a36Sopenharmony_ci bool should_dirty = dio->flags & DIO_SHOULD_DIRTY; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci if (bio->bi_status && !dio->bio.bi_status) 12862306a36Sopenharmony_ci dio->bio.bi_status = bio->bi_status; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci if (atomic_dec_and_test(&dio->ref)) { 13162306a36Sopenharmony_ci if (!(dio->flags & DIO_IS_SYNC)) { 13262306a36Sopenharmony_ci struct kiocb *iocb = dio->iocb; 13362306a36Sopenharmony_ci ssize_t ret; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci WRITE_ONCE(iocb->private, NULL); 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci if (likely(!dio->bio.bi_status)) { 13862306a36Sopenharmony_ci ret = dio->size; 13962306a36Sopenharmony_ci iocb->ki_pos += ret; 14062306a36Sopenharmony_ci } else { 14162306a36Sopenharmony_ci ret = blk_status_to_errno(dio->bio.bi_status); 14262306a36Sopenharmony_ci } 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci dio->iocb->ki_complete(iocb, ret); 14562306a36Sopenharmony_ci bio_put(&dio->bio); 14662306a36Sopenharmony_ci } else { 14762306a36Sopenharmony_ci struct task_struct *waiter = dio->waiter; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci WRITE_ONCE(dio->waiter, NULL); 15062306a36Sopenharmony_ci blk_wake_io_task(waiter); 15162306a36Sopenharmony_ci } 15262306a36Sopenharmony_ci } 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci if (should_dirty) { 15562306a36Sopenharmony_ci bio_check_pages_dirty(bio); 15662306a36Sopenharmony_ci } else { 15762306a36Sopenharmony_ci bio_release_pages(bio, false); 15862306a36Sopenharmony_ci bio_put(bio); 15962306a36Sopenharmony_ci } 16062306a36Sopenharmony_ci} 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_cistatic ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, 16362306a36Sopenharmony_ci unsigned int nr_pages) 16462306a36Sopenharmony_ci{ 16562306a36Sopenharmony_ci struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); 16662306a36Sopenharmony_ci struct blk_plug plug; 16762306a36Sopenharmony_ci struct blkdev_dio *dio; 16862306a36Sopenharmony_ci struct bio *bio; 16962306a36Sopenharmony_ci bool is_read = (iov_iter_rw(iter) == READ), is_sync; 17062306a36Sopenharmony_ci blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb); 17162306a36Sopenharmony_ci loff_t pos = iocb->ki_pos; 17262306a36Sopenharmony_ci int ret = 0; 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci if (blkdev_dio_unaligned(bdev, pos, iter)) 17562306a36Sopenharmony_ci return -EINVAL; 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci if (iocb->ki_flags & IOCB_ALLOC_CACHE) 17862306a36Sopenharmony_ci opf |= REQ_ALLOC_CACHE; 17962306a36Sopenharmony_ci bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL, 18062306a36Sopenharmony_ci &blkdev_dio_pool); 18162306a36Sopenharmony_ci dio = container_of(bio, struct blkdev_dio, bio); 18262306a36Sopenharmony_ci atomic_set(&dio->ref, 1); 18362306a36Sopenharmony_ci /* 18462306a36Sopenharmony_ci * Grab an extra reference to ensure the dio structure which is embedded 18562306a36Sopenharmony_ci * into the first bio stays around. 18662306a36Sopenharmony_ci */ 18762306a36Sopenharmony_ci bio_get(bio); 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci is_sync = is_sync_kiocb(iocb); 19062306a36Sopenharmony_ci if (is_sync) { 19162306a36Sopenharmony_ci dio->flags = DIO_IS_SYNC; 19262306a36Sopenharmony_ci dio->waiter = current; 19362306a36Sopenharmony_ci } else { 19462306a36Sopenharmony_ci dio->flags = 0; 19562306a36Sopenharmony_ci dio->iocb = iocb; 19662306a36Sopenharmony_ci } 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci dio->size = 0; 19962306a36Sopenharmony_ci if (is_read && user_backed_iter(iter)) 20062306a36Sopenharmony_ci dio->flags |= DIO_SHOULD_DIRTY; 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci blk_start_plug(&plug); 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci for (;;) { 20562306a36Sopenharmony_ci bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; 20662306a36Sopenharmony_ci bio->bi_private = dio; 20762306a36Sopenharmony_ci bio->bi_end_io = blkdev_bio_end_io; 20862306a36Sopenharmony_ci bio->bi_ioprio = iocb->ki_ioprio; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci ret = bio_iov_iter_get_pages(bio, iter); 21162306a36Sopenharmony_ci if (unlikely(ret)) { 21262306a36Sopenharmony_ci bio->bi_status = BLK_STS_IOERR; 21362306a36Sopenharmony_ci bio_endio(bio); 21462306a36Sopenharmony_ci break; 21562306a36Sopenharmony_ci } 21662306a36Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) { 21762306a36Sopenharmony_ci /* 21862306a36Sopenharmony_ci * This is nonblocking IO, and we need to allocate 21962306a36Sopenharmony_ci * another bio if we have data left to map. As we 22062306a36Sopenharmony_ci * cannot guarantee that one of the sub bios will not 22162306a36Sopenharmony_ci * fail getting issued FOR NOWAIT and as error results 22262306a36Sopenharmony_ci * are coalesced across all of them, be safe and ask for 22362306a36Sopenharmony_ci * a retry of this from blocking context. 22462306a36Sopenharmony_ci */ 22562306a36Sopenharmony_ci if (unlikely(iov_iter_count(iter))) { 22662306a36Sopenharmony_ci bio_release_pages(bio, false); 22762306a36Sopenharmony_ci bio_clear_flag(bio, BIO_REFFED); 22862306a36Sopenharmony_ci bio_put(bio); 22962306a36Sopenharmony_ci blk_finish_plug(&plug); 23062306a36Sopenharmony_ci return -EAGAIN; 23162306a36Sopenharmony_ci } 23262306a36Sopenharmony_ci bio->bi_opf |= REQ_NOWAIT; 23362306a36Sopenharmony_ci } 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci if (is_read) { 23662306a36Sopenharmony_ci if (dio->flags & DIO_SHOULD_DIRTY) 23762306a36Sopenharmony_ci bio_set_pages_dirty(bio); 23862306a36Sopenharmony_ci } else { 23962306a36Sopenharmony_ci task_io_account_write(bio->bi_iter.bi_size); 24062306a36Sopenharmony_ci } 24162306a36Sopenharmony_ci dio->size += bio->bi_iter.bi_size; 24262306a36Sopenharmony_ci pos += bio->bi_iter.bi_size; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); 24562306a36Sopenharmony_ci if (!nr_pages) { 24662306a36Sopenharmony_ci submit_bio(bio); 24762306a36Sopenharmony_ci break; 24862306a36Sopenharmony_ci } 24962306a36Sopenharmony_ci atomic_inc(&dio->ref); 25062306a36Sopenharmony_ci submit_bio(bio); 25162306a36Sopenharmony_ci bio = bio_alloc(bdev, nr_pages, opf, GFP_KERNEL); 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci blk_finish_plug(&plug); 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci if (!is_sync) 25762306a36Sopenharmony_ci return -EIOCBQUEUED; 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci for (;;) { 26062306a36Sopenharmony_ci set_current_state(TASK_UNINTERRUPTIBLE); 26162306a36Sopenharmony_ci if (!READ_ONCE(dio->waiter)) 26262306a36Sopenharmony_ci break; 26362306a36Sopenharmony_ci blk_io_schedule(); 26462306a36Sopenharmony_ci } 26562306a36Sopenharmony_ci __set_current_state(TASK_RUNNING); 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci if (!ret) 26862306a36Sopenharmony_ci ret = blk_status_to_errno(dio->bio.bi_status); 26962306a36Sopenharmony_ci if (likely(!ret)) 27062306a36Sopenharmony_ci ret = dio->size; 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci bio_put(&dio->bio); 27362306a36Sopenharmony_ci return ret; 27462306a36Sopenharmony_ci} 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_cistatic void blkdev_bio_end_io_async(struct bio *bio) 27762306a36Sopenharmony_ci{ 27862306a36Sopenharmony_ci struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio); 27962306a36Sopenharmony_ci struct kiocb *iocb = dio->iocb; 28062306a36Sopenharmony_ci ssize_t ret; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci WRITE_ONCE(iocb->private, NULL); 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci if (likely(!bio->bi_status)) { 28562306a36Sopenharmony_ci ret = dio->size; 28662306a36Sopenharmony_ci iocb->ki_pos += ret; 28762306a36Sopenharmony_ci } else { 28862306a36Sopenharmony_ci ret = blk_status_to_errno(bio->bi_status); 28962306a36Sopenharmony_ci } 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci iocb->ki_complete(iocb, ret); 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci if (dio->flags & DIO_SHOULD_DIRTY) { 29462306a36Sopenharmony_ci bio_check_pages_dirty(bio); 29562306a36Sopenharmony_ci } else { 29662306a36Sopenharmony_ci bio_release_pages(bio, false); 29762306a36Sopenharmony_ci bio_put(bio); 29862306a36Sopenharmony_ci } 29962306a36Sopenharmony_ci} 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_cistatic ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, 30262306a36Sopenharmony_ci struct iov_iter *iter, 30362306a36Sopenharmony_ci unsigned int nr_pages) 30462306a36Sopenharmony_ci{ 30562306a36Sopenharmony_ci struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); 30662306a36Sopenharmony_ci bool is_read = iov_iter_rw(iter) == READ; 30762306a36Sopenharmony_ci blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb); 30862306a36Sopenharmony_ci struct blkdev_dio *dio; 30962306a36Sopenharmony_ci struct bio *bio; 31062306a36Sopenharmony_ci loff_t pos = iocb->ki_pos; 31162306a36Sopenharmony_ci int ret = 0; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci if (blkdev_dio_unaligned(bdev, pos, iter)) 31462306a36Sopenharmony_ci return -EINVAL; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci if (iocb->ki_flags & IOCB_ALLOC_CACHE) 31762306a36Sopenharmony_ci opf |= REQ_ALLOC_CACHE; 31862306a36Sopenharmony_ci bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL, 31962306a36Sopenharmony_ci &blkdev_dio_pool); 32062306a36Sopenharmony_ci dio = container_of(bio, struct blkdev_dio, bio); 32162306a36Sopenharmony_ci dio->flags = 0; 32262306a36Sopenharmony_ci dio->iocb = iocb; 32362306a36Sopenharmony_ci bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; 32462306a36Sopenharmony_ci bio->bi_end_io = blkdev_bio_end_io_async; 32562306a36Sopenharmony_ci bio->bi_ioprio = iocb->ki_ioprio; 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci if (iov_iter_is_bvec(iter)) { 32862306a36Sopenharmony_ci /* 32962306a36Sopenharmony_ci * Users don't rely on the iterator being in any particular 33062306a36Sopenharmony_ci * state for async I/O returning -EIOCBQUEUED, hence we can 33162306a36Sopenharmony_ci * avoid expensive iov_iter_advance(). Bypass 33262306a36Sopenharmony_ci * bio_iov_iter_get_pages() and set the bvec directly. 33362306a36Sopenharmony_ci */ 33462306a36Sopenharmony_ci bio_iov_bvec_set(bio, iter); 33562306a36Sopenharmony_ci } else { 33662306a36Sopenharmony_ci ret = bio_iov_iter_get_pages(bio, iter); 33762306a36Sopenharmony_ci if (unlikely(ret)) { 33862306a36Sopenharmony_ci bio_put(bio); 33962306a36Sopenharmony_ci return ret; 34062306a36Sopenharmony_ci } 34162306a36Sopenharmony_ci } 34262306a36Sopenharmony_ci dio->size = bio->bi_iter.bi_size; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci if (is_read) { 34562306a36Sopenharmony_ci if (user_backed_iter(iter)) { 34662306a36Sopenharmony_ci dio->flags |= DIO_SHOULD_DIRTY; 34762306a36Sopenharmony_ci bio_set_pages_dirty(bio); 34862306a36Sopenharmony_ci } 34962306a36Sopenharmony_ci } else { 35062306a36Sopenharmony_ci task_io_account_write(bio->bi_iter.bi_size); 35162306a36Sopenharmony_ci } 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) 35462306a36Sopenharmony_ci bio->bi_opf |= REQ_NOWAIT; 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ci if (iocb->ki_flags & IOCB_HIPRI) { 35762306a36Sopenharmony_ci bio->bi_opf |= REQ_POLLED; 35862306a36Sopenharmony_ci submit_bio(bio); 35962306a36Sopenharmony_ci WRITE_ONCE(iocb->private, bio); 36062306a36Sopenharmony_ci } else { 36162306a36Sopenharmony_ci submit_bio(bio); 36262306a36Sopenharmony_ci } 36362306a36Sopenharmony_ci return -EIOCBQUEUED; 36462306a36Sopenharmony_ci} 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_cistatic ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 36762306a36Sopenharmony_ci{ 36862306a36Sopenharmony_ci unsigned int nr_pages; 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci if (!iov_iter_count(iter)) 37162306a36Sopenharmony_ci return 0; 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); 37462306a36Sopenharmony_ci if (likely(nr_pages <= BIO_MAX_VECS)) { 37562306a36Sopenharmony_ci if (is_sync_kiocb(iocb)) 37662306a36Sopenharmony_ci return __blkdev_direct_IO_simple(iocb, iter, nr_pages); 37762306a36Sopenharmony_ci return __blkdev_direct_IO_async(iocb, iter, nr_pages); 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); 38062306a36Sopenharmony_ci} 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_cistatic int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 38362306a36Sopenharmony_ci unsigned int flags, struct iomap *iomap, struct iomap *srcmap) 38462306a36Sopenharmony_ci{ 38562306a36Sopenharmony_ci struct block_device *bdev = I_BDEV(inode); 38662306a36Sopenharmony_ci loff_t isize = i_size_read(inode); 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci iomap->bdev = bdev; 38962306a36Sopenharmony_ci iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev)); 39062306a36Sopenharmony_ci if (iomap->offset >= isize) 39162306a36Sopenharmony_ci return -EIO; 39262306a36Sopenharmony_ci iomap->type = IOMAP_MAPPED; 39362306a36Sopenharmony_ci iomap->addr = iomap->offset; 39462306a36Sopenharmony_ci iomap->length = isize - iomap->offset; 39562306a36Sopenharmony_ci iomap->flags |= IOMAP_F_BUFFER_HEAD; /* noop for !CONFIG_BUFFER_HEAD */ 39662306a36Sopenharmony_ci return 0; 39762306a36Sopenharmony_ci} 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_cistatic const struct iomap_ops blkdev_iomap_ops = { 40062306a36Sopenharmony_ci .iomap_begin = blkdev_iomap_begin, 40162306a36Sopenharmony_ci}; 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci#ifdef CONFIG_BUFFER_HEAD 40462306a36Sopenharmony_cistatic int blkdev_get_block(struct inode *inode, sector_t iblock, 40562306a36Sopenharmony_ci struct buffer_head *bh, int create) 40662306a36Sopenharmony_ci{ 40762306a36Sopenharmony_ci bh->b_bdev = I_BDEV(inode); 40862306a36Sopenharmony_ci bh->b_blocknr = iblock; 40962306a36Sopenharmony_ci set_buffer_mapped(bh); 41062306a36Sopenharmony_ci return 0; 41162306a36Sopenharmony_ci} 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_cistatic int blkdev_writepage(struct page *page, struct writeback_control *wbc) 41462306a36Sopenharmony_ci{ 41562306a36Sopenharmony_ci return block_write_full_page(page, blkdev_get_block, wbc); 41662306a36Sopenharmony_ci} 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_cistatic int blkdev_read_folio(struct file *file, struct folio *folio) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci return block_read_full_folio(folio, blkdev_get_block); 42162306a36Sopenharmony_ci} 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_cistatic void blkdev_readahead(struct readahead_control *rac) 42462306a36Sopenharmony_ci{ 42562306a36Sopenharmony_ci mpage_readahead(rac, blkdev_get_block); 42662306a36Sopenharmony_ci} 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_cistatic int blkdev_write_begin(struct file *file, struct address_space *mapping, 42962306a36Sopenharmony_ci loff_t pos, unsigned len, struct page **pagep, void **fsdata) 43062306a36Sopenharmony_ci{ 43162306a36Sopenharmony_ci return block_write_begin(mapping, pos, len, pagep, blkdev_get_block); 43262306a36Sopenharmony_ci} 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_cistatic int blkdev_write_end(struct file *file, struct address_space *mapping, 43562306a36Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, struct page *page, 43662306a36Sopenharmony_ci void *fsdata) 43762306a36Sopenharmony_ci{ 43862306a36Sopenharmony_ci int ret; 43962306a36Sopenharmony_ci ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci unlock_page(page); 44262306a36Sopenharmony_ci put_page(page); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci return ret; 44562306a36Sopenharmony_ci} 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ciconst struct address_space_operations def_blk_aops = { 44862306a36Sopenharmony_ci .dirty_folio = block_dirty_folio, 44962306a36Sopenharmony_ci .invalidate_folio = block_invalidate_folio, 45062306a36Sopenharmony_ci .read_folio = blkdev_read_folio, 45162306a36Sopenharmony_ci .readahead = blkdev_readahead, 45262306a36Sopenharmony_ci .writepage = blkdev_writepage, 45362306a36Sopenharmony_ci .write_begin = blkdev_write_begin, 45462306a36Sopenharmony_ci .write_end = blkdev_write_end, 45562306a36Sopenharmony_ci .migrate_folio = buffer_migrate_folio_norefs, 45662306a36Sopenharmony_ci .is_dirty_writeback = buffer_check_dirty_writeback, 45762306a36Sopenharmony_ci}; 45862306a36Sopenharmony_ci#else /* CONFIG_BUFFER_HEAD */ 45962306a36Sopenharmony_cistatic int blkdev_read_folio(struct file *file, struct folio *folio) 46062306a36Sopenharmony_ci{ 46162306a36Sopenharmony_ci return iomap_read_folio(folio, &blkdev_iomap_ops); 46262306a36Sopenharmony_ci} 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_cistatic void blkdev_readahead(struct readahead_control *rac) 46562306a36Sopenharmony_ci{ 46662306a36Sopenharmony_ci iomap_readahead(rac, &blkdev_iomap_ops); 46762306a36Sopenharmony_ci} 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_cistatic int blkdev_map_blocks(struct iomap_writepage_ctx *wpc, 47062306a36Sopenharmony_ci struct inode *inode, loff_t offset) 47162306a36Sopenharmony_ci{ 47262306a36Sopenharmony_ci loff_t isize = i_size_read(inode); 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci if (WARN_ON_ONCE(offset >= isize)) 47562306a36Sopenharmony_ci return -EIO; 47662306a36Sopenharmony_ci if (offset >= wpc->iomap.offset && 47762306a36Sopenharmony_ci offset < wpc->iomap.offset + wpc->iomap.length) 47862306a36Sopenharmony_ci return 0; 47962306a36Sopenharmony_ci return blkdev_iomap_begin(inode, offset, isize - offset, 48062306a36Sopenharmony_ci IOMAP_WRITE, &wpc->iomap, NULL); 48162306a36Sopenharmony_ci} 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_cistatic const struct iomap_writeback_ops blkdev_writeback_ops = { 48462306a36Sopenharmony_ci .map_blocks = blkdev_map_blocks, 48562306a36Sopenharmony_ci}; 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_cistatic int blkdev_writepages(struct address_space *mapping, 48862306a36Sopenharmony_ci struct writeback_control *wbc) 48962306a36Sopenharmony_ci{ 49062306a36Sopenharmony_ci struct iomap_writepage_ctx wpc = { }; 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci return iomap_writepages(mapping, wbc, &wpc, &blkdev_writeback_ops); 49362306a36Sopenharmony_ci} 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ciconst struct address_space_operations def_blk_aops = { 49662306a36Sopenharmony_ci .dirty_folio = filemap_dirty_folio, 49762306a36Sopenharmony_ci .release_folio = iomap_release_folio, 49862306a36Sopenharmony_ci .invalidate_folio = iomap_invalidate_folio, 49962306a36Sopenharmony_ci .read_folio = blkdev_read_folio, 50062306a36Sopenharmony_ci .readahead = blkdev_readahead, 50162306a36Sopenharmony_ci .writepages = blkdev_writepages, 50262306a36Sopenharmony_ci .is_partially_uptodate = iomap_is_partially_uptodate, 50362306a36Sopenharmony_ci .error_remove_page = generic_error_remove_page, 50462306a36Sopenharmony_ci .migrate_folio = filemap_migrate_folio, 50562306a36Sopenharmony_ci}; 50662306a36Sopenharmony_ci#endif /* CONFIG_BUFFER_HEAD */ 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci/* 50962306a36Sopenharmony_ci * for a block special file file_inode(file)->i_size is zero 51062306a36Sopenharmony_ci * so we compute the size by hand (just as in block_read/write above) 51162306a36Sopenharmony_ci */ 51262306a36Sopenharmony_cistatic loff_t blkdev_llseek(struct file *file, loff_t offset, int whence) 51362306a36Sopenharmony_ci{ 51462306a36Sopenharmony_ci struct inode *bd_inode = bdev_file_inode(file); 51562306a36Sopenharmony_ci loff_t retval; 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci inode_lock(bd_inode); 51862306a36Sopenharmony_ci retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode)); 51962306a36Sopenharmony_ci inode_unlock(bd_inode); 52062306a36Sopenharmony_ci return retval; 52162306a36Sopenharmony_ci} 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_cistatic int blkdev_fsync(struct file *filp, loff_t start, loff_t end, 52462306a36Sopenharmony_ci int datasync) 52562306a36Sopenharmony_ci{ 52662306a36Sopenharmony_ci struct block_device *bdev = I_BDEV(filp->f_mapping->host); 52762306a36Sopenharmony_ci int error; 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci error = file_write_and_wait_range(filp, start, end); 53062306a36Sopenharmony_ci if (error) 53162306a36Sopenharmony_ci return error; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci /* 53462306a36Sopenharmony_ci * There is no need to serialise calls to blkdev_issue_flush with 53562306a36Sopenharmony_ci * i_mutex and doing so causes performance issues with concurrent 53662306a36Sopenharmony_ci * O_SYNC writers to a block device. 53762306a36Sopenharmony_ci */ 53862306a36Sopenharmony_ci error = blkdev_issue_flush(bdev); 53962306a36Sopenharmony_ci if (error == -EOPNOTSUPP) 54062306a36Sopenharmony_ci error = 0; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci return error; 54362306a36Sopenharmony_ci} 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_ciblk_mode_t file_to_blk_mode(struct file *file) 54662306a36Sopenharmony_ci{ 54762306a36Sopenharmony_ci blk_mode_t mode = 0; 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci if (file->f_mode & FMODE_READ) 55062306a36Sopenharmony_ci mode |= BLK_OPEN_READ; 55162306a36Sopenharmony_ci if (file->f_mode & FMODE_WRITE) 55262306a36Sopenharmony_ci mode |= BLK_OPEN_WRITE; 55362306a36Sopenharmony_ci if (file->private_data) 55462306a36Sopenharmony_ci mode |= BLK_OPEN_EXCL; 55562306a36Sopenharmony_ci if (file->f_flags & O_NDELAY) 55662306a36Sopenharmony_ci mode |= BLK_OPEN_NDELAY; 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci /* 55962306a36Sopenharmony_ci * If all bits in O_ACCMODE set (aka O_RDWR | O_WRONLY), the floppy 56062306a36Sopenharmony_ci * driver has historically allowed ioctls as if the file was opened for 56162306a36Sopenharmony_ci * writing, but does not allow and actual reads or writes. 56262306a36Sopenharmony_ci */ 56362306a36Sopenharmony_ci if ((file->f_flags & O_ACCMODE) == (O_RDWR | O_WRONLY)) 56462306a36Sopenharmony_ci mode |= BLK_OPEN_WRITE_IOCTL; 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci return mode; 56762306a36Sopenharmony_ci} 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_cistatic int blkdev_open(struct inode *inode, struct file *filp) 57062306a36Sopenharmony_ci{ 57162306a36Sopenharmony_ci struct block_device *bdev; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci /* 57462306a36Sopenharmony_ci * Preserve backwards compatibility and allow large file access 57562306a36Sopenharmony_ci * even if userspace doesn't ask for it explicitly. Some mkfs 57662306a36Sopenharmony_ci * binary needs it. We might want to drop this workaround 57762306a36Sopenharmony_ci * during an unstable branch. 57862306a36Sopenharmony_ci */ 57962306a36Sopenharmony_ci filp->f_flags |= O_LARGEFILE; 58062306a36Sopenharmony_ci filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT; 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci /* 58362306a36Sopenharmony_ci * Use the file private data to store the holder for exclusive openes. 58462306a36Sopenharmony_ci * file_to_blk_mode relies on it being present to set BLK_OPEN_EXCL. 58562306a36Sopenharmony_ci */ 58662306a36Sopenharmony_ci if (filp->f_flags & O_EXCL) 58762306a36Sopenharmony_ci filp->private_data = filp; 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci bdev = blkdev_get_by_dev(inode->i_rdev, file_to_blk_mode(filp), 59062306a36Sopenharmony_ci filp->private_data, NULL); 59162306a36Sopenharmony_ci if (IS_ERR(bdev)) 59262306a36Sopenharmony_ci return PTR_ERR(bdev); 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ci if (bdev_nowait(bdev)) 59562306a36Sopenharmony_ci filp->f_mode |= FMODE_NOWAIT; 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci filp->f_mapping = bdev->bd_inode->i_mapping; 59862306a36Sopenharmony_ci filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); 59962306a36Sopenharmony_ci return 0; 60062306a36Sopenharmony_ci} 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_cistatic int blkdev_release(struct inode *inode, struct file *filp) 60362306a36Sopenharmony_ci{ 60462306a36Sopenharmony_ci blkdev_put(I_BDEV(filp->f_mapping->host), filp->private_data); 60562306a36Sopenharmony_ci return 0; 60662306a36Sopenharmony_ci} 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_cistatic ssize_t 60962306a36Sopenharmony_ciblkdev_direct_write(struct kiocb *iocb, struct iov_iter *from) 61062306a36Sopenharmony_ci{ 61162306a36Sopenharmony_ci size_t count = iov_iter_count(from); 61262306a36Sopenharmony_ci ssize_t written; 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci written = kiocb_invalidate_pages(iocb, count); 61562306a36Sopenharmony_ci if (written) { 61662306a36Sopenharmony_ci if (written == -EBUSY) 61762306a36Sopenharmony_ci return 0; 61862306a36Sopenharmony_ci return written; 61962306a36Sopenharmony_ci } 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci written = blkdev_direct_IO(iocb, from); 62262306a36Sopenharmony_ci if (written > 0) { 62362306a36Sopenharmony_ci kiocb_invalidate_post_direct_write(iocb, count); 62462306a36Sopenharmony_ci iocb->ki_pos += written; 62562306a36Sopenharmony_ci count -= written; 62662306a36Sopenharmony_ci } 62762306a36Sopenharmony_ci if (written != -EIOCBQUEUED) 62862306a36Sopenharmony_ci iov_iter_revert(from, count - iov_iter_count(from)); 62962306a36Sopenharmony_ci return written; 63062306a36Sopenharmony_ci} 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_cistatic ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from) 63362306a36Sopenharmony_ci{ 63462306a36Sopenharmony_ci return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops); 63562306a36Sopenharmony_ci} 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci/* 63862306a36Sopenharmony_ci * Write data to the block device. Only intended for the block device itself 63962306a36Sopenharmony_ci * and the raw driver which basically is a fake block device. 64062306a36Sopenharmony_ci * 64162306a36Sopenharmony_ci * Does not take i_mutex for the write and thus is not for general purpose 64262306a36Sopenharmony_ci * use. 64362306a36Sopenharmony_ci */ 64462306a36Sopenharmony_cistatic ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) 64562306a36Sopenharmony_ci{ 64662306a36Sopenharmony_ci struct file *file = iocb->ki_filp; 64762306a36Sopenharmony_ci struct block_device *bdev = I_BDEV(file->f_mapping->host); 64862306a36Sopenharmony_ci struct inode *bd_inode = bdev->bd_inode; 64962306a36Sopenharmony_ci loff_t size = bdev_nr_bytes(bdev); 65062306a36Sopenharmony_ci size_t shorted = 0; 65162306a36Sopenharmony_ci ssize_t ret; 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci if (bdev_read_only(bdev)) 65462306a36Sopenharmony_ci return -EPERM; 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev)) 65762306a36Sopenharmony_ci return -ETXTBSY; 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci if (!iov_iter_count(from)) 66062306a36Sopenharmony_ci return 0; 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci if (iocb->ki_pos >= size) 66362306a36Sopenharmony_ci return -ENOSPC; 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT) 66662306a36Sopenharmony_ci return -EOPNOTSUPP; 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci size -= iocb->ki_pos; 66962306a36Sopenharmony_ci if (iov_iter_count(from) > size) { 67062306a36Sopenharmony_ci shorted = iov_iter_count(from) - size; 67162306a36Sopenharmony_ci iov_iter_truncate(from, size); 67262306a36Sopenharmony_ci } 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci ret = file_update_time(file); 67562306a36Sopenharmony_ci if (ret) 67662306a36Sopenharmony_ci return ret; 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci if (iocb->ki_flags & IOCB_DIRECT) { 67962306a36Sopenharmony_ci ret = blkdev_direct_write(iocb, from); 68062306a36Sopenharmony_ci if (ret >= 0 && iov_iter_count(from)) 68162306a36Sopenharmony_ci ret = direct_write_fallback(iocb, from, ret, 68262306a36Sopenharmony_ci blkdev_buffered_write(iocb, from)); 68362306a36Sopenharmony_ci } else { 68462306a36Sopenharmony_ci ret = blkdev_buffered_write(iocb, from); 68562306a36Sopenharmony_ci } 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci if (ret > 0) 68862306a36Sopenharmony_ci ret = generic_write_sync(iocb, ret); 68962306a36Sopenharmony_ci iov_iter_reexpand(from, iov_iter_count(from) + shorted); 69062306a36Sopenharmony_ci return ret; 69162306a36Sopenharmony_ci} 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_cistatic ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) 69462306a36Sopenharmony_ci{ 69562306a36Sopenharmony_ci struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); 69662306a36Sopenharmony_ci loff_t size = bdev_nr_bytes(bdev); 69762306a36Sopenharmony_ci loff_t pos = iocb->ki_pos; 69862306a36Sopenharmony_ci size_t shorted = 0; 69962306a36Sopenharmony_ci ssize_t ret = 0; 70062306a36Sopenharmony_ci size_t count; 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci if (unlikely(pos + iov_iter_count(to) > size)) { 70362306a36Sopenharmony_ci if (pos >= size) 70462306a36Sopenharmony_ci return 0; 70562306a36Sopenharmony_ci size -= pos; 70662306a36Sopenharmony_ci shorted = iov_iter_count(to) - size; 70762306a36Sopenharmony_ci iov_iter_truncate(to, size); 70862306a36Sopenharmony_ci } 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci count = iov_iter_count(to); 71162306a36Sopenharmony_ci if (!count) 71262306a36Sopenharmony_ci goto reexpand; /* skip atime */ 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci if (iocb->ki_flags & IOCB_DIRECT) { 71562306a36Sopenharmony_ci ret = kiocb_write_and_wait(iocb, count); 71662306a36Sopenharmony_ci if (ret < 0) 71762306a36Sopenharmony_ci goto reexpand; 71862306a36Sopenharmony_ci file_accessed(iocb->ki_filp); 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci ret = blkdev_direct_IO(iocb, to); 72162306a36Sopenharmony_ci if (ret >= 0) { 72262306a36Sopenharmony_ci iocb->ki_pos += ret; 72362306a36Sopenharmony_ci count -= ret; 72462306a36Sopenharmony_ci } 72562306a36Sopenharmony_ci iov_iter_revert(to, count - iov_iter_count(to)); 72662306a36Sopenharmony_ci if (ret < 0 || !count) 72762306a36Sopenharmony_ci goto reexpand; 72862306a36Sopenharmony_ci } 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci ret = filemap_read(iocb, to, ret); 73162306a36Sopenharmony_ci 73262306a36Sopenharmony_cireexpand: 73362306a36Sopenharmony_ci if (unlikely(shorted)) 73462306a36Sopenharmony_ci iov_iter_reexpand(to, iov_iter_count(to) + shorted); 73562306a36Sopenharmony_ci return ret; 73662306a36Sopenharmony_ci} 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci#define BLKDEV_FALLOC_FL_SUPPORTED \ 73962306a36Sopenharmony_ci (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 74062306a36Sopenharmony_ci FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE) 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_cistatic long blkdev_fallocate(struct file *file, int mode, loff_t start, 74362306a36Sopenharmony_ci loff_t len) 74462306a36Sopenharmony_ci{ 74562306a36Sopenharmony_ci struct inode *inode = bdev_file_inode(file); 74662306a36Sopenharmony_ci struct block_device *bdev = I_BDEV(inode); 74762306a36Sopenharmony_ci loff_t end = start + len - 1; 74862306a36Sopenharmony_ci loff_t isize; 74962306a36Sopenharmony_ci int error; 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci /* Fail if we don't recognize the flags. */ 75262306a36Sopenharmony_ci if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED) 75362306a36Sopenharmony_ci return -EOPNOTSUPP; 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci /* Don't go off the end of the device. */ 75662306a36Sopenharmony_ci isize = bdev_nr_bytes(bdev); 75762306a36Sopenharmony_ci if (start >= isize) 75862306a36Sopenharmony_ci return -EINVAL; 75962306a36Sopenharmony_ci if (end >= isize) { 76062306a36Sopenharmony_ci if (mode & FALLOC_FL_KEEP_SIZE) { 76162306a36Sopenharmony_ci len = isize - start; 76262306a36Sopenharmony_ci end = start + len - 1; 76362306a36Sopenharmony_ci } else 76462306a36Sopenharmony_ci return -EINVAL; 76562306a36Sopenharmony_ci } 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci /* 76862306a36Sopenharmony_ci * Don't allow IO that isn't aligned to logical block size. 76962306a36Sopenharmony_ci */ 77062306a36Sopenharmony_ci if ((start | len) & (bdev_logical_block_size(bdev) - 1)) 77162306a36Sopenharmony_ci return -EINVAL; 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci filemap_invalidate_lock(inode->i_mapping); 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci /* 77662306a36Sopenharmony_ci * Invalidate the page cache, including dirty pages, for valid 77762306a36Sopenharmony_ci * de-allocate mode calls to fallocate(). 77862306a36Sopenharmony_ci */ 77962306a36Sopenharmony_ci switch (mode) { 78062306a36Sopenharmony_ci case FALLOC_FL_ZERO_RANGE: 78162306a36Sopenharmony_ci case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE: 78262306a36Sopenharmony_ci error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); 78362306a36Sopenharmony_ci if (error) 78462306a36Sopenharmony_ci goto fail; 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT, 78762306a36Sopenharmony_ci len >> SECTOR_SHIFT, GFP_KERNEL, 78862306a36Sopenharmony_ci BLKDEV_ZERO_NOUNMAP); 78962306a36Sopenharmony_ci break; 79062306a36Sopenharmony_ci case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE: 79162306a36Sopenharmony_ci error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); 79262306a36Sopenharmony_ci if (error) 79362306a36Sopenharmony_ci goto fail; 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT, 79662306a36Sopenharmony_ci len >> SECTOR_SHIFT, GFP_KERNEL, 79762306a36Sopenharmony_ci BLKDEV_ZERO_NOFALLBACK); 79862306a36Sopenharmony_ci break; 79962306a36Sopenharmony_ci case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE: 80062306a36Sopenharmony_ci error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end); 80162306a36Sopenharmony_ci if (error) 80262306a36Sopenharmony_ci goto fail; 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT, 80562306a36Sopenharmony_ci len >> SECTOR_SHIFT, GFP_KERNEL); 80662306a36Sopenharmony_ci break; 80762306a36Sopenharmony_ci default: 80862306a36Sopenharmony_ci error = -EOPNOTSUPP; 80962306a36Sopenharmony_ci } 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci fail: 81262306a36Sopenharmony_ci filemap_invalidate_unlock(inode->i_mapping); 81362306a36Sopenharmony_ci return error; 81462306a36Sopenharmony_ci} 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_cistatic int blkdev_mmap(struct file *file, struct vm_area_struct *vma) 81762306a36Sopenharmony_ci{ 81862306a36Sopenharmony_ci struct inode *bd_inode = bdev_file_inode(file); 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci if (bdev_read_only(I_BDEV(bd_inode))) 82162306a36Sopenharmony_ci return generic_file_readonly_mmap(file, vma); 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci return generic_file_mmap(file, vma); 82462306a36Sopenharmony_ci} 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ciconst struct file_operations def_blk_fops = { 82762306a36Sopenharmony_ci .open = blkdev_open, 82862306a36Sopenharmony_ci .release = blkdev_release, 82962306a36Sopenharmony_ci .llseek = blkdev_llseek, 83062306a36Sopenharmony_ci .read_iter = blkdev_read_iter, 83162306a36Sopenharmony_ci .write_iter = blkdev_write_iter, 83262306a36Sopenharmony_ci .iopoll = iocb_bio_iopoll, 83362306a36Sopenharmony_ci .mmap = blkdev_mmap, 83462306a36Sopenharmony_ci .fsync = blkdev_fsync, 83562306a36Sopenharmony_ci .unlocked_ioctl = blkdev_ioctl, 83662306a36Sopenharmony_ci#ifdef CONFIG_COMPAT 83762306a36Sopenharmony_ci .compat_ioctl = compat_blkdev_ioctl, 83862306a36Sopenharmony_ci#endif 83962306a36Sopenharmony_ci .splice_read = filemap_splice_read, 84062306a36Sopenharmony_ci .splice_write = iter_file_splice_write, 84162306a36Sopenharmony_ci .fallocate = blkdev_fallocate, 84262306a36Sopenharmony_ci}; 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_cistatic __init int blkdev_init(void) 84562306a36Sopenharmony_ci{ 84662306a36Sopenharmony_ci return bioset_init(&blkdev_dio_pool, 4, 84762306a36Sopenharmony_ci offsetof(struct blkdev_dio, bio), 84862306a36Sopenharmony_ci BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE); 84962306a36Sopenharmony_ci} 85062306a36Sopenharmony_cimodule_init(blkdev_init); 851