162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 1991, 1992  Linus Torvalds
462306a36Sopenharmony_ci * Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
562306a36Sopenharmony_ci * Copyright (C) 2016 - 2020 Christoph Hellwig
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci#include <linux/init.h>
862306a36Sopenharmony_ci#include <linux/mm.h>
962306a36Sopenharmony_ci#include <linux/blkdev.h>
1062306a36Sopenharmony_ci#include <linux/buffer_head.h>
1162306a36Sopenharmony_ci#include <linux/mpage.h>
1262306a36Sopenharmony_ci#include <linux/uio.h>
1362306a36Sopenharmony_ci#include <linux/namei.h>
1462306a36Sopenharmony_ci#include <linux/task_io_accounting_ops.h>
1562306a36Sopenharmony_ci#include <linux/falloc.h>
1662306a36Sopenharmony_ci#include <linux/suspend.h>
1762306a36Sopenharmony_ci#include <linux/fs.h>
1862306a36Sopenharmony_ci#include <linux/iomap.h>
1962306a36Sopenharmony_ci#include <linux/module.h>
2062306a36Sopenharmony_ci#include "blk.h"
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_cistatic inline struct inode *bdev_file_inode(struct file *file)
2362306a36Sopenharmony_ci{
2462306a36Sopenharmony_ci	return file->f_mapping->host;
2562306a36Sopenharmony_ci}
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_cistatic blk_opf_t dio_bio_write_op(struct kiocb *iocb)
2862306a36Sopenharmony_ci{
2962306a36Sopenharmony_ci	blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	/* avoid the need for a I/O completion work item */
3262306a36Sopenharmony_ci	if (iocb_is_dsync(iocb))
3362306a36Sopenharmony_ci		opf |= REQ_FUA;
3462306a36Sopenharmony_ci	return opf;
3562306a36Sopenharmony_ci}
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
3862306a36Sopenharmony_ci			      struct iov_iter *iter)
3962306a36Sopenharmony_ci{
4062306a36Sopenharmony_ci	return pos & (bdev_logical_block_size(bdev) - 1) ||
4162306a36Sopenharmony_ci		!bdev_iter_is_aligned(bdev, iter);
4262306a36Sopenharmony_ci}
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci#define DIO_INLINE_BIO_VECS 4
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_cistatic ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
4762306a36Sopenharmony_ci		struct iov_iter *iter, unsigned int nr_pages)
4862306a36Sopenharmony_ci{
4962306a36Sopenharmony_ci	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
5062306a36Sopenharmony_ci	struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
5162306a36Sopenharmony_ci	loff_t pos = iocb->ki_pos;
5262306a36Sopenharmony_ci	bool should_dirty = false;
5362306a36Sopenharmony_ci	struct bio bio;
5462306a36Sopenharmony_ci	ssize_t ret;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	if (blkdev_dio_unaligned(bdev, pos, iter))
5762306a36Sopenharmony_ci		return -EINVAL;
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	if (nr_pages <= DIO_INLINE_BIO_VECS)
6062306a36Sopenharmony_ci		vecs = inline_vecs;
6162306a36Sopenharmony_ci	else {
6262306a36Sopenharmony_ci		vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
6362306a36Sopenharmony_ci				     GFP_KERNEL);
6462306a36Sopenharmony_ci		if (!vecs)
6562306a36Sopenharmony_ci			return -ENOMEM;
6662306a36Sopenharmony_ci	}
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	if (iov_iter_rw(iter) == READ) {
6962306a36Sopenharmony_ci		bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
7062306a36Sopenharmony_ci		if (user_backed_iter(iter))
7162306a36Sopenharmony_ci			should_dirty = true;
7262306a36Sopenharmony_ci	} else {
7362306a36Sopenharmony_ci		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
7462306a36Sopenharmony_ci	}
7562306a36Sopenharmony_ci	bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
7662306a36Sopenharmony_ci	bio.bi_ioprio = iocb->ki_ioprio;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	ret = bio_iov_iter_get_pages(&bio, iter);
7962306a36Sopenharmony_ci	if (unlikely(ret))
8062306a36Sopenharmony_ci		goto out;
8162306a36Sopenharmony_ci	ret = bio.bi_iter.bi_size;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	if (iov_iter_rw(iter) == WRITE)
8462306a36Sopenharmony_ci		task_io_account_write(ret);
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT)
8762306a36Sopenharmony_ci		bio.bi_opf |= REQ_NOWAIT;
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	submit_bio_wait(&bio);
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	bio_release_pages(&bio, should_dirty);
9262306a36Sopenharmony_ci	if (unlikely(bio.bi_status))
9362306a36Sopenharmony_ci		ret = blk_status_to_errno(bio.bi_status);
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ciout:
9662306a36Sopenharmony_ci	if (vecs != inline_vecs)
9762306a36Sopenharmony_ci		kfree(vecs);
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	bio_uninit(&bio);
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	return ret;
10262306a36Sopenharmony_ci}
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_cienum {
10562306a36Sopenharmony_ci	DIO_SHOULD_DIRTY	= 1,
10662306a36Sopenharmony_ci	DIO_IS_SYNC		= 2,
10762306a36Sopenharmony_ci};
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_cistruct blkdev_dio {
11062306a36Sopenharmony_ci	union {
11162306a36Sopenharmony_ci		struct kiocb		*iocb;
11262306a36Sopenharmony_ci		struct task_struct	*waiter;
11362306a36Sopenharmony_ci	};
11462306a36Sopenharmony_ci	size_t			size;
11562306a36Sopenharmony_ci	atomic_t		ref;
11662306a36Sopenharmony_ci	unsigned int		flags;
11762306a36Sopenharmony_ci	struct bio		bio ____cacheline_aligned_in_smp;
11862306a36Sopenharmony_ci};
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_cistatic struct bio_set blkdev_dio_pool;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_cistatic void blkdev_bio_end_io(struct bio *bio)
12362306a36Sopenharmony_ci{
12462306a36Sopenharmony_ci	struct blkdev_dio *dio = bio->bi_private;
12562306a36Sopenharmony_ci	bool should_dirty = dio->flags & DIO_SHOULD_DIRTY;
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if (bio->bi_status && !dio->bio.bi_status)
12862306a36Sopenharmony_ci		dio->bio.bi_status = bio->bi_status;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	if (atomic_dec_and_test(&dio->ref)) {
13162306a36Sopenharmony_ci		if (!(dio->flags & DIO_IS_SYNC)) {
13262306a36Sopenharmony_ci			struct kiocb *iocb = dio->iocb;
13362306a36Sopenharmony_ci			ssize_t ret;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci			WRITE_ONCE(iocb->private, NULL);
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci			if (likely(!dio->bio.bi_status)) {
13862306a36Sopenharmony_ci				ret = dio->size;
13962306a36Sopenharmony_ci				iocb->ki_pos += ret;
14062306a36Sopenharmony_ci			} else {
14162306a36Sopenharmony_ci				ret = blk_status_to_errno(dio->bio.bi_status);
14262306a36Sopenharmony_ci			}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci			dio->iocb->ki_complete(iocb, ret);
14562306a36Sopenharmony_ci			bio_put(&dio->bio);
14662306a36Sopenharmony_ci		} else {
14762306a36Sopenharmony_ci			struct task_struct *waiter = dio->waiter;
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci			WRITE_ONCE(dio->waiter, NULL);
15062306a36Sopenharmony_ci			blk_wake_io_task(waiter);
15162306a36Sopenharmony_ci		}
15262306a36Sopenharmony_ci	}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	if (should_dirty) {
15562306a36Sopenharmony_ci		bio_check_pages_dirty(bio);
15662306a36Sopenharmony_ci	} else {
15762306a36Sopenharmony_ci		bio_release_pages(bio, false);
15862306a36Sopenharmony_ci		bio_put(bio);
15962306a36Sopenharmony_ci	}
16062306a36Sopenharmony_ci}
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_cistatic ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
16362306a36Sopenharmony_ci		unsigned int nr_pages)
16462306a36Sopenharmony_ci{
16562306a36Sopenharmony_ci	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
16662306a36Sopenharmony_ci	struct blk_plug plug;
16762306a36Sopenharmony_ci	struct blkdev_dio *dio;
16862306a36Sopenharmony_ci	struct bio *bio;
16962306a36Sopenharmony_ci	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
17062306a36Sopenharmony_ci	blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
17162306a36Sopenharmony_ci	loff_t pos = iocb->ki_pos;
17262306a36Sopenharmony_ci	int ret = 0;
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	if (blkdev_dio_unaligned(bdev, pos, iter))
17562306a36Sopenharmony_ci		return -EINVAL;
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
17862306a36Sopenharmony_ci		opf |= REQ_ALLOC_CACHE;
17962306a36Sopenharmony_ci	bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
18062306a36Sopenharmony_ci			       &blkdev_dio_pool);
18162306a36Sopenharmony_ci	dio = container_of(bio, struct blkdev_dio, bio);
18262306a36Sopenharmony_ci	atomic_set(&dio->ref, 1);
18362306a36Sopenharmony_ci	/*
18462306a36Sopenharmony_ci	 * Grab an extra reference to ensure the dio structure which is embedded
18562306a36Sopenharmony_ci	 * into the first bio stays around.
18662306a36Sopenharmony_ci	 */
18762306a36Sopenharmony_ci	bio_get(bio);
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	is_sync = is_sync_kiocb(iocb);
19062306a36Sopenharmony_ci	if (is_sync) {
19162306a36Sopenharmony_ci		dio->flags = DIO_IS_SYNC;
19262306a36Sopenharmony_ci		dio->waiter = current;
19362306a36Sopenharmony_ci	} else {
19462306a36Sopenharmony_ci		dio->flags = 0;
19562306a36Sopenharmony_ci		dio->iocb = iocb;
19662306a36Sopenharmony_ci	}
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	dio->size = 0;
19962306a36Sopenharmony_ci	if (is_read && user_backed_iter(iter))
20062306a36Sopenharmony_ci		dio->flags |= DIO_SHOULD_DIRTY;
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	blk_start_plug(&plug);
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	for (;;) {
20562306a36Sopenharmony_ci		bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
20662306a36Sopenharmony_ci		bio->bi_private = dio;
20762306a36Sopenharmony_ci		bio->bi_end_io = blkdev_bio_end_io;
20862306a36Sopenharmony_ci		bio->bi_ioprio = iocb->ki_ioprio;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci		ret = bio_iov_iter_get_pages(bio, iter);
21162306a36Sopenharmony_ci		if (unlikely(ret)) {
21262306a36Sopenharmony_ci			bio->bi_status = BLK_STS_IOERR;
21362306a36Sopenharmony_ci			bio_endio(bio);
21462306a36Sopenharmony_ci			break;
21562306a36Sopenharmony_ci		}
21662306a36Sopenharmony_ci		if (iocb->ki_flags & IOCB_NOWAIT) {
21762306a36Sopenharmony_ci			/*
21862306a36Sopenharmony_ci			 * This is nonblocking IO, and we need to allocate
21962306a36Sopenharmony_ci			 * another bio if we have data left to map. As we
22062306a36Sopenharmony_ci			 * cannot guarantee that one of the sub bios will not
22162306a36Sopenharmony_ci			 * fail getting issued FOR NOWAIT and as error results
22262306a36Sopenharmony_ci			 * are coalesced across all of them, be safe and ask for
22362306a36Sopenharmony_ci			 * a retry of this from blocking context.
22462306a36Sopenharmony_ci			 */
22562306a36Sopenharmony_ci			if (unlikely(iov_iter_count(iter))) {
22662306a36Sopenharmony_ci				bio_release_pages(bio, false);
22762306a36Sopenharmony_ci				bio_clear_flag(bio, BIO_REFFED);
22862306a36Sopenharmony_ci				bio_put(bio);
22962306a36Sopenharmony_ci				blk_finish_plug(&plug);
23062306a36Sopenharmony_ci				return -EAGAIN;
23162306a36Sopenharmony_ci			}
23262306a36Sopenharmony_ci			bio->bi_opf |= REQ_NOWAIT;
23362306a36Sopenharmony_ci		}
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci		if (is_read) {
23662306a36Sopenharmony_ci			if (dio->flags & DIO_SHOULD_DIRTY)
23762306a36Sopenharmony_ci				bio_set_pages_dirty(bio);
23862306a36Sopenharmony_ci		} else {
23962306a36Sopenharmony_ci			task_io_account_write(bio->bi_iter.bi_size);
24062306a36Sopenharmony_ci		}
24162306a36Sopenharmony_ci		dio->size += bio->bi_iter.bi_size;
24262306a36Sopenharmony_ci		pos += bio->bi_iter.bi_size;
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci		nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
24562306a36Sopenharmony_ci		if (!nr_pages) {
24662306a36Sopenharmony_ci			submit_bio(bio);
24762306a36Sopenharmony_ci			break;
24862306a36Sopenharmony_ci		}
24962306a36Sopenharmony_ci		atomic_inc(&dio->ref);
25062306a36Sopenharmony_ci		submit_bio(bio);
25162306a36Sopenharmony_ci		bio = bio_alloc(bdev, nr_pages, opf, GFP_KERNEL);
25262306a36Sopenharmony_ci	}
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	blk_finish_plug(&plug);
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	if (!is_sync)
25762306a36Sopenharmony_ci		return -EIOCBQUEUED;
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	for (;;) {
26062306a36Sopenharmony_ci		set_current_state(TASK_UNINTERRUPTIBLE);
26162306a36Sopenharmony_ci		if (!READ_ONCE(dio->waiter))
26262306a36Sopenharmony_ci			break;
26362306a36Sopenharmony_ci		blk_io_schedule();
26462306a36Sopenharmony_ci	}
26562306a36Sopenharmony_ci	__set_current_state(TASK_RUNNING);
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	if (!ret)
26862306a36Sopenharmony_ci		ret = blk_status_to_errno(dio->bio.bi_status);
26962306a36Sopenharmony_ci	if (likely(!ret))
27062306a36Sopenharmony_ci		ret = dio->size;
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	bio_put(&dio->bio);
27362306a36Sopenharmony_ci	return ret;
27462306a36Sopenharmony_ci}
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_cistatic void blkdev_bio_end_io_async(struct bio *bio)
27762306a36Sopenharmony_ci{
27862306a36Sopenharmony_ci	struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio);
27962306a36Sopenharmony_ci	struct kiocb *iocb = dio->iocb;
28062306a36Sopenharmony_ci	ssize_t ret;
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci	WRITE_ONCE(iocb->private, NULL);
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	if (likely(!bio->bi_status)) {
28562306a36Sopenharmony_ci		ret = dio->size;
28662306a36Sopenharmony_ci		iocb->ki_pos += ret;
28762306a36Sopenharmony_ci	} else {
28862306a36Sopenharmony_ci		ret = blk_status_to_errno(bio->bi_status);
28962306a36Sopenharmony_ci	}
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	iocb->ki_complete(iocb, ret);
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	if (dio->flags & DIO_SHOULD_DIRTY) {
29462306a36Sopenharmony_ci		bio_check_pages_dirty(bio);
29562306a36Sopenharmony_ci	} else {
29662306a36Sopenharmony_ci		bio_release_pages(bio, false);
29762306a36Sopenharmony_ci		bio_put(bio);
29862306a36Sopenharmony_ci	}
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_cistatic ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
30262306a36Sopenharmony_ci					struct iov_iter *iter,
30362306a36Sopenharmony_ci					unsigned int nr_pages)
30462306a36Sopenharmony_ci{
30562306a36Sopenharmony_ci	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
30662306a36Sopenharmony_ci	bool is_read = iov_iter_rw(iter) == READ;
30762306a36Sopenharmony_ci	blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
30862306a36Sopenharmony_ci	struct blkdev_dio *dio;
30962306a36Sopenharmony_ci	struct bio *bio;
31062306a36Sopenharmony_ci	loff_t pos = iocb->ki_pos;
31162306a36Sopenharmony_ci	int ret = 0;
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	if (blkdev_dio_unaligned(bdev, pos, iter))
31462306a36Sopenharmony_ci		return -EINVAL;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
31762306a36Sopenharmony_ci		opf |= REQ_ALLOC_CACHE;
31862306a36Sopenharmony_ci	bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
31962306a36Sopenharmony_ci			       &blkdev_dio_pool);
32062306a36Sopenharmony_ci	dio = container_of(bio, struct blkdev_dio, bio);
32162306a36Sopenharmony_ci	dio->flags = 0;
32262306a36Sopenharmony_ci	dio->iocb = iocb;
32362306a36Sopenharmony_ci	bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
32462306a36Sopenharmony_ci	bio->bi_end_io = blkdev_bio_end_io_async;
32562306a36Sopenharmony_ci	bio->bi_ioprio = iocb->ki_ioprio;
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	if (iov_iter_is_bvec(iter)) {
32862306a36Sopenharmony_ci		/*
32962306a36Sopenharmony_ci		 * Users don't rely on the iterator being in any particular
33062306a36Sopenharmony_ci		 * state for async I/O returning -EIOCBQUEUED, hence we can
33162306a36Sopenharmony_ci		 * avoid expensive iov_iter_advance(). Bypass
33262306a36Sopenharmony_ci		 * bio_iov_iter_get_pages() and set the bvec directly.
33362306a36Sopenharmony_ci		 */
33462306a36Sopenharmony_ci		bio_iov_bvec_set(bio, iter);
33562306a36Sopenharmony_ci	} else {
33662306a36Sopenharmony_ci		ret = bio_iov_iter_get_pages(bio, iter);
33762306a36Sopenharmony_ci		if (unlikely(ret)) {
33862306a36Sopenharmony_ci			bio_put(bio);
33962306a36Sopenharmony_ci			return ret;
34062306a36Sopenharmony_ci		}
34162306a36Sopenharmony_ci	}
34262306a36Sopenharmony_ci	dio->size = bio->bi_iter.bi_size;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	if (is_read) {
34562306a36Sopenharmony_ci		if (user_backed_iter(iter)) {
34662306a36Sopenharmony_ci			dio->flags |= DIO_SHOULD_DIRTY;
34762306a36Sopenharmony_ci			bio_set_pages_dirty(bio);
34862306a36Sopenharmony_ci		}
34962306a36Sopenharmony_ci	} else {
35062306a36Sopenharmony_ci		task_io_account_write(bio->bi_iter.bi_size);
35162306a36Sopenharmony_ci	}
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT)
35462306a36Sopenharmony_ci		bio->bi_opf |= REQ_NOWAIT;
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	if (iocb->ki_flags & IOCB_HIPRI) {
35762306a36Sopenharmony_ci		bio->bi_opf |= REQ_POLLED;
35862306a36Sopenharmony_ci		submit_bio(bio);
35962306a36Sopenharmony_ci		WRITE_ONCE(iocb->private, bio);
36062306a36Sopenharmony_ci	} else {
36162306a36Sopenharmony_ci		submit_bio(bio);
36262306a36Sopenharmony_ci	}
36362306a36Sopenharmony_ci	return -EIOCBQUEUED;
36462306a36Sopenharmony_ci}
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_cistatic ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
36762306a36Sopenharmony_ci{
36862306a36Sopenharmony_ci	unsigned int nr_pages;
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	if (!iov_iter_count(iter))
37162306a36Sopenharmony_ci		return 0;
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
37462306a36Sopenharmony_ci	if (likely(nr_pages <= BIO_MAX_VECS)) {
37562306a36Sopenharmony_ci		if (is_sync_kiocb(iocb))
37662306a36Sopenharmony_ci			return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
37762306a36Sopenharmony_ci		return __blkdev_direct_IO_async(iocb, iter, nr_pages);
37862306a36Sopenharmony_ci	}
37962306a36Sopenharmony_ci	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
38062306a36Sopenharmony_ci}
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_cistatic int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
38362306a36Sopenharmony_ci		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
38462306a36Sopenharmony_ci{
38562306a36Sopenharmony_ci	struct block_device *bdev = I_BDEV(inode);
38662306a36Sopenharmony_ci	loff_t isize = i_size_read(inode);
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	iomap->bdev = bdev;
38962306a36Sopenharmony_ci	iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
39062306a36Sopenharmony_ci	if (iomap->offset >= isize)
39162306a36Sopenharmony_ci		return -EIO;
39262306a36Sopenharmony_ci	iomap->type = IOMAP_MAPPED;
39362306a36Sopenharmony_ci	iomap->addr = iomap->offset;
39462306a36Sopenharmony_ci	iomap->length = isize - iomap->offset;
39562306a36Sopenharmony_ci	iomap->flags |= IOMAP_F_BUFFER_HEAD; /* noop for !CONFIG_BUFFER_HEAD */
39662306a36Sopenharmony_ci	return 0;
39762306a36Sopenharmony_ci}
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_cistatic const struct iomap_ops blkdev_iomap_ops = {
40062306a36Sopenharmony_ci	.iomap_begin		= blkdev_iomap_begin,
40162306a36Sopenharmony_ci};
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci#ifdef CONFIG_BUFFER_HEAD
40462306a36Sopenharmony_cistatic int blkdev_get_block(struct inode *inode, sector_t iblock,
40562306a36Sopenharmony_ci		struct buffer_head *bh, int create)
40662306a36Sopenharmony_ci{
40762306a36Sopenharmony_ci	bh->b_bdev = I_BDEV(inode);
40862306a36Sopenharmony_ci	bh->b_blocknr = iblock;
40962306a36Sopenharmony_ci	set_buffer_mapped(bh);
41062306a36Sopenharmony_ci	return 0;
41162306a36Sopenharmony_ci}
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_cistatic int blkdev_writepage(struct page *page, struct writeback_control *wbc)
41462306a36Sopenharmony_ci{
41562306a36Sopenharmony_ci	return block_write_full_page(page, blkdev_get_block, wbc);
41662306a36Sopenharmony_ci}
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_cistatic int blkdev_read_folio(struct file *file, struct folio *folio)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	return block_read_full_folio(folio, blkdev_get_block);
42162306a36Sopenharmony_ci}
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_cistatic void blkdev_readahead(struct readahead_control *rac)
42462306a36Sopenharmony_ci{
42562306a36Sopenharmony_ci	mpage_readahead(rac, blkdev_get_block);
42662306a36Sopenharmony_ci}
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_cistatic int blkdev_write_begin(struct file *file, struct address_space *mapping,
42962306a36Sopenharmony_ci		loff_t pos, unsigned len, struct page **pagep, void **fsdata)
43062306a36Sopenharmony_ci{
43162306a36Sopenharmony_ci	return block_write_begin(mapping, pos, len, pagep, blkdev_get_block);
43262306a36Sopenharmony_ci}
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_cistatic int blkdev_write_end(struct file *file, struct address_space *mapping,
43562306a36Sopenharmony_ci		loff_t pos, unsigned len, unsigned copied, struct page *page,
43662306a36Sopenharmony_ci		void *fsdata)
43762306a36Sopenharmony_ci{
43862306a36Sopenharmony_ci	int ret;
43962306a36Sopenharmony_ci	ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	unlock_page(page);
44262306a36Sopenharmony_ci	put_page(page);
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	return ret;
44562306a36Sopenharmony_ci}
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ciconst struct address_space_operations def_blk_aops = {
44862306a36Sopenharmony_ci	.dirty_folio	= block_dirty_folio,
44962306a36Sopenharmony_ci	.invalidate_folio = block_invalidate_folio,
45062306a36Sopenharmony_ci	.read_folio	= blkdev_read_folio,
45162306a36Sopenharmony_ci	.readahead	= blkdev_readahead,
45262306a36Sopenharmony_ci	.writepage	= blkdev_writepage,
45362306a36Sopenharmony_ci	.write_begin	= blkdev_write_begin,
45462306a36Sopenharmony_ci	.write_end	= blkdev_write_end,
45562306a36Sopenharmony_ci	.migrate_folio	= buffer_migrate_folio_norefs,
45662306a36Sopenharmony_ci	.is_dirty_writeback = buffer_check_dirty_writeback,
45762306a36Sopenharmony_ci};
45862306a36Sopenharmony_ci#else /* CONFIG_BUFFER_HEAD */
45962306a36Sopenharmony_cistatic int blkdev_read_folio(struct file *file, struct folio *folio)
46062306a36Sopenharmony_ci{
46162306a36Sopenharmony_ci	return iomap_read_folio(folio, &blkdev_iomap_ops);
46262306a36Sopenharmony_ci}
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_cistatic void blkdev_readahead(struct readahead_control *rac)
46562306a36Sopenharmony_ci{
46662306a36Sopenharmony_ci	iomap_readahead(rac, &blkdev_iomap_ops);
46762306a36Sopenharmony_ci}
46862306a36Sopenharmony_ci
46962306a36Sopenharmony_cistatic int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
47062306a36Sopenharmony_ci		struct inode *inode, loff_t offset)
47162306a36Sopenharmony_ci{
47262306a36Sopenharmony_ci	loff_t isize = i_size_read(inode);
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	if (WARN_ON_ONCE(offset >= isize))
47562306a36Sopenharmony_ci		return -EIO;
47662306a36Sopenharmony_ci	if (offset >= wpc->iomap.offset &&
47762306a36Sopenharmony_ci	    offset < wpc->iomap.offset + wpc->iomap.length)
47862306a36Sopenharmony_ci		return 0;
47962306a36Sopenharmony_ci	return blkdev_iomap_begin(inode, offset, isize - offset,
48062306a36Sopenharmony_ci				  IOMAP_WRITE, &wpc->iomap, NULL);
48162306a36Sopenharmony_ci}
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_cistatic const struct iomap_writeback_ops blkdev_writeback_ops = {
48462306a36Sopenharmony_ci	.map_blocks		= blkdev_map_blocks,
48562306a36Sopenharmony_ci};
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_cistatic int blkdev_writepages(struct address_space *mapping,
48862306a36Sopenharmony_ci		struct writeback_control *wbc)
48962306a36Sopenharmony_ci{
49062306a36Sopenharmony_ci	struct iomap_writepage_ctx wpc = { };
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci	return iomap_writepages(mapping, wbc, &wpc, &blkdev_writeback_ops);
49362306a36Sopenharmony_ci}
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ciconst struct address_space_operations def_blk_aops = {
49662306a36Sopenharmony_ci	.dirty_folio	= filemap_dirty_folio,
49762306a36Sopenharmony_ci	.release_folio		= iomap_release_folio,
49862306a36Sopenharmony_ci	.invalidate_folio	= iomap_invalidate_folio,
49962306a36Sopenharmony_ci	.read_folio		= blkdev_read_folio,
50062306a36Sopenharmony_ci	.readahead		= blkdev_readahead,
50162306a36Sopenharmony_ci	.writepages		= blkdev_writepages,
50262306a36Sopenharmony_ci	.is_partially_uptodate  = iomap_is_partially_uptodate,
50362306a36Sopenharmony_ci	.error_remove_page	= generic_error_remove_page,
50462306a36Sopenharmony_ci	.migrate_folio		= filemap_migrate_folio,
50562306a36Sopenharmony_ci};
50662306a36Sopenharmony_ci#endif /* CONFIG_BUFFER_HEAD */
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci/*
50962306a36Sopenharmony_ci * for a block special file file_inode(file)->i_size is zero
51062306a36Sopenharmony_ci * so we compute the size by hand (just as in block_read/write above)
51162306a36Sopenharmony_ci */
51262306a36Sopenharmony_cistatic loff_t blkdev_llseek(struct file *file, loff_t offset, int whence)
51362306a36Sopenharmony_ci{
51462306a36Sopenharmony_ci	struct inode *bd_inode = bdev_file_inode(file);
51562306a36Sopenharmony_ci	loff_t retval;
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	inode_lock(bd_inode);
51862306a36Sopenharmony_ci	retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
51962306a36Sopenharmony_ci	inode_unlock(bd_inode);
52062306a36Sopenharmony_ci	return retval;
52162306a36Sopenharmony_ci}
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_cistatic int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
52462306a36Sopenharmony_ci		int datasync)
52562306a36Sopenharmony_ci{
52662306a36Sopenharmony_ci	struct block_device *bdev = I_BDEV(filp->f_mapping->host);
52762306a36Sopenharmony_ci	int error;
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	error = file_write_and_wait_range(filp, start, end);
53062306a36Sopenharmony_ci	if (error)
53162306a36Sopenharmony_ci		return error;
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	/*
53462306a36Sopenharmony_ci	 * There is no need to serialise calls to blkdev_issue_flush with
53562306a36Sopenharmony_ci	 * i_mutex and doing so causes performance issues with concurrent
53662306a36Sopenharmony_ci	 * O_SYNC writers to a block device.
53762306a36Sopenharmony_ci	 */
53862306a36Sopenharmony_ci	error = blkdev_issue_flush(bdev);
53962306a36Sopenharmony_ci	if (error == -EOPNOTSUPP)
54062306a36Sopenharmony_ci		error = 0;
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	return error;
54362306a36Sopenharmony_ci}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ciblk_mode_t file_to_blk_mode(struct file *file)
54662306a36Sopenharmony_ci{
54762306a36Sopenharmony_ci	blk_mode_t mode = 0;
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	if (file->f_mode & FMODE_READ)
55062306a36Sopenharmony_ci		mode |= BLK_OPEN_READ;
55162306a36Sopenharmony_ci	if (file->f_mode & FMODE_WRITE)
55262306a36Sopenharmony_ci		mode |= BLK_OPEN_WRITE;
55362306a36Sopenharmony_ci	if (file->private_data)
55462306a36Sopenharmony_ci		mode |= BLK_OPEN_EXCL;
55562306a36Sopenharmony_ci	if (file->f_flags & O_NDELAY)
55662306a36Sopenharmony_ci		mode |= BLK_OPEN_NDELAY;
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci	/*
55962306a36Sopenharmony_ci	 * If all bits in O_ACCMODE set (aka O_RDWR | O_WRONLY), the floppy
56062306a36Sopenharmony_ci	 * driver has historically allowed ioctls as if the file was opened for
56162306a36Sopenharmony_ci	 * writing, but does not allow and actual reads or writes.
56262306a36Sopenharmony_ci	 */
56362306a36Sopenharmony_ci	if ((file->f_flags & O_ACCMODE) == (O_RDWR | O_WRONLY))
56462306a36Sopenharmony_ci		mode |= BLK_OPEN_WRITE_IOCTL;
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci	return mode;
56762306a36Sopenharmony_ci}
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_cistatic int blkdev_open(struct inode *inode, struct file *filp)
57062306a36Sopenharmony_ci{
57162306a36Sopenharmony_ci	struct block_device *bdev;
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci	/*
57462306a36Sopenharmony_ci	 * Preserve backwards compatibility and allow large file access
57562306a36Sopenharmony_ci	 * even if userspace doesn't ask for it explicitly. Some mkfs
57662306a36Sopenharmony_ci	 * binary needs it. We might want to drop this workaround
57762306a36Sopenharmony_ci	 * during an unstable branch.
57862306a36Sopenharmony_ci	 */
57962306a36Sopenharmony_ci	filp->f_flags |= O_LARGEFILE;
58062306a36Sopenharmony_ci	filp->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci	/*
58362306a36Sopenharmony_ci	 * Use the file private data to store the holder for exclusive openes.
58462306a36Sopenharmony_ci	 * file_to_blk_mode relies on it being present to set BLK_OPEN_EXCL.
58562306a36Sopenharmony_ci	 */
58662306a36Sopenharmony_ci	if (filp->f_flags & O_EXCL)
58762306a36Sopenharmony_ci		filp->private_data = filp;
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	bdev = blkdev_get_by_dev(inode->i_rdev, file_to_blk_mode(filp),
59062306a36Sopenharmony_ci				 filp->private_data, NULL);
59162306a36Sopenharmony_ci	if (IS_ERR(bdev))
59262306a36Sopenharmony_ci		return PTR_ERR(bdev);
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	if (bdev_nowait(bdev))
59562306a36Sopenharmony_ci		filp->f_mode |= FMODE_NOWAIT;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	filp->f_mapping = bdev->bd_inode->i_mapping;
59862306a36Sopenharmony_ci	filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
59962306a36Sopenharmony_ci	return 0;
60062306a36Sopenharmony_ci}
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_cistatic int blkdev_release(struct inode *inode, struct file *filp)
60362306a36Sopenharmony_ci{
60462306a36Sopenharmony_ci	blkdev_put(I_BDEV(filp->f_mapping->host), filp->private_data);
60562306a36Sopenharmony_ci	return 0;
60662306a36Sopenharmony_ci}
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_cistatic ssize_t
60962306a36Sopenharmony_ciblkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
61062306a36Sopenharmony_ci{
61162306a36Sopenharmony_ci	size_t count = iov_iter_count(from);
61262306a36Sopenharmony_ci	ssize_t written;
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ci	written = kiocb_invalidate_pages(iocb, count);
61562306a36Sopenharmony_ci	if (written) {
61662306a36Sopenharmony_ci		if (written == -EBUSY)
61762306a36Sopenharmony_ci			return 0;
61862306a36Sopenharmony_ci		return written;
61962306a36Sopenharmony_ci	}
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	written = blkdev_direct_IO(iocb, from);
62262306a36Sopenharmony_ci	if (written > 0) {
62362306a36Sopenharmony_ci		kiocb_invalidate_post_direct_write(iocb, count);
62462306a36Sopenharmony_ci		iocb->ki_pos += written;
62562306a36Sopenharmony_ci		count -= written;
62662306a36Sopenharmony_ci	}
62762306a36Sopenharmony_ci	if (written != -EIOCBQUEUED)
62862306a36Sopenharmony_ci		iov_iter_revert(from, count - iov_iter_count(from));
62962306a36Sopenharmony_ci	return written;
63062306a36Sopenharmony_ci}
63162306a36Sopenharmony_ci
63262306a36Sopenharmony_cistatic ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from)
63362306a36Sopenharmony_ci{
63462306a36Sopenharmony_ci	return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops);
63562306a36Sopenharmony_ci}
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci/*
63862306a36Sopenharmony_ci * Write data to the block device.  Only intended for the block device itself
63962306a36Sopenharmony_ci * and the raw driver which basically is a fake block device.
64062306a36Sopenharmony_ci *
64162306a36Sopenharmony_ci * Does not take i_mutex for the write and thus is not for general purpose
64262306a36Sopenharmony_ci * use.
64362306a36Sopenharmony_ci */
64462306a36Sopenharmony_cistatic ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
64562306a36Sopenharmony_ci{
64662306a36Sopenharmony_ci	struct file *file = iocb->ki_filp;
64762306a36Sopenharmony_ci	struct block_device *bdev = I_BDEV(file->f_mapping->host);
64862306a36Sopenharmony_ci	struct inode *bd_inode = bdev->bd_inode;
64962306a36Sopenharmony_ci	loff_t size = bdev_nr_bytes(bdev);
65062306a36Sopenharmony_ci	size_t shorted = 0;
65162306a36Sopenharmony_ci	ssize_t ret;
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	if (bdev_read_only(bdev))
65462306a36Sopenharmony_ci		return -EPERM;
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ci	if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
65762306a36Sopenharmony_ci		return -ETXTBSY;
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci	if (!iov_iter_count(from))
66062306a36Sopenharmony_ci		return 0;
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci	if (iocb->ki_pos >= size)
66362306a36Sopenharmony_ci		return -ENOSPC;
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_ci	if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
66662306a36Sopenharmony_ci		return -EOPNOTSUPP;
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci	size -= iocb->ki_pos;
66962306a36Sopenharmony_ci	if (iov_iter_count(from) > size) {
67062306a36Sopenharmony_ci		shorted = iov_iter_count(from) - size;
67162306a36Sopenharmony_ci		iov_iter_truncate(from, size);
67262306a36Sopenharmony_ci	}
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci	ret = file_update_time(file);
67562306a36Sopenharmony_ci	if (ret)
67662306a36Sopenharmony_ci		return ret;
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	if (iocb->ki_flags & IOCB_DIRECT) {
67962306a36Sopenharmony_ci		ret = blkdev_direct_write(iocb, from);
68062306a36Sopenharmony_ci		if (ret >= 0 && iov_iter_count(from))
68162306a36Sopenharmony_ci			ret = direct_write_fallback(iocb, from, ret,
68262306a36Sopenharmony_ci					blkdev_buffered_write(iocb, from));
68362306a36Sopenharmony_ci	} else {
68462306a36Sopenharmony_ci		ret = blkdev_buffered_write(iocb, from);
68562306a36Sopenharmony_ci	}
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci	if (ret > 0)
68862306a36Sopenharmony_ci		ret = generic_write_sync(iocb, ret);
68962306a36Sopenharmony_ci	iov_iter_reexpand(from, iov_iter_count(from) + shorted);
69062306a36Sopenharmony_ci	return ret;
69162306a36Sopenharmony_ci}
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_cistatic ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
69462306a36Sopenharmony_ci{
69562306a36Sopenharmony_ci	struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
69662306a36Sopenharmony_ci	loff_t size = bdev_nr_bytes(bdev);
69762306a36Sopenharmony_ci	loff_t pos = iocb->ki_pos;
69862306a36Sopenharmony_ci	size_t shorted = 0;
69962306a36Sopenharmony_ci	ssize_t ret = 0;
70062306a36Sopenharmony_ci	size_t count;
70162306a36Sopenharmony_ci
70262306a36Sopenharmony_ci	if (unlikely(pos + iov_iter_count(to) > size)) {
70362306a36Sopenharmony_ci		if (pos >= size)
70462306a36Sopenharmony_ci			return 0;
70562306a36Sopenharmony_ci		size -= pos;
70662306a36Sopenharmony_ci		shorted = iov_iter_count(to) - size;
70762306a36Sopenharmony_ci		iov_iter_truncate(to, size);
70862306a36Sopenharmony_ci	}
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci	count = iov_iter_count(to);
71162306a36Sopenharmony_ci	if (!count)
71262306a36Sopenharmony_ci		goto reexpand; /* skip atime */
71362306a36Sopenharmony_ci
71462306a36Sopenharmony_ci	if (iocb->ki_flags & IOCB_DIRECT) {
71562306a36Sopenharmony_ci		ret = kiocb_write_and_wait(iocb, count);
71662306a36Sopenharmony_ci		if (ret < 0)
71762306a36Sopenharmony_ci			goto reexpand;
71862306a36Sopenharmony_ci		file_accessed(iocb->ki_filp);
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci		ret = blkdev_direct_IO(iocb, to);
72162306a36Sopenharmony_ci		if (ret >= 0) {
72262306a36Sopenharmony_ci			iocb->ki_pos += ret;
72362306a36Sopenharmony_ci			count -= ret;
72462306a36Sopenharmony_ci		}
72562306a36Sopenharmony_ci		iov_iter_revert(to, count - iov_iter_count(to));
72662306a36Sopenharmony_ci		if (ret < 0 || !count)
72762306a36Sopenharmony_ci			goto reexpand;
72862306a36Sopenharmony_ci	}
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci	ret = filemap_read(iocb, to, ret);
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_cireexpand:
73362306a36Sopenharmony_ci	if (unlikely(shorted))
73462306a36Sopenharmony_ci		iov_iter_reexpand(to, iov_iter_count(to) + shorted);
73562306a36Sopenharmony_ci	return ret;
73662306a36Sopenharmony_ci}
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci#define	BLKDEV_FALLOC_FL_SUPPORTED					\
73962306a36Sopenharmony_ci		(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |		\
74062306a36Sopenharmony_ci		 FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_cistatic long blkdev_fallocate(struct file *file, int mode, loff_t start,
74362306a36Sopenharmony_ci			     loff_t len)
74462306a36Sopenharmony_ci{
74562306a36Sopenharmony_ci	struct inode *inode = bdev_file_inode(file);
74662306a36Sopenharmony_ci	struct block_device *bdev = I_BDEV(inode);
74762306a36Sopenharmony_ci	loff_t end = start + len - 1;
74862306a36Sopenharmony_ci	loff_t isize;
74962306a36Sopenharmony_ci	int error;
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci	/* Fail if we don't recognize the flags. */
75262306a36Sopenharmony_ci	if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
75362306a36Sopenharmony_ci		return -EOPNOTSUPP;
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	/* Don't go off the end of the device. */
75662306a36Sopenharmony_ci	isize = bdev_nr_bytes(bdev);
75762306a36Sopenharmony_ci	if (start >= isize)
75862306a36Sopenharmony_ci		return -EINVAL;
75962306a36Sopenharmony_ci	if (end >= isize) {
76062306a36Sopenharmony_ci		if (mode & FALLOC_FL_KEEP_SIZE) {
76162306a36Sopenharmony_ci			len = isize - start;
76262306a36Sopenharmony_ci			end = start + len - 1;
76362306a36Sopenharmony_ci		} else
76462306a36Sopenharmony_ci			return -EINVAL;
76562306a36Sopenharmony_ci	}
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	/*
76862306a36Sopenharmony_ci	 * Don't allow IO that isn't aligned to logical block size.
76962306a36Sopenharmony_ci	 */
77062306a36Sopenharmony_ci	if ((start | len) & (bdev_logical_block_size(bdev) - 1))
77162306a36Sopenharmony_ci		return -EINVAL;
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	filemap_invalidate_lock(inode->i_mapping);
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	/*
77662306a36Sopenharmony_ci	 * Invalidate the page cache, including dirty pages, for valid
77762306a36Sopenharmony_ci	 * de-allocate mode calls to fallocate().
77862306a36Sopenharmony_ci	 */
77962306a36Sopenharmony_ci	switch (mode) {
78062306a36Sopenharmony_ci	case FALLOC_FL_ZERO_RANGE:
78162306a36Sopenharmony_ci	case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
78262306a36Sopenharmony_ci		error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
78362306a36Sopenharmony_ci		if (error)
78462306a36Sopenharmony_ci			goto fail;
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ci		error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
78762306a36Sopenharmony_ci					     len >> SECTOR_SHIFT, GFP_KERNEL,
78862306a36Sopenharmony_ci					     BLKDEV_ZERO_NOUNMAP);
78962306a36Sopenharmony_ci		break;
79062306a36Sopenharmony_ci	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
79162306a36Sopenharmony_ci		error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
79262306a36Sopenharmony_ci		if (error)
79362306a36Sopenharmony_ci			goto fail;
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci		error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
79662306a36Sopenharmony_ci					     len >> SECTOR_SHIFT, GFP_KERNEL,
79762306a36Sopenharmony_ci					     BLKDEV_ZERO_NOFALLBACK);
79862306a36Sopenharmony_ci		break;
79962306a36Sopenharmony_ci	case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
80062306a36Sopenharmony_ci		error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
80162306a36Sopenharmony_ci		if (error)
80262306a36Sopenharmony_ci			goto fail;
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci		error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
80562306a36Sopenharmony_ci					     len >> SECTOR_SHIFT, GFP_KERNEL);
80662306a36Sopenharmony_ci		break;
80762306a36Sopenharmony_ci	default:
80862306a36Sopenharmony_ci		error = -EOPNOTSUPP;
80962306a36Sopenharmony_ci	}
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci fail:
81262306a36Sopenharmony_ci	filemap_invalidate_unlock(inode->i_mapping);
81362306a36Sopenharmony_ci	return error;
81462306a36Sopenharmony_ci}
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_cistatic int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
81762306a36Sopenharmony_ci{
81862306a36Sopenharmony_ci	struct inode *bd_inode = bdev_file_inode(file);
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	if (bdev_read_only(I_BDEV(bd_inode)))
82162306a36Sopenharmony_ci		return generic_file_readonly_mmap(file, vma);
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci	return generic_file_mmap(file, vma);
82462306a36Sopenharmony_ci}
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ciconst struct file_operations def_blk_fops = {
82762306a36Sopenharmony_ci	.open		= blkdev_open,
82862306a36Sopenharmony_ci	.release	= blkdev_release,
82962306a36Sopenharmony_ci	.llseek		= blkdev_llseek,
83062306a36Sopenharmony_ci	.read_iter	= blkdev_read_iter,
83162306a36Sopenharmony_ci	.write_iter	= blkdev_write_iter,
83262306a36Sopenharmony_ci	.iopoll		= iocb_bio_iopoll,
83362306a36Sopenharmony_ci	.mmap		= blkdev_mmap,
83462306a36Sopenharmony_ci	.fsync		= blkdev_fsync,
83562306a36Sopenharmony_ci	.unlocked_ioctl	= blkdev_ioctl,
83662306a36Sopenharmony_ci#ifdef CONFIG_COMPAT
83762306a36Sopenharmony_ci	.compat_ioctl	= compat_blkdev_ioctl,
83862306a36Sopenharmony_ci#endif
83962306a36Sopenharmony_ci	.splice_read	= filemap_splice_read,
84062306a36Sopenharmony_ci	.splice_write	= iter_file_splice_write,
84162306a36Sopenharmony_ci	.fallocate	= blkdev_fallocate,
84262306a36Sopenharmony_ci};
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_cistatic __init int blkdev_init(void)
84562306a36Sopenharmony_ci{
84662306a36Sopenharmony_ci	return bioset_init(&blkdev_dio_pool, 4,
84762306a36Sopenharmony_ci				offsetof(struct blkdev_dio, bio),
84862306a36Sopenharmony_ci				BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
84962306a36Sopenharmony_ci}
85062306a36Sopenharmony_cimodule_init(blkdev_init);
851