18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (C) 2003 Sistina Software
38c2ecf20Sopenharmony_ci * Copyright (C) 2006 Red Hat GmbH
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This file is released under the GPL.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include "dm-core.h"
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#include <linux/device-mapper.h>
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/bio.h>
138c2ecf20Sopenharmony_ci#include <linux/completion.h>
148c2ecf20Sopenharmony_ci#include <linux/mempool.h>
158c2ecf20Sopenharmony_ci#include <linux/module.h>
168c2ecf20Sopenharmony_ci#include <linux/sched.h>
178c2ecf20Sopenharmony_ci#include <linux/slab.h>
188c2ecf20Sopenharmony_ci#include <linux/dm-io.h>
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX "io"
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci#define DM_IO_MAX_REGIONS	BITS_PER_LONG
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_cistruct dm_io_client {
258c2ecf20Sopenharmony_ci	mempool_t pool;
268c2ecf20Sopenharmony_ci	struct bio_set bios;
278c2ecf20Sopenharmony_ci};
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci/*
308c2ecf20Sopenharmony_ci * Aligning 'struct io' reduces the number of bits required to store
318c2ecf20Sopenharmony_ci * its address.  Refer to store_io_and_region_in_bio() below.
328c2ecf20Sopenharmony_ci */
338c2ecf20Sopenharmony_cistruct io {
348c2ecf20Sopenharmony_ci	unsigned long error_bits;
358c2ecf20Sopenharmony_ci	atomic_t count;
368c2ecf20Sopenharmony_ci	struct dm_io_client *client;
378c2ecf20Sopenharmony_ci	io_notify_fn callback;
388c2ecf20Sopenharmony_ci	void *context;
398c2ecf20Sopenharmony_ci	void *vma_invalidate_address;
408c2ecf20Sopenharmony_ci	unsigned long vma_invalidate_size;
418c2ecf20Sopenharmony_ci} __attribute__((aligned(DM_IO_MAX_REGIONS)));
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_cistatic struct kmem_cache *_dm_io_cache;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci/*
468c2ecf20Sopenharmony_ci * Create a client with mempool and bioset.
478c2ecf20Sopenharmony_ci */
488c2ecf20Sopenharmony_cistruct dm_io_client *dm_io_client_create(void)
498c2ecf20Sopenharmony_ci{
508c2ecf20Sopenharmony_ci	struct dm_io_client *client;
518c2ecf20Sopenharmony_ci	unsigned min_ios = dm_get_reserved_bio_based_ios();
528c2ecf20Sopenharmony_ci	int ret;
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci	client = kzalloc(sizeof(*client), GFP_KERNEL);
558c2ecf20Sopenharmony_ci	if (!client)
568c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	ret = mempool_init_slab_pool(&client->pool, min_ios, _dm_io_cache);
598c2ecf20Sopenharmony_ci	if (ret)
608c2ecf20Sopenharmony_ci		goto bad;
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	ret = bioset_init(&client->bios, min_ios, 0, BIOSET_NEED_BVECS);
638c2ecf20Sopenharmony_ci	if (ret)
648c2ecf20Sopenharmony_ci		goto bad;
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	return client;
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci   bad:
698c2ecf20Sopenharmony_ci	mempool_exit(&client->pool);
708c2ecf20Sopenharmony_ci	kfree(client);
718c2ecf20Sopenharmony_ci	return ERR_PTR(ret);
728c2ecf20Sopenharmony_ci}
738c2ecf20Sopenharmony_ciEXPORT_SYMBOL(dm_io_client_create);
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_civoid dm_io_client_destroy(struct dm_io_client *client)
768c2ecf20Sopenharmony_ci{
778c2ecf20Sopenharmony_ci	mempool_exit(&client->pool);
788c2ecf20Sopenharmony_ci	bioset_exit(&client->bios);
798c2ecf20Sopenharmony_ci	kfree(client);
808c2ecf20Sopenharmony_ci}
818c2ecf20Sopenharmony_ciEXPORT_SYMBOL(dm_io_client_destroy);
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_ci/*-----------------------------------------------------------------
848c2ecf20Sopenharmony_ci * We need to keep track of which region a bio is doing io for.
858c2ecf20Sopenharmony_ci * To avoid a memory allocation to store just 5 or 6 bits, we
868c2ecf20Sopenharmony_ci * ensure the 'struct io' pointer is aligned so enough low bits are
878c2ecf20Sopenharmony_ci * always zero and then combine it with the region number directly in
888c2ecf20Sopenharmony_ci * bi_private.
898c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/
908c2ecf20Sopenharmony_cistatic void store_io_and_region_in_bio(struct bio *bio, struct io *io,
918c2ecf20Sopenharmony_ci				       unsigned region)
928c2ecf20Sopenharmony_ci{
938c2ecf20Sopenharmony_ci	if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
948c2ecf20Sopenharmony_ci		DMCRIT("Unaligned struct io pointer %p", io);
958c2ecf20Sopenharmony_ci		BUG();
968c2ecf20Sopenharmony_ci	}
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci	bio->bi_private = (void *)((unsigned long)io | region);
998c2ecf20Sopenharmony_ci}
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_cistatic void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
1028c2ecf20Sopenharmony_ci				       unsigned *region)
1038c2ecf20Sopenharmony_ci{
1048c2ecf20Sopenharmony_ci	unsigned long val = (unsigned long)bio->bi_private;
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci	*io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
1078c2ecf20Sopenharmony_ci	*region = val & (DM_IO_MAX_REGIONS - 1);
1088c2ecf20Sopenharmony_ci}
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci/*-----------------------------------------------------------------
1118c2ecf20Sopenharmony_ci * We need an io object to keep track of the number of bios that
1128c2ecf20Sopenharmony_ci * have been dispatched for a particular io.
1138c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/
1148c2ecf20Sopenharmony_cistatic void complete_io(struct io *io)
1158c2ecf20Sopenharmony_ci{
1168c2ecf20Sopenharmony_ci	unsigned long error_bits = io->error_bits;
1178c2ecf20Sopenharmony_ci	io_notify_fn fn = io->callback;
1188c2ecf20Sopenharmony_ci	void *context = io->context;
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	if (io->vma_invalidate_size)
1218c2ecf20Sopenharmony_ci		invalidate_kernel_vmap_range(io->vma_invalidate_address,
1228c2ecf20Sopenharmony_ci					     io->vma_invalidate_size);
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	mempool_free(io, &io->client->pool);
1258c2ecf20Sopenharmony_ci	fn(error_bits, context);
1268c2ecf20Sopenharmony_ci}
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_cistatic void dec_count(struct io *io, unsigned int region, blk_status_t error)
1298c2ecf20Sopenharmony_ci{
1308c2ecf20Sopenharmony_ci	if (error)
1318c2ecf20Sopenharmony_ci		set_bit(region, &io->error_bits);
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&io->count))
1348c2ecf20Sopenharmony_ci		complete_io(io);
1358c2ecf20Sopenharmony_ci}
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_cistatic void endio(struct bio *bio)
1388c2ecf20Sopenharmony_ci{
1398c2ecf20Sopenharmony_ci	struct io *io;
1408c2ecf20Sopenharmony_ci	unsigned region;
1418c2ecf20Sopenharmony_ci	blk_status_t error;
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	if (bio->bi_status && bio_data_dir(bio) == READ)
1448c2ecf20Sopenharmony_ci		zero_fill_bio(bio);
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	/*
1478c2ecf20Sopenharmony_ci	 * The bio destructor in bio_put() may use the io object.
1488c2ecf20Sopenharmony_ci	 */
1498c2ecf20Sopenharmony_ci	retrieve_io_and_region_from_bio(bio, &io, &region);
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci	error = bio->bi_status;
1528c2ecf20Sopenharmony_ci	bio_put(bio);
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci	dec_count(io, region, error);
1558c2ecf20Sopenharmony_ci}
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci/*-----------------------------------------------------------------
1588c2ecf20Sopenharmony_ci * These little objects provide an abstraction for getting a new
1598c2ecf20Sopenharmony_ci * destination page for io.
1608c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/
1618c2ecf20Sopenharmony_cistruct dpages {
1628c2ecf20Sopenharmony_ci	void (*get_page)(struct dpages *dp,
1638c2ecf20Sopenharmony_ci			 struct page **p, unsigned long *len, unsigned *offset);
1648c2ecf20Sopenharmony_ci	void (*next_page)(struct dpages *dp);
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	union {
1678c2ecf20Sopenharmony_ci		unsigned context_u;
1688c2ecf20Sopenharmony_ci		struct bvec_iter context_bi;
1698c2ecf20Sopenharmony_ci	};
1708c2ecf20Sopenharmony_ci	void *context_ptr;
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci	void *vma_invalidate_address;
1738c2ecf20Sopenharmony_ci	unsigned long vma_invalidate_size;
1748c2ecf20Sopenharmony_ci};
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci/*
1778c2ecf20Sopenharmony_ci * Functions for getting the pages from a list.
1788c2ecf20Sopenharmony_ci */
1798c2ecf20Sopenharmony_cistatic void list_get_page(struct dpages *dp,
1808c2ecf20Sopenharmony_ci		  struct page **p, unsigned long *len, unsigned *offset)
1818c2ecf20Sopenharmony_ci{
1828c2ecf20Sopenharmony_ci	unsigned o = dp->context_u;
1838c2ecf20Sopenharmony_ci	struct page_list *pl = (struct page_list *) dp->context_ptr;
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci	*p = pl->page;
1868c2ecf20Sopenharmony_ci	*len = PAGE_SIZE - o;
1878c2ecf20Sopenharmony_ci	*offset = o;
1888c2ecf20Sopenharmony_ci}
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_cistatic void list_next_page(struct dpages *dp)
1918c2ecf20Sopenharmony_ci{
1928c2ecf20Sopenharmony_ci	struct page_list *pl = (struct page_list *) dp->context_ptr;
1938c2ecf20Sopenharmony_ci	dp->context_ptr = pl->next;
1948c2ecf20Sopenharmony_ci	dp->context_u = 0;
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_cistatic void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned offset)
1988c2ecf20Sopenharmony_ci{
1998c2ecf20Sopenharmony_ci	dp->get_page = list_get_page;
2008c2ecf20Sopenharmony_ci	dp->next_page = list_next_page;
2018c2ecf20Sopenharmony_ci	dp->context_u = offset;
2028c2ecf20Sopenharmony_ci	dp->context_ptr = pl;
2038c2ecf20Sopenharmony_ci}
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci/*
2068c2ecf20Sopenharmony_ci * Functions for getting the pages from a bvec.
2078c2ecf20Sopenharmony_ci */
2088c2ecf20Sopenharmony_cistatic void bio_get_page(struct dpages *dp, struct page **p,
2098c2ecf20Sopenharmony_ci			 unsigned long *len, unsigned *offset)
2108c2ecf20Sopenharmony_ci{
2118c2ecf20Sopenharmony_ci	struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr,
2128c2ecf20Sopenharmony_ci					     dp->context_bi);
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	*p = bvec.bv_page;
2158c2ecf20Sopenharmony_ci	*len = bvec.bv_len;
2168c2ecf20Sopenharmony_ci	*offset = bvec.bv_offset;
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	/* avoid figuring it out again in bio_next_page() */
2198c2ecf20Sopenharmony_ci	dp->context_bi.bi_sector = (sector_t)bvec.bv_len;
2208c2ecf20Sopenharmony_ci}
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_cistatic void bio_next_page(struct dpages *dp)
2238c2ecf20Sopenharmony_ci{
2248c2ecf20Sopenharmony_ci	unsigned int len = (unsigned int)dp->context_bi.bi_sector;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	bvec_iter_advance((struct bio_vec *)dp->context_ptr,
2278c2ecf20Sopenharmony_ci			  &dp->context_bi, len);
2288c2ecf20Sopenharmony_ci}
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_cistatic void bio_dp_init(struct dpages *dp, struct bio *bio)
2318c2ecf20Sopenharmony_ci{
2328c2ecf20Sopenharmony_ci	dp->get_page = bio_get_page;
2338c2ecf20Sopenharmony_ci	dp->next_page = bio_next_page;
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	/*
2368c2ecf20Sopenharmony_ci	 * We just use bvec iterator to retrieve pages, so it is ok to
2378c2ecf20Sopenharmony_ci	 * access the bvec table directly here
2388c2ecf20Sopenharmony_ci	 */
2398c2ecf20Sopenharmony_ci	dp->context_ptr = bio->bi_io_vec;
2408c2ecf20Sopenharmony_ci	dp->context_bi = bio->bi_iter;
2418c2ecf20Sopenharmony_ci}
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci/*
2448c2ecf20Sopenharmony_ci * Functions for getting the pages from a VMA.
2458c2ecf20Sopenharmony_ci */
2468c2ecf20Sopenharmony_cistatic void vm_get_page(struct dpages *dp,
2478c2ecf20Sopenharmony_ci		 struct page **p, unsigned long *len, unsigned *offset)
2488c2ecf20Sopenharmony_ci{
2498c2ecf20Sopenharmony_ci	*p = vmalloc_to_page(dp->context_ptr);
2508c2ecf20Sopenharmony_ci	*offset = dp->context_u;
2518c2ecf20Sopenharmony_ci	*len = PAGE_SIZE - dp->context_u;
2528c2ecf20Sopenharmony_ci}
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_cistatic void vm_next_page(struct dpages *dp)
2558c2ecf20Sopenharmony_ci{
2568c2ecf20Sopenharmony_ci	dp->context_ptr += PAGE_SIZE - dp->context_u;
2578c2ecf20Sopenharmony_ci	dp->context_u = 0;
2588c2ecf20Sopenharmony_ci}
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_cistatic void vm_dp_init(struct dpages *dp, void *data)
2618c2ecf20Sopenharmony_ci{
2628c2ecf20Sopenharmony_ci	dp->get_page = vm_get_page;
2638c2ecf20Sopenharmony_ci	dp->next_page = vm_next_page;
2648c2ecf20Sopenharmony_ci	dp->context_u = offset_in_page(data);
2658c2ecf20Sopenharmony_ci	dp->context_ptr = data;
2668c2ecf20Sopenharmony_ci}
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci/*
2698c2ecf20Sopenharmony_ci * Functions for getting the pages from kernel memory.
2708c2ecf20Sopenharmony_ci */
2718c2ecf20Sopenharmony_cistatic void km_get_page(struct dpages *dp, struct page **p, unsigned long *len,
2728c2ecf20Sopenharmony_ci			unsigned *offset)
2738c2ecf20Sopenharmony_ci{
2748c2ecf20Sopenharmony_ci	*p = virt_to_page(dp->context_ptr);
2758c2ecf20Sopenharmony_ci	*offset = dp->context_u;
2768c2ecf20Sopenharmony_ci	*len = PAGE_SIZE - dp->context_u;
2778c2ecf20Sopenharmony_ci}
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_cistatic void km_next_page(struct dpages *dp)
2808c2ecf20Sopenharmony_ci{
2818c2ecf20Sopenharmony_ci	dp->context_ptr += PAGE_SIZE - dp->context_u;
2828c2ecf20Sopenharmony_ci	dp->context_u = 0;
2838c2ecf20Sopenharmony_ci}
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_cistatic void km_dp_init(struct dpages *dp, void *data)
2868c2ecf20Sopenharmony_ci{
2878c2ecf20Sopenharmony_ci	dp->get_page = km_get_page;
2888c2ecf20Sopenharmony_ci	dp->next_page = km_next_page;
2898c2ecf20Sopenharmony_ci	dp->context_u = offset_in_page(data);
2908c2ecf20Sopenharmony_ci	dp->context_ptr = data;
2918c2ecf20Sopenharmony_ci}
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci/*-----------------------------------------------------------------
2948c2ecf20Sopenharmony_ci * IO routines that accept a list of pages.
2958c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/
2968c2ecf20Sopenharmony_cistatic void do_region(int op, int op_flags, unsigned region,
2978c2ecf20Sopenharmony_ci		      struct dm_io_region *where, struct dpages *dp,
2988c2ecf20Sopenharmony_ci		      struct io *io)
2998c2ecf20Sopenharmony_ci{
3008c2ecf20Sopenharmony_ci	struct bio *bio;
3018c2ecf20Sopenharmony_ci	struct page *page;
3028c2ecf20Sopenharmony_ci	unsigned long len;
3038c2ecf20Sopenharmony_ci	unsigned offset;
3048c2ecf20Sopenharmony_ci	unsigned num_bvecs;
3058c2ecf20Sopenharmony_ci	sector_t remaining = where->count;
3068c2ecf20Sopenharmony_ci	struct request_queue *q = bdev_get_queue(where->bdev);
3078c2ecf20Sopenharmony_ci	unsigned short logical_block_size = queue_logical_block_size(q);
3088c2ecf20Sopenharmony_ci	sector_t num_sectors;
3098c2ecf20Sopenharmony_ci	unsigned int special_cmd_max_sectors;
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci	/*
3128c2ecf20Sopenharmony_ci	 * Reject unsupported discard and write same requests.
3138c2ecf20Sopenharmony_ci	 */
3148c2ecf20Sopenharmony_ci	if (op == REQ_OP_DISCARD)
3158c2ecf20Sopenharmony_ci		special_cmd_max_sectors = q->limits.max_discard_sectors;
3168c2ecf20Sopenharmony_ci	else if (op == REQ_OP_WRITE_ZEROES)
3178c2ecf20Sopenharmony_ci		special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
3188c2ecf20Sopenharmony_ci	else if (op == REQ_OP_WRITE_SAME)
3198c2ecf20Sopenharmony_ci		special_cmd_max_sectors = q->limits.max_write_same_sectors;
3208c2ecf20Sopenharmony_ci	if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES ||
3218c2ecf20Sopenharmony_ci	     op == REQ_OP_WRITE_SAME) && special_cmd_max_sectors == 0) {
3228c2ecf20Sopenharmony_ci		atomic_inc(&io->count);
3238c2ecf20Sopenharmony_ci		dec_count(io, region, BLK_STS_NOTSUPP);
3248c2ecf20Sopenharmony_ci		return;
3258c2ecf20Sopenharmony_ci	}
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	/*
3288c2ecf20Sopenharmony_ci	 * where->count may be zero if op holds a flush and we need to
3298c2ecf20Sopenharmony_ci	 * send a zero-sized flush.
3308c2ecf20Sopenharmony_ci	 */
3318c2ecf20Sopenharmony_ci	do {
3328c2ecf20Sopenharmony_ci		/*
3338c2ecf20Sopenharmony_ci		 * Allocate a suitably sized-bio.
3348c2ecf20Sopenharmony_ci		 */
3358c2ecf20Sopenharmony_ci		switch (op) {
3368c2ecf20Sopenharmony_ci		case REQ_OP_DISCARD:
3378c2ecf20Sopenharmony_ci		case REQ_OP_WRITE_ZEROES:
3388c2ecf20Sopenharmony_ci			num_bvecs = 0;
3398c2ecf20Sopenharmony_ci			break;
3408c2ecf20Sopenharmony_ci		case REQ_OP_WRITE_SAME:
3418c2ecf20Sopenharmony_ci			num_bvecs = 1;
3428c2ecf20Sopenharmony_ci			break;
3438c2ecf20Sopenharmony_ci		default:
3448c2ecf20Sopenharmony_ci			num_bvecs = min_t(int, BIO_MAX_PAGES,
3458c2ecf20Sopenharmony_ci					  dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
3468c2ecf20Sopenharmony_ci		}
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios);
3498c2ecf20Sopenharmony_ci		bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
3508c2ecf20Sopenharmony_ci		bio_set_dev(bio, where->bdev);
3518c2ecf20Sopenharmony_ci		bio->bi_end_io = endio;
3528c2ecf20Sopenharmony_ci		bio_set_op_attrs(bio, op, op_flags);
3538c2ecf20Sopenharmony_ci		store_io_and_region_in_bio(bio, io, region);
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci		if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {
3568c2ecf20Sopenharmony_ci			num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
3578c2ecf20Sopenharmony_ci			bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
3588c2ecf20Sopenharmony_ci			remaining -= num_sectors;
3598c2ecf20Sopenharmony_ci		} else if (op == REQ_OP_WRITE_SAME) {
3608c2ecf20Sopenharmony_ci			/*
3618c2ecf20Sopenharmony_ci			 * WRITE SAME only uses a single page.
3628c2ecf20Sopenharmony_ci			 */
3638c2ecf20Sopenharmony_ci			dp->get_page(dp, &page, &len, &offset);
3648c2ecf20Sopenharmony_ci			bio_add_page(bio, page, logical_block_size, offset);
3658c2ecf20Sopenharmony_ci			num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
3668c2ecf20Sopenharmony_ci			bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci			offset = 0;
3698c2ecf20Sopenharmony_ci			remaining -= num_sectors;
3708c2ecf20Sopenharmony_ci			dp->next_page(dp);
3718c2ecf20Sopenharmony_ci		} else while (remaining) {
3728c2ecf20Sopenharmony_ci			/*
3738c2ecf20Sopenharmony_ci			 * Try and add as many pages as possible.
3748c2ecf20Sopenharmony_ci			 */
3758c2ecf20Sopenharmony_ci			dp->get_page(dp, &page, &len, &offset);
3768c2ecf20Sopenharmony_ci			len = min(len, to_bytes(remaining));
3778c2ecf20Sopenharmony_ci			if (!bio_add_page(bio, page, len, offset))
3788c2ecf20Sopenharmony_ci				break;
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci			offset = 0;
3818c2ecf20Sopenharmony_ci			remaining -= to_sector(len);
3828c2ecf20Sopenharmony_ci			dp->next_page(dp);
3838c2ecf20Sopenharmony_ci		}
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_ci		atomic_inc(&io->count);
3868c2ecf20Sopenharmony_ci		submit_bio(bio);
3878c2ecf20Sopenharmony_ci	} while (remaining);
3888c2ecf20Sopenharmony_ci}
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_cistatic void dispatch_io(int op, int op_flags, unsigned int num_regions,
3918c2ecf20Sopenharmony_ci			struct dm_io_region *where, struct dpages *dp,
3928c2ecf20Sopenharmony_ci			struct io *io, int sync)
3938c2ecf20Sopenharmony_ci{
3948c2ecf20Sopenharmony_ci	int i;
3958c2ecf20Sopenharmony_ci	struct dpages old_pages = *dp;
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	BUG_ON(num_regions > DM_IO_MAX_REGIONS);
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	if (sync)
4008c2ecf20Sopenharmony_ci		op_flags |= REQ_SYNC;
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci	/*
4038c2ecf20Sopenharmony_ci	 * For multiple regions we need to be careful to rewind
4048c2ecf20Sopenharmony_ci	 * the dp object for each call to do_region.
4058c2ecf20Sopenharmony_ci	 */
4068c2ecf20Sopenharmony_ci	for (i = 0; i < num_regions; i++) {
4078c2ecf20Sopenharmony_ci		*dp = old_pages;
4088c2ecf20Sopenharmony_ci		if (where[i].count || (op_flags & REQ_PREFLUSH))
4098c2ecf20Sopenharmony_ci			do_region(op, op_flags, i, where + i, dp, io);
4108c2ecf20Sopenharmony_ci	}
4118c2ecf20Sopenharmony_ci
4128c2ecf20Sopenharmony_ci	/*
4138c2ecf20Sopenharmony_ci	 * Drop the extra reference that we were holding to avoid
4148c2ecf20Sopenharmony_ci	 * the io being completed too early.
4158c2ecf20Sopenharmony_ci	 */
4168c2ecf20Sopenharmony_ci	dec_count(io, 0, 0);
4178c2ecf20Sopenharmony_ci}
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_cistruct sync_io {
4208c2ecf20Sopenharmony_ci	unsigned long error_bits;
4218c2ecf20Sopenharmony_ci	struct completion wait;
4228c2ecf20Sopenharmony_ci};
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_cistatic void sync_io_complete(unsigned long error, void *context)
4258c2ecf20Sopenharmony_ci{
4268c2ecf20Sopenharmony_ci	struct sync_io *sio = context;
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	sio->error_bits = error;
4298c2ecf20Sopenharmony_ci	complete(&sio->wait);
4308c2ecf20Sopenharmony_ci}
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_cistatic int sync_io(struct dm_io_client *client, unsigned int num_regions,
4338c2ecf20Sopenharmony_ci		   struct dm_io_region *where, int op, int op_flags,
4348c2ecf20Sopenharmony_ci		   struct dpages *dp, unsigned long *error_bits)
4358c2ecf20Sopenharmony_ci{
4368c2ecf20Sopenharmony_ci	struct io *io;
4378c2ecf20Sopenharmony_ci	struct sync_io sio;
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	if (num_regions > 1 && !op_is_write(op)) {
4408c2ecf20Sopenharmony_ci		WARN_ON(1);
4418c2ecf20Sopenharmony_ci		return -EIO;
4428c2ecf20Sopenharmony_ci	}
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	init_completion(&sio.wait);
4458c2ecf20Sopenharmony_ci
4468c2ecf20Sopenharmony_ci	io = mempool_alloc(&client->pool, GFP_NOIO);
4478c2ecf20Sopenharmony_ci	io->error_bits = 0;
4488c2ecf20Sopenharmony_ci	atomic_set(&io->count, 1); /* see dispatch_io() */
4498c2ecf20Sopenharmony_ci	io->client = client;
4508c2ecf20Sopenharmony_ci	io->callback = sync_io_complete;
4518c2ecf20Sopenharmony_ci	io->context = &sio;
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	io->vma_invalidate_address = dp->vma_invalidate_address;
4548c2ecf20Sopenharmony_ci	io->vma_invalidate_size = dp->vma_invalidate_size;
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_ci	dispatch_io(op, op_flags, num_regions, where, dp, io, 1);
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci	wait_for_completion_io(&sio.wait);
4598c2ecf20Sopenharmony_ci
4608c2ecf20Sopenharmony_ci	if (error_bits)
4618c2ecf20Sopenharmony_ci		*error_bits = sio.error_bits;
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci	return sio.error_bits ? -EIO : 0;
4648c2ecf20Sopenharmony_ci}
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_cistatic int async_io(struct dm_io_client *client, unsigned int num_regions,
4678c2ecf20Sopenharmony_ci		    struct dm_io_region *where, int op, int op_flags,
4688c2ecf20Sopenharmony_ci		    struct dpages *dp, io_notify_fn fn, void *context)
4698c2ecf20Sopenharmony_ci{
4708c2ecf20Sopenharmony_ci	struct io *io;
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_ci	if (num_regions > 1 && !op_is_write(op)) {
4738c2ecf20Sopenharmony_ci		WARN_ON(1);
4748c2ecf20Sopenharmony_ci		fn(1, context);
4758c2ecf20Sopenharmony_ci		return -EIO;
4768c2ecf20Sopenharmony_ci	}
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	io = mempool_alloc(&client->pool, GFP_NOIO);
4798c2ecf20Sopenharmony_ci	io->error_bits = 0;
4808c2ecf20Sopenharmony_ci	atomic_set(&io->count, 1); /* see dispatch_io() */
4818c2ecf20Sopenharmony_ci	io->client = client;
4828c2ecf20Sopenharmony_ci	io->callback = fn;
4838c2ecf20Sopenharmony_ci	io->context = context;
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci	io->vma_invalidate_address = dp->vma_invalidate_address;
4868c2ecf20Sopenharmony_ci	io->vma_invalidate_size = dp->vma_invalidate_size;
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci	dispatch_io(op, op_flags, num_regions, where, dp, io, 0);
4898c2ecf20Sopenharmony_ci	return 0;
4908c2ecf20Sopenharmony_ci}
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_cistatic int dp_init(struct dm_io_request *io_req, struct dpages *dp,
4938c2ecf20Sopenharmony_ci		   unsigned long size)
4948c2ecf20Sopenharmony_ci{
4958c2ecf20Sopenharmony_ci	/* Set up dpages based on memory type */
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci	dp->vma_invalidate_address = NULL;
4988c2ecf20Sopenharmony_ci	dp->vma_invalidate_size = 0;
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci	switch (io_req->mem.type) {
5018c2ecf20Sopenharmony_ci	case DM_IO_PAGE_LIST:
5028c2ecf20Sopenharmony_ci		list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
5038c2ecf20Sopenharmony_ci		break;
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	case DM_IO_BIO:
5068c2ecf20Sopenharmony_ci		bio_dp_init(dp, io_req->mem.ptr.bio);
5078c2ecf20Sopenharmony_ci		break;
5088c2ecf20Sopenharmony_ci
5098c2ecf20Sopenharmony_ci	case DM_IO_VMA:
5108c2ecf20Sopenharmony_ci		flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
5118c2ecf20Sopenharmony_ci		if (io_req->bi_op == REQ_OP_READ) {
5128c2ecf20Sopenharmony_ci			dp->vma_invalidate_address = io_req->mem.ptr.vma;
5138c2ecf20Sopenharmony_ci			dp->vma_invalidate_size = size;
5148c2ecf20Sopenharmony_ci		}
5158c2ecf20Sopenharmony_ci		vm_dp_init(dp, io_req->mem.ptr.vma);
5168c2ecf20Sopenharmony_ci		break;
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci	case DM_IO_KMEM:
5198c2ecf20Sopenharmony_ci		km_dp_init(dp, io_req->mem.ptr.addr);
5208c2ecf20Sopenharmony_ci		break;
5218c2ecf20Sopenharmony_ci
5228c2ecf20Sopenharmony_ci	default:
5238c2ecf20Sopenharmony_ci		return -EINVAL;
5248c2ecf20Sopenharmony_ci	}
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_ci	return 0;
5278c2ecf20Sopenharmony_ci}
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ci/*
5308c2ecf20Sopenharmony_ci * New collapsed (a)synchronous interface.
5318c2ecf20Sopenharmony_ci *
5328c2ecf20Sopenharmony_ci * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
5338c2ecf20Sopenharmony_ci * the queue with blk_unplug() some time later or set REQ_SYNC in
5348c2ecf20Sopenharmony_ci * io_req->bi_opf. If you fail to do one of these, the IO will be submitted to
5358c2ecf20Sopenharmony_ci * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c.
5368c2ecf20Sopenharmony_ci */
5378c2ecf20Sopenharmony_ciint dm_io(struct dm_io_request *io_req, unsigned num_regions,
5388c2ecf20Sopenharmony_ci	  struct dm_io_region *where, unsigned long *sync_error_bits)
5398c2ecf20Sopenharmony_ci{
5408c2ecf20Sopenharmony_ci	int r;
5418c2ecf20Sopenharmony_ci	struct dpages dp;
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ci	r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
5448c2ecf20Sopenharmony_ci	if (r)
5458c2ecf20Sopenharmony_ci		return r;
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci	if (!io_req->notify.fn)
5488c2ecf20Sopenharmony_ci		return sync_io(io_req->client, num_regions, where,
5498c2ecf20Sopenharmony_ci			       io_req->bi_op, io_req->bi_op_flags, &dp,
5508c2ecf20Sopenharmony_ci			       sync_error_bits);
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci	return async_io(io_req->client, num_regions, where, io_req->bi_op,
5538c2ecf20Sopenharmony_ci			io_req->bi_op_flags, &dp, io_req->notify.fn,
5548c2ecf20Sopenharmony_ci			io_req->notify.context);
5558c2ecf20Sopenharmony_ci}
5568c2ecf20Sopenharmony_ciEXPORT_SYMBOL(dm_io);
5578c2ecf20Sopenharmony_ci
5588c2ecf20Sopenharmony_ciint __init dm_io_init(void)
5598c2ecf20Sopenharmony_ci{
5608c2ecf20Sopenharmony_ci	_dm_io_cache = KMEM_CACHE(io, 0);
5618c2ecf20Sopenharmony_ci	if (!_dm_io_cache)
5628c2ecf20Sopenharmony_ci		return -ENOMEM;
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	return 0;
5658c2ecf20Sopenharmony_ci}
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_civoid dm_io_exit(void)
5688c2ecf20Sopenharmony_ci{
5698c2ecf20Sopenharmony_ci	kmem_cache_destroy(_dm_io_cache);
5708c2ecf20Sopenharmony_ci	_dm_io_cache = NULL;
5718c2ecf20Sopenharmony_ci}
572