162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2003 Sistina Software
462306a36Sopenharmony_ci * Copyright (C) 2006 Red Hat GmbH
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * This file is released under the GPL.
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include "dm-core.h"
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/device-mapper.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <linux/bio.h>
1462306a36Sopenharmony_ci#include <linux/completion.h>
1562306a36Sopenharmony_ci#include <linux/mempool.h>
1662306a36Sopenharmony_ci#include <linux/module.h>
1762306a36Sopenharmony_ci#include <linux/sched.h>
1862306a36Sopenharmony_ci#include <linux/slab.h>
1962306a36Sopenharmony_ci#include <linux/dm-io.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#define DM_MSG_PREFIX "io"
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#define DM_IO_MAX_REGIONS	BITS_PER_LONG
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_cistruct dm_io_client {
2662306a36Sopenharmony_ci	mempool_t pool;
2762306a36Sopenharmony_ci	struct bio_set bios;
2862306a36Sopenharmony_ci};
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci/*
3162306a36Sopenharmony_ci * Aligning 'struct io' reduces the number of bits required to store
3262306a36Sopenharmony_ci * its address.  Refer to store_io_and_region_in_bio() below.
3362306a36Sopenharmony_ci */
3462306a36Sopenharmony_cistruct io {
3562306a36Sopenharmony_ci	unsigned long error_bits;
3662306a36Sopenharmony_ci	atomic_t count;
3762306a36Sopenharmony_ci	struct dm_io_client *client;
3862306a36Sopenharmony_ci	io_notify_fn callback;
3962306a36Sopenharmony_ci	void *context;
4062306a36Sopenharmony_ci	void *vma_invalidate_address;
4162306a36Sopenharmony_ci	unsigned long vma_invalidate_size;
4262306a36Sopenharmony_ci} __aligned(DM_IO_MAX_REGIONS);
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_cistatic struct kmem_cache *_dm_io_cache;
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci/*
4762306a36Sopenharmony_ci * Create a client with mempool and bioset.
4862306a36Sopenharmony_ci */
4962306a36Sopenharmony_cistruct dm_io_client *dm_io_client_create(void)
5062306a36Sopenharmony_ci{
5162306a36Sopenharmony_ci	struct dm_io_client *client;
5262306a36Sopenharmony_ci	unsigned int min_ios = dm_get_reserved_bio_based_ios();
5362306a36Sopenharmony_ci	int ret;
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	client = kzalloc(sizeof(*client), GFP_KERNEL);
5662306a36Sopenharmony_ci	if (!client)
5762306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	ret = mempool_init_slab_pool(&client->pool, min_ios, _dm_io_cache);
6062306a36Sopenharmony_ci	if (ret)
6162306a36Sopenharmony_ci		goto bad;
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci	ret = bioset_init(&client->bios, min_ios, 0, BIOSET_NEED_BVECS);
6462306a36Sopenharmony_ci	if (ret)
6562306a36Sopenharmony_ci		goto bad;
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	return client;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_cibad:
7062306a36Sopenharmony_ci	mempool_exit(&client->pool);
7162306a36Sopenharmony_ci	kfree(client);
7262306a36Sopenharmony_ci	return ERR_PTR(ret);
7362306a36Sopenharmony_ci}
7462306a36Sopenharmony_ciEXPORT_SYMBOL(dm_io_client_create);
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_civoid dm_io_client_destroy(struct dm_io_client *client)
7762306a36Sopenharmony_ci{
7862306a36Sopenharmony_ci	mempool_exit(&client->pool);
7962306a36Sopenharmony_ci	bioset_exit(&client->bios);
8062306a36Sopenharmony_ci	kfree(client);
8162306a36Sopenharmony_ci}
8262306a36Sopenharmony_ciEXPORT_SYMBOL(dm_io_client_destroy);
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci/*
8562306a36Sopenharmony_ci *-------------------------------------------------------------------
8662306a36Sopenharmony_ci * We need to keep track of which region a bio is doing io for.
8762306a36Sopenharmony_ci * To avoid a memory allocation to store just 5 or 6 bits, we
8862306a36Sopenharmony_ci * ensure the 'struct io' pointer is aligned so enough low bits are
8962306a36Sopenharmony_ci * always zero and then combine it with the region number directly in
9062306a36Sopenharmony_ci * bi_private.
9162306a36Sopenharmony_ci *-------------------------------------------------------------------
9262306a36Sopenharmony_ci */
9362306a36Sopenharmony_cistatic void store_io_and_region_in_bio(struct bio *bio, struct io *io,
9462306a36Sopenharmony_ci				       unsigned int region)
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
9762306a36Sopenharmony_ci		DMCRIT("Unaligned struct io pointer %p", io);
9862306a36Sopenharmony_ci		BUG();
9962306a36Sopenharmony_ci	}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	bio->bi_private = (void *)((unsigned long)io | region);
10262306a36Sopenharmony_ci}
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_cistatic void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
10562306a36Sopenharmony_ci				       unsigned int *region)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	unsigned long val = (unsigned long)bio->bi_private;
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	*io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
11062306a36Sopenharmony_ci	*region = val & (DM_IO_MAX_REGIONS - 1);
11162306a36Sopenharmony_ci}
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci/*
11462306a36Sopenharmony_ci *--------------------------------------------------------------
11562306a36Sopenharmony_ci * We need an io object to keep track of the number of bios that
11662306a36Sopenharmony_ci * have been dispatched for a particular io.
11762306a36Sopenharmony_ci *--------------------------------------------------------------
11862306a36Sopenharmony_ci */
11962306a36Sopenharmony_cistatic void complete_io(struct io *io)
12062306a36Sopenharmony_ci{
12162306a36Sopenharmony_ci	unsigned long error_bits = io->error_bits;
12262306a36Sopenharmony_ci	io_notify_fn fn = io->callback;
12362306a36Sopenharmony_ci	void *context = io->context;
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	if (io->vma_invalidate_size)
12662306a36Sopenharmony_ci		invalidate_kernel_vmap_range(io->vma_invalidate_address,
12762306a36Sopenharmony_ci					     io->vma_invalidate_size);
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	mempool_free(io, &io->client->pool);
13062306a36Sopenharmony_ci	fn(error_bits, context);
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_cistatic void dec_count(struct io *io, unsigned int region, blk_status_t error)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	if (error)
13662306a36Sopenharmony_ci		set_bit(region, &io->error_bits);
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	if (atomic_dec_and_test(&io->count))
13962306a36Sopenharmony_ci		complete_io(io);
14062306a36Sopenharmony_ci}
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_cistatic void endio(struct bio *bio)
14362306a36Sopenharmony_ci{
14462306a36Sopenharmony_ci	struct io *io;
14562306a36Sopenharmony_ci	unsigned int region;
14662306a36Sopenharmony_ci	blk_status_t error;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	if (bio->bi_status && bio_data_dir(bio) == READ)
14962306a36Sopenharmony_ci		zero_fill_bio(bio);
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	/*
15262306a36Sopenharmony_ci	 * The bio destructor in bio_put() may use the io object.
15362306a36Sopenharmony_ci	 */
15462306a36Sopenharmony_ci	retrieve_io_and_region_from_bio(bio, &io, &region);
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	error = bio->bi_status;
15762306a36Sopenharmony_ci	bio_put(bio);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	dec_count(io, region, error);
16062306a36Sopenharmony_ci}
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci/*
16362306a36Sopenharmony_ci *--------------------------------------------------------------
16462306a36Sopenharmony_ci * These little objects provide an abstraction for getting a new
16562306a36Sopenharmony_ci * destination page for io.
16662306a36Sopenharmony_ci *--------------------------------------------------------------
16762306a36Sopenharmony_ci */
16862306a36Sopenharmony_cistruct dpages {
16962306a36Sopenharmony_ci	void (*get_page)(struct dpages *dp,
17062306a36Sopenharmony_ci			 struct page **p, unsigned long *len, unsigned int *offset);
17162306a36Sopenharmony_ci	void (*next_page)(struct dpages *dp);
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	union {
17462306a36Sopenharmony_ci		unsigned int context_u;
17562306a36Sopenharmony_ci		struct bvec_iter context_bi;
17662306a36Sopenharmony_ci	};
17762306a36Sopenharmony_ci	void *context_ptr;
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	void *vma_invalidate_address;
18062306a36Sopenharmony_ci	unsigned long vma_invalidate_size;
18162306a36Sopenharmony_ci};
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci/*
18462306a36Sopenharmony_ci * Functions for getting the pages from a list.
18562306a36Sopenharmony_ci */
18662306a36Sopenharmony_cistatic void list_get_page(struct dpages *dp,
18762306a36Sopenharmony_ci		  struct page **p, unsigned long *len, unsigned int *offset)
18862306a36Sopenharmony_ci{
18962306a36Sopenharmony_ci	unsigned int o = dp->context_u;
19062306a36Sopenharmony_ci	struct page_list *pl = dp->context_ptr;
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	*p = pl->page;
19362306a36Sopenharmony_ci	*len = PAGE_SIZE - o;
19462306a36Sopenharmony_ci	*offset = o;
19562306a36Sopenharmony_ci}
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_cistatic void list_next_page(struct dpages *dp)
19862306a36Sopenharmony_ci{
19962306a36Sopenharmony_ci	struct page_list *pl = dp->context_ptr;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	dp->context_ptr = pl->next;
20262306a36Sopenharmony_ci	dp->context_u = 0;
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_cistatic void list_dp_init(struct dpages *dp, struct page_list *pl, unsigned int offset)
20662306a36Sopenharmony_ci{
20762306a36Sopenharmony_ci	dp->get_page = list_get_page;
20862306a36Sopenharmony_ci	dp->next_page = list_next_page;
20962306a36Sopenharmony_ci	dp->context_u = offset;
21062306a36Sopenharmony_ci	dp->context_ptr = pl;
21162306a36Sopenharmony_ci}
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci/*
21462306a36Sopenharmony_ci * Functions for getting the pages from a bvec.
21562306a36Sopenharmony_ci */
21662306a36Sopenharmony_cistatic void bio_get_page(struct dpages *dp, struct page **p,
21762306a36Sopenharmony_ci			 unsigned long *len, unsigned int *offset)
21862306a36Sopenharmony_ci{
21962306a36Sopenharmony_ci	struct bio_vec bvec = bvec_iter_bvec((struct bio_vec *)dp->context_ptr,
22062306a36Sopenharmony_ci					     dp->context_bi);
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	*p = bvec.bv_page;
22362306a36Sopenharmony_ci	*len = bvec.bv_len;
22462306a36Sopenharmony_ci	*offset = bvec.bv_offset;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	/* avoid figuring it out again in bio_next_page() */
22762306a36Sopenharmony_ci	dp->context_bi.bi_sector = (sector_t)bvec.bv_len;
22862306a36Sopenharmony_ci}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_cistatic void bio_next_page(struct dpages *dp)
23162306a36Sopenharmony_ci{
23262306a36Sopenharmony_ci	unsigned int len = (unsigned int)dp->context_bi.bi_sector;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	bvec_iter_advance((struct bio_vec *)dp->context_ptr,
23562306a36Sopenharmony_ci			  &dp->context_bi, len);
23662306a36Sopenharmony_ci}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_cistatic void bio_dp_init(struct dpages *dp, struct bio *bio)
23962306a36Sopenharmony_ci{
24062306a36Sopenharmony_ci	dp->get_page = bio_get_page;
24162306a36Sopenharmony_ci	dp->next_page = bio_next_page;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	/*
24462306a36Sopenharmony_ci	 * We just use bvec iterator to retrieve pages, so it is ok to
24562306a36Sopenharmony_ci	 * access the bvec table directly here
24662306a36Sopenharmony_ci	 */
24762306a36Sopenharmony_ci	dp->context_ptr = bio->bi_io_vec;
24862306a36Sopenharmony_ci	dp->context_bi = bio->bi_iter;
24962306a36Sopenharmony_ci}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci/*
25262306a36Sopenharmony_ci * Functions for getting the pages from a VMA.
25362306a36Sopenharmony_ci */
25462306a36Sopenharmony_cistatic void vm_get_page(struct dpages *dp,
25562306a36Sopenharmony_ci		 struct page **p, unsigned long *len, unsigned int *offset)
25662306a36Sopenharmony_ci{
25762306a36Sopenharmony_ci	*p = vmalloc_to_page(dp->context_ptr);
25862306a36Sopenharmony_ci	*offset = dp->context_u;
25962306a36Sopenharmony_ci	*len = PAGE_SIZE - dp->context_u;
26062306a36Sopenharmony_ci}
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_cistatic void vm_next_page(struct dpages *dp)
26362306a36Sopenharmony_ci{
26462306a36Sopenharmony_ci	dp->context_ptr += PAGE_SIZE - dp->context_u;
26562306a36Sopenharmony_ci	dp->context_u = 0;
26662306a36Sopenharmony_ci}
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_cistatic void vm_dp_init(struct dpages *dp, void *data)
26962306a36Sopenharmony_ci{
27062306a36Sopenharmony_ci	dp->get_page = vm_get_page;
27162306a36Sopenharmony_ci	dp->next_page = vm_next_page;
27262306a36Sopenharmony_ci	dp->context_u = offset_in_page(data);
27362306a36Sopenharmony_ci	dp->context_ptr = data;
27462306a36Sopenharmony_ci}
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci/*
27762306a36Sopenharmony_ci * Functions for getting the pages from kernel memory.
27862306a36Sopenharmony_ci */
27962306a36Sopenharmony_cistatic void km_get_page(struct dpages *dp, struct page **p, unsigned long *len,
28062306a36Sopenharmony_ci			unsigned int *offset)
28162306a36Sopenharmony_ci{
28262306a36Sopenharmony_ci	*p = virt_to_page(dp->context_ptr);
28362306a36Sopenharmony_ci	*offset = dp->context_u;
28462306a36Sopenharmony_ci	*len = PAGE_SIZE - dp->context_u;
28562306a36Sopenharmony_ci}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_cistatic void km_next_page(struct dpages *dp)
28862306a36Sopenharmony_ci{
28962306a36Sopenharmony_ci	dp->context_ptr += PAGE_SIZE - dp->context_u;
29062306a36Sopenharmony_ci	dp->context_u = 0;
29162306a36Sopenharmony_ci}
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_cistatic void km_dp_init(struct dpages *dp, void *data)
29462306a36Sopenharmony_ci{
29562306a36Sopenharmony_ci	dp->get_page = km_get_page;
29662306a36Sopenharmony_ci	dp->next_page = km_next_page;
29762306a36Sopenharmony_ci	dp->context_u = offset_in_page(data);
29862306a36Sopenharmony_ci	dp->context_ptr = data;
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci/*
30262306a36Sopenharmony_ci *---------------------------------------------------------------
30362306a36Sopenharmony_ci * IO routines that accept a list of pages.
30462306a36Sopenharmony_ci *---------------------------------------------------------------
30562306a36Sopenharmony_ci */
30662306a36Sopenharmony_cistatic void do_region(const blk_opf_t opf, unsigned int region,
30762306a36Sopenharmony_ci		      struct dm_io_region *where, struct dpages *dp,
30862306a36Sopenharmony_ci		      struct io *io, unsigned short ioprio)
30962306a36Sopenharmony_ci{
31062306a36Sopenharmony_ci	struct bio *bio;
31162306a36Sopenharmony_ci	struct page *page;
31262306a36Sopenharmony_ci	unsigned long len;
31362306a36Sopenharmony_ci	unsigned int offset;
31462306a36Sopenharmony_ci	unsigned int num_bvecs;
31562306a36Sopenharmony_ci	sector_t remaining = where->count;
31662306a36Sopenharmony_ci	struct request_queue *q = bdev_get_queue(where->bdev);
31762306a36Sopenharmony_ci	sector_t num_sectors;
31862306a36Sopenharmony_ci	unsigned int special_cmd_max_sectors;
31962306a36Sopenharmony_ci	const enum req_op op = opf & REQ_OP_MASK;
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	/*
32262306a36Sopenharmony_ci	 * Reject unsupported discard and write same requests.
32362306a36Sopenharmony_ci	 */
32462306a36Sopenharmony_ci	if (op == REQ_OP_DISCARD)
32562306a36Sopenharmony_ci		special_cmd_max_sectors = bdev_max_discard_sectors(where->bdev);
32662306a36Sopenharmony_ci	else if (op == REQ_OP_WRITE_ZEROES)
32762306a36Sopenharmony_ci		special_cmd_max_sectors = q->limits.max_write_zeroes_sectors;
32862306a36Sopenharmony_ci	if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) &&
32962306a36Sopenharmony_ci	    special_cmd_max_sectors == 0) {
33062306a36Sopenharmony_ci		atomic_inc(&io->count);
33162306a36Sopenharmony_ci		dec_count(io, region, BLK_STS_NOTSUPP);
33262306a36Sopenharmony_ci		return;
33362306a36Sopenharmony_ci	}
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	/*
33662306a36Sopenharmony_ci	 * where->count may be zero if op holds a flush and we need to
33762306a36Sopenharmony_ci	 * send a zero-sized flush.
33862306a36Sopenharmony_ci	 */
33962306a36Sopenharmony_ci	do {
34062306a36Sopenharmony_ci		/*
34162306a36Sopenharmony_ci		 * Allocate a suitably sized-bio.
34262306a36Sopenharmony_ci		 */
34362306a36Sopenharmony_ci		switch (op) {
34462306a36Sopenharmony_ci		case REQ_OP_DISCARD:
34562306a36Sopenharmony_ci		case REQ_OP_WRITE_ZEROES:
34662306a36Sopenharmony_ci			num_bvecs = 0;
34762306a36Sopenharmony_ci			break;
34862306a36Sopenharmony_ci		default:
34962306a36Sopenharmony_ci			num_bvecs = bio_max_segs(dm_sector_div_up(remaining,
35062306a36Sopenharmony_ci						(PAGE_SIZE >> SECTOR_SHIFT)));
35162306a36Sopenharmony_ci		}
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci		bio = bio_alloc_bioset(where->bdev, num_bvecs, opf, GFP_NOIO,
35462306a36Sopenharmony_ci				       &io->client->bios);
35562306a36Sopenharmony_ci		bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
35662306a36Sopenharmony_ci		bio->bi_end_io = endio;
35762306a36Sopenharmony_ci		bio->bi_ioprio = ioprio;
35862306a36Sopenharmony_ci		store_io_and_region_in_bio(bio, io, region);
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci		if (op == REQ_OP_DISCARD || op == REQ_OP_WRITE_ZEROES) {
36162306a36Sopenharmony_ci			num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
36262306a36Sopenharmony_ci			bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
36362306a36Sopenharmony_ci			remaining -= num_sectors;
36462306a36Sopenharmony_ci		} else {
36562306a36Sopenharmony_ci			while (remaining) {
36662306a36Sopenharmony_ci				/*
36762306a36Sopenharmony_ci				 * Try and add as many pages as possible.
36862306a36Sopenharmony_ci				 */
36962306a36Sopenharmony_ci				dp->get_page(dp, &page, &len, &offset);
37062306a36Sopenharmony_ci				len = min(len, to_bytes(remaining));
37162306a36Sopenharmony_ci				if (!bio_add_page(bio, page, len, offset))
37262306a36Sopenharmony_ci					break;
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci				offset = 0;
37562306a36Sopenharmony_ci				remaining -= to_sector(len);
37662306a36Sopenharmony_ci				dp->next_page(dp);
37762306a36Sopenharmony_ci			}
37862306a36Sopenharmony_ci		}
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci		atomic_inc(&io->count);
38162306a36Sopenharmony_ci		submit_bio(bio);
38262306a36Sopenharmony_ci	} while (remaining);
38362306a36Sopenharmony_ci}
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_cistatic void dispatch_io(blk_opf_t opf, unsigned int num_regions,
38662306a36Sopenharmony_ci			struct dm_io_region *where, struct dpages *dp,
38762306a36Sopenharmony_ci			struct io *io, int sync, unsigned short ioprio)
38862306a36Sopenharmony_ci{
38962306a36Sopenharmony_ci	int i;
39062306a36Sopenharmony_ci	struct dpages old_pages = *dp;
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	BUG_ON(num_regions > DM_IO_MAX_REGIONS);
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	if (sync)
39562306a36Sopenharmony_ci		opf |= REQ_SYNC;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	/*
39862306a36Sopenharmony_ci	 * For multiple regions we need to be careful to rewind
39962306a36Sopenharmony_ci	 * the dp object for each call to do_region.
40062306a36Sopenharmony_ci	 */
40162306a36Sopenharmony_ci	for (i = 0; i < num_regions; i++) {
40262306a36Sopenharmony_ci		*dp = old_pages;
40362306a36Sopenharmony_ci		if (where[i].count || (opf & REQ_PREFLUSH))
40462306a36Sopenharmony_ci			do_region(opf, i, where + i, dp, io, ioprio);
40562306a36Sopenharmony_ci	}
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	/*
40862306a36Sopenharmony_ci	 * Drop the extra reference that we were holding to avoid
40962306a36Sopenharmony_ci	 * the io being completed too early.
41062306a36Sopenharmony_ci	 */
41162306a36Sopenharmony_ci	dec_count(io, 0, 0);
41262306a36Sopenharmony_ci}
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_cistruct sync_io {
41562306a36Sopenharmony_ci	unsigned long error_bits;
41662306a36Sopenharmony_ci	struct completion wait;
41762306a36Sopenharmony_ci};
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_cistatic void sync_io_complete(unsigned long error, void *context)
42062306a36Sopenharmony_ci{
42162306a36Sopenharmony_ci	struct sync_io *sio = context;
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_ci	sio->error_bits = error;
42462306a36Sopenharmony_ci	complete(&sio->wait);
42562306a36Sopenharmony_ci}
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_cistatic int sync_io(struct dm_io_client *client, unsigned int num_regions,
42862306a36Sopenharmony_ci		   struct dm_io_region *where, blk_opf_t opf, struct dpages *dp,
42962306a36Sopenharmony_ci		   unsigned long *error_bits, unsigned short ioprio)
43062306a36Sopenharmony_ci{
43162306a36Sopenharmony_ci	struct io *io;
43262306a36Sopenharmony_ci	struct sync_io sio;
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	if (num_regions > 1 && !op_is_write(opf)) {
43562306a36Sopenharmony_ci		WARN_ON(1);
43662306a36Sopenharmony_ci		return -EIO;
43762306a36Sopenharmony_ci	}
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	init_completion(&sio.wait);
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	io = mempool_alloc(&client->pool, GFP_NOIO);
44262306a36Sopenharmony_ci	io->error_bits = 0;
44362306a36Sopenharmony_ci	atomic_set(&io->count, 1); /* see dispatch_io() */
44462306a36Sopenharmony_ci	io->client = client;
44562306a36Sopenharmony_ci	io->callback = sync_io_complete;
44662306a36Sopenharmony_ci	io->context = &sio;
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	io->vma_invalidate_address = dp->vma_invalidate_address;
44962306a36Sopenharmony_ci	io->vma_invalidate_size = dp->vma_invalidate_size;
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci	dispatch_io(opf, num_regions, where, dp, io, 1, ioprio);
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci	wait_for_completion_io(&sio.wait);
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	if (error_bits)
45662306a36Sopenharmony_ci		*error_bits = sio.error_bits;
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	return sio.error_bits ? -EIO : 0;
45962306a36Sopenharmony_ci}
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_cistatic int async_io(struct dm_io_client *client, unsigned int num_regions,
46262306a36Sopenharmony_ci		    struct dm_io_region *where, blk_opf_t opf,
46362306a36Sopenharmony_ci		    struct dpages *dp, io_notify_fn fn, void *context,
46462306a36Sopenharmony_ci		    unsigned short ioprio)
46562306a36Sopenharmony_ci{
46662306a36Sopenharmony_ci	struct io *io;
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	if (num_regions > 1 && !op_is_write(opf)) {
46962306a36Sopenharmony_ci		WARN_ON(1);
47062306a36Sopenharmony_ci		fn(1, context);
47162306a36Sopenharmony_ci		return -EIO;
47262306a36Sopenharmony_ci	}
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	io = mempool_alloc(&client->pool, GFP_NOIO);
47562306a36Sopenharmony_ci	io->error_bits = 0;
47662306a36Sopenharmony_ci	atomic_set(&io->count, 1); /* see dispatch_io() */
47762306a36Sopenharmony_ci	io->client = client;
47862306a36Sopenharmony_ci	io->callback = fn;
47962306a36Sopenharmony_ci	io->context = context;
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	io->vma_invalidate_address = dp->vma_invalidate_address;
48262306a36Sopenharmony_ci	io->vma_invalidate_size = dp->vma_invalidate_size;
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	dispatch_io(opf, num_regions, where, dp, io, 0, ioprio);
48562306a36Sopenharmony_ci	return 0;
48662306a36Sopenharmony_ci}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_cistatic int dp_init(struct dm_io_request *io_req, struct dpages *dp,
48962306a36Sopenharmony_ci		   unsigned long size)
49062306a36Sopenharmony_ci{
49162306a36Sopenharmony_ci	/* Set up dpages based on memory type */
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ci	dp->vma_invalidate_address = NULL;
49462306a36Sopenharmony_ci	dp->vma_invalidate_size = 0;
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	switch (io_req->mem.type) {
49762306a36Sopenharmony_ci	case DM_IO_PAGE_LIST:
49862306a36Sopenharmony_ci		list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
49962306a36Sopenharmony_ci		break;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	case DM_IO_BIO:
50262306a36Sopenharmony_ci		bio_dp_init(dp, io_req->mem.ptr.bio);
50362306a36Sopenharmony_ci		break;
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	case DM_IO_VMA:
50662306a36Sopenharmony_ci		flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
50762306a36Sopenharmony_ci		if ((io_req->bi_opf & REQ_OP_MASK) == REQ_OP_READ) {
50862306a36Sopenharmony_ci			dp->vma_invalidate_address = io_req->mem.ptr.vma;
50962306a36Sopenharmony_ci			dp->vma_invalidate_size = size;
51062306a36Sopenharmony_ci		}
51162306a36Sopenharmony_ci		vm_dp_init(dp, io_req->mem.ptr.vma);
51262306a36Sopenharmony_ci		break;
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	case DM_IO_KMEM:
51562306a36Sopenharmony_ci		km_dp_init(dp, io_req->mem.ptr.addr);
51662306a36Sopenharmony_ci		break;
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci	default:
51962306a36Sopenharmony_ci		return -EINVAL;
52062306a36Sopenharmony_ci	}
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	return 0;
52362306a36Sopenharmony_ci}
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ciint dm_io(struct dm_io_request *io_req, unsigned int num_regions,
52662306a36Sopenharmony_ci	  struct dm_io_region *where, unsigned long *sync_error_bits,
52762306a36Sopenharmony_ci	  unsigned short ioprio)
52862306a36Sopenharmony_ci{
52962306a36Sopenharmony_ci	int r;
53062306a36Sopenharmony_ci	struct dpages dp;
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	r = dp_init(io_req, &dp, (unsigned long)where->count << SECTOR_SHIFT);
53362306a36Sopenharmony_ci	if (r)
53462306a36Sopenharmony_ci		return r;
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	if (!io_req->notify.fn)
53762306a36Sopenharmony_ci		return sync_io(io_req->client, num_regions, where,
53862306a36Sopenharmony_ci			       io_req->bi_opf, &dp, sync_error_bits, ioprio);
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	return async_io(io_req->client, num_regions, where,
54162306a36Sopenharmony_ci			io_req->bi_opf, &dp, io_req->notify.fn,
54262306a36Sopenharmony_ci			io_req->notify.context, ioprio);
54362306a36Sopenharmony_ci}
54462306a36Sopenharmony_ciEXPORT_SYMBOL(dm_io);
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ciint __init dm_io_init(void)
54762306a36Sopenharmony_ci{
54862306a36Sopenharmony_ci	_dm_io_cache = KMEM_CACHE(io, 0);
54962306a36Sopenharmony_ci	if (!_dm_io_cache)
55062306a36Sopenharmony_ci		return -ENOMEM;
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	return 0;
55362306a36Sopenharmony_ci}
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_civoid dm_io_exit(void)
55662306a36Sopenharmony_ci{
55762306a36Sopenharmony_ci	kmem_cache_destroy(_dm_io_cache);
55862306a36Sopenharmony_ci	_dm_io_cache = NULL;
55962306a36Sopenharmony_ci}
560