162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
462306a36Sopenharmony_ci * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * This file is released under the GPL.
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include "dm-core.h"
1062306a36Sopenharmony_ci#include "dm-rq.h"
1162306a36Sopenharmony_ci#include "dm-uevent.h"
1262306a36Sopenharmony_ci#include "dm-ima.h"
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include <linux/init.h>
1562306a36Sopenharmony_ci#include <linux/module.h>
1662306a36Sopenharmony_ci#include <linux/mutex.h>
1762306a36Sopenharmony_ci#include <linux/sched/mm.h>
1862306a36Sopenharmony_ci#include <linux/sched/signal.h>
1962306a36Sopenharmony_ci#include <linux/blkpg.h>
2062306a36Sopenharmony_ci#include <linux/bio.h>
2162306a36Sopenharmony_ci#include <linux/mempool.h>
2262306a36Sopenharmony_ci#include <linux/dax.h>
2362306a36Sopenharmony_ci#include <linux/slab.h>
2462306a36Sopenharmony_ci#include <linux/idr.h>
2562306a36Sopenharmony_ci#include <linux/uio.h>
2662306a36Sopenharmony_ci#include <linux/hdreg.h>
2762306a36Sopenharmony_ci#include <linux/delay.h>
2862306a36Sopenharmony_ci#include <linux/wait.h>
2962306a36Sopenharmony_ci#include <linux/pr.h>
3062306a36Sopenharmony_ci#include <linux/refcount.h>
3162306a36Sopenharmony_ci#include <linux/part_stat.h>
3262306a36Sopenharmony_ci#include <linux/blk-crypto.h>
3362306a36Sopenharmony_ci#include <linux/blk-crypto-profile.h>
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci#define DM_MSG_PREFIX "core"
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/*
3862306a36Sopenharmony_ci * Cookies are numeric values sent with CHANGE and REMOVE
3962306a36Sopenharmony_ci * uevents while resuming, removing or renaming the device.
4062306a36Sopenharmony_ci */
4162306a36Sopenharmony_ci#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
4262306a36Sopenharmony_ci#define DM_COOKIE_LENGTH 24
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci/*
4562306a36Sopenharmony_ci * For REQ_POLLED fs bio, this flag is set if we link mapped underlying
4662306a36Sopenharmony_ci * dm_io into one list, and reuse bio->bi_private as the list head. Before
4762306a36Sopenharmony_ci * ending this fs bio, we will recover its ->bi_private.
4862306a36Sopenharmony_ci */
4962306a36Sopenharmony_ci#define REQ_DM_POLL_LIST	REQ_DRV
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_cistatic const char *_name = DM_NAME;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_cistatic unsigned int major;
5462306a36Sopenharmony_cistatic unsigned int _major;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_cistatic DEFINE_IDR(_minor_idr);
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_cistatic DEFINE_SPINLOCK(_minor_lock);
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_cistatic void do_deferred_remove(struct work_struct *w);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_cistatic DECLARE_WORK(deferred_remove_work, do_deferred_remove);
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_cistatic struct workqueue_struct *deferred_remove_workqueue;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ciatomic_t dm_global_event_nr = ATOMIC_INIT(0);
6762306a36Sopenharmony_ciDECLARE_WAIT_QUEUE_HEAD(dm_global_eventq);
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_civoid dm_issue_global_event(void)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	atomic_inc(&dm_global_event_nr);
7262306a36Sopenharmony_ci	wake_up(&dm_global_eventq);
7362306a36Sopenharmony_ci}
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(stats_enabled);
7662306a36Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(swap_bios_enabled);
7762306a36Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(zoned_enabled);
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci/*
8062306a36Sopenharmony_ci * One of these is allocated (on-stack) per original bio.
8162306a36Sopenharmony_ci */
8262306a36Sopenharmony_cistruct clone_info {
8362306a36Sopenharmony_ci	struct dm_table *map;
8462306a36Sopenharmony_ci	struct bio *bio;
8562306a36Sopenharmony_ci	struct dm_io *io;
8662306a36Sopenharmony_ci	sector_t sector;
8762306a36Sopenharmony_ci	unsigned int sector_count;
8862306a36Sopenharmony_ci	bool is_abnormal_io:1;
8962306a36Sopenharmony_ci	bool submit_as_polled:1;
9062306a36Sopenharmony_ci};
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_cistatic inline struct dm_target_io *clone_to_tio(struct bio *clone)
9362306a36Sopenharmony_ci{
9462306a36Sopenharmony_ci	return container_of(clone, struct dm_target_io, clone);
9562306a36Sopenharmony_ci}
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_civoid *dm_per_bio_data(struct bio *bio, size_t data_size)
9862306a36Sopenharmony_ci{
9962306a36Sopenharmony_ci	if (!dm_tio_flagged(clone_to_tio(bio), DM_TIO_INSIDE_DM_IO))
10062306a36Sopenharmony_ci		return (char *)bio - DM_TARGET_IO_BIO_OFFSET - data_size;
10162306a36Sopenharmony_ci	return (char *)bio - DM_IO_BIO_OFFSET - data_size;
10262306a36Sopenharmony_ci}
10362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_per_bio_data);
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_cistruct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	struct dm_io *io = (struct dm_io *)((char *)data + data_size);
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	if (io->magic == DM_IO_MAGIC)
11062306a36Sopenharmony_ci		return (struct bio *)((char *)io + DM_IO_BIO_OFFSET);
11162306a36Sopenharmony_ci	BUG_ON(io->magic != DM_TIO_MAGIC);
11262306a36Sopenharmony_ci	return (struct bio *)((char *)io + DM_TARGET_IO_BIO_OFFSET);
11362306a36Sopenharmony_ci}
11462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bio_from_per_bio_data);
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ciunsigned int dm_bio_get_target_bio_nr(const struct bio *bio)
11762306a36Sopenharmony_ci{
11862306a36Sopenharmony_ci	return container_of(bio, struct dm_target_io, clone)->target_bio_nr;
11962306a36Sopenharmony_ci}
12062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr);
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci#define MINOR_ALLOCED ((void *)-1)
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci#define DM_NUMA_NODE NUMA_NO_NODE
12562306a36Sopenharmony_cistatic int dm_numa_node = DM_NUMA_NODE;
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci#define DEFAULT_SWAP_BIOS	(8 * 1048576 / PAGE_SIZE)
12862306a36Sopenharmony_cistatic int swap_bios = DEFAULT_SWAP_BIOS;
12962306a36Sopenharmony_cistatic int get_swap_bios(void)
13062306a36Sopenharmony_ci{
13162306a36Sopenharmony_ci	int latch = READ_ONCE(swap_bios);
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	if (unlikely(latch <= 0))
13462306a36Sopenharmony_ci		latch = DEFAULT_SWAP_BIOS;
13562306a36Sopenharmony_ci	return latch;
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_cistruct table_device {
13962306a36Sopenharmony_ci	struct list_head list;
14062306a36Sopenharmony_ci	refcount_t count;
14162306a36Sopenharmony_ci	struct dm_dev dm_dev;
14262306a36Sopenharmony_ci};
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci/*
14562306a36Sopenharmony_ci * Bio-based DM's mempools' reserved IOs set by the user.
14662306a36Sopenharmony_ci */
14762306a36Sopenharmony_ci#define RESERVED_BIO_BASED_IOS		16
14862306a36Sopenharmony_cistatic unsigned int reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_cistatic int __dm_get_module_param_int(int *module_param, int min, int max)
15162306a36Sopenharmony_ci{
15262306a36Sopenharmony_ci	int param = READ_ONCE(*module_param);
15362306a36Sopenharmony_ci	int modified_param = 0;
15462306a36Sopenharmony_ci	bool modified = true;
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	if (param < min)
15762306a36Sopenharmony_ci		modified_param = min;
15862306a36Sopenharmony_ci	else if (param > max)
15962306a36Sopenharmony_ci		modified_param = max;
16062306a36Sopenharmony_ci	else
16162306a36Sopenharmony_ci		modified = false;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	if (modified) {
16462306a36Sopenharmony_ci		(void)cmpxchg(module_param, param, modified_param);
16562306a36Sopenharmony_ci		param = modified_param;
16662306a36Sopenharmony_ci	}
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	return param;
16962306a36Sopenharmony_ci}
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ciunsigned int __dm_get_module_param(unsigned int *module_param, unsigned int def, unsigned int max)
17262306a36Sopenharmony_ci{
17362306a36Sopenharmony_ci	unsigned int param = READ_ONCE(*module_param);
17462306a36Sopenharmony_ci	unsigned int modified_param = 0;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	if (!param)
17762306a36Sopenharmony_ci		modified_param = def;
17862306a36Sopenharmony_ci	else if (param > max)
17962306a36Sopenharmony_ci		modified_param = max;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	if (modified_param) {
18262306a36Sopenharmony_ci		(void)cmpxchg(module_param, param, modified_param);
18362306a36Sopenharmony_ci		param = modified_param;
18462306a36Sopenharmony_ci	}
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	return param;
18762306a36Sopenharmony_ci}
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ciunsigned int dm_get_reserved_bio_based_ios(void)
19062306a36Sopenharmony_ci{
19162306a36Sopenharmony_ci	return __dm_get_module_param(&reserved_bio_based_ios,
19262306a36Sopenharmony_ci				     RESERVED_BIO_BASED_IOS, DM_RESERVED_MAX_IOS);
19362306a36Sopenharmony_ci}
19462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_cistatic unsigned int dm_get_numa_node(void)
19762306a36Sopenharmony_ci{
19862306a36Sopenharmony_ci	return __dm_get_module_param_int(&dm_numa_node,
19962306a36Sopenharmony_ci					 DM_NUMA_NODE, num_online_nodes() - 1);
20062306a36Sopenharmony_ci}
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_cistatic int __init local_init(void)
20362306a36Sopenharmony_ci{
20462306a36Sopenharmony_ci	int r;
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ci	r = dm_uevent_init();
20762306a36Sopenharmony_ci	if (r)
20862306a36Sopenharmony_ci		return r;
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	deferred_remove_workqueue = alloc_ordered_workqueue("kdmremove", 0);
21162306a36Sopenharmony_ci	if (!deferred_remove_workqueue) {
21262306a36Sopenharmony_ci		r = -ENOMEM;
21362306a36Sopenharmony_ci		goto out_uevent_exit;
21462306a36Sopenharmony_ci	}
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	_major = major;
21762306a36Sopenharmony_ci	r = register_blkdev(_major, _name);
21862306a36Sopenharmony_ci	if (r < 0)
21962306a36Sopenharmony_ci		goto out_free_workqueue;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	if (!_major)
22262306a36Sopenharmony_ci		_major = r;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	return 0;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ciout_free_workqueue:
22762306a36Sopenharmony_ci	destroy_workqueue(deferred_remove_workqueue);
22862306a36Sopenharmony_ciout_uevent_exit:
22962306a36Sopenharmony_ci	dm_uevent_exit();
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	return r;
23262306a36Sopenharmony_ci}
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_cistatic void local_exit(void)
23562306a36Sopenharmony_ci{
23662306a36Sopenharmony_ci	destroy_workqueue(deferred_remove_workqueue);
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	unregister_blkdev(_major, _name);
23962306a36Sopenharmony_ci	dm_uevent_exit();
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	_major = 0;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	DMINFO("cleaned up");
24462306a36Sopenharmony_ci}
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_cistatic int (*_inits[])(void) __initdata = {
24762306a36Sopenharmony_ci	local_init,
24862306a36Sopenharmony_ci	dm_target_init,
24962306a36Sopenharmony_ci	dm_linear_init,
25062306a36Sopenharmony_ci	dm_stripe_init,
25162306a36Sopenharmony_ci	dm_io_init,
25262306a36Sopenharmony_ci	dm_kcopyd_init,
25362306a36Sopenharmony_ci	dm_interface_init,
25462306a36Sopenharmony_ci	dm_statistics_init,
25562306a36Sopenharmony_ci};
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_cistatic void (*_exits[])(void) = {
25862306a36Sopenharmony_ci	local_exit,
25962306a36Sopenharmony_ci	dm_target_exit,
26062306a36Sopenharmony_ci	dm_linear_exit,
26162306a36Sopenharmony_ci	dm_stripe_exit,
26262306a36Sopenharmony_ci	dm_io_exit,
26362306a36Sopenharmony_ci	dm_kcopyd_exit,
26462306a36Sopenharmony_ci	dm_interface_exit,
26562306a36Sopenharmony_ci	dm_statistics_exit,
26662306a36Sopenharmony_ci};
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_cistatic int __init dm_init(void)
26962306a36Sopenharmony_ci{
27062306a36Sopenharmony_ci	const int count = ARRAY_SIZE(_inits);
27162306a36Sopenharmony_ci	int r, i;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci#if (IS_ENABLED(CONFIG_IMA) && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE))
27462306a36Sopenharmony_ci	DMWARN("CONFIG_IMA_DISABLE_HTABLE is disabled."
27562306a36Sopenharmony_ci	       " Duplicate IMA measurements will not be recorded in the IMA log.");
27662306a36Sopenharmony_ci#endif
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	for (i = 0; i < count; i++) {
27962306a36Sopenharmony_ci		r = _inits[i]();
28062306a36Sopenharmony_ci		if (r)
28162306a36Sopenharmony_ci			goto bad;
28262306a36Sopenharmony_ci	}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	return 0;
28562306a36Sopenharmony_cibad:
28662306a36Sopenharmony_ci	while (i--)
28762306a36Sopenharmony_ci		_exits[i]();
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	return r;
29062306a36Sopenharmony_ci}
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_cistatic void __exit dm_exit(void)
29362306a36Sopenharmony_ci{
29462306a36Sopenharmony_ci	int i = ARRAY_SIZE(_exits);
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	while (i--)
29762306a36Sopenharmony_ci		_exits[i]();
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	/*
30062306a36Sopenharmony_ci	 * Should be empty by this point.
30162306a36Sopenharmony_ci	 */
30262306a36Sopenharmony_ci	idr_destroy(&_minor_idr);
30362306a36Sopenharmony_ci}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci/*
30662306a36Sopenharmony_ci * Block device functions
30762306a36Sopenharmony_ci */
30862306a36Sopenharmony_ciint dm_deleting_md(struct mapped_device *md)
30962306a36Sopenharmony_ci{
31062306a36Sopenharmony_ci	return test_bit(DMF_DELETING, &md->flags);
31162306a36Sopenharmony_ci}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_cistatic int dm_blk_open(struct gendisk *disk, blk_mode_t mode)
31462306a36Sopenharmony_ci{
31562306a36Sopenharmony_ci	struct mapped_device *md;
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci	spin_lock(&_minor_lock);
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	md = disk->private_data;
32062306a36Sopenharmony_ci	if (!md)
32162306a36Sopenharmony_ci		goto out;
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci	if (test_bit(DMF_FREEING, &md->flags) ||
32462306a36Sopenharmony_ci	    dm_deleting_md(md)) {
32562306a36Sopenharmony_ci		md = NULL;
32662306a36Sopenharmony_ci		goto out;
32762306a36Sopenharmony_ci	}
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	dm_get(md);
33062306a36Sopenharmony_ci	atomic_inc(&md->open_count);
33162306a36Sopenharmony_ciout:
33262306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	return md ? 0 : -ENXIO;
33562306a36Sopenharmony_ci}
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_cistatic void dm_blk_close(struct gendisk *disk)
33862306a36Sopenharmony_ci{
33962306a36Sopenharmony_ci	struct mapped_device *md;
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	spin_lock(&_minor_lock);
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	md = disk->private_data;
34462306a36Sopenharmony_ci	if (WARN_ON(!md))
34562306a36Sopenharmony_ci		goto out;
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	if (atomic_dec_and_test(&md->open_count) &&
34862306a36Sopenharmony_ci	    (test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
34962306a36Sopenharmony_ci		queue_work(deferred_remove_workqueue, &deferred_remove_work);
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	dm_put(md);
35262306a36Sopenharmony_ciout:
35362306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
35462306a36Sopenharmony_ci}
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ciint dm_open_count(struct mapped_device *md)
35762306a36Sopenharmony_ci{
35862306a36Sopenharmony_ci	return atomic_read(&md->open_count);
35962306a36Sopenharmony_ci}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci/*
36262306a36Sopenharmony_ci * Guarantees nothing is using the device before it's deleted.
36362306a36Sopenharmony_ci */
36462306a36Sopenharmony_ciint dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred)
36562306a36Sopenharmony_ci{
36662306a36Sopenharmony_ci	int r = 0;
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	spin_lock(&_minor_lock);
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	if (dm_open_count(md)) {
37162306a36Sopenharmony_ci		r = -EBUSY;
37262306a36Sopenharmony_ci		if (mark_deferred)
37362306a36Sopenharmony_ci			set_bit(DMF_DEFERRED_REMOVE, &md->flags);
37462306a36Sopenharmony_ci	} else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags))
37562306a36Sopenharmony_ci		r = -EEXIST;
37662306a36Sopenharmony_ci	else
37762306a36Sopenharmony_ci		set_bit(DMF_DELETING, &md->flags);
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci	return r;
38262306a36Sopenharmony_ci}
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ciint dm_cancel_deferred_remove(struct mapped_device *md)
38562306a36Sopenharmony_ci{
38662306a36Sopenharmony_ci	int r = 0;
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	spin_lock(&_minor_lock);
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	if (test_bit(DMF_DELETING, &md->flags))
39162306a36Sopenharmony_ci		r = -EBUSY;
39262306a36Sopenharmony_ci	else
39362306a36Sopenharmony_ci		clear_bit(DMF_DEFERRED_REMOVE, &md->flags);
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	return r;
39862306a36Sopenharmony_ci}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_cistatic void do_deferred_remove(struct work_struct *w)
40162306a36Sopenharmony_ci{
40262306a36Sopenharmony_ci	dm_deferred_remove();
40362306a36Sopenharmony_ci}
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_cistatic int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
40662306a36Sopenharmony_ci{
40762306a36Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	return dm_get_geometry(md, geo);
41062306a36Sopenharmony_ci}
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_cistatic int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
41362306a36Sopenharmony_ci			    struct block_device **bdev)
41462306a36Sopenharmony_ci{
41562306a36Sopenharmony_ci	struct dm_target *ti;
41662306a36Sopenharmony_ci	struct dm_table *map;
41762306a36Sopenharmony_ci	int r;
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ciretry:
42062306a36Sopenharmony_ci	r = -ENOTTY;
42162306a36Sopenharmony_ci	map = dm_get_live_table(md, srcu_idx);
42262306a36Sopenharmony_ci	if (!map || !dm_table_get_size(map))
42362306a36Sopenharmony_ci		return r;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	/* We only support devices that have a single target */
42662306a36Sopenharmony_ci	if (map->num_targets != 1)
42762306a36Sopenharmony_ci		return r;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	ti = dm_table_get_target(map, 0);
43062306a36Sopenharmony_ci	if (!ti->type->prepare_ioctl)
43162306a36Sopenharmony_ci		return r;
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	if (dm_suspended_md(md))
43462306a36Sopenharmony_ci		return -EAGAIN;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	r = ti->type->prepare_ioctl(ti, bdev);
43762306a36Sopenharmony_ci	if (r == -ENOTCONN && !fatal_signal_pending(current)) {
43862306a36Sopenharmony_ci		dm_put_live_table(md, *srcu_idx);
43962306a36Sopenharmony_ci		fsleep(10000);
44062306a36Sopenharmony_ci		goto retry;
44162306a36Sopenharmony_ci	}
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci	return r;
44462306a36Sopenharmony_ci}
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_cistatic void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx)
44762306a36Sopenharmony_ci{
44862306a36Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
44962306a36Sopenharmony_ci}
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_cistatic int dm_blk_ioctl(struct block_device *bdev, blk_mode_t mode,
45262306a36Sopenharmony_ci			unsigned int cmd, unsigned long arg)
45362306a36Sopenharmony_ci{
45462306a36Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
45562306a36Sopenharmony_ci	int r, srcu_idx;
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
45862306a36Sopenharmony_ci	if (r < 0)
45962306a36Sopenharmony_ci		goto out;
46062306a36Sopenharmony_ci
46162306a36Sopenharmony_ci	if (r > 0) {
46262306a36Sopenharmony_ci		/*
46362306a36Sopenharmony_ci		 * Target determined this ioctl is being issued against a
46462306a36Sopenharmony_ci		 * subset of the parent bdev; require extra privileges.
46562306a36Sopenharmony_ci		 */
46662306a36Sopenharmony_ci		if (!capable(CAP_SYS_RAWIO)) {
46762306a36Sopenharmony_ci			DMDEBUG_LIMIT(
46862306a36Sopenharmony_ci	"%s: sending ioctl %x to DM device without required privilege.",
46962306a36Sopenharmony_ci				current->comm, cmd);
47062306a36Sopenharmony_ci			r = -ENOIOCTLCMD;
47162306a36Sopenharmony_ci			goto out;
47262306a36Sopenharmony_ci		}
47362306a36Sopenharmony_ci	}
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci	if (!bdev->bd_disk->fops->ioctl)
47662306a36Sopenharmony_ci		r = -ENOTTY;
47762306a36Sopenharmony_ci	else
47862306a36Sopenharmony_ci		r = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg);
47962306a36Sopenharmony_ciout:
48062306a36Sopenharmony_ci	dm_unprepare_ioctl(md, srcu_idx);
48162306a36Sopenharmony_ci	return r;
48262306a36Sopenharmony_ci}
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ciu64 dm_start_time_ns_from_clone(struct bio *bio)
48562306a36Sopenharmony_ci{
48662306a36Sopenharmony_ci	return jiffies_to_nsecs(clone_to_tio(bio)->io->start_time);
48762306a36Sopenharmony_ci}
48862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_cistatic inline bool bio_is_flush_with_data(struct bio *bio)
49162306a36Sopenharmony_ci{
49262306a36Sopenharmony_ci	return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size);
49362306a36Sopenharmony_ci}
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_cistatic inline unsigned int dm_io_sectors(struct dm_io *io, struct bio *bio)
49662306a36Sopenharmony_ci{
49762306a36Sopenharmony_ci	/*
49862306a36Sopenharmony_ci	 * If REQ_PREFLUSH set, don't account payload, it will be
49962306a36Sopenharmony_ci	 * submitted (and accounted) after this flush completes.
50062306a36Sopenharmony_ci	 */
50162306a36Sopenharmony_ci	if (bio_is_flush_with_data(bio))
50262306a36Sopenharmony_ci		return 0;
50362306a36Sopenharmony_ci	if (unlikely(dm_io_flagged(io, DM_IO_WAS_SPLIT)))
50462306a36Sopenharmony_ci		return io->sectors;
50562306a36Sopenharmony_ci	return bio_sectors(bio);
50662306a36Sopenharmony_ci}
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_cistatic void dm_io_acct(struct dm_io *io, bool end)
50962306a36Sopenharmony_ci{
51062306a36Sopenharmony_ci	struct bio *bio = io->orig_bio;
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	if (dm_io_flagged(io, DM_IO_BLK_STAT)) {
51362306a36Sopenharmony_ci		if (!end)
51462306a36Sopenharmony_ci			bdev_start_io_acct(bio->bi_bdev, bio_op(bio),
51562306a36Sopenharmony_ci					   io->start_time);
51662306a36Sopenharmony_ci		else
51762306a36Sopenharmony_ci			bdev_end_io_acct(bio->bi_bdev, bio_op(bio),
51862306a36Sopenharmony_ci					 dm_io_sectors(io, bio),
51962306a36Sopenharmony_ci					 io->start_time);
52062306a36Sopenharmony_ci	}
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	if (static_branch_unlikely(&stats_enabled) &&
52362306a36Sopenharmony_ci	    unlikely(dm_stats_used(&io->md->stats))) {
52462306a36Sopenharmony_ci		sector_t sector;
52562306a36Sopenharmony_ci
52662306a36Sopenharmony_ci		if (unlikely(dm_io_flagged(io, DM_IO_WAS_SPLIT)))
52762306a36Sopenharmony_ci			sector = bio_end_sector(bio) - io->sector_offset;
52862306a36Sopenharmony_ci		else
52962306a36Sopenharmony_ci			sector = bio->bi_iter.bi_sector;
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci		dm_stats_account_io(&io->md->stats, bio_data_dir(bio),
53262306a36Sopenharmony_ci				    sector, dm_io_sectors(io, bio),
53362306a36Sopenharmony_ci				    end, io->start_time, &io->stats_aux);
53462306a36Sopenharmony_ci	}
53562306a36Sopenharmony_ci}
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_cistatic void __dm_start_io_acct(struct dm_io *io)
53862306a36Sopenharmony_ci{
53962306a36Sopenharmony_ci	dm_io_acct(io, false);
54062306a36Sopenharmony_ci}
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_cistatic void dm_start_io_acct(struct dm_io *io, struct bio *clone)
54362306a36Sopenharmony_ci{
54462306a36Sopenharmony_ci	/*
54562306a36Sopenharmony_ci	 * Ensure IO accounting is only ever started once.
54662306a36Sopenharmony_ci	 */
54762306a36Sopenharmony_ci	if (dm_io_flagged(io, DM_IO_ACCOUNTED))
54862306a36Sopenharmony_ci		return;
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	/* Expect no possibility for race unless DM_TIO_IS_DUPLICATE_BIO. */
55162306a36Sopenharmony_ci	if (!clone || likely(dm_tio_is_normal(clone_to_tio(clone)))) {
55262306a36Sopenharmony_ci		dm_io_set_flag(io, DM_IO_ACCOUNTED);
55362306a36Sopenharmony_ci	} else {
55462306a36Sopenharmony_ci		unsigned long flags;
55562306a36Sopenharmony_ci		/* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */
55662306a36Sopenharmony_ci		spin_lock_irqsave(&io->lock, flags);
55762306a36Sopenharmony_ci		if (dm_io_flagged(io, DM_IO_ACCOUNTED)) {
55862306a36Sopenharmony_ci			spin_unlock_irqrestore(&io->lock, flags);
55962306a36Sopenharmony_ci			return;
56062306a36Sopenharmony_ci		}
56162306a36Sopenharmony_ci		dm_io_set_flag(io, DM_IO_ACCOUNTED);
56262306a36Sopenharmony_ci		spin_unlock_irqrestore(&io->lock, flags);
56362306a36Sopenharmony_ci	}
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	__dm_start_io_acct(io);
56662306a36Sopenharmony_ci}
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_cistatic void dm_end_io_acct(struct dm_io *io)
56962306a36Sopenharmony_ci{
57062306a36Sopenharmony_ci	dm_io_acct(io, true);
57162306a36Sopenharmony_ci}
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_cistatic struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
57462306a36Sopenharmony_ci{
57562306a36Sopenharmony_ci	struct dm_io *io;
57662306a36Sopenharmony_ci	struct dm_target_io *tio;
57762306a36Sopenharmony_ci	struct bio *clone;
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
58062306a36Sopenharmony_ci	tio = clone_to_tio(clone);
58162306a36Sopenharmony_ci	tio->flags = 0;
58262306a36Sopenharmony_ci	dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
58362306a36Sopenharmony_ci	tio->io = NULL;
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci	io = container_of(tio, struct dm_io, tio);
58662306a36Sopenharmony_ci	io->magic = DM_IO_MAGIC;
58762306a36Sopenharmony_ci	io->status = BLK_STS_OK;
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	/* one ref is for submission, the other is for completion */
59062306a36Sopenharmony_ci	atomic_set(&io->io_count, 2);
59162306a36Sopenharmony_ci	this_cpu_inc(*md->pending_io);
59262306a36Sopenharmony_ci	io->orig_bio = bio;
59362306a36Sopenharmony_ci	io->md = md;
59462306a36Sopenharmony_ci	spin_lock_init(&io->lock);
59562306a36Sopenharmony_ci	io->start_time = jiffies;
59662306a36Sopenharmony_ci	io->flags = 0;
59762306a36Sopenharmony_ci	if (blk_queue_io_stat(md->queue))
59862306a36Sopenharmony_ci		dm_io_set_flag(io, DM_IO_BLK_STAT);
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	if (static_branch_unlikely(&stats_enabled) &&
60162306a36Sopenharmony_ci	    unlikely(dm_stats_used(&md->stats)))
60262306a36Sopenharmony_ci		dm_stats_record_start(&md->stats, &io->stats_aux);
60362306a36Sopenharmony_ci
60462306a36Sopenharmony_ci	return io;
60562306a36Sopenharmony_ci}
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_cistatic void free_io(struct dm_io *io)
60862306a36Sopenharmony_ci{
60962306a36Sopenharmony_ci	bio_put(&io->tio.clone);
61062306a36Sopenharmony_ci}
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_cistatic struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
61362306a36Sopenharmony_ci			     unsigned int target_bio_nr, unsigned int *len, gfp_t gfp_mask)
61462306a36Sopenharmony_ci{
61562306a36Sopenharmony_ci	struct mapped_device *md = ci->io->md;
61662306a36Sopenharmony_ci	struct dm_target_io *tio;
61762306a36Sopenharmony_ci	struct bio *clone;
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	if (!ci->io->tio.io) {
62062306a36Sopenharmony_ci		/* the dm_target_io embedded in ci->io is available */
62162306a36Sopenharmony_ci		tio = &ci->io->tio;
62262306a36Sopenharmony_ci		/* alloc_io() already initialized embedded clone */
62362306a36Sopenharmony_ci		clone = &tio->clone;
62462306a36Sopenharmony_ci	} else {
62562306a36Sopenharmony_ci		clone = bio_alloc_clone(NULL, ci->bio, gfp_mask,
62662306a36Sopenharmony_ci					&md->mempools->bs);
62762306a36Sopenharmony_ci		if (!clone)
62862306a36Sopenharmony_ci			return NULL;
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci		/* REQ_DM_POLL_LIST shouldn't be inherited */
63162306a36Sopenharmony_ci		clone->bi_opf &= ~REQ_DM_POLL_LIST;
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci		tio = clone_to_tio(clone);
63462306a36Sopenharmony_ci		tio->flags = 0; /* also clears DM_TIO_INSIDE_DM_IO */
63562306a36Sopenharmony_ci	}
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci	tio->magic = DM_TIO_MAGIC;
63862306a36Sopenharmony_ci	tio->io = ci->io;
63962306a36Sopenharmony_ci	tio->ti = ti;
64062306a36Sopenharmony_ci	tio->target_bio_nr = target_bio_nr;
64162306a36Sopenharmony_ci	tio->len_ptr = len;
64262306a36Sopenharmony_ci	tio->old_sector = 0;
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci	/* Set default bdev, but target must bio_set_dev() before issuing IO */
64562306a36Sopenharmony_ci	clone->bi_bdev = md->disk->part0;
64662306a36Sopenharmony_ci	if (unlikely(ti->needs_bio_set_dev))
64762306a36Sopenharmony_ci		bio_set_dev(clone, md->disk->part0);
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_ci	if (len) {
65062306a36Sopenharmony_ci		clone->bi_iter.bi_size = to_bytes(*len);
65162306a36Sopenharmony_ci		if (bio_integrity(clone))
65262306a36Sopenharmony_ci			bio_integrity_trim(clone);
65362306a36Sopenharmony_ci	}
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci	return clone;
65662306a36Sopenharmony_ci}
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_cistatic void free_tio(struct bio *clone)
65962306a36Sopenharmony_ci{
66062306a36Sopenharmony_ci	if (dm_tio_flagged(clone_to_tio(clone), DM_TIO_INSIDE_DM_IO))
66162306a36Sopenharmony_ci		return;
66262306a36Sopenharmony_ci	bio_put(clone);
66362306a36Sopenharmony_ci}
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_ci/*
66662306a36Sopenharmony_ci * Add the bio to the list of deferred io.
66762306a36Sopenharmony_ci */
66862306a36Sopenharmony_cistatic void queue_io(struct mapped_device *md, struct bio *bio)
66962306a36Sopenharmony_ci{
67062306a36Sopenharmony_ci	unsigned long flags;
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci	spin_lock_irqsave(&md->deferred_lock, flags);
67362306a36Sopenharmony_ci	bio_list_add(&md->deferred, bio);
67462306a36Sopenharmony_ci	spin_unlock_irqrestore(&md->deferred_lock, flags);
67562306a36Sopenharmony_ci	queue_work(md->wq, &md->work);
67662306a36Sopenharmony_ci}
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci/*
67962306a36Sopenharmony_ci * Everyone (including functions in this file), should use this
68062306a36Sopenharmony_ci * function to access the md->map field, and make sure they call
68162306a36Sopenharmony_ci * dm_put_live_table() when finished.
68262306a36Sopenharmony_ci */
68362306a36Sopenharmony_cistruct dm_table *dm_get_live_table(struct mapped_device *md,
68462306a36Sopenharmony_ci				   int *srcu_idx) __acquires(md->io_barrier)
68562306a36Sopenharmony_ci{
68662306a36Sopenharmony_ci	*srcu_idx = srcu_read_lock(&md->io_barrier);
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci	return srcu_dereference(md->map, &md->io_barrier);
68962306a36Sopenharmony_ci}
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_civoid dm_put_live_table(struct mapped_device *md,
69262306a36Sopenharmony_ci		       int srcu_idx) __releases(md->io_barrier)
69362306a36Sopenharmony_ci{
69462306a36Sopenharmony_ci	srcu_read_unlock(&md->io_barrier, srcu_idx);
69562306a36Sopenharmony_ci}
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_civoid dm_sync_table(struct mapped_device *md)
69862306a36Sopenharmony_ci{
69962306a36Sopenharmony_ci	synchronize_srcu(&md->io_barrier);
70062306a36Sopenharmony_ci	synchronize_rcu_expedited();
70162306a36Sopenharmony_ci}
70262306a36Sopenharmony_ci
70362306a36Sopenharmony_ci/*
70462306a36Sopenharmony_ci * A fast alternative to dm_get_live_table/dm_put_live_table.
70562306a36Sopenharmony_ci * The caller must not block between these two functions.
70662306a36Sopenharmony_ci */
70762306a36Sopenharmony_cistatic struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
70862306a36Sopenharmony_ci{
70962306a36Sopenharmony_ci	rcu_read_lock();
71062306a36Sopenharmony_ci	return rcu_dereference(md->map);
71162306a36Sopenharmony_ci}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_cistatic void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
71462306a36Sopenharmony_ci{
71562306a36Sopenharmony_ci	rcu_read_unlock();
71662306a36Sopenharmony_ci}
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_cistatic char *_dm_claim_ptr = "I belong to device-mapper";
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci/*
72162306a36Sopenharmony_ci * Open a table device so we can use it as a map destination.
72262306a36Sopenharmony_ci */
72362306a36Sopenharmony_cistatic struct table_device *open_table_device(struct mapped_device *md,
72462306a36Sopenharmony_ci		dev_t dev, blk_mode_t mode)
72562306a36Sopenharmony_ci{
72662306a36Sopenharmony_ci	struct table_device *td;
72762306a36Sopenharmony_ci	struct block_device *bdev;
72862306a36Sopenharmony_ci	u64 part_off;
72962306a36Sopenharmony_ci	int r;
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci	td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
73262306a36Sopenharmony_ci	if (!td)
73362306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
73462306a36Sopenharmony_ci	refcount_set(&td->count, 1);
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_ci	bdev = blkdev_get_by_dev(dev, mode, _dm_claim_ptr, NULL);
73762306a36Sopenharmony_ci	if (IS_ERR(bdev)) {
73862306a36Sopenharmony_ci		r = PTR_ERR(bdev);
73962306a36Sopenharmony_ci		goto out_free_td;
74062306a36Sopenharmony_ci	}
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci	/*
74362306a36Sopenharmony_ci	 * We can be called before the dm disk is added.  In that case we can't
74462306a36Sopenharmony_ci	 * register the holder relation here.  It will be done once add_disk was
74562306a36Sopenharmony_ci	 * called.
74662306a36Sopenharmony_ci	 */
74762306a36Sopenharmony_ci	if (md->disk->slave_dir) {
74862306a36Sopenharmony_ci		r = bd_link_disk_holder(bdev, md->disk);
74962306a36Sopenharmony_ci		if (r)
75062306a36Sopenharmony_ci			goto out_blkdev_put;
75162306a36Sopenharmony_ci	}
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci	td->dm_dev.mode = mode;
75462306a36Sopenharmony_ci	td->dm_dev.bdev = bdev;
75562306a36Sopenharmony_ci	td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off, NULL, NULL);
75662306a36Sopenharmony_ci	format_dev_t(td->dm_dev.name, dev);
75762306a36Sopenharmony_ci	list_add(&td->list, &md->table_devices);
75862306a36Sopenharmony_ci	return td;
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ciout_blkdev_put:
76162306a36Sopenharmony_ci	blkdev_put(bdev, _dm_claim_ptr);
76262306a36Sopenharmony_ciout_free_td:
76362306a36Sopenharmony_ci	kfree(td);
76462306a36Sopenharmony_ci	return ERR_PTR(r);
76562306a36Sopenharmony_ci}
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci/*
76862306a36Sopenharmony_ci * Close a table device that we've been using.
76962306a36Sopenharmony_ci */
77062306a36Sopenharmony_cistatic void close_table_device(struct table_device *td, struct mapped_device *md)
77162306a36Sopenharmony_ci{
77262306a36Sopenharmony_ci	if (md->disk->slave_dir)
77362306a36Sopenharmony_ci		bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
77462306a36Sopenharmony_ci	blkdev_put(td->dm_dev.bdev, _dm_claim_ptr);
77562306a36Sopenharmony_ci	put_dax(td->dm_dev.dax_dev);
77662306a36Sopenharmony_ci	list_del(&td->list);
77762306a36Sopenharmony_ci	kfree(td);
77862306a36Sopenharmony_ci}
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_cistatic struct table_device *find_table_device(struct list_head *l, dev_t dev,
78162306a36Sopenharmony_ci					      blk_mode_t mode)
78262306a36Sopenharmony_ci{
78362306a36Sopenharmony_ci	struct table_device *td;
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci	list_for_each_entry(td, l, list)
78662306a36Sopenharmony_ci		if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
78762306a36Sopenharmony_ci			return td;
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci	return NULL;
79062306a36Sopenharmony_ci}
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_ciint dm_get_table_device(struct mapped_device *md, dev_t dev, blk_mode_t mode,
79362306a36Sopenharmony_ci			struct dm_dev **result)
79462306a36Sopenharmony_ci{
79562306a36Sopenharmony_ci	struct table_device *td;
79662306a36Sopenharmony_ci
79762306a36Sopenharmony_ci	mutex_lock(&md->table_devices_lock);
79862306a36Sopenharmony_ci	td = find_table_device(&md->table_devices, dev, mode);
79962306a36Sopenharmony_ci	if (!td) {
80062306a36Sopenharmony_ci		td = open_table_device(md, dev, mode);
80162306a36Sopenharmony_ci		if (IS_ERR(td)) {
80262306a36Sopenharmony_ci			mutex_unlock(&md->table_devices_lock);
80362306a36Sopenharmony_ci			return PTR_ERR(td);
80462306a36Sopenharmony_ci		}
80562306a36Sopenharmony_ci	} else {
80662306a36Sopenharmony_ci		refcount_inc(&td->count);
80762306a36Sopenharmony_ci	}
80862306a36Sopenharmony_ci	mutex_unlock(&md->table_devices_lock);
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	*result = &td->dm_dev;
81162306a36Sopenharmony_ci	return 0;
81262306a36Sopenharmony_ci}
81362306a36Sopenharmony_ci
81462306a36Sopenharmony_civoid dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
81562306a36Sopenharmony_ci{
81662306a36Sopenharmony_ci	struct table_device *td = container_of(d, struct table_device, dm_dev);
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci	mutex_lock(&md->table_devices_lock);
81962306a36Sopenharmony_ci	if (refcount_dec_and_test(&td->count))
82062306a36Sopenharmony_ci		close_table_device(td, md);
82162306a36Sopenharmony_ci	mutex_unlock(&md->table_devices_lock);
82262306a36Sopenharmony_ci}
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci/*
82562306a36Sopenharmony_ci * Get the geometry associated with a dm device
82662306a36Sopenharmony_ci */
82762306a36Sopenharmony_ciint dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
82862306a36Sopenharmony_ci{
82962306a36Sopenharmony_ci	*geo = md->geometry;
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci	return 0;
83262306a36Sopenharmony_ci}
83362306a36Sopenharmony_ci
83462306a36Sopenharmony_ci/*
83562306a36Sopenharmony_ci * Set the geometry of a device.
83662306a36Sopenharmony_ci */
83762306a36Sopenharmony_ciint dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
83862306a36Sopenharmony_ci{
83962306a36Sopenharmony_ci	sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	if (geo->start > sz) {
84262306a36Sopenharmony_ci		DMERR("Start sector is beyond the geometry limits.");
84362306a36Sopenharmony_ci		return -EINVAL;
84462306a36Sopenharmony_ci	}
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ci	md->geometry = *geo;
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	return 0;
84962306a36Sopenharmony_ci}
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_cistatic int __noflush_suspending(struct mapped_device *md)
85262306a36Sopenharmony_ci{
85362306a36Sopenharmony_ci	return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
85462306a36Sopenharmony_ci}
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_cistatic void dm_requeue_add_io(struct dm_io *io, bool first_stage)
85762306a36Sopenharmony_ci{
85862306a36Sopenharmony_ci	struct mapped_device *md = io->md;
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci	if (first_stage) {
86162306a36Sopenharmony_ci		struct dm_io *next = md->requeue_list;
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci		md->requeue_list = io;
86462306a36Sopenharmony_ci		io->next = next;
86562306a36Sopenharmony_ci	} else {
86662306a36Sopenharmony_ci		bio_list_add_head(&md->deferred, io->orig_bio);
86762306a36Sopenharmony_ci	}
86862306a36Sopenharmony_ci}
86962306a36Sopenharmony_ci
87062306a36Sopenharmony_cistatic void dm_kick_requeue(struct mapped_device *md, bool first_stage)
87162306a36Sopenharmony_ci{
87262306a36Sopenharmony_ci	if (first_stage)
87362306a36Sopenharmony_ci		queue_work(md->wq, &md->requeue_work);
87462306a36Sopenharmony_ci	else
87562306a36Sopenharmony_ci		queue_work(md->wq, &md->work);
87662306a36Sopenharmony_ci}
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci/*
87962306a36Sopenharmony_ci * Return true if the dm_io's original bio is requeued.
88062306a36Sopenharmony_ci * io->status is updated with error if requeue disallowed.
88162306a36Sopenharmony_ci */
88262306a36Sopenharmony_cistatic bool dm_handle_requeue(struct dm_io *io, bool first_stage)
88362306a36Sopenharmony_ci{
88462306a36Sopenharmony_ci	struct bio *bio = io->orig_bio;
88562306a36Sopenharmony_ci	bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE);
88662306a36Sopenharmony_ci	bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) &&
88762306a36Sopenharmony_ci				     (bio->bi_opf & REQ_POLLED));
88862306a36Sopenharmony_ci	struct mapped_device *md = io->md;
88962306a36Sopenharmony_ci	bool requeued = false;
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci	if (handle_requeue || handle_polled_eagain) {
89262306a36Sopenharmony_ci		unsigned long flags;
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci		if (bio->bi_opf & REQ_POLLED) {
89562306a36Sopenharmony_ci			/*
89662306a36Sopenharmony_ci			 * Upper layer won't help us poll split bio
89762306a36Sopenharmony_ci			 * (io->orig_bio may only reflect a subset of the
89862306a36Sopenharmony_ci			 * pre-split original) so clear REQ_POLLED.
89962306a36Sopenharmony_ci			 */
90062306a36Sopenharmony_ci			bio_clear_polled(bio);
90162306a36Sopenharmony_ci		}
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci		/*
90462306a36Sopenharmony_ci		 * Target requested pushing back the I/O or
90562306a36Sopenharmony_ci		 * polled IO hit BLK_STS_AGAIN.
90662306a36Sopenharmony_ci		 */
90762306a36Sopenharmony_ci		spin_lock_irqsave(&md->deferred_lock, flags);
90862306a36Sopenharmony_ci		if ((__noflush_suspending(md) &&
90962306a36Sopenharmony_ci		     !WARN_ON_ONCE(dm_is_zone_write(md, bio))) ||
91062306a36Sopenharmony_ci		    handle_polled_eagain || first_stage) {
91162306a36Sopenharmony_ci			dm_requeue_add_io(io, first_stage);
91262306a36Sopenharmony_ci			requeued = true;
91362306a36Sopenharmony_ci		} else {
91462306a36Sopenharmony_ci			/*
91562306a36Sopenharmony_ci			 * noflush suspend was interrupted or this is
91662306a36Sopenharmony_ci			 * a write to a zoned target.
91762306a36Sopenharmony_ci			 */
91862306a36Sopenharmony_ci			io->status = BLK_STS_IOERR;
91962306a36Sopenharmony_ci		}
92062306a36Sopenharmony_ci		spin_unlock_irqrestore(&md->deferred_lock, flags);
92162306a36Sopenharmony_ci	}
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci	if (requeued)
92462306a36Sopenharmony_ci		dm_kick_requeue(md, first_stage);
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	return requeued;
92762306a36Sopenharmony_ci}
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_cistatic void __dm_io_complete(struct dm_io *io, bool first_stage)
93062306a36Sopenharmony_ci{
93162306a36Sopenharmony_ci	struct bio *bio = io->orig_bio;
93262306a36Sopenharmony_ci	struct mapped_device *md = io->md;
93362306a36Sopenharmony_ci	blk_status_t io_error;
93462306a36Sopenharmony_ci	bool requeued;
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	requeued = dm_handle_requeue(io, first_stage);
93762306a36Sopenharmony_ci	if (requeued && first_stage)
93862306a36Sopenharmony_ci		return;
93962306a36Sopenharmony_ci
94062306a36Sopenharmony_ci	io_error = io->status;
94162306a36Sopenharmony_ci	if (dm_io_flagged(io, DM_IO_ACCOUNTED))
94262306a36Sopenharmony_ci		dm_end_io_acct(io);
94362306a36Sopenharmony_ci	else if (!io_error) {
94462306a36Sopenharmony_ci		/*
94562306a36Sopenharmony_ci		 * Must handle target that DM_MAPIO_SUBMITTED only to
94662306a36Sopenharmony_ci		 * then bio_endio() rather than dm_submit_bio_remap()
94762306a36Sopenharmony_ci		 */
94862306a36Sopenharmony_ci		__dm_start_io_acct(io);
94962306a36Sopenharmony_ci		dm_end_io_acct(io);
95062306a36Sopenharmony_ci	}
95162306a36Sopenharmony_ci	free_io(io);
95262306a36Sopenharmony_ci	smp_wmb();
95362306a36Sopenharmony_ci	this_cpu_dec(*md->pending_io);
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ci	/* nudge anyone waiting on suspend queue */
95662306a36Sopenharmony_ci	if (unlikely(wq_has_sleeper(&md->wait)))
95762306a36Sopenharmony_ci		wake_up(&md->wait);
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci	/* Return early if the original bio was requeued */
96062306a36Sopenharmony_ci	if (requeued)
96162306a36Sopenharmony_ci		return;
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_ci	if (bio_is_flush_with_data(bio)) {
96462306a36Sopenharmony_ci		/*
96562306a36Sopenharmony_ci		 * Preflush done for flush with data, reissue
96662306a36Sopenharmony_ci		 * without REQ_PREFLUSH.
96762306a36Sopenharmony_ci		 */
96862306a36Sopenharmony_ci		bio->bi_opf &= ~REQ_PREFLUSH;
96962306a36Sopenharmony_ci		queue_io(md, bio);
97062306a36Sopenharmony_ci	} else {
97162306a36Sopenharmony_ci		/* done with normal IO or empty flush */
97262306a36Sopenharmony_ci		if (io_error)
97362306a36Sopenharmony_ci			bio->bi_status = io_error;
97462306a36Sopenharmony_ci		bio_endio(bio);
97562306a36Sopenharmony_ci	}
97662306a36Sopenharmony_ci}
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_cistatic void dm_wq_requeue_work(struct work_struct *work)
97962306a36Sopenharmony_ci{
98062306a36Sopenharmony_ci	struct mapped_device *md = container_of(work, struct mapped_device,
98162306a36Sopenharmony_ci						requeue_work);
98262306a36Sopenharmony_ci	unsigned long flags;
98362306a36Sopenharmony_ci	struct dm_io *io;
98462306a36Sopenharmony_ci
98562306a36Sopenharmony_ci	/* reuse deferred lock to simplify dm_handle_requeue */
98662306a36Sopenharmony_ci	spin_lock_irqsave(&md->deferred_lock, flags);
98762306a36Sopenharmony_ci	io = md->requeue_list;
98862306a36Sopenharmony_ci	md->requeue_list = NULL;
98962306a36Sopenharmony_ci	spin_unlock_irqrestore(&md->deferred_lock, flags);
99062306a36Sopenharmony_ci
99162306a36Sopenharmony_ci	while (io) {
99262306a36Sopenharmony_ci		struct dm_io *next = io->next;
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci		dm_io_rewind(io, &md->disk->bio_split);
99562306a36Sopenharmony_ci
99662306a36Sopenharmony_ci		io->next = NULL;
99762306a36Sopenharmony_ci		__dm_io_complete(io, false);
99862306a36Sopenharmony_ci		io = next;
99962306a36Sopenharmony_ci		cond_resched();
100062306a36Sopenharmony_ci	}
100162306a36Sopenharmony_ci}
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci/*
100462306a36Sopenharmony_ci * Two staged requeue:
100562306a36Sopenharmony_ci *
100662306a36Sopenharmony_ci * 1) io->orig_bio points to the real original bio, and the part mapped to
100762306a36Sopenharmony_ci *    this io must be requeued, instead of other parts of the original bio.
100862306a36Sopenharmony_ci *
100962306a36Sopenharmony_ci * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io.
101062306a36Sopenharmony_ci */
101162306a36Sopenharmony_cistatic void dm_io_complete(struct dm_io *io)
101262306a36Sopenharmony_ci{
101362306a36Sopenharmony_ci	bool first_requeue;
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci	/*
101662306a36Sopenharmony_ci	 * Only dm_io that has been split needs two stage requeue, otherwise
101762306a36Sopenharmony_ci	 * we may run into long bio clone chain during suspend and OOM could
101862306a36Sopenharmony_ci	 * be triggered.
101962306a36Sopenharmony_ci	 *
102062306a36Sopenharmony_ci	 * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they
102162306a36Sopenharmony_ci	 * also aren't handled via the first stage requeue.
102262306a36Sopenharmony_ci	 */
102362306a36Sopenharmony_ci	if (dm_io_flagged(io, DM_IO_WAS_SPLIT))
102462306a36Sopenharmony_ci		first_requeue = true;
102562306a36Sopenharmony_ci	else
102662306a36Sopenharmony_ci		first_requeue = false;
102762306a36Sopenharmony_ci
102862306a36Sopenharmony_ci	__dm_io_complete(io, first_requeue);
102962306a36Sopenharmony_ci}
103062306a36Sopenharmony_ci
103162306a36Sopenharmony_ci/*
103262306a36Sopenharmony_ci * Decrements the number of outstanding ios that a bio has been
103362306a36Sopenharmony_ci * cloned into, completing the original io if necc.
103462306a36Sopenharmony_ci */
103562306a36Sopenharmony_cistatic inline void __dm_io_dec_pending(struct dm_io *io)
103662306a36Sopenharmony_ci{
103762306a36Sopenharmony_ci	if (atomic_dec_and_test(&io->io_count))
103862306a36Sopenharmony_ci		dm_io_complete(io);
103962306a36Sopenharmony_ci}
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_cistatic void dm_io_set_error(struct dm_io *io, blk_status_t error)
104262306a36Sopenharmony_ci{
104362306a36Sopenharmony_ci	unsigned long flags;
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_ci	/* Push-back supersedes any I/O errors */
104662306a36Sopenharmony_ci	spin_lock_irqsave(&io->lock, flags);
104762306a36Sopenharmony_ci	if (!(io->status == BLK_STS_DM_REQUEUE &&
104862306a36Sopenharmony_ci	      __noflush_suspending(io->md))) {
104962306a36Sopenharmony_ci		io->status = error;
105062306a36Sopenharmony_ci	}
105162306a36Sopenharmony_ci	spin_unlock_irqrestore(&io->lock, flags);
105262306a36Sopenharmony_ci}
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_cistatic void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
105562306a36Sopenharmony_ci{
105662306a36Sopenharmony_ci	if (unlikely(error))
105762306a36Sopenharmony_ci		dm_io_set_error(io, error);
105862306a36Sopenharmony_ci
105962306a36Sopenharmony_ci	__dm_io_dec_pending(io);
106062306a36Sopenharmony_ci}
106162306a36Sopenharmony_ci
106262306a36Sopenharmony_ci/*
106362306a36Sopenharmony_ci * The queue_limits are only valid as long as you have a reference
106462306a36Sopenharmony_ci * count on 'md'. But _not_ imposing verification to avoid atomic_read(),
106562306a36Sopenharmony_ci */
106662306a36Sopenharmony_cistatic inline struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
106762306a36Sopenharmony_ci{
106862306a36Sopenharmony_ci	return &md->queue->limits;
106962306a36Sopenharmony_ci}
107062306a36Sopenharmony_ci
107162306a36Sopenharmony_civoid disable_discard(struct mapped_device *md)
107262306a36Sopenharmony_ci{
107362306a36Sopenharmony_ci	struct queue_limits *limits = dm_get_queue_limits(md);
107462306a36Sopenharmony_ci
107562306a36Sopenharmony_ci	/* device doesn't really support DISCARD, disable it */
107662306a36Sopenharmony_ci	limits->max_discard_sectors = 0;
107762306a36Sopenharmony_ci}
107862306a36Sopenharmony_ci
107962306a36Sopenharmony_civoid disable_write_zeroes(struct mapped_device *md)
108062306a36Sopenharmony_ci{
108162306a36Sopenharmony_ci	struct queue_limits *limits = dm_get_queue_limits(md);
108262306a36Sopenharmony_ci
108362306a36Sopenharmony_ci	/* device doesn't really support WRITE ZEROES, disable it */
108462306a36Sopenharmony_ci	limits->max_write_zeroes_sectors = 0;
108562306a36Sopenharmony_ci}
108662306a36Sopenharmony_ci
108762306a36Sopenharmony_cistatic bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
108862306a36Sopenharmony_ci{
108962306a36Sopenharmony_ci	return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios);
109062306a36Sopenharmony_ci}
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_cistatic void clone_endio(struct bio *bio)
109362306a36Sopenharmony_ci{
109462306a36Sopenharmony_ci	blk_status_t error = bio->bi_status;
109562306a36Sopenharmony_ci	struct dm_target_io *tio = clone_to_tio(bio);
109662306a36Sopenharmony_ci	struct dm_target *ti = tio->ti;
109762306a36Sopenharmony_ci	dm_endio_fn endio = ti->type->end_io;
109862306a36Sopenharmony_ci	struct dm_io *io = tio->io;
109962306a36Sopenharmony_ci	struct mapped_device *md = io->md;
110062306a36Sopenharmony_ci
110162306a36Sopenharmony_ci	if (unlikely(error == BLK_STS_TARGET)) {
110262306a36Sopenharmony_ci		if (bio_op(bio) == REQ_OP_DISCARD &&
110362306a36Sopenharmony_ci		    !bdev_max_discard_sectors(bio->bi_bdev))
110462306a36Sopenharmony_ci			disable_discard(md);
110562306a36Sopenharmony_ci		else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
110662306a36Sopenharmony_ci			 !bdev_write_zeroes_sectors(bio->bi_bdev))
110762306a36Sopenharmony_ci			disable_write_zeroes(md);
110862306a36Sopenharmony_ci	}
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci	if (static_branch_unlikely(&zoned_enabled) &&
111162306a36Sopenharmony_ci	    unlikely(bdev_is_zoned(bio->bi_bdev)))
111262306a36Sopenharmony_ci		dm_zone_endio(io, bio);
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci	if (endio) {
111562306a36Sopenharmony_ci		int r = endio(ti, bio, &error);
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci		switch (r) {
111862306a36Sopenharmony_ci		case DM_ENDIO_REQUEUE:
111962306a36Sopenharmony_ci			if (static_branch_unlikely(&zoned_enabled)) {
112062306a36Sopenharmony_ci				/*
112162306a36Sopenharmony_ci				 * Requeuing writes to a sequential zone of a zoned
112262306a36Sopenharmony_ci				 * target will break the sequential write pattern:
112362306a36Sopenharmony_ci				 * fail such IO.
112462306a36Sopenharmony_ci				 */
112562306a36Sopenharmony_ci				if (WARN_ON_ONCE(dm_is_zone_write(md, bio)))
112662306a36Sopenharmony_ci					error = BLK_STS_IOERR;
112762306a36Sopenharmony_ci				else
112862306a36Sopenharmony_ci					error = BLK_STS_DM_REQUEUE;
112962306a36Sopenharmony_ci			} else
113062306a36Sopenharmony_ci				error = BLK_STS_DM_REQUEUE;
113162306a36Sopenharmony_ci			fallthrough;
113262306a36Sopenharmony_ci		case DM_ENDIO_DONE:
113362306a36Sopenharmony_ci			break;
113462306a36Sopenharmony_ci		case DM_ENDIO_INCOMPLETE:
113562306a36Sopenharmony_ci			/* The target will handle the io */
113662306a36Sopenharmony_ci			return;
113762306a36Sopenharmony_ci		default:
113862306a36Sopenharmony_ci			DMCRIT("unimplemented target endio return value: %d", r);
113962306a36Sopenharmony_ci			BUG();
114062306a36Sopenharmony_ci		}
114162306a36Sopenharmony_ci	}
114262306a36Sopenharmony_ci
114362306a36Sopenharmony_ci	if (static_branch_unlikely(&swap_bios_enabled) &&
114462306a36Sopenharmony_ci	    unlikely(swap_bios_limit(ti, bio)))
114562306a36Sopenharmony_ci		up(&md->swap_bios_semaphore);
114662306a36Sopenharmony_ci
114762306a36Sopenharmony_ci	free_tio(bio);
114862306a36Sopenharmony_ci	dm_io_dec_pending(io, error);
114962306a36Sopenharmony_ci}
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci/*
115262306a36Sopenharmony_ci * Return maximum size of I/O possible at the supplied sector up to the current
115362306a36Sopenharmony_ci * target boundary.
115462306a36Sopenharmony_ci */
115562306a36Sopenharmony_cistatic inline sector_t max_io_len_target_boundary(struct dm_target *ti,
115662306a36Sopenharmony_ci						  sector_t target_offset)
115762306a36Sopenharmony_ci{
115862306a36Sopenharmony_ci	return ti->len - target_offset;
115962306a36Sopenharmony_ci}
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_cistatic sector_t __max_io_len(struct dm_target *ti, sector_t sector,
116262306a36Sopenharmony_ci			     unsigned int max_granularity,
116362306a36Sopenharmony_ci			     unsigned int max_sectors)
116462306a36Sopenharmony_ci{
116562306a36Sopenharmony_ci	sector_t target_offset = dm_target_offset(ti, sector);
116662306a36Sopenharmony_ci	sector_t len = max_io_len_target_boundary(ti, target_offset);
116762306a36Sopenharmony_ci
116862306a36Sopenharmony_ci	/*
116962306a36Sopenharmony_ci	 * Does the target need to split IO even further?
117062306a36Sopenharmony_ci	 * - varied (per target) IO splitting is a tenet of DM; this
117162306a36Sopenharmony_ci	 *   explains why stacked chunk_sectors based splitting via
117262306a36Sopenharmony_ci	 *   bio_split_to_limits() isn't possible here.
117362306a36Sopenharmony_ci	 */
117462306a36Sopenharmony_ci	if (!max_granularity)
117562306a36Sopenharmony_ci		return len;
117662306a36Sopenharmony_ci	return min_t(sector_t, len,
117762306a36Sopenharmony_ci		min(max_sectors ? : queue_max_sectors(ti->table->md->queue),
117862306a36Sopenharmony_ci		    blk_chunk_sectors_left(target_offset, max_granularity)));
117962306a36Sopenharmony_ci}
118062306a36Sopenharmony_ci
118162306a36Sopenharmony_cistatic inline sector_t max_io_len(struct dm_target *ti, sector_t sector)
118262306a36Sopenharmony_ci{
118362306a36Sopenharmony_ci	return __max_io_len(ti, sector, ti->max_io_len, 0);
118462306a36Sopenharmony_ci}
118562306a36Sopenharmony_ci
118662306a36Sopenharmony_ciint dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
118762306a36Sopenharmony_ci{
118862306a36Sopenharmony_ci	if (len > UINT_MAX) {
118962306a36Sopenharmony_ci		DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
119062306a36Sopenharmony_ci		      (unsigned long long)len, UINT_MAX);
119162306a36Sopenharmony_ci		ti->error = "Maximum size of target IO is too large";
119262306a36Sopenharmony_ci		return -EINVAL;
119362306a36Sopenharmony_ci	}
119462306a36Sopenharmony_ci
119562306a36Sopenharmony_ci	ti->max_io_len = (uint32_t) len;
119662306a36Sopenharmony_ci
119762306a36Sopenharmony_ci	return 0;
119862306a36Sopenharmony_ci}
119962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
120062306a36Sopenharmony_ci
120162306a36Sopenharmony_cistatic struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
120262306a36Sopenharmony_ci						sector_t sector, int *srcu_idx)
120362306a36Sopenharmony_ci	__acquires(md->io_barrier)
120462306a36Sopenharmony_ci{
120562306a36Sopenharmony_ci	struct dm_table *map;
120662306a36Sopenharmony_ci	struct dm_target *ti;
120762306a36Sopenharmony_ci
120862306a36Sopenharmony_ci	map = dm_get_live_table(md, srcu_idx);
120962306a36Sopenharmony_ci	if (!map)
121062306a36Sopenharmony_ci		return NULL;
121162306a36Sopenharmony_ci
121262306a36Sopenharmony_ci	ti = dm_table_find_target(map, sector);
121362306a36Sopenharmony_ci	if (!ti)
121462306a36Sopenharmony_ci		return NULL;
121562306a36Sopenharmony_ci
121662306a36Sopenharmony_ci	return ti;
121762306a36Sopenharmony_ci}
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_cistatic long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
122062306a36Sopenharmony_ci		long nr_pages, enum dax_access_mode mode, void **kaddr,
122162306a36Sopenharmony_ci		pfn_t *pfn)
122262306a36Sopenharmony_ci{
122362306a36Sopenharmony_ci	struct mapped_device *md = dax_get_private(dax_dev);
122462306a36Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
122562306a36Sopenharmony_ci	struct dm_target *ti;
122662306a36Sopenharmony_ci	long len, ret = -EIO;
122762306a36Sopenharmony_ci	int srcu_idx;
122862306a36Sopenharmony_ci
122962306a36Sopenharmony_ci	ti = dm_dax_get_live_target(md, sector, &srcu_idx);
123062306a36Sopenharmony_ci
123162306a36Sopenharmony_ci	if (!ti)
123262306a36Sopenharmony_ci		goto out;
123362306a36Sopenharmony_ci	if (!ti->type->direct_access)
123462306a36Sopenharmony_ci		goto out;
123562306a36Sopenharmony_ci	len = max_io_len(ti, sector) / PAGE_SECTORS;
123662306a36Sopenharmony_ci	if (len < 1)
123762306a36Sopenharmony_ci		goto out;
123862306a36Sopenharmony_ci	nr_pages = min(len, nr_pages);
123962306a36Sopenharmony_ci	ret = ti->type->direct_access(ti, pgoff, nr_pages, mode, kaddr, pfn);
124062306a36Sopenharmony_ci
124162306a36Sopenharmony_ci out:
124262306a36Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
124362306a36Sopenharmony_ci
124462306a36Sopenharmony_ci	return ret;
124562306a36Sopenharmony_ci}
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_cistatic int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
124862306a36Sopenharmony_ci				  size_t nr_pages)
124962306a36Sopenharmony_ci{
125062306a36Sopenharmony_ci	struct mapped_device *md = dax_get_private(dax_dev);
125162306a36Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
125262306a36Sopenharmony_ci	struct dm_target *ti;
125362306a36Sopenharmony_ci	int ret = -EIO;
125462306a36Sopenharmony_ci	int srcu_idx;
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	ti = dm_dax_get_live_target(md, sector, &srcu_idx);
125762306a36Sopenharmony_ci
125862306a36Sopenharmony_ci	if (!ti)
125962306a36Sopenharmony_ci		goto out;
126062306a36Sopenharmony_ci	if (WARN_ON(!ti->type->dax_zero_page_range)) {
126162306a36Sopenharmony_ci		/*
126262306a36Sopenharmony_ci		 * ->zero_page_range() is mandatory dax operation. If we are
126362306a36Sopenharmony_ci		 *  here, something is wrong.
126462306a36Sopenharmony_ci		 */
126562306a36Sopenharmony_ci		goto out;
126662306a36Sopenharmony_ci	}
126762306a36Sopenharmony_ci	ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages);
126862306a36Sopenharmony_ci out:
126962306a36Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
127062306a36Sopenharmony_ci
127162306a36Sopenharmony_ci	return ret;
127262306a36Sopenharmony_ci}
127362306a36Sopenharmony_ci
127462306a36Sopenharmony_cistatic size_t dm_dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
127562306a36Sopenharmony_ci		void *addr, size_t bytes, struct iov_iter *i)
127662306a36Sopenharmony_ci{
127762306a36Sopenharmony_ci	struct mapped_device *md = dax_get_private(dax_dev);
127862306a36Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
127962306a36Sopenharmony_ci	struct dm_target *ti;
128062306a36Sopenharmony_ci	int srcu_idx;
128162306a36Sopenharmony_ci	long ret = 0;
128262306a36Sopenharmony_ci
128362306a36Sopenharmony_ci	ti = dm_dax_get_live_target(md, sector, &srcu_idx);
128462306a36Sopenharmony_ci	if (!ti || !ti->type->dax_recovery_write)
128562306a36Sopenharmony_ci		goto out;
128662306a36Sopenharmony_ci
128762306a36Sopenharmony_ci	ret = ti->type->dax_recovery_write(ti, pgoff, addr, bytes, i);
128862306a36Sopenharmony_ciout:
128962306a36Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
129062306a36Sopenharmony_ci	return ret;
129162306a36Sopenharmony_ci}
129262306a36Sopenharmony_ci
129362306a36Sopenharmony_ci/*
129462306a36Sopenharmony_ci * A target may call dm_accept_partial_bio only from the map routine.  It is
129562306a36Sopenharmony_ci * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
129662306a36Sopenharmony_ci * operations, REQ_OP_ZONE_APPEND (zone append writes) and any bio serviced by
129762306a36Sopenharmony_ci * __send_duplicate_bios().
129862306a36Sopenharmony_ci *
129962306a36Sopenharmony_ci * dm_accept_partial_bio informs the dm that the target only wants to process
130062306a36Sopenharmony_ci * additional n_sectors sectors of the bio and the rest of the data should be
130162306a36Sopenharmony_ci * sent in a next bio.
130262306a36Sopenharmony_ci *
130362306a36Sopenharmony_ci * A diagram that explains the arithmetics:
130462306a36Sopenharmony_ci * +--------------------+---------------+-------+
130562306a36Sopenharmony_ci * |         1          |       2       |   3   |
130662306a36Sopenharmony_ci * +--------------------+---------------+-------+
130762306a36Sopenharmony_ci *
130862306a36Sopenharmony_ci * <-------------- *tio->len_ptr --------------->
130962306a36Sopenharmony_ci *                      <----- bio_sectors ----->
131062306a36Sopenharmony_ci *                      <-- n_sectors -->
131162306a36Sopenharmony_ci *
131262306a36Sopenharmony_ci * Region 1 was already iterated over with bio_advance or similar function.
131362306a36Sopenharmony_ci *	(it may be empty if the target doesn't use bio_advance)
131462306a36Sopenharmony_ci * Region 2 is the remaining bio size that the target wants to process.
131562306a36Sopenharmony_ci *	(it may be empty if region 1 is non-empty, although there is no reason
131662306a36Sopenharmony_ci *	 to make it empty)
131762306a36Sopenharmony_ci * The target requires that region 3 is to be sent in the next bio.
131862306a36Sopenharmony_ci *
131962306a36Sopenharmony_ci * If the target wants to receive multiple copies of the bio (via num_*bios, etc),
132062306a36Sopenharmony_ci * the partially processed part (the sum of regions 1+2) must be the same for all
132162306a36Sopenharmony_ci * copies of the bio.
132262306a36Sopenharmony_ci */
132362306a36Sopenharmony_civoid dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors)
132462306a36Sopenharmony_ci{
132562306a36Sopenharmony_ci	struct dm_target_io *tio = clone_to_tio(bio);
132662306a36Sopenharmony_ci	struct dm_io *io = tio->io;
132762306a36Sopenharmony_ci	unsigned int bio_sectors = bio_sectors(bio);
132862306a36Sopenharmony_ci
132962306a36Sopenharmony_ci	BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
133062306a36Sopenharmony_ci	BUG_ON(op_is_zone_mgmt(bio_op(bio)));
133162306a36Sopenharmony_ci	BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND);
133262306a36Sopenharmony_ci	BUG_ON(bio_sectors > *tio->len_ptr);
133362306a36Sopenharmony_ci	BUG_ON(n_sectors > bio_sectors);
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci	*tio->len_ptr -= bio_sectors - n_sectors;
133662306a36Sopenharmony_ci	bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
133762306a36Sopenharmony_ci
133862306a36Sopenharmony_ci	/*
133962306a36Sopenharmony_ci	 * __split_and_process_bio() may have already saved mapped part
134062306a36Sopenharmony_ci	 * for accounting but it is being reduced so update accordingly.
134162306a36Sopenharmony_ci	 */
134262306a36Sopenharmony_ci	dm_io_set_flag(io, DM_IO_WAS_SPLIT);
134362306a36Sopenharmony_ci	io->sectors = n_sectors;
134462306a36Sopenharmony_ci	io->sector_offset = bio_sectors(io->orig_bio);
134562306a36Sopenharmony_ci}
134662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_accept_partial_bio);
134762306a36Sopenharmony_ci
134862306a36Sopenharmony_ci/*
134962306a36Sopenharmony_ci * @clone: clone bio that DM core passed to target's .map function
135062306a36Sopenharmony_ci * @tgt_clone: clone of @clone bio that target needs submitted
135162306a36Sopenharmony_ci *
135262306a36Sopenharmony_ci * Targets should use this interface to submit bios they take
135362306a36Sopenharmony_ci * ownership of when returning DM_MAPIO_SUBMITTED.
135462306a36Sopenharmony_ci *
135562306a36Sopenharmony_ci * Target should also enable ti->accounts_remapped_io
135662306a36Sopenharmony_ci */
135762306a36Sopenharmony_civoid dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone)
135862306a36Sopenharmony_ci{
135962306a36Sopenharmony_ci	struct dm_target_io *tio = clone_to_tio(clone);
136062306a36Sopenharmony_ci	struct dm_io *io = tio->io;
136162306a36Sopenharmony_ci
136262306a36Sopenharmony_ci	/* establish bio that will get submitted */
136362306a36Sopenharmony_ci	if (!tgt_clone)
136462306a36Sopenharmony_ci		tgt_clone = clone;
136562306a36Sopenharmony_ci
136662306a36Sopenharmony_ci	/*
136762306a36Sopenharmony_ci	 * Account io->origin_bio to DM dev on behalf of target
136862306a36Sopenharmony_ci	 * that took ownership of IO with DM_MAPIO_SUBMITTED.
136962306a36Sopenharmony_ci	 */
137062306a36Sopenharmony_ci	dm_start_io_acct(io, clone);
137162306a36Sopenharmony_ci
137262306a36Sopenharmony_ci	trace_block_bio_remap(tgt_clone, disk_devt(io->md->disk),
137362306a36Sopenharmony_ci			      tio->old_sector);
137462306a36Sopenharmony_ci	submit_bio_noacct(tgt_clone);
137562306a36Sopenharmony_ci}
137662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_submit_bio_remap);
137762306a36Sopenharmony_ci
137862306a36Sopenharmony_cistatic noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
137962306a36Sopenharmony_ci{
138062306a36Sopenharmony_ci	mutex_lock(&md->swap_bios_lock);
138162306a36Sopenharmony_ci	while (latch < md->swap_bios) {
138262306a36Sopenharmony_ci		cond_resched();
138362306a36Sopenharmony_ci		down(&md->swap_bios_semaphore);
138462306a36Sopenharmony_ci		md->swap_bios--;
138562306a36Sopenharmony_ci	}
138662306a36Sopenharmony_ci	while (latch > md->swap_bios) {
138762306a36Sopenharmony_ci		cond_resched();
138862306a36Sopenharmony_ci		up(&md->swap_bios_semaphore);
138962306a36Sopenharmony_ci		md->swap_bios++;
139062306a36Sopenharmony_ci	}
139162306a36Sopenharmony_ci	mutex_unlock(&md->swap_bios_lock);
139262306a36Sopenharmony_ci}
139362306a36Sopenharmony_ci
139462306a36Sopenharmony_cistatic void __map_bio(struct bio *clone)
139562306a36Sopenharmony_ci{
139662306a36Sopenharmony_ci	struct dm_target_io *tio = clone_to_tio(clone);
139762306a36Sopenharmony_ci	struct dm_target *ti = tio->ti;
139862306a36Sopenharmony_ci	struct dm_io *io = tio->io;
139962306a36Sopenharmony_ci	struct mapped_device *md = io->md;
140062306a36Sopenharmony_ci	int r;
140162306a36Sopenharmony_ci
140262306a36Sopenharmony_ci	clone->bi_end_io = clone_endio;
140362306a36Sopenharmony_ci
140462306a36Sopenharmony_ci	/*
140562306a36Sopenharmony_ci	 * Map the clone.
140662306a36Sopenharmony_ci	 */
140762306a36Sopenharmony_ci	tio->old_sector = clone->bi_iter.bi_sector;
140862306a36Sopenharmony_ci
140962306a36Sopenharmony_ci	if (static_branch_unlikely(&swap_bios_enabled) &&
141062306a36Sopenharmony_ci	    unlikely(swap_bios_limit(ti, clone))) {
141162306a36Sopenharmony_ci		int latch = get_swap_bios();
141262306a36Sopenharmony_ci
141362306a36Sopenharmony_ci		if (unlikely(latch != md->swap_bios))
141462306a36Sopenharmony_ci			__set_swap_bios_limit(md, latch);
141562306a36Sopenharmony_ci		down(&md->swap_bios_semaphore);
141662306a36Sopenharmony_ci	}
141762306a36Sopenharmony_ci
141862306a36Sopenharmony_ci	if (static_branch_unlikely(&zoned_enabled)) {
141962306a36Sopenharmony_ci		/*
142062306a36Sopenharmony_ci		 * Check if the IO needs a special mapping due to zone append
142162306a36Sopenharmony_ci		 * emulation on zoned target. In this case, dm_zone_map_bio()
142262306a36Sopenharmony_ci		 * calls the target map operation.
142362306a36Sopenharmony_ci		 */
142462306a36Sopenharmony_ci		if (unlikely(dm_emulate_zone_append(md)))
142562306a36Sopenharmony_ci			r = dm_zone_map_bio(tio);
142662306a36Sopenharmony_ci		else
142762306a36Sopenharmony_ci			r = ti->type->map(ti, clone);
142862306a36Sopenharmony_ci	} else
142962306a36Sopenharmony_ci		r = ti->type->map(ti, clone);
143062306a36Sopenharmony_ci
143162306a36Sopenharmony_ci	switch (r) {
143262306a36Sopenharmony_ci	case DM_MAPIO_SUBMITTED:
143362306a36Sopenharmony_ci		/* target has assumed ownership of this io */
143462306a36Sopenharmony_ci		if (!ti->accounts_remapped_io)
143562306a36Sopenharmony_ci			dm_start_io_acct(io, clone);
143662306a36Sopenharmony_ci		break;
143762306a36Sopenharmony_ci	case DM_MAPIO_REMAPPED:
143862306a36Sopenharmony_ci		dm_submit_bio_remap(clone, NULL);
143962306a36Sopenharmony_ci		break;
144062306a36Sopenharmony_ci	case DM_MAPIO_KILL:
144162306a36Sopenharmony_ci	case DM_MAPIO_REQUEUE:
144262306a36Sopenharmony_ci		if (static_branch_unlikely(&swap_bios_enabled) &&
144362306a36Sopenharmony_ci		    unlikely(swap_bios_limit(ti, clone)))
144462306a36Sopenharmony_ci			up(&md->swap_bios_semaphore);
144562306a36Sopenharmony_ci		free_tio(clone);
144662306a36Sopenharmony_ci		if (r == DM_MAPIO_KILL)
144762306a36Sopenharmony_ci			dm_io_dec_pending(io, BLK_STS_IOERR);
144862306a36Sopenharmony_ci		else
144962306a36Sopenharmony_ci			dm_io_dec_pending(io, BLK_STS_DM_REQUEUE);
145062306a36Sopenharmony_ci		break;
145162306a36Sopenharmony_ci	default:
145262306a36Sopenharmony_ci		DMCRIT("unimplemented target map return value: %d", r);
145362306a36Sopenharmony_ci		BUG();
145462306a36Sopenharmony_ci	}
145562306a36Sopenharmony_ci}
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_cistatic void setup_split_accounting(struct clone_info *ci, unsigned int len)
145862306a36Sopenharmony_ci{
145962306a36Sopenharmony_ci	struct dm_io *io = ci->io;
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_ci	if (ci->sector_count > len) {
146262306a36Sopenharmony_ci		/*
146362306a36Sopenharmony_ci		 * Split needed, save the mapped part for accounting.
146462306a36Sopenharmony_ci		 * NOTE: dm_accept_partial_bio() will update accordingly.
146562306a36Sopenharmony_ci		 */
146662306a36Sopenharmony_ci		dm_io_set_flag(io, DM_IO_WAS_SPLIT);
146762306a36Sopenharmony_ci		io->sectors = len;
146862306a36Sopenharmony_ci		io->sector_offset = bio_sectors(ci->bio);
146962306a36Sopenharmony_ci	}
147062306a36Sopenharmony_ci}
147162306a36Sopenharmony_ci
147262306a36Sopenharmony_cistatic void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
147362306a36Sopenharmony_ci				struct dm_target *ti, unsigned int num_bios,
147462306a36Sopenharmony_ci				unsigned *len)
147562306a36Sopenharmony_ci{
147662306a36Sopenharmony_ci	struct bio *bio;
147762306a36Sopenharmony_ci	int try;
147862306a36Sopenharmony_ci
147962306a36Sopenharmony_ci	for (try = 0; try < 2; try++) {
148062306a36Sopenharmony_ci		int bio_nr;
148162306a36Sopenharmony_ci
148262306a36Sopenharmony_ci		if (try)
148362306a36Sopenharmony_ci			mutex_lock(&ci->io->md->table_devices_lock);
148462306a36Sopenharmony_ci		for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
148562306a36Sopenharmony_ci			bio = alloc_tio(ci, ti, bio_nr, len,
148662306a36Sopenharmony_ci					try ? GFP_NOIO : GFP_NOWAIT);
148762306a36Sopenharmony_ci			if (!bio)
148862306a36Sopenharmony_ci				break;
148962306a36Sopenharmony_ci
149062306a36Sopenharmony_ci			bio_list_add(blist, bio);
149162306a36Sopenharmony_ci		}
149262306a36Sopenharmony_ci		if (try)
149362306a36Sopenharmony_ci			mutex_unlock(&ci->io->md->table_devices_lock);
149462306a36Sopenharmony_ci		if (bio_nr == num_bios)
149562306a36Sopenharmony_ci			return;
149662306a36Sopenharmony_ci
149762306a36Sopenharmony_ci		while ((bio = bio_list_pop(blist)))
149862306a36Sopenharmony_ci			free_tio(bio);
149962306a36Sopenharmony_ci	}
150062306a36Sopenharmony_ci}
150162306a36Sopenharmony_ci
150262306a36Sopenharmony_cistatic int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
150362306a36Sopenharmony_ci				 unsigned int num_bios, unsigned int *len)
150462306a36Sopenharmony_ci{
150562306a36Sopenharmony_ci	struct bio_list blist = BIO_EMPTY_LIST;
150662306a36Sopenharmony_ci	struct bio *clone;
150762306a36Sopenharmony_ci	unsigned int ret = 0;
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci	switch (num_bios) {
151062306a36Sopenharmony_ci	case 0:
151162306a36Sopenharmony_ci		break;
151262306a36Sopenharmony_ci	case 1:
151362306a36Sopenharmony_ci		if (len)
151462306a36Sopenharmony_ci			setup_split_accounting(ci, *len);
151562306a36Sopenharmony_ci		clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
151662306a36Sopenharmony_ci		__map_bio(clone);
151762306a36Sopenharmony_ci		ret = 1;
151862306a36Sopenharmony_ci		break;
151962306a36Sopenharmony_ci	default:
152062306a36Sopenharmony_ci		if (len)
152162306a36Sopenharmony_ci			setup_split_accounting(ci, *len);
152262306a36Sopenharmony_ci		/* dm_accept_partial_bio() is not supported with shared tio->len_ptr */
152362306a36Sopenharmony_ci		alloc_multiple_bios(&blist, ci, ti, num_bios, len);
152462306a36Sopenharmony_ci		while ((clone = bio_list_pop(&blist))) {
152562306a36Sopenharmony_ci			dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
152662306a36Sopenharmony_ci			__map_bio(clone);
152762306a36Sopenharmony_ci			ret += 1;
152862306a36Sopenharmony_ci		}
152962306a36Sopenharmony_ci		break;
153062306a36Sopenharmony_ci	}
153162306a36Sopenharmony_ci
153262306a36Sopenharmony_ci	return ret;
153362306a36Sopenharmony_ci}
153462306a36Sopenharmony_ci
153562306a36Sopenharmony_cistatic void __send_empty_flush(struct clone_info *ci)
153662306a36Sopenharmony_ci{
153762306a36Sopenharmony_ci	struct dm_table *t = ci->map;
153862306a36Sopenharmony_ci	struct bio flush_bio;
153962306a36Sopenharmony_ci
154062306a36Sopenharmony_ci	/*
154162306a36Sopenharmony_ci	 * Use an on-stack bio for this, it's safe since we don't
154262306a36Sopenharmony_ci	 * need to reference it after submit. It's just used as
154362306a36Sopenharmony_ci	 * the basis for the clone(s).
154462306a36Sopenharmony_ci	 */
154562306a36Sopenharmony_ci	bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0,
154662306a36Sopenharmony_ci		 REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC);
154762306a36Sopenharmony_ci
154862306a36Sopenharmony_ci	ci->bio = &flush_bio;
154962306a36Sopenharmony_ci	ci->sector_count = 0;
155062306a36Sopenharmony_ci	ci->io->tio.clone.bi_iter.bi_size = 0;
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_ci	for (unsigned int i = 0; i < t->num_targets; i++) {
155362306a36Sopenharmony_ci		unsigned int bios;
155462306a36Sopenharmony_ci		struct dm_target *ti = dm_table_get_target(t, i);
155562306a36Sopenharmony_ci
155662306a36Sopenharmony_ci		atomic_add(ti->num_flush_bios, &ci->io->io_count);
155762306a36Sopenharmony_ci		bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
155862306a36Sopenharmony_ci		atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
155962306a36Sopenharmony_ci	}
156062306a36Sopenharmony_ci
156162306a36Sopenharmony_ci	/*
156262306a36Sopenharmony_ci	 * alloc_io() takes one extra reference for submission, so the
156362306a36Sopenharmony_ci	 * reference won't reach 0 without the following subtraction
156462306a36Sopenharmony_ci	 */
156562306a36Sopenharmony_ci	atomic_sub(1, &ci->io->io_count);
156662306a36Sopenharmony_ci
156762306a36Sopenharmony_ci	bio_uninit(ci->bio);
156862306a36Sopenharmony_ci}
156962306a36Sopenharmony_ci
157062306a36Sopenharmony_cistatic void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
157162306a36Sopenharmony_ci					unsigned int num_bios,
157262306a36Sopenharmony_ci					unsigned int max_granularity,
157362306a36Sopenharmony_ci					unsigned int max_sectors)
157462306a36Sopenharmony_ci{
157562306a36Sopenharmony_ci	unsigned int len, bios;
157662306a36Sopenharmony_ci
157762306a36Sopenharmony_ci	len = min_t(sector_t, ci->sector_count,
157862306a36Sopenharmony_ci		    __max_io_len(ti, ci->sector, max_granularity, max_sectors));
157962306a36Sopenharmony_ci
158062306a36Sopenharmony_ci	atomic_add(num_bios, &ci->io->io_count);
158162306a36Sopenharmony_ci	bios = __send_duplicate_bios(ci, ti, num_bios, &len);
158262306a36Sopenharmony_ci	/*
158362306a36Sopenharmony_ci	 * alloc_io() takes one extra reference for submission, so the
158462306a36Sopenharmony_ci	 * reference won't reach 0 without the following (+1) subtraction
158562306a36Sopenharmony_ci	 */
158662306a36Sopenharmony_ci	atomic_sub(num_bios - bios + 1, &ci->io->io_count);
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ci	ci->sector += len;
158962306a36Sopenharmony_ci	ci->sector_count -= len;
159062306a36Sopenharmony_ci}
159162306a36Sopenharmony_ci
159262306a36Sopenharmony_cistatic bool is_abnormal_io(struct bio *bio)
159362306a36Sopenharmony_ci{
159462306a36Sopenharmony_ci	enum req_op op = bio_op(bio);
159562306a36Sopenharmony_ci
159662306a36Sopenharmony_ci	if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) {
159762306a36Sopenharmony_ci		switch (op) {
159862306a36Sopenharmony_ci		case REQ_OP_DISCARD:
159962306a36Sopenharmony_ci		case REQ_OP_SECURE_ERASE:
160062306a36Sopenharmony_ci		case REQ_OP_WRITE_ZEROES:
160162306a36Sopenharmony_ci			return true;
160262306a36Sopenharmony_ci		default:
160362306a36Sopenharmony_ci			break;
160462306a36Sopenharmony_ci		}
160562306a36Sopenharmony_ci	}
160662306a36Sopenharmony_ci
160762306a36Sopenharmony_ci	return false;
160862306a36Sopenharmony_ci}
160962306a36Sopenharmony_ci
161062306a36Sopenharmony_cistatic blk_status_t __process_abnormal_io(struct clone_info *ci,
161162306a36Sopenharmony_ci					  struct dm_target *ti)
161262306a36Sopenharmony_ci{
161362306a36Sopenharmony_ci	unsigned int num_bios = 0;
161462306a36Sopenharmony_ci	unsigned int max_granularity = 0;
161562306a36Sopenharmony_ci	unsigned int max_sectors = 0;
161662306a36Sopenharmony_ci	struct queue_limits *limits = dm_get_queue_limits(ti->table->md);
161762306a36Sopenharmony_ci
161862306a36Sopenharmony_ci	switch (bio_op(ci->bio)) {
161962306a36Sopenharmony_ci	case REQ_OP_DISCARD:
162062306a36Sopenharmony_ci		num_bios = ti->num_discard_bios;
162162306a36Sopenharmony_ci		max_sectors = limits->max_discard_sectors;
162262306a36Sopenharmony_ci		if (ti->max_discard_granularity)
162362306a36Sopenharmony_ci			max_granularity = max_sectors;
162462306a36Sopenharmony_ci		break;
162562306a36Sopenharmony_ci	case REQ_OP_SECURE_ERASE:
162662306a36Sopenharmony_ci		num_bios = ti->num_secure_erase_bios;
162762306a36Sopenharmony_ci		max_sectors = limits->max_secure_erase_sectors;
162862306a36Sopenharmony_ci		if (ti->max_secure_erase_granularity)
162962306a36Sopenharmony_ci			max_granularity = max_sectors;
163062306a36Sopenharmony_ci		break;
163162306a36Sopenharmony_ci	case REQ_OP_WRITE_ZEROES:
163262306a36Sopenharmony_ci		num_bios = ti->num_write_zeroes_bios;
163362306a36Sopenharmony_ci		max_sectors = limits->max_write_zeroes_sectors;
163462306a36Sopenharmony_ci		if (ti->max_write_zeroes_granularity)
163562306a36Sopenharmony_ci			max_granularity = max_sectors;
163662306a36Sopenharmony_ci		break;
163762306a36Sopenharmony_ci	default:
163862306a36Sopenharmony_ci		break;
163962306a36Sopenharmony_ci	}
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_ci	/*
164262306a36Sopenharmony_ci	 * Even though the device advertised support for this type of
164362306a36Sopenharmony_ci	 * request, that does not mean every target supports it, and
164462306a36Sopenharmony_ci	 * reconfiguration might also have changed that since the
164562306a36Sopenharmony_ci	 * check was performed.
164662306a36Sopenharmony_ci	 */
164762306a36Sopenharmony_ci	if (unlikely(!num_bios))
164862306a36Sopenharmony_ci		return BLK_STS_NOTSUPP;
164962306a36Sopenharmony_ci
165062306a36Sopenharmony_ci	__send_changing_extent_only(ci, ti, num_bios,
165162306a36Sopenharmony_ci				    max_granularity, max_sectors);
165262306a36Sopenharmony_ci	return BLK_STS_OK;
165362306a36Sopenharmony_ci}
165462306a36Sopenharmony_ci
165562306a36Sopenharmony_ci/*
165662306a36Sopenharmony_ci * Reuse ->bi_private as dm_io list head for storing all dm_io instances
165762306a36Sopenharmony_ci * associated with this bio, and this bio's bi_private needs to be
165862306a36Sopenharmony_ci * stored in dm_io->data before the reuse.
165962306a36Sopenharmony_ci *
166062306a36Sopenharmony_ci * bio->bi_private is owned by fs or upper layer, so block layer won't
166162306a36Sopenharmony_ci * touch it after splitting. Meantime it won't be changed by anyone after
166262306a36Sopenharmony_ci * bio is submitted. So this reuse is safe.
166362306a36Sopenharmony_ci */
166462306a36Sopenharmony_cistatic inline struct dm_io **dm_poll_list_head(struct bio *bio)
166562306a36Sopenharmony_ci{
166662306a36Sopenharmony_ci	return (struct dm_io **)&bio->bi_private;
166762306a36Sopenharmony_ci}
166862306a36Sopenharmony_ci
166962306a36Sopenharmony_cistatic void dm_queue_poll_io(struct bio *bio, struct dm_io *io)
167062306a36Sopenharmony_ci{
167162306a36Sopenharmony_ci	struct dm_io **head = dm_poll_list_head(bio);
167262306a36Sopenharmony_ci
167362306a36Sopenharmony_ci	if (!(bio->bi_opf & REQ_DM_POLL_LIST)) {
167462306a36Sopenharmony_ci		bio->bi_opf |= REQ_DM_POLL_LIST;
167562306a36Sopenharmony_ci		/*
167662306a36Sopenharmony_ci		 * Save .bi_private into dm_io, so that we can reuse
167762306a36Sopenharmony_ci		 * .bi_private as dm_io list head for storing dm_io list
167862306a36Sopenharmony_ci		 */
167962306a36Sopenharmony_ci		io->data = bio->bi_private;
168062306a36Sopenharmony_ci
168162306a36Sopenharmony_ci		/* tell block layer to poll for completion */
168262306a36Sopenharmony_ci		bio->bi_cookie = ~BLK_QC_T_NONE;
168362306a36Sopenharmony_ci
168462306a36Sopenharmony_ci		io->next = NULL;
168562306a36Sopenharmony_ci	} else {
168662306a36Sopenharmony_ci		/*
168762306a36Sopenharmony_ci		 * bio recursed due to split, reuse original poll list,
168862306a36Sopenharmony_ci		 * and save bio->bi_private too.
168962306a36Sopenharmony_ci		 */
169062306a36Sopenharmony_ci		io->data = (*head)->data;
169162306a36Sopenharmony_ci		io->next = *head;
169262306a36Sopenharmony_ci	}
169362306a36Sopenharmony_ci
169462306a36Sopenharmony_ci	*head = io;
169562306a36Sopenharmony_ci}
169662306a36Sopenharmony_ci
169762306a36Sopenharmony_ci/*
169862306a36Sopenharmony_ci * Select the correct strategy for processing a non-flush bio.
169962306a36Sopenharmony_ci */
170062306a36Sopenharmony_cistatic blk_status_t __split_and_process_bio(struct clone_info *ci)
170162306a36Sopenharmony_ci{
170262306a36Sopenharmony_ci	struct bio *clone;
170362306a36Sopenharmony_ci	struct dm_target *ti;
170462306a36Sopenharmony_ci	unsigned int len;
170562306a36Sopenharmony_ci
170662306a36Sopenharmony_ci	ti = dm_table_find_target(ci->map, ci->sector);
170762306a36Sopenharmony_ci	if (unlikely(!ti))
170862306a36Sopenharmony_ci		return BLK_STS_IOERR;
170962306a36Sopenharmony_ci
171062306a36Sopenharmony_ci	if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) &&
171162306a36Sopenharmony_ci	    unlikely(!dm_target_supports_nowait(ti->type)))
171262306a36Sopenharmony_ci		return BLK_STS_NOTSUPP;
171362306a36Sopenharmony_ci
171462306a36Sopenharmony_ci	if (unlikely(ci->is_abnormal_io))
171562306a36Sopenharmony_ci		return __process_abnormal_io(ci, ti);
171662306a36Sopenharmony_ci
171762306a36Sopenharmony_ci	/*
171862306a36Sopenharmony_ci	 * Only support bio polling for normal IO, and the target io is
171962306a36Sopenharmony_ci	 * exactly inside the dm_io instance (verified in dm_poll_dm_io)
172062306a36Sopenharmony_ci	 */
172162306a36Sopenharmony_ci	ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED);
172262306a36Sopenharmony_ci
172362306a36Sopenharmony_ci	len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
172462306a36Sopenharmony_ci	setup_split_accounting(ci, len);
172562306a36Sopenharmony_ci	clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO);
172662306a36Sopenharmony_ci	__map_bio(clone);
172762306a36Sopenharmony_ci
172862306a36Sopenharmony_ci	ci->sector += len;
172962306a36Sopenharmony_ci	ci->sector_count -= len;
173062306a36Sopenharmony_ci
173162306a36Sopenharmony_ci	return BLK_STS_OK;
173262306a36Sopenharmony_ci}
173362306a36Sopenharmony_ci
173462306a36Sopenharmony_cistatic void init_clone_info(struct clone_info *ci, struct mapped_device *md,
173562306a36Sopenharmony_ci			    struct dm_table *map, struct bio *bio, bool is_abnormal)
173662306a36Sopenharmony_ci{
173762306a36Sopenharmony_ci	ci->map = map;
173862306a36Sopenharmony_ci	ci->io = alloc_io(md, bio);
173962306a36Sopenharmony_ci	ci->bio = bio;
174062306a36Sopenharmony_ci	ci->is_abnormal_io = is_abnormal;
174162306a36Sopenharmony_ci	ci->submit_as_polled = false;
174262306a36Sopenharmony_ci	ci->sector = bio->bi_iter.bi_sector;
174362306a36Sopenharmony_ci	ci->sector_count = bio_sectors(bio);
174462306a36Sopenharmony_ci
174562306a36Sopenharmony_ci	/* Shouldn't happen but sector_count was being set to 0 so... */
174662306a36Sopenharmony_ci	if (static_branch_unlikely(&zoned_enabled) &&
174762306a36Sopenharmony_ci	    WARN_ON_ONCE(op_is_zone_mgmt(bio_op(bio)) && ci->sector_count))
174862306a36Sopenharmony_ci		ci->sector_count = 0;
174962306a36Sopenharmony_ci}
175062306a36Sopenharmony_ci
175162306a36Sopenharmony_ci/*
175262306a36Sopenharmony_ci * Entry point to split a bio into clones and submit them to the targets.
175362306a36Sopenharmony_ci */
175462306a36Sopenharmony_cistatic void dm_split_and_process_bio(struct mapped_device *md,
175562306a36Sopenharmony_ci				     struct dm_table *map, struct bio *bio)
175662306a36Sopenharmony_ci{
175762306a36Sopenharmony_ci	struct clone_info ci;
175862306a36Sopenharmony_ci	struct dm_io *io;
175962306a36Sopenharmony_ci	blk_status_t error = BLK_STS_OK;
176062306a36Sopenharmony_ci	bool is_abnormal;
176162306a36Sopenharmony_ci
176262306a36Sopenharmony_ci	is_abnormal = is_abnormal_io(bio);
176362306a36Sopenharmony_ci	if (unlikely(is_abnormal)) {
176462306a36Sopenharmony_ci		/*
176562306a36Sopenharmony_ci		 * Use bio_split_to_limits() for abnormal IO (e.g. discard, etc)
176662306a36Sopenharmony_ci		 * otherwise associated queue_limits won't be imposed.
176762306a36Sopenharmony_ci		 */
176862306a36Sopenharmony_ci		bio = bio_split_to_limits(bio);
176962306a36Sopenharmony_ci		if (!bio)
177062306a36Sopenharmony_ci			return;
177162306a36Sopenharmony_ci	}
177262306a36Sopenharmony_ci
177362306a36Sopenharmony_ci	init_clone_info(&ci, md, map, bio, is_abnormal);
177462306a36Sopenharmony_ci	io = ci.io;
177562306a36Sopenharmony_ci
177662306a36Sopenharmony_ci	if (bio->bi_opf & REQ_PREFLUSH) {
177762306a36Sopenharmony_ci		__send_empty_flush(&ci);
177862306a36Sopenharmony_ci		/* dm_io_complete submits any data associated with flush */
177962306a36Sopenharmony_ci		goto out;
178062306a36Sopenharmony_ci	}
178162306a36Sopenharmony_ci
178262306a36Sopenharmony_ci	error = __split_and_process_bio(&ci);
178362306a36Sopenharmony_ci	if (error || !ci.sector_count)
178462306a36Sopenharmony_ci		goto out;
178562306a36Sopenharmony_ci	/*
178662306a36Sopenharmony_ci	 * Remainder must be passed to submit_bio_noacct() so it gets handled
178762306a36Sopenharmony_ci	 * *after* bios already submitted have been completely processed.
178862306a36Sopenharmony_ci	 */
178962306a36Sopenharmony_ci	bio_trim(bio, io->sectors, ci.sector_count);
179062306a36Sopenharmony_ci	trace_block_split(bio, bio->bi_iter.bi_sector);
179162306a36Sopenharmony_ci	bio_inc_remaining(bio);
179262306a36Sopenharmony_ci	submit_bio_noacct(bio);
179362306a36Sopenharmony_ciout:
179462306a36Sopenharmony_ci	/*
179562306a36Sopenharmony_ci	 * Drop the extra reference count for non-POLLED bio, and hold one
179662306a36Sopenharmony_ci	 * reference for POLLED bio, which will be released in dm_poll_bio
179762306a36Sopenharmony_ci	 *
179862306a36Sopenharmony_ci	 * Add every dm_io instance into the dm_io list head which is stored
179962306a36Sopenharmony_ci	 * in bio->bi_private, so that dm_poll_bio can poll them all.
180062306a36Sopenharmony_ci	 */
180162306a36Sopenharmony_ci	if (error || !ci.submit_as_polled) {
180262306a36Sopenharmony_ci		/*
180362306a36Sopenharmony_ci		 * In case of submission failure, the extra reference for
180462306a36Sopenharmony_ci		 * submitting io isn't consumed yet
180562306a36Sopenharmony_ci		 */
180662306a36Sopenharmony_ci		if (error)
180762306a36Sopenharmony_ci			atomic_dec(&io->io_count);
180862306a36Sopenharmony_ci		dm_io_dec_pending(io, error);
180962306a36Sopenharmony_ci	} else
181062306a36Sopenharmony_ci		dm_queue_poll_io(bio, io);
181162306a36Sopenharmony_ci}
181262306a36Sopenharmony_ci
181362306a36Sopenharmony_cistatic void dm_submit_bio(struct bio *bio)
181462306a36Sopenharmony_ci{
181562306a36Sopenharmony_ci	struct mapped_device *md = bio->bi_bdev->bd_disk->private_data;
181662306a36Sopenharmony_ci	int srcu_idx;
181762306a36Sopenharmony_ci	struct dm_table *map;
181862306a36Sopenharmony_ci
181962306a36Sopenharmony_ci	map = dm_get_live_table(md, &srcu_idx);
182062306a36Sopenharmony_ci
182162306a36Sopenharmony_ci	/* If suspended, or map not yet available, queue this IO for later */
182262306a36Sopenharmony_ci	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
182362306a36Sopenharmony_ci	    unlikely(!map)) {
182462306a36Sopenharmony_ci		if (bio->bi_opf & REQ_NOWAIT)
182562306a36Sopenharmony_ci			bio_wouldblock_error(bio);
182662306a36Sopenharmony_ci		else if (bio->bi_opf & REQ_RAHEAD)
182762306a36Sopenharmony_ci			bio_io_error(bio);
182862306a36Sopenharmony_ci		else
182962306a36Sopenharmony_ci			queue_io(md, bio);
183062306a36Sopenharmony_ci		goto out;
183162306a36Sopenharmony_ci	}
183262306a36Sopenharmony_ci
183362306a36Sopenharmony_ci	dm_split_and_process_bio(md, map, bio);
183462306a36Sopenharmony_ciout:
183562306a36Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
183662306a36Sopenharmony_ci}
183762306a36Sopenharmony_ci
183862306a36Sopenharmony_cistatic bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob,
183962306a36Sopenharmony_ci			  unsigned int flags)
184062306a36Sopenharmony_ci{
184162306a36Sopenharmony_ci	WARN_ON_ONCE(!dm_tio_is_normal(&io->tio));
184262306a36Sopenharmony_ci
184362306a36Sopenharmony_ci	/* don't poll if the mapped io is done */
184462306a36Sopenharmony_ci	if (atomic_read(&io->io_count) > 1)
184562306a36Sopenharmony_ci		bio_poll(&io->tio.clone, iob, flags);
184662306a36Sopenharmony_ci
184762306a36Sopenharmony_ci	/* bio_poll holds the last reference */
184862306a36Sopenharmony_ci	return atomic_read(&io->io_count) == 1;
184962306a36Sopenharmony_ci}
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_cistatic int dm_poll_bio(struct bio *bio, struct io_comp_batch *iob,
185262306a36Sopenharmony_ci		       unsigned int flags)
185362306a36Sopenharmony_ci{
185462306a36Sopenharmony_ci	struct dm_io **head = dm_poll_list_head(bio);
185562306a36Sopenharmony_ci	struct dm_io *list = *head;
185662306a36Sopenharmony_ci	struct dm_io *tmp = NULL;
185762306a36Sopenharmony_ci	struct dm_io *curr, *next;
185862306a36Sopenharmony_ci
185962306a36Sopenharmony_ci	/* Only poll normal bio which was marked as REQ_DM_POLL_LIST */
186062306a36Sopenharmony_ci	if (!(bio->bi_opf & REQ_DM_POLL_LIST))
186162306a36Sopenharmony_ci		return 0;
186262306a36Sopenharmony_ci
186362306a36Sopenharmony_ci	WARN_ON_ONCE(!list);
186462306a36Sopenharmony_ci
186562306a36Sopenharmony_ci	/*
186662306a36Sopenharmony_ci	 * Restore .bi_private before possibly completing dm_io.
186762306a36Sopenharmony_ci	 *
186862306a36Sopenharmony_ci	 * bio_poll() is only possible once @bio has been completely
186962306a36Sopenharmony_ci	 * submitted via submit_bio_noacct()'s depth-first submission.
187062306a36Sopenharmony_ci	 * So there is no dm_queue_poll_io() race associated with
187162306a36Sopenharmony_ci	 * clearing REQ_DM_POLL_LIST here.
187262306a36Sopenharmony_ci	 */
187362306a36Sopenharmony_ci	bio->bi_opf &= ~REQ_DM_POLL_LIST;
187462306a36Sopenharmony_ci	bio->bi_private = list->data;
187562306a36Sopenharmony_ci
187662306a36Sopenharmony_ci	for (curr = list, next = curr->next; curr; curr = next, next =
187762306a36Sopenharmony_ci			curr ? curr->next : NULL) {
187862306a36Sopenharmony_ci		if (dm_poll_dm_io(curr, iob, flags)) {
187962306a36Sopenharmony_ci			/*
188062306a36Sopenharmony_ci			 * clone_endio() has already occurred, so no
188162306a36Sopenharmony_ci			 * error handling is needed here.
188262306a36Sopenharmony_ci			 */
188362306a36Sopenharmony_ci			__dm_io_dec_pending(curr);
188462306a36Sopenharmony_ci		} else {
188562306a36Sopenharmony_ci			curr->next = tmp;
188662306a36Sopenharmony_ci			tmp = curr;
188762306a36Sopenharmony_ci		}
188862306a36Sopenharmony_ci	}
188962306a36Sopenharmony_ci
189062306a36Sopenharmony_ci	/* Not done? */
189162306a36Sopenharmony_ci	if (tmp) {
189262306a36Sopenharmony_ci		bio->bi_opf |= REQ_DM_POLL_LIST;
189362306a36Sopenharmony_ci		/* Reset bio->bi_private to dm_io list head */
189462306a36Sopenharmony_ci		*head = tmp;
189562306a36Sopenharmony_ci		return 0;
189662306a36Sopenharmony_ci	}
189762306a36Sopenharmony_ci	return 1;
189862306a36Sopenharmony_ci}
189962306a36Sopenharmony_ci
190062306a36Sopenharmony_ci/*
190162306a36Sopenharmony_ci *---------------------------------------------------------------
190262306a36Sopenharmony_ci * An IDR is used to keep track of allocated minor numbers.
190362306a36Sopenharmony_ci *---------------------------------------------------------------
190462306a36Sopenharmony_ci */
190562306a36Sopenharmony_cistatic void free_minor(int minor)
190662306a36Sopenharmony_ci{
190762306a36Sopenharmony_ci	spin_lock(&_minor_lock);
190862306a36Sopenharmony_ci	idr_remove(&_minor_idr, minor);
190962306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
191062306a36Sopenharmony_ci}
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci/*
191362306a36Sopenharmony_ci * See if the device with a specific minor # is free.
191462306a36Sopenharmony_ci */
191562306a36Sopenharmony_cistatic int specific_minor(int minor)
191662306a36Sopenharmony_ci{
191762306a36Sopenharmony_ci	int r;
191862306a36Sopenharmony_ci
191962306a36Sopenharmony_ci	if (minor >= (1 << MINORBITS))
192062306a36Sopenharmony_ci		return -EINVAL;
192162306a36Sopenharmony_ci
192262306a36Sopenharmony_ci	idr_preload(GFP_KERNEL);
192362306a36Sopenharmony_ci	spin_lock(&_minor_lock);
192462306a36Sopenharmony_ci
192562306a36Sopenharmony_ci	r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT);
192662306a36Sopenharmony_ci
192762306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
192862306a36Sopenharmony_ci	idr_preload_end();
192962306a36Sopenharmony_ci	if (r < 0)
193062306a36Sopenharmony_ci		return r == -ENOSPC ? -EBUSY : r;
193162306a36Sopenharmony_ci	return 0;
193262306a36Sopenharmony_ci}
193362306a36Sopenharmony_ci
193462306a36Sopenharmony_cistatic int next_free_minor(int *minor)
193562306a36Sopenharmony_ci{
193662306a36Sopenharmony_ci	int r;
193762306a36Sopenharmony_ci
193862306a36Sopenharmony_ci	idr_preload(GFP_KERNEL);
193962306a36Sopenharmony_ci	spin_lock(&_minor_lock);
194062306a36Sopenharmony_ci
194162306a36Sopenharmony_ci	r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT);
194262306a36Sopenharmony_ci
194362306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
194462306a36Sopenharmony_ci	idr_preload_end();
194562306a36Sopenharmony_ci	if (r < 0)
194662306a36Sopenharmony_ci		return r;
194762306a36Sopenharmony_ci	*minor = r;
194862306a36Sopenharmony_ci	return 0;
194962306a36Sopenharmony_ci}
195062306a36Sopenharmony_ci
195162306a36Sopenharmony_cistatic const struct block_device_operations dm_blk_dops;
195262306a36Sopenharmony_cistatic const struct block_device_operations dm_rq_blk_dops;
195362306a36Sopenharmony_cistatic const struct dax_operations dm_dax_ops;
195462306a36Sopenharmony_ci
195562306a36Sopenharmony_cistatic void dm_wq_work(struct work_struct *work);
195662306a36Sopenharmony_ci
195762306a36Sopenharmony_ci#ifdef CONFIG_BLK_INLINE_ENCRYPTION
195862306a36Sopenharmony_cistatic void dm_queue_destroy_crypto_profile(struct request_queue *q)
195962306a36Sopenharmony_ci{
196062306a36Sopenharmony_ci	dm_destroy_crypto_profile(q->crypto_profile);
196162306a36Sopenharmony_ci}
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci#else /* CONFIG_BLK_INLINE_ENCRYPTION */
196462306a36Sopenharmony_ci
196562306a36Sopenharmony_cistatic inline void dm_queue_destroy_crypto_profile(struct request_queue *q)
196662306a36Sopenharmony_ci{
196762306a36Sopenharmony_ci}
196862306a36Sopenharmony_ci#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */
196962306a36Sopenharmony_ci
197062306a36Sopenharmony_cistatic void cleanup_mapped_device(struct mapped_device *md)
197162306a36Sopenharmony_ci{
197262306a36Sopenharmony_ci	if (md->wq)
197362306a36Sopenharmony_ci		destroy_workqueue(md->wq);
197462306a36Sopenharmony_ci	dm_free_md_mempools(md->mempools);
197562306a36Sopenharmony_ci
197662306a36Sopenharmony_ci	if (md->dax_dev) {
197762306a36Sopenharmony_ci		dax_remove_host(md->disk);
197862306a36Sopenharmony_ci		kill_dax(md->dax_dev);
197962306a36Sopenharmony_ci		put_dax(md->dax_dev);
198062306a36Sopenharmony_ci		md->dax_dev = NULL;
198162306a36Sopenharmony_ci	}
198262306a36Sopenharmony_ci
198362306a36Sopenharmony_ci	dm_cleanup_zoned_dev(md);
198462306a36Sopenharmony_ci	if (md->disk) {
198562306a36Sopenharmony_ci		spin_lock(&_minor_lock);
198662306a36Sopenharmony_ci		md->disk->private_data = NULL;
198762306a36Sopenharmony_ci		spin_unlock(&_minor_lock);
198862306a36Sopenharmony_ci		if (dm_get_md_type(md) != DM_TYPE_NONE) {
198962306a36Sopenharmony_ci			struct table_device *td;
199062306a36Sopenharmony_ci
199162306a36Sopenharmony_ci			dm_sysfs_exit(md);
199262306a36Sopenharmony_ci			list_for_each_entry(td, &md->table_devices, list) {
199362306a36Sopenharmony_ci				bd_unlink_disk_holder(td->dm_dev.bdev,
199462306a36Sopenharmony_ci						      md->disk);
199562306a36Sopenharmony_ci			}
199662306a36Sopenharmony_ci
199762306a36Sopenharmony_ci			/*
199862306a36Sopenharmony_ci			 * Hold lock to make sure del_gendisk() won't concurrent
199962306a36Sopenharmony_ci			 * with open/close_table_device().
200062306a36Sopenharmony_ci			 */
200162306a36Sopenharmony_ci			mutex_lock(&md->table_devices_lock);
200262306a36Sopenharmony_ci			del_gendisk(md->disk);
200362306a36Sopenharmony_ci			mutex_unlock(&md->table_devices_lock);
200462306a36Sopenharmony_ci		}
200562306a36Sopenharmony_ci		dm_queue_destroy_crypto_profile(md->queue);
200662306a36Sopenharmony_ci		put_disk(md->disk);
200762306a36Sopenharmony_ci	}
200862306a36Sopenharmony_ci
200962306a36Sopenharmony_ci	if (md->pending_io) {
201062306a36Sopenharmony_ci		free_percpu(md->pending_io);
201162306a36Sopenharmony_ci		md->pending_io = NULL;
201262306a36Sopenharmony_ci	}
201362306a36Sopenharmony_ci
201462306a36Sopenharmony_ci	cleanup_srcu_struct(&md->io_barrier);
201562306a36Sopenharmony_ci
201662306a36Sopenharmony_ci	mutex_destroy(&md->suspend_lock);
201762306a36Sopenharmony_ci	mutex_destroy(&md->type_lock);
201862306a36Sopenharmony_ci	mutex_destroy(&md->table_devices_lock);
201962306a36Sopenharmony_ci	mutex_destroy(&md->swap_bios_lock);
202062306a36Sopenharmony_ci
202162306a36Sopenharmony_ci	dm_mq_cleanup_mapped_device(md);
202262306a36Sopenharmony_ci}
202362306a36Sopenharmony_ci
202462306a36Sopenharmony_ci/*
202562306a36Sopenharmony_ci * Allocate and initialise a blank device with a given minor.
202662306a36Sopenharmony_ci */
202762306a36Sopenharmony_cistatic struct mapped_device *alloc_dev(int minor)
202862306a36Sopenharmony_ci{
202962306a36Sopenharmony_ci	int r, numa_node_id = dm_get_numa_node();
203062306a36Sopenharmony_ci	struct mapped_device *md;
203162306a36Sopenharmony_ci	void *old_md;
203262306a36Sopenharmony_ci
203362306a36Sopenharmony_ci	md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id);
203462306a36Sopenharmony_ci	if (!md) {
203562306a36Sopenharmony_ci		DMERR("unable to allocate device, out of memory.");
203662306a36Sopenharmony_ci		return NULL;
203762306a36Sopenharmony_ci	}
203862306a36Sopenharmony_ci
203962306a36Sopenharmony_ci	if (!try_module_get(THIS_MODULE))
204062306a36Sopenharmony_ci		goto bad_module_get;
204162306a36Sopenharmony_ci
204262306a36Sopenharmony_ci	/* get a minor number for the dev */
204362306a36Sopenharmony_ci	if (minor == DM_ANY_MINOR)
204462306a36Sopenharmony_ci		r = next_free_minor(&minor);
204562306a36Sopenharmony_ci	else
204662306a36Sopenharmony_ci		r = specific_minor(minor);
204762306a36Sopenharmony_ci	if (r < 0)
204862306a36Sopenharmony_ci		goto bad_minor;
204962306a36Sopenharmony_ci
205062306a36Sopenharmony_ci	r = init_srcu_struct(&md->io_barrier);
205162306a36Sopenharmony_ci	if (r < 0)
205262306a36Sopenharmony_ci		goto bad_io_barrier;
205362306a36Sopenharmony_ci
205462306a36Sopenharmony_ci	md->numa_node_id = numa_node_id;
205562306a36Sopenharmony_ci	md->init_tio_pdu = false;
205662306a36Sopenharmony_ci	md->type = DM_TYPE_NONE;
205762306a36Sopenharmony_ci	mutex_init(&md->suspend_lock);
205862306a36Sopenharmony_ci	mutex_init(&md->type_lock);
205962306a36Sopenharmony_ci	mutex_init(&md->table_devices_lock);
206062306a36Sopenharmony_ci	spin_lock_init(&md->deferred_lock);
206162306a36Sopenharmony_ci	atomic_set(&md->holders, 1);
206262306a36Sopenharmony_ci	atomic_set(&md->open_count, 0);
206362306a36Sopenharmony_ci	atomic_set(&md->event_nr, 0);
206462306a36Sopenharmony_ci	atomic_set(&md->uevent_seq, 0);
206562306a36Sopenharmony_ci	INIT_LIST_HEAD(&md->uevent_list);
206662306a36Sopenharmony_ci	INIT_LIST_HEAD(&md->table_devices);
206762306a36Sopenharmony_ci	spin_lock_init(&md->uevent_lock);
206862306a36Sopenharmony_ci
206962306a36Sopenharmony_ci	/*
207062306a36Sopenharmony_ci	 * default to bio-based until DM table is loaded and md->type
207162306a36Sopenharmony_ci	 * established. If request-based table is loaded: blk-mq will
207262306a36Sopenharmony_ci	 * override accordingly.
207362306a36Sopenharmony_ci	 */
207462306a36Sopenharmony_ci	md->disk = blk_alloc_disk(md->numa_node_id);
207562306a36Sopenharmony_ci	if (!md->disk)
207662306a36Sopenharmony_ci		goto bad;
207762306a36Sopenharmony_ci	md->queue = md->disk->queue;
207862306a36Sopenharmony_ci
207962306a36Sopenharmony_ci	init_waitqueue_head(&md->wait);
208062306a36Sopenharmony_ci	INIT_WORK(&md->work, dm_wq_work);
208162306a36Sopenharmony_ci	INIT_WORK(&md->requeue_work, dm_wq_requeue_work);
208262306a36Sopenharmony_ci	init_waitqueue_head(&md->eventq);
208362306a36Sopenharmony_ci	init_completion(&md->kobj_holder.completion);
208462306a36Sopenharmony_ci
208562306a36Sopenharmony_ci	md->requeue_list = NULL;
208662306a36Sopenharmony_ci	md->swap_bios = get_swap_bios();
208762306a36Sopenharmony_ci	sema_init(&md->swap_bios_semaphore, md->swap_bios);
208862306a36Sopenharmony_ci	mutex_init(&md->swap_bios_lock);
208962306a36Sopenharmony_ci
209062306a36Sopenharmony_ci	md->disk->major = _major;
209162306a36Sopenharmony_ci	md->disk->first_minor = minor;
209262306a36Sopenharmony_ci	md->disk->minors = 1;
209362306a36Sopenharmony_ci	md->disk->flags |= GENHD_FL_NO_PART;
209462306a36Sopenharmony_ci	md->disk->fops = &dm_blk_dops;
209562306a36Sopenharmony_ci	md->disk->private_data = md;
209662306a36Sopenharmony_ci	sprintf(md->disk->disk_name, "dm-%d", minor);
209762306a36Sopenharmony_ci
209862306a36Sopenharmony_ci	if (IS_ENABLED(CONFIG_FS_DAX)) {
209962306a36Sopenharmony_ci		md->dax_dev = alloc_dax(md, &dm_dax_ops);
210062306a36Sopenharmony_ci		if (IS_ERR(md->dax_dev)) {
210162306a36Sopenharmony_ci			md->dax_dev = NULL;
210262306a36Sopenharmony_ci			goto bad;
210362306a36Sopenharmony_ci		}
210462306a36Sopenharmony_ci		set_dax_nocache(md->dax_dev);
210562306a36Sopenharmony_ci		set_dax_nomc(md->dax_dev);
210662306a36Sopenharmony_ci		if (dax_add_host(md->dax_dev, md->disk))
210762306a36Sopenharmony_ci			goto bad;
210862306a36Sopenharmony_ci	}
210962306a36Sopenharmony_ci
211062306a36Sopenharmony_ci	format_dev_t(md->name, MKDEV(_major, minor));
211162306a36Sopenharmony_ci
211262306a36Sopenharmony_ci	md->wq = alloc_workqueue("kdmflush/%s", WQ_MEM_RECLAIM, 0, md->name);
211362306a36Sopenharmony_ci	if (!md->wq)
211462306a36Sopenharmony_ci		goto bad;
211562306a36Sopenharmony_ci
211662306a36Sopenharmony_ci	md->pending_io = alloc_percpu(unsigned long);
211762306a36Sopenharmony_ci	if (!md->pending_io)
211862306a36Sopenharmony_ci		goto bad;
211962306a36Sopenharmony_ci
212062306a36Sopenharmony_ci	r = dm_stats_init(&md->stats);
212162306a36Sopenharmony_ci	if (r < 0)
212262306a36Sopenharmony_ci		goto bad;
212362306a36Sopenharmony_ci
212462306a36Sopenharmony_ci	/* Populate the mapping, nobody knows we exist yet */
212562306a36Sopenharmony_ci	spin_lock(&_minor_lock);
212662306a36Sopenharmony_ci	old_md = idr_replace(&_minor_idr, md, minor);
212762306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
212862306a36Sopenharmony_ci
212962306a36Sopenharmony_ci	BUG_ON(old_md != MINOR_ALLOCED);
213062306a36Sopenharmony_ci
213162306a36Sopenharmony_ci	return md;
213262306a36Sopenharmony_ci
213362306a36Sopenharmony_cibad:
213462306a36Sopenharmony_ci	cleanup_mapped_device(md);
213562306a36Sopenharmony_cibad_io_barrier:
213662306a36Sopenharmony_ci	free_minor(minor);
213762306a36Sopenharmony_cibad_minor:
213862306a36Sopenharmony_ci	module_put(THIS_MODULE);
213962306a36Sopenharmony_cibad_module_get:
214062306a36Sopenharmony_ci	kvfree(md);
214162306a36Sopenharmony_ci	return NULL;
214262306a36Sopenharmony_ci}
214362306a36Sopenharmony_ci
214462306a36Sopenharmony_cistatic void unlock_fs(struct mapped_device *md);
214562306a36Sopenharmony_ci
214662306a36Sopenharmony_cistatic void free_dev(struct mapped_device *md)
214762306a36Sopenharmony_ci{
214862306a36Sopenharmony_ci	int minor = MINOR(disk_devt(md->disk));
214962306a36Sopenharmony_ci
215062306a36Sopenharmony_ci	unlock_fs(md);
215162306a36Sopenharmony_ci
215262306a36Sopenharmony_ci	cleanup_mapped_device(md);
215362306a36Sopenharmony_ci
215462306a36Sopenharmony_ci	WARN_ON_ONCE(!list_empty(&md->table_devices));
215562306a36Sopenharmony_ci	dm_stats_cleanup(&md->stats);
215662306a36Sopenharmony_ci	free_minor(minor);
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ci	module_put(THIS_MODULE);
215962306a36Sopenharmony_ci	kvfree(md);
216062306a36Sopenharmony_ci}
216162306a36Sopenharmony_ci
216262306a36Sopenharmony_ci/*
216362306a36Sopenharmony_ci * Bind a table to the device.
216462306a36Sopenharmony_ci */
216562306a36Sopenharmony_cistatic void event_callback(void *context)
216662306a36Sopenharmony_ci{
216762306a36Sopenharmony_ci	unsigned long flags;
216862306a36Sopenharmony_ci	LIST_HEAD(uevents);
216962306a36Sopenharmony_ci	struct mapped_device *md = context;
217062306a36Sopenharmony_ci
217162306a36Sopenharmony_ci	spin_lock_irqsave(&md->uevent_lock, flags);
217262306a36Sopenharmony_ci	list_splice_init(&md->uevent_list, &uevents);
217362306a36Sopenharmony_ci	spin_unlock_irqrestore(&md->uevent_lock, flags);
217462306a36Sopenharmony_ci
217562306a36Sopenharmony_ci	dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
217662306a36Sopenharmony_ci
217762306a36Sopenharmony_ci	atomic_inc(&md->event_nr);
217862306a36Sopenharmony_ci	wake_up(&md->eventq);
217962306a36Sopenharmony_ci	dm_issue_global_event();
218062306a36Sopenharmony_ci}
218162306a36Sopenharmony_ci
218262306a36Sopenharmony_ci/*
218362306a36Sopenharmony_ci * Returns old map, which caller must destroy.
218462306a36Sopenharmony_ci */
218562306a36Sopenharmony_cistatic struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
218662306a36Sopenharmony_ci			       struct queue_limits *limits)
218762306a36Sopenharmony_ci{
218862306a36Sopenharmony_ci	struct dm_table *old_map;
218962306a36Sopenharmony_ci	sector_t size;
219062306a36Sopenharmony_ci	int ret;
219162306a36Sopenharmony_ci
219262306a36Sopenharmony_ci	lockdep_assert_held(&md->suspend_lock);
219362306a36Sopenharmony_ci
219462306a36Sopenharmony_ci	size = dm_table_get_size(t);
219562306a36Sopenharmony_ci
219662306a36Sopenharmony_ci	/*
219762306a36Sopenharmony_ci	 * Wipe any geometry if the size of the table changed.
219862306a36Sopenharmony_ci	 */
219962306a36Sopenharmony_ci	if (size != dm_get_size(md))
220062306a36Sopenharmony_ci		memset(&md->geometry, 0, sizeof(md->geometry));
220162306a36Sopenharmony_ci
220262306a36Sopenharmony_ci	set_capacity(md->disk, size);
220362306a36Sopenharmony_ci
220462306a36Sopenharmony_ci	dm_table_event_callback(t, event_callback, md);
220562306a36Sopenharmony_ci
220662306a36Sopenharmony_ci	if (dm_table_request_based(t)) {
220762306a36Sopenharmony_ci		/*
220862306a36Sopenharmony_ci		 * Leverage the fact that request-based DM targets are
220962306a36Sopenharmony_ci		 * immutable singletons - used to optimize dm_mq_queue_rq.
221062306a36Sopenharmony_ci		 */
221162306a36Sopenharmony_ci		md->immutable_target = dm_table_get_immutable_target(t);
221262306a36Sopenharmony_ci
221362306a36Sopenharmony_ci		/*
221462306a36Sopenharmony_ci		 * There is no need to reload with request-based dm because the
221562306a36Sopenharmony_ci		 * size of front_pad doesn't change.
221662306a36Sopenharmony_ci		 *
221762306a36Sopenharmony_ci		 * Note for future: If you are to reload bioset, prep-ed
221862306a36Sopenharmony_ci		 * requests in the queue may refer to bio from the old bioset,
221962306a36Sopenharmony_ci		 * so you must walk through the queue to unprep.
222062306a36Sopenharmony_ci		 */
222162306a36Sopenharmony_ci		if (!md->mempools) {
222262306a36Sopenharmony_ci			md->mempools = t->mempools;
222362306a36Sopenharmony_ci			t->mempools = NULL;
222462306a36Sopenharmony_ci		}
222562306a36Sopenharmony_ci	} else {
222662306a36Sopenharmony_ci		/*
222762306a36Sopenharmony_ci		 * The md may already have mempools that need changing.
222862306a36Sopenharmony_ci		 * If so, reload bioset because front_pad may have changed
222962306a36Sopenharmony_ci		 * because a different table was loaded.
223062306a36Sopenharmony_ci		 */
223162306a36Sopenharmony_ci		dm_free_md_mempools(md->mempools);
223262306a36Sopenharmony_ci		md->mempools = t->mempools;
223362306a36Sopenharmony_ci		t->mempools = NULL;
223462306a36Sopenharmony_ci	}
223562306a36Sopenharmony_ci
223662306a36Sopenharmony_ci	ret = dm_table_set_restrictions(t, md->queue, limits);
223762306a36Sopenharmony_ci	if (ret) {
223862306a36Sopenharmony_ci		old_map = ERR_PTR(ret);
223962306a36Sopenharmony_ci		goto out;
224062306a36Sopenharmony_ci	}
224162306a36Sopenharmony_ci
224262306a36Sopenharmony_ci	old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
224362306a36Sopenharmony_ci	rcu_assign_pointer(md->map, (void *)t);
224462306a36Sopenharmony_ci	md->immutable_target_type = dm_table_get_immutable_target_type(t);
224562306a36Sopenharmony_ci
224662306a36Sopenharmony_ci	if (old_map)
224762306a36Sopenharmony_ci		dm_sync_table(md);
224862306a36Sopenharmony_ciout:
224962306a36Sopenharmony_ci	return old_map;
225062306a36Sopenharmony_ci}
225162306a36Sopenharmony_ci
225262306a36Sopenharmony_ci/*
225362306a36Sopenharmony_ci * Returns unbound table for the caller to free.
225462306a36Sopenharmony_ci */
225562306a36Sopenharmony_cistatic struct dm_table *__unbind(struct mapped_device *md)
225662306a36Sopenharmony_ci{
225762306a36Sopenharmony_ci	struct dm_table *map = rcu_dereference_protected(md->map, 1);
225862306a36Sopenharmony_ci
225962306a36Sopenharmony_ci	if (!map)
226062306a36Sopenharmony_ci		return NULL;
226162306a36Sopenharmony_ci
226262306a36Sopenharmony_ci	dm_table_event_callback(map, NULL, NULL);
226362306a36Sopenharmony_ci	RCU_INIT_POINTER(md->map, NULL);
226462306a36Sopenharmony_ci	dm_sync_table(md);
226562306a36Sopenharmony_ci
226662306a36Sopenharmony_ci	return map;
226762306a36Sopenharmony_ci}
226862306a36Sopenharmony_ci
226962306a36Sopenharmony_ci/*
227062306a36Sopenharmony_ci * Constructor for a new device.
227162306a36Sopenharmony_ci */
227262306a36Sopenharmony_ciint dm_create(int minor, struct mapped_device **result)
227362306a36Sopenharmony_ci{
227462306a36Sopenharmony_ci	struct mapped_device *md;
227562306a36Sopenharmony_ci
227662306a36Sopenharmony_ci	md = alloc_dev(minor);
227762306a36Sopenharmony_ci	if (!md)
227862306a36Sopenharmony_ci		return -ENXIO;
227962306a36Sopenharmony_ci
228062306a36Sopenharmony_ci	dm_ima_reset_data(md);
228162306a36Sopenharmony_ci
228262306a36Sopenharmony_ci	*result = md;
228362306a36Sopenharmony_ci	return 0;
228462306a36Sopenharmony_ci}
228562306a36Sopenharmony_ci
228662306a36Sopenharmony_ci/*
228762306a36Sopenharmony_ci * Functions to manage md->type.
228862306a36Sopenharmony_ci * All are required to hold md->type_lock.
228962306a36Sopenharmony_ci */
229062306a36Sopenharmony_civoid dm_lock_md_type(struct mapped_device *md)
229162306a36Sopenharmony_ci{
229262306a36Sopenharmony_ci	mutex_lock(&md->type_lock);
229362306a36Sopenharmony_ci}
229462306a36Sopenharmony_ci
229562306a36Sopenharmony_civoid dm_unlock_md_type(struct mapped_device *md)
229662306a36Sopenharmony_ci{
229762306a36Sopenharmony_ci	mutex_unlock(&md->type_lock);
229862306a36Sopenharmony_ci}
229962306a36Sopenharmony_ci
230062306a36Sopenharmony_civoid dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type)
230162306a36Sopenharmony_ci{
230262306a36Sopenharmony_ci	BUG_ON(!mutex_is_locked(&md->type_lock));
230362306a36Sopenharmony_ci	md->type = type;
230462306a36Sopenharmony_ci}
230562306a36Sopenharmony_ci
230662306a36Sopenharmony_cienum dm_queue_mode dm_get_md_type(struct mapped_device *md)
230762306a36Sopenharmony_ci{
230862306a36Sopenharmony_ci	return md->type;
230962306a36Sopenharmony_ci}
231062306a36Sopenharmony_ci
231162306a36Sopenharmony_cistruct target_type *dm_get_immutable_target_type(struct mapped_device *md)
231262306a36Sopenharmony_ci{
231362306a36Sopenharmony_ci	return md->immutable_target_type;
231462306a36Sopenharmony_ci}
231562306a36Sopenharmony_ci
231662306a36Sopenharmony_ci/*
231762306a36Sopenharmony_ci * Setup the DM device's queue based on md's type
231862306a36Sopenharmony_ci */
231962306a36Sopenharmony_ciint dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
232062306a36Sopenharmony_ci{
232162306a36Sopenharmony_ci	enum dm_queue_mode type = dm_table_get_type(t);
232262306a36Sopenharmony_ci	struct queue_limits limits;
232362306a36Sopenharmony_ci	struct table_device *td;
232462306a36Sopenharmony_ci	int r;
232562306a36Sopenharmony_ci
232662306a36Sopenharmony_ci	switch (type) {
232762306a36Sopenharmony_ci	case DM_TYPE_REQUEST_BASED:
232862306a36Sopenharmony_ci		md->disk->fops = &dm_rq_blk_dops;
232962306a36Sopenharmony_ci		r = dm_mq_init_request_queue(md, t);
233062306a36Sopenharmony_ci		if (r) {
233162306a36Sopenharmony_ci			DMERR("Cannot initialize queue for request-based dm mapped device");
233262306a36Sopenharmony_ci			return r;
233362306a36Sopenharmony_ci		}
233462306a36Sopenharmony_ci		break;
233562306a36Sopenharmony_ci	case DM_TYPE_BIO_BASED:
233662306a36Sopenharmony_ci	case DM_TYPE_DAX_BIO_BASED:
233762306a36Sopenharmony_ci		blk_queue_flag_set(QUEUE_FLAG_IO_STAT, md->queue);
233862306a36Sopenharmony_ci		break;
233962306a36Sopenharmony_ci	case DM_TYPE_NONE:
234062306a36Sopenharmony_ci		WARN_ON_ONCE(true);
234162306a36Sopenharmony_ci		break;
234262306a36Sopenharmony_ci	}
234362306a36Sopenharmony_ci
234462306a36Sopenharmony_ci	r = dm_calculate_queue_limits(t, &limits);
234562306a36Sopenharmony_ci	if (r) {
234662306a36Sopenharmony_ci		DMERR("Cannot calculate initial queue limits");
234762306a36Sopenharmony_ci		return r;
234862306a36Sopenharmony_ci	}
234962306a36Sopenharmony_ci	r = dm_table_set_restrictions(t, md->queue, &limits);
235062306a36Sopenharmony_ci	if (r)
235162306a36Sopenharmony_ci		return r;
235262306a36Sopenharmony_ci
235362306a36Sopenharmony_ci	/*
235462306a36Sopenharmony_ci	 * Hold lock to make sure add_disk() and del_gendisk() won't concurrent
235562306a36Sopenharmony_ci	 * with open_table_device() and close_table_device().
235662306a36Sopenharmony_ci	 */
235762306a36Sopenharmony_ci	mutex_lock(&md->table_devices_lock);
235862306a36Sopenharmony_ci	r = add_disk(md->disk);
235962306a36Sopenharmony_ci	mutex_unlock(&md->table_devices_lock);
236062306a36Sopenharmony_ci	if (r)
236162306a36Sopenharmony_ci		return r;
236262306a36Sopenharmony_ci
236362306a36Sopenharmony_ci	/*
236462306a36Sopenharmony_ci	 * Register the holder relationship for devices added before the disk
236562306a36Sopenharmony_ci	 * was live.
236662306a36Sopenharmony_ci	 */
236762306a36Sopenharmony_ci	list_for_each_entry(td, &md->table_devices, list) {
236862306a36Sopenharmony_ci		r = bd_link_disk_holder(td->dm_dev.bdev, md->disk);
236962306a36Sopenharmony_ci		if (r)
237062306a36Sopenharmony_ci			goto out_undo_holders;
237162306a36Sopenharmony_ci	}
237262306a36Sopenharmony_ci
237362306a36Sopenharmony_ci	r = dm_sysfs_init(md);
237462306a36Sopenharmony_ci	if (r)
237562306a36Sopenharmony_ci		goto out_undo_holders;
237662306a36Sopenharmony_ci
237762306a36Sopenharmony_ci	md->type = type;
237862306a36Sopenharmony_ci	return 0;
237962306a36Sopenharmony_ci
238062306a36Sopenharmony_ciout_undo_holders:
238162306a36Sopenharmony_ci	list_for_each_entry_continue_reverse(td, &md->table_devices, list)
238262306a36Sopenharmony_ci		bd_unlink_disk_holder(td->dm_dev.bdev, md->disk);
238362306a36Sopenharmony_ci	mutex_lock(&md->table_devices_lock);
238462306a36Sopenharmony_ci	del_gendisk(md->disk);
238562306a36Sopenharmony_ci	mutex_unlock(&md->table_devices_lock);
238662306a36Sopenharmony_ci	return r;
238762306a36Sopenharmony_ci}
238862306a36Sopenharmony_ci
238962306a36Sopenharmony_cistruct mapped_device *dm_get_md(dev_t dev)
239062306a36Sopenharmony_ci{
239162306a36Sopenharmony_ci	struct mapped_device *md;
239262306a36Sopenharmony_ci	unsigned int minor = MINOR(dev);
239362306a36Sopenharmony_ci
239462306a36Sopenharmony_ci	if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
239562306a36Sopenharmony_ci		return NULL;
239662306a36Sopenharmony_ci
239762306a36Sopenharmony_ci	spin_lock(&_minor_lock);
239862306a36Sopenharmony_ci
239962306a36Sopenharmony_ci	md = idr_find(&_minor_idr, minor);
240062306a36Sopenharmony_ci	if (!md || md == MINOR_ALLOCED || (MINOR(disk_devt(dm_disk(md))) != minor) ||
240162306a36Sopenharmony_ci	    test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
240262306a36Sopenharmony_ci		md = NULL;
240362306a36Sopenharmony_ci		goto out;
240462306a36Sopenharmony_ci	}
240562306a36Sopenharmony_ci	dm_get(md);
240662306a36Sopenharmony_ciout:
240762306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
240862306a36Sopenharmony_ci
240962306a36Sopenharmony_ci	return md;
241062306a36Sopenharmony_ci}
241162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_get_md);
241262306a36Sopenharmony_ci
241362306a36Sopenharmony_civoid *dm_get_mdptr(struct mapped_device *md)
241462306a36Sopenharmony_ci{
241562306a36Sopenharmony_ci	return md->interface_ptr;
241662306a36Sopenharmony_ci}
241762306a36Sopenharmony_ci
241862306a36Sopenharmony_civoid dm_set_mdptr(struct mapped_device *md, void *ptr)
241962306a36Sopenharmony_ci{
242062306a36Sopenharmony_ci	md->interface_ptr = ptr;
242162306a36Sopenharmony_ci}
242262306a36Sopenharmony_ci
242362306a36Sopenharmony_civoid dm_get(struct mapped_device *md)
242462306a36Sopenharmony_ci{
242562306a36Sopenharmony_ci	atomic_inc(&md->holders);
242662306a36Sopenharmony_ci	BUG_ON(test_bit(DMF_FREEING, &md->flags));
242762306a36Sopenharmony_ci}
242862306a36Sopenharmony_ci
242962306a36Sopenharmony_ciint dm_hold(struct mapped_device *md)
243062306a36Sopenharmony_ci{
243162306a36Sopenharmony_ci	spin_lock(&_minor_lock);
243262306a36Sopenharmony_ci	if (test_bit(DMF_FREEING, &md->flags)) {
243362306a36Sopenharmony_ci		spin_unlock(&_minor_lock);
243462306a36Sopenharmony_ci		return -EBUSY;
243562306a36Sopenharmony_ci	}
243662306a36Sopenharmony_ci	dm_get(md);
243762306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
243862306a36Sopenharmony_ci	return 0;
243962306a36Sopenharmony_ci}
244062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_hold);
244162306a36Sopenharmony_ci
244262306a36Sopenharmony_ciconst char *dm_device_name(struct mapped_device *md)
244362306a36Sopenharmony_ci{
244462306a36Sopenharmony_ci	return md->name;
244562306a36Sopenharmony_ci}
244662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_device_name);
244762306a36Sopenharmony_ci
244862306a36Sopenharmony_cistatic void __dm_destroy(struct mapped_device *md, bool wait)
244962306a36Sopenharmony_ci{
245062306a36Sopenharmony_ci	struct dm_table *map;
245162306a36Sopenharmony_ci	int srcu_idx;
245262306a36Sopenharmony_ci
245362306a36Sopenharmony_ci	might_sleep();
245462306a36Sopenharmony_ci
245562306a36Sopenharmony_ci	spin_lock(&_minor_lock);
245662306a36Sopenharmony_ci	idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
245762306a36Sopenharmony_ci	set_bit(DMF_FREEING, &md->flags);
245862306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
245962306a36Sopenharmony_ci
246062306a36Sopenharmony_ci	blk_mark_disk_dead(md->disk);
246162306a36Sopenharmony_ci
246262306a36Sopenharmony_ci	/*
246362306a36Sopenharmony_ci	 * Take suspend_lock so that presuspend and postsuspend methods
246462306a36Sopenharmony_ci	 * do not race with internal suspend.
246562306a36Sopenharmony_ci	 */
246662306a36Sopenharmony_ci	mutex_lock(&md->suspend_lock);
246762306a36Sopenharmony_ci	map = dm_get_live_table(md, &srcu_idx);
246862306a36Sopenharmony_ci	if (!dm_suspended_md(md)) {
246962306a36Sopenharmony_ci		dm_table_presuspend_targets(map);
247062306a36Sopenharmony_ci		set_bit(DMF_SUSPENDED, &md->flags);
247162306a36Sopenharmony_ci		set_bit(DMF_POST_SUSPENDING, &md->flags);
247262306a36Sopenharmony_ci		dm_table_postsuspend_targets(map);
247362306a36Sopenharmony_ci	}
247462306a36Sopenharmony_ci	/* dm_put_live_table must be before fsleep, otherwise deadlock is possible */
247562306a36Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
247662306a36Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
247762306a36Sopenharmony_ci
247862306a36Sopenharmony_ci	/*
247962306a36Sopenharmony_ci	 * Rare, but there may be I/O requests still going to complete,
248062306a36Sopenharmony_ci	 * for example.  Wait for all references to disappear.
248162306a36Sopenharmony_ci	 * No one should increment the reference count of the mapped_device,
248262306a36Sopenharmony_ci	 * after the mapped_device state becomes DMF_FREEING.
248362306a36Sopenharmony_ci	 */
248462306a36Sopenharmony_ci	if (wait)
248562306a36Sopenharmony_ci		while (atomic_read(&md->holders))
248662306a36Sopenharmony_ci			fsleep(1000);
248762306a36Sopenharmony_ci	else if (atomic_read(&md->holders))
248862306a36Sopenharmony_ci		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
248962306a36Sopenharmony_ci		       dm_device_name(md), atomic_read(&md->holders));
249062306a36Sopenharmony_ci
249162306a36Sopenharmony_ci	dm_table_destroy(__unbind(md));
249262306a36Sopenharmony_ci	free_dev(md);
249362306a36Sopenharmony_ci}
249462306a36Sopenharmony_ci
249562306a36Sopenharmony_civoid dm_destroy(struct mapped_device *md)
249662306a36Sopenharmony_ci{
249762306a36Sopenharmony_ci	__dm_destroy(md, true);
249862306a36Sopenharmony_ci}
249962306a36Sopenharmony_ci
250062306a36Sopenharmony_civoid dm_destroy_immediate(struct mapped_device *md)
250162306a36Sopenharmony_ci{
250262306a36Sopenharmony_ci	__dm_destroy(md, false);
250362306a36Sopenharmony_ci}
250462306a36Sopenharmony_ci
250562306a36Sopenharmony_civoid dm_put(struct mapped_device *md)
250662306a36Sopenharmony_ci{
250762306a36Sopenharmony_ci	atomic_dec(&md->holders);
250862306a36Sopenharmony_ci}
250962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_put);
251062306a36Sopenharmony_ci
251162306a36Sopenharmony_cistatic bool dm_in_flight_bios(struct mapped_device *md)
251262306a36Sopenharmony_ci{
251362306a36Sopenharmony_ci	int cpu;
251462306a36Sopenharmony_ci	unsigned long sum = 0;
251562306a36Sopenharmony_ci
251662306a36Sopenharmony_ci	for_each_possible_cpu(cpu)
251762306a36Sopenharmony_ci		sum += *per_cpu_ptr(md->pending_io, cpu);
251862306a36Sopenharmony_ci
251962306a36Sopenharmony_ci	return sum != 0;
252062306a36Sopenharmony_ci}
252162306a36Sopenharmony_ci
252262306a36Sopenharmony_cistatic int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int task_state)
252362306a36Sopenharmony_ci{
252462306a36Sopenharmony_ci	int r = 0;
252562306a36Sopenharmony_ci	DEFINE_WAIT(wait);
252662306a36Sopenharmony_ci
252762306a36Sopenharmony_ci	while (true) {
252862306a36Sopenharmony_ci		prepare_to_wait(&md->wait, &wait, task_state);
252962306a36Sopenharmony_ci
253062306a36Sopenharmony_ci		if (!dm_in_flight_bios(md))
253162306a36Sopenharmony_ci			break;
253262306a36Sopenharmony_ci
253362306a36Sopenharmony_ci		if (signal_pending_state(task_state, current)) {
253462306a36Sopenharmony_ci			r = -EINTR;
253562306a36Sopenharmony_ci			break;
253662306a36Sopenharmony_ci		}
253762306a36Sopenharmony_ci
253862306a36Sopenharmony_ci		io_schedule();
253962306a36Sopenharmony_ci	}
254062306a36Sopenharmony_ci	finish_wait(&md->wait, &wait);
254162306a36Sopenharmony_ci
254262306a36Sopenharmony_ci	smp_rmb();
254362306a36Sopenharmony_ci
254462306a36Sopenharmony_ci	return r;
254562306a36Sopenharmony_ci}
254662306a36Sopenharmony_ci
254762306a36Sopenharmony_cistatic int dm_wait_for_completion(struct mapped_device *md, unsigned int task_state)
254862306a36Sopenharmony_ci{
254962306a36Sopenharmony_ci	int r = 0;
255062306a36Sopenharmony_ci
255162306a36Sopenharmony_ci	if (!queue_is_mq(md->queue))
255262306a36Sopenharmony_ci		return dm_wait_for_bios_completion(md, task_state);
255362306a36Sopenharmony_ci
255462306a36Sopenharmony_ci	while (true) {
255562306a36Sopenharmony_ci		if (!blk_mq_queue_inflight(md->queue))
255662306a36Sopenharmony_ci			break;
255762306a36Sopenharmony_ci
255862306a36Sopenharmony_ci		if (signal_pending_state(task_state, current)) {
255962306a36Sopenharmony_ci			r = -EINTR;
256062306a36Sopenharmony_ci			break;
256162306a36Sopenharmony_ci		}
256262306a36Sopenharmony_ci
256362306a36Sopenharmony_ci		fsleep(5000);
256462306a36Sopenharmony_ci	}
256562306a36Sopenharmony_ci
256662306a36Sopenharmony_ci	return r;
256762306a36Sopenharmony_ci}
256862306a36Sopenharmony_ci
256962306a36Sopenharmony_ci/*
257062306a36Sopenharmony_ci * Process the deferred bios
257162306a36Sopenharmony_ci */
257262306a36Sopenharmony_cistatic void dm_wq_work(struct work_struct *work)
257362306a36Sopenharmony_ci{
257462306a36Sopenharmony_ci	struct mapped_device *md = container_of(work, struct mapped_device, work);
257562306a36Sopenharmony_ci	struct bio *bio;
257662306a36Sopenharmony_ci
257762306a36Sopenharmony_ci	while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
257862306a36Sopenharmony_ci		spin_lock_irq(&md->deferred_lock);
257962306a36Sopenharmony_ci		bio = bio_list_pop(&md->deferred);
258062306a36Sopenharmony_ci		spin_unlock_irq(&md->deferred_lock);
258162306a36Sopenharmony_ci
258262306a36Sopenharmony_ci		if (!bio)
258362306a36Sopenharmony_ci			break;
258462306a36Sopenharmony_ci
258562306a36Sopenharmony_ci		submit_bio_noacct(bio);
258662306a36Sopenharmony_ci		cond_resched();
258762306a36Sopenharmony_ci	}
258862306a36Sopenharmony_ci}
258962306a36Sopenharmony_ci
259062306a36Sopenharmony_cistatic void dm_queue_flush(struct mapped_device *md)
259162306a36Sopenharmony_ci{
259262306a36Sopenharmony_ci	clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
259362306a36Sopenharmony_ci	smp_mb__after_atomic();
259462306a36Sopenharmony_ci	queue_work(md->wq, &md->work);
259562306a36Sopenharmony_ci}
259662306a36Sopenharmony_ci
259762306a36Sopenharmony_ci/*
259862306a36Sopenharmony_ci * Swap in a new table, returning the old one for the caller to destroy.
259962306a36Sopenharmony_ci */
260062306a36Sopenharmony_cistruct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
260162306a36Sopenharmony_ci{
260262306a36Sopenharmony_ci	struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
260362306a36Sopenharmony_ci	struct queue_limits limits;
260462306a36Sopenharmony_ci	int r;
260562306a36Sopenharmony_ci
260662306a36Sopenharmony_ci	mutex_lock(&md->suspend_lock);
260762306a36Sopenharmony_ci
260862306a36Sopenharmony_ci	/* device must be suspended */
260962306a36Sopenharmony_ci	if (!dm_suspended_md(md))
261062306a36Sopenharmony_ci		goto out;
261162306a36Sopenharmony_ci
261262306a36Sopenharmony_ci	/*
261362306a36Sopenharmony_ci	 * If the new table has no data devices, retain the existing limits.
261462306a36Sopenharmony_ci	 * This helps multipath with queue_if_no_path if all paths disappear,
261562306a36Sopenharmony_ci	 * then new I/O is queued based on these limits, and then some paths
261662306a36Sopenharmony_ci	 * reappear.
261762306a36Sopenharmony_ci	 */
261862306a36Sopenharmony_ci	if (dm_table_has_no_data_devices(table)) {
261962306a36Sopenharmony_ci		live_map = dm_get_live_table_fast(md);
262062306a36Sopenharmony_ci		if (live_map)
262162306a36Sopenharmony_ci			limits = md->queue->limits;
262262306a36Sopenharmony_ci		dm_put_live_table_fast(md);
262362306a36Sopenharmony_ci	}
262462306a36Sopenharmony_ci
262562306a36Sopenharmony_ci	if (!live_map) {
262662306a36Sopenharmony_ci		r = dm_calculate_queue_limits(table, &limits);
262762306a36Sopenharmony_ci		if (r) {
262862306a36Sopenharmony_ci			map = ERR_PTR(r);
262962306a36Sopenharmony_ci			goto out;
263062306a36Sopenharmony_ci		}
263162306a36Sopenharmony_ci	}
263262306a36Sopenharmony_ci
263362306a36Sopenharmony_ci	map = __bind(md, table, &limits);
263462306a36Sopenharmony_ci	dm_issue_global_event();
263562306a36Sopenharmony_ci
263662306a36Sopenharmony_ciout:
263762306a36Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
263862306a36Sopenharmony_ci	return map;
263962306a36Sopenharmony_ci}
264062306a36Sopenharmony_ci
264162306a36Sopenharmony_ci/*
264262306a36Sopenharmony_ci * Functions to lock and unlock any filesystem running on the
264362306a36Sopenharmony_ci * device.
264462306a36Sopenharmony_ci */
264562306a36Sopenharmony_cistatic int lock_fs(struct mapped_device *md)
264662306a36Sopenharmony_ci{
264762306a36Sopenharmony_ci	int r;
264862306a36Sopenharmony_ci
264962306a36Sopenharmony_ci	WARN_ON(test_bit(DMF_FROZEN, &md->flags));
265062306a36Sopenharmony_ci
265162306a36Sopenharmony_ci	r = freeze_bdev(md->disk->part0);
265262306a36Sopenharmony_ci	if (!r)
265362306a36Sopenharmony_ci		set_bit(DMF_FROZEN, &md->flags);
265462306a36Sopenharmony_ci	return r;
265562306a36Sopenharmony_ci}
265662306a36Sopenharmony_ci
265762306a36Sopenharmony_cistatic void unlock_fs(struct mapped_device *md)
265862306a36Sopenharmony_ci{
265962306a36Sopenharmony_ci	if (!test_bit(DMF_FROZEN, &md->flags))
266062306a36Sopenharmony_ci		return;
266162306a36Sopenharmony_ci	thaw_bdev(md->disk->part0);
266262306a36Sopenharmony_ci	clear_bit(DMF_FROZEN, &md->flags);
266362306a36Sopenharmony_ci}
266462306a36Sopenharmony_ci
266562306a36Sopenharmony_ci/*
266662306a36Sopenharmony_ci * @suspend_flags: DM_SUSPEND_LOCKFS_FLAG and/or DM_SUSPEND_NOFLUSH_FLAG
266762306a36Sopenharmony_ci * @task_state: e.g. TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE
266862306a36Sopenharmony_ci * @dmf_suspended_flag: DMF_SUSPENDED or DMF_SUSPENDED_INTERNALLY
266962306a36Sopenharmony_ci *
267062306a36Sopenharmony_ci * If __dm_suspend returns 0, the device is completely quiescent
267162306a36Sopenharmony_ci * now. There is no request-processing activity. All new requests
267262306a36Sopenharmony_ci * are being added to md->deferred list.
267362306a36Sopenharmony_ci */
267462306a36Sopenharmony_cistatic int __dm_suspend(struct mapped_device *md, struct dm_table *map,
267562306a36Sopenharmony_ci			unsigned int suspend_flags, unsigned int task_state,
267662306a36Sopenharmony_ci			int dmf_suspended_flag)
267762306a36Sopenharmony_ci{
267862306a36Sopenharmony_ci	bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
267962306a36Sopenharmony_ci	bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
268062306a36Sopenharmony_ci	int r;
268162306a36Sopenharmony_ci
268262306a36Sopenharmony_ci	lockdep_assert_held(&md->suspend_lock);
268362306a36Sopenharmony_ci
268462306a36Sopenharmony_ci	/*
268562306a36Sopenharmony_ci	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
268662306a36Sopenharmony_ci	 * This flag is cleared before dm_suspend returns.
268762306a36Sopenharmony_ci	 */
268862306a36Sopenharmony_ci	if (noflush)
268962306a36Sopenharmony_ci		set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
269062306a36Sopenharmony_ci	else
269162306a36Sopenharmony_ci		DMDEBUG("%s: suspending with flush", dm_device_name(md));
269262306a36Sopenharmony_ci
269362306a36Sopenharmony_ci	/*
269462306a36Sopenharmony_ci	 * This gets reverted if there's an error later and the targets
269562306a36Sopenharmony_ci	 * provide the .presuspend_undo hook.
269662306a36Sopenharmony_ci	 */
269762306a36Sopenharmony_ci	dm_table_presuspend_targets(map);
269862306a36Sopenharmony_ci
269962306a36Sopenharmony_ci	/*
270062306a36Sopenharmony_ci	 * Flush I/O to the device.
270162306a36Sopenharmony_ci	 * Any I/O submitted after lock_fs() may not be flushed.
270262306a36Sopenharmony_ci	 * noflush takes precedence over do_lockfs.
270362306a36Sopenharmony_ci	 * (lock_fs() flushes I/Os and waits for them to complete.)
270462306a36Sopenharmony_ci	 */
270562306a36Sopenharmony_ci	if (!noflush && do_lockfs) {
270662306a36Sopenharmony_ci		r = lock_fs(md);
270762306a36Sopenharmony_ci		if (r) {
270862306a36Sopenharmony_ci			dm_table_presuspend_undo_targets(map);
270962306a36Sopenharmony_ci			return r;
271062306a36Sopenharmony_ci		}
271162306a36Sopenharmony_ci	}
271262306a36Sopenharmony_ci
271362306a36Sopenharmony_ci	/*
271462306a36Sopenharmony_ci	 * Here we must make sure that no processes are submitting requests
271562306a36Sopenharmony_ci	 * to target drivers i.e. no one may be executing
271662306a36Sopenharmony_ci	 * dm_split_and_process_bio from dm_submit_bio.
271762306a36Sopenharmony_ci	 *
271862306a36Sopenharmony_ci	 * To get all processes out of dm_split_and_process_bio in dm_submit_bio,
271962306a36Sopenharmony_ci	 * we take the write lock. To prevent any process from reentering
272062306a36Sopenharmony_ci	 * dm_split_and_process_bio from dm_submit_bio and quiesce the thread
272162306a36Sopenharmony_ci	 * (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND and call
272262306a36Sopenharmony_ci	 * flush_workqueue(md->wq).
272362306a36Sopenharmony_ci	 */
272462306a36Sopenharmony_ci	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
272562306a36Sopenharmony_ci	if (map)
272662306a36Sopenharmony_ci		synchronize_srcu(&md->io_barrier);
272762306a36Sopenharmony_ci
272862306a36Sopenharmony_ci	/*
272962306a36Sopenharmony_ci	 * Stop md->queue before flushing md->wq in case request-based
273062306a36Sopenharmony_ci	 * dm defers requests to md->wq from md->queue.
273162306a36Sopenharmony_ci	 */
273262306a36Sopenharmony_ci	if (dm_request_based(md))
273362306a36Sopenharmony_ci		dm_stop_queue(md->queue);
273462306a36Sopenharmony_ci
273562306a36Sopenharmony_ci	flush_workqueue(md->wq);
273662306a36Sopenharmony_ci
273762306a36Sopenharmony_ci	/*
273862306a36Sopenharmony_ci	 * At this point no more requests are entering target request routines.
273962306a36Sopenharmony_ci	 * We call dm_wait_for_completion to wait for all existing requests
274062306a36Sopenharmony_ci	 * to finish.
274162306a36Sopenharmony_ci	 */
274262306a36Sopenharmony_ci	r = dm_wait_for_completion(md, task_state);
274362306a36Sopenharmony_ci	if (!r)
274462306a36Sopenharmony_ci		set_bit(dmf_suspended_flag, &md->flags);
274562306a36Sopenharmony_ci
274662306a36Sopenharmony_ci	if (noflush)
274762306a36Sopenharmony_ci		clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
274862306a36Sopenharmony_ci	if (map)
274962306a36Sopenharmony_ci		synchronize_srcu(&md->io_barrier);
275062306a36Sopenharmony_ci
275162306a36Sopenharmony_ci	/* were we interrupted ? */
275262306a36Sopenharmony_ci	if (r < 0) {
275362306a36Sopenharmony_ci		dm_queue_flush(md);
275462306a36Sopenharmony_ci
275562306a36Sopenharmony_ci		if (dm_request_based(md))
275662306a36Sopenharmony_ci			dm_start_queue(md->queue);
275762306a36Sopenharmony_ci
275862306a36Sopenharmony_ci		unlock_fs(md);
275962306a36Sopenharmony_ci		dm_table_presuspend_undo_targets(map);
276062306a36Sopenharmony_ci		/* pushback list is already flushed, so skip flush */
276162306a36Sopenharmony_ci	}
276262306a36Sopenharmony_ci
276362306a36Sopenharmony_ci	return r;
276462306a36Sopenharmony_ci}
276562306a36Sopenharmony_ci
276662306a36Sopenharmony_ci/*
276762306a36Sopenharmony_ci * We need to be able to change a mapping table under a mounted
276862306a36Sopenharmony_ci * filesystem.  For example we might want to move some data in
276962306a36Sopenharmony_ci * the background.  Before the table can be swapped with
277062306a36Sopenharmony_ci * dm_bind_table, dm_suspend must be called to flush any in
277162306a36Sopenharmony_ci * flight bios and ensure that any further io gets deferred.
277262306a36Sopenharmony_ci */
277362306a36Sopenharmony_ci/*
277462306a36Sopenharmony_ci * Suspend mechanism in request-based dm.
277562306a36Sopenharmony_ci *
277662306a36Sopenharmony_ci * 1. Flush all I/Os by lock_fs() if needed.
277762306a36Sopenharmony_ci * 2. Stop dispatching any I/O by stopping the request_queue.
277862306a36Sopenharmony_ci * 3. Wait for all in-flight I/Os to be completed or requeued.
277962306a36Sopenharmony_ci *
278062306a36Sopenharmony_ci * To abort suspend, start the request_queue.
278162306a36Sopenharmony_ci */
278262306a36Sopenharmony_ciint dm_suspend(struct mapped_device *md, unsigned int suspend_flags)
278362306a36Sopenharmony_ci{
278462306a36Sopenharmony_ci	struct dm_table *map = NULL;
278562306a36Sopenharmony_ci	int r = 0;
278662306a36Sopenharmony_ci
278762306a36Sopenharmony_ciretry:
278862306a36Sopenharmony_ci	mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
278962306a36Sopenharmony_ci
279062306a36Sopenharmony_ci	if (dm_suspended_md(md)) {
279162306a36Sopenharmony_ci		r = -EINVAL;
279262306a36Sopenharmony_ci		goto out_unlock;
279362306a36Sopenharmony_ci	}
279462306a36Sopenharmony_ci
279562306a36Sopenharmony_ci	if (dm_suspended_internally_md(md)) {
279662306a36Sopenharmony_ci		/* already internally suspended, wait for internal resume */
279762306a36Sopenharmony_ci		mutex_unlock(&md->suspend_lock);
279862306a36Sopenharmony_ci		r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
279962306a36Sopenharmony_ci		if (r)
280062306a36Sopenharmony_ci			return r;
280162306a36Sopenharmony_ci		goto retry;
280262306a36Sopenharmony_ci	}
280362306a36Sopenharmony_ci
280462306a36Sopenharmony_ci	map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
280562306a36Sopenharmony_ci	if (!map) {
280662306a36Sopenharmony_ci		/* avoid deadlock with fs/namespace.c:do_mount() */
280762306a36Sopenharmony_ci		suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG;
280862306a36Sopenharmony_ci	}
280962306a36Sopenharmony_ci
281062306a36Sopenharmony_ci	r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
281162306a36Sopenharmony_ci	if (r)
281262306a36Sopenharmony_ci		goto out_unlock;
281362306a36Sopenharmony_ci
281462306a36Sopenharmony_ci	set_bit(DMF_POST_SUSPENDING, &md->flags);
281562306a36Sopenharmony_ci	dm_table_postsuspend_targets(map);
281662306a36Sopenharmony_ci	clear_bit(DMF_POST_SUSPENDING, &md->flags);
281762306a36Sopenharmony_ci
281862306a36Sopenharmony_ciout_unlock:
281962306a36Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
282062306a36Sopenharmony_ci	return r;
282162306a36Sopenharmony_ci}
282262306a36Sopenharmony_ci
282362306a36Sopenharmony_cistatic int __dm_resume(struct mapped_device *md, struct dm_table *map)
282462306a36Sopenharmony_ci{
282562306a36Sopenharmony_ci	if (map) {
282662306a36Sopenharmony_ci		int r = dm_table_resume_targets(map);
282762306a36Sopenharmony_ci
282862306a36Sopenharmony_ci		if (r)
282962306a36Sopenharmony_ci			return r;
283062306a36Sopenharmony_ci	}
283162306a36Sopenharmony_ci
283262306a36Sopenharmony_ci	dm_queue_flush(md);
283362306a36Sopenharmony_ci
283462306a36Sopenharmony_ci	/*
283562306a36Sopenharmony_ci	 * Flushing deferred I/Os must be done after targets are resumed
283662306a36Sopenharmony_ci	 * so that mapping of targets can work correctly.
283762306a36Sopenharmony_ci	 * Request-based dm is queueing the deferred I/Os in its request_queue.
283862306a36Sopenharmony_ci	 */
283962306a36Sopenharmony_ci	if (dm_request_based(md))
284062306a36Sopenharmony_ci		dm_start_queue(md->queue);
284162306a36Sopenharmony_ci
284262306a36Sopenharmony_ci	unlock_fs(md);
284362306a36Sopenharmony_ci
284462306a36Sopenharmony_ci	return 0;
284562306a36Sopenharmony_ci}
284662306a36Sopenharmony_ci
284762306a36Sopenharmony_ciint dm_resume(struct mapped_device *md)
284862306a36Sopenharmony_ci{
284962306a36Sopenharmony_ci	int r;
285062306a36Sopenharmony_ci	struct dm_table *map = NULL;
285162306a36Sopenharmony_ci
285262306a36Sopenharmony_ciretry:
285362306a36Sopenharmony_ci	r = -EINVAL;
285462306a36Sopenharmony_ci	mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
285562306a36Sopenharmony_ci
285662306a36Sopenharmony_ci	if (!dm_suspended_md(md))
285762306a36Sopenharmony_ci		goto out;
285862306a36Sopenharmony_ci
285962306a36Sopenharmony_ci	if (dm_suspended_internally_md(md)) {
286062306a36Sopenharmony_ci		/* already internally suspended, wait for internal resume */
286162306a36Sopenharmony_ci		mutex_unlock(&md->suspend_lock);
286262306a36Sopenharmony_ci		r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
286362306a36Sopenharmony_ci		if (r)
286462306a36Sopenharmony_ci			return r;
286562306a36Sopenharmony_ci		goto retry;
286662306a36Sopenharmony_ci	}
286762306a36Sopenharmony_ci
286862306a36Sopenharmony_ci	map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
286962306a36Sopenharmony_ci	if (!map || !dm_table_get_size(map))
287062306a36Sopenharmony_ci		goto out;
287162306a36Sopenharmony_ci
287262306a36Sopenharmony_ci	r = __dm_resume(md, map);
287362306a36Sopenharmony_ci	if (r)
287462306a36Sopenharmony_ci		goto out;
287562306a36Sopenharmony_ci
287662306a36Sopenharmony_ci	clear_bit(DMF_SUSPENDED, &md->flags);
287762306a36Sopenharmony_ciout:
287862306a36Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
287962306a36Sopenharmony_ci
288062306a36Sopenharmony_ci	return r;
288162306a36Sopenharmony_ci}
288262306a36Sopenharmony_ci
288362306a36Sopenharmony_ci/*
288462306a36Sopenharmony_ci * Internal suspend/resume works like userspace-driven suspend. It waits
288562306a36Sopenharmony_ci * until all bios finish and prevents issuing new bios to the target drivers.
288662306a36Sopenharmony_ci * It may be used only from the kernel.
288762306a36Sopenharmony_ci */
288862306a36Sopenharmony_ci
288962306a36Sopenharmony_cistatic void __dm_internal_suspend(struct mapped_device *md, unsigned int suspend_flags)
289062306a36Sopenharmony_ci{
289162306a36Sopenharmony_ci	struct dm_table *map = NULL;
289262306a36Sopenharmony_ci
289362306a36Sopenharmony_ci	lockdep_assert_held(&md->suspend_lock);
289462306a36Sopenharmony_ci
289562306a36Sopenharmony_ci	if (md->internal_suspend_count++)
289662306a36Sopenharmony_ci		return; /* nested internal suspend */
289762306a36Sopenharmony_ci
289862306a36Sopenharmony_ci	if (dm_suspended_md(md)) {
289962306a36Sopenharmony_ci		set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
290062306a36Sopenharmony_ci		return; /* nest suspend */
290162306a36Sopenharmony_ci	}
290262306a36Sopenharmony_ci
290362306a36Sopenharmony_ci	map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
290462306a36Sopenharmony_ci
290562306a36Sopenharmony_ci	/*
290662306a36Sopenharmony_ci	 * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is
290762306a36Sopenharmony_ci	 * supported.  Properly supporting a TASK_INTERRUPTIBLE internal suspend
290862306a36Sopenharmony_ci	 * would require changing .presuspend to return an error -- avoid this
290962306a36Sopenharmony_ci	 * until there is a need for more elaborate variants of internal suspend.
291062306a36Sopenharmony_ci	 */
291162306a36Sopenharmony_ci	(void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE,
291262306a36Sopenharmony_ci			    DMF_SUSPENDED_INTERNALLY);
291362306a36Sopenharmony_ci
291462306a36Sopenharmony_ci	set_bit(DMF_POST_SUSPENDING, &md->flags);
291562306a36Sopenharmony_ci	dm_table_postsuspend_targets(map);
291662306a36Sopenharmony_ci	clear_bit(DMF_POST_SUSPENDING, &md->flags);
291762306a36Sopenharmony_ci}
291862306a36Sopenharmony_ci
291962306a36Sopenharmony_cistatic void __dm_internal_resume(struct mapped_device *md)
292062306a36Sopenharmony_ci{
292162306a36Sopenharmony_ci	int r;
292262306a36Sopenharmony_ci	struct dm_table *map;
292362306a36Sopenharmony_ci
292462306a36Sopenharmony_ci	BUG_ON(!md->internal_suspend_count);
292562306a36Sopenharmony_ci
292662306a36Sopenharmony_ci	if (--md->internal_suspend_count)
292762306a36Sopenharmony_ci		return; /* resume from nested internal suspend */
292862306a36Sopenharmony_ci
292962306a36Sopenharmony_ci	if (dm_suspended_md(md))
293062306a36Sopenharmony_ci		goto done; /* resume from nested suspend */
293162306a36Sopenharmony_ci
293262306a36Sopenharmony_ci	map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
293362306a36Sopenharmony_ci	r = __dm_resume(md, map);
293462306a36Sopenharmony_ci	if (r) {
293562306a36Sopenharmony_ci		/*
293662306a36Sopenharmony_ci		 * If a preresume method of some target failed, we are in a
293762306a36Sopenharmony_ci		 * tricky situation. We can't return an error to the caller. We
293862306a36Sopenharmony_ci		 * can't fake success because then the "resume" and
293962306a36Sopenharmony_ci		 * "postsuspend" methods would not be paired correctly, and it
294062306a36Sopenharmony_ci		 * would break various targets, for example it would cause list
294162306a36Sopenharmony_ci		 * corruption in the "origin" target.
294262306a36Sopenharmony_ci		 *
294362306a36Sopenharmony_ci		 * So, we fake normal suspend here, to make sure that the
294462306a36Sopenharmony_ci		 * "resume" and "postsuspend" methods will be paired correctly.
294562306a36Sopenharmony_ci		 */
294662306a36Sopenharmony_ci		DMERR("Preresume method failed: %d", r);
294762306a36Sopenharmony_ci		set_bit(DMF_SUSPENDED, &md->flags);
294862306a36Sopenharmony_ci	}
294962306a36Sopenharmony_cidone:
295062306a36Sopenharmony_ci	clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
295162306a36Sopenharmony_ci	smp_mb__after_atomic();
295262306a36Sopenharmony_ci	wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
295362306a36Sopenharmony_ci}
295462306a36Sopenharmony_ci
295562306a36Sopenharmony_civoid dm_internal_suspend_noflush(struct mapped_device *md)
295662306a36Sopenharmony_ci{
295762306a36Sopenharmony_ci	mutex_lock(&md->suspend_lock);
295862306a36Sopenharmony_ci	__dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
295962306a36Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
296062306a36Sopenharmony_ci}
296162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
296262306a36Sopenharmony_ci
296362306a36Sopenharmony_civoid dm_internal_resume(struct mapped_device *md)
296462306a36Sopenharmony_ci{
296562306a36Sopenharmony_ci	mutex_lock(&md->suspend_lock);
296662306a36Sopenharmony_ci	__dm_internal_resume(md);
296762306a36Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
296862306a36Sopenharmony_ci}
296962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_resume);
297062306a36Sopenharmony_ci
297162306a36Sopenharmony_ci/*
297262306a36Sopenharmony_ci * Fast variants of internal suspend/resume hold md->suspend_lock,
297362306a36Sopenharmony_ci * which prevents interaction with userspace-driven suspend.
297462306a36Sopenharmony_ci */
297562306a36Sopenharmony_ci
297662306a36Sopenharmony_civoid dm_internal_suspend_fast(struct mapped_device *md)
297762306a36Sopenharmony_ci{
297862306a36Sopenharmony_ci	mutex_lock(&md->suspend_lock);
297962306a36Sopenharmony_ci	if (dm_suspended_md(md) || dm_suspended_internally_md(md))
298062306a36Sopenharmony_ci		return;
298162306a36Sopenharmony_ci
298262306a36Sopenharmony_ci	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
298362306a36Sopenharmony_ci	synchronize_srcu(&md->io_barrier);
298462306a36Sopenharmony_ci	flush_workqueue(md->wq);
298562306a36Sopenharmony_ci	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
298662306a36Sopenharmony_ci}
298762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_suspend_fast);
298862306a36Sopenharmony_ci
298962306a36Sopenharmony_civoid dm_internal_resume_fast(struct mapped_device *md)
299062306a36Sopenharmony_ci{
299162306a36Sopenharmony_ci	if (dm_suspended_md(md) || dm_suspended_internally_md(md))
299262306a36Sopenharmony_ci		goto done;
299362306a36Sopenharmony_ci
299462306a36Sopenharmony_ci	dm_queue_flush(md);
299562306a36Sopenharmony_ci
299662306a36Sopenharmony_cidone:
299762306a36Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
299862306a36Sopenharmony_ci}
299962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_resume_fast);
300062306a36Sopenharmony_ci
300162306a36Sopenharmony_ci/*
300262306a36Sopenharmony_ci *---------------------------------------------------------------
300362306a36Sopenharmony_ci * Event notification.
300462306a36Sopenharmony_ci *---------------------------------------------------------------
300562306a36Sopenharmony_ci */
300662306a36Sopenharmony_ciint dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
300762306a36Sopenharmony_ci		      unsigned int cookie, bool need_resize_uevent)
300862306a36Sopenharmony_ci{
300962306a36Sopenharmony_ci	int r;
301062306a36Sopenharmony_ci	unsigned int noio_flag;
301162306a36Sopenharmony_ci	char udev_cookie[DM_COOKIE_LENGTH];
301262306a36Sopenharmony_ci	char *envp[3] = { NULL, NULL, NULL };
301362306a36Sopenharmony_ci	char **envpp = envp;
301462306a36Sopenharmony_ci	if (cookie) {
301562306a36Sopenharmony_ci		snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
301662306a36Sopenharmony_ci			 DM_COOKIE_ENV_VAR_NAME, cookie);
301762306a36Sopenharmony_ci		*envpp++ = udev_cookie;
301862306a36Sopenharmony_ci	}
301962306a36Sopenharmony_ci	if (need_resize_uevent) {
302062306a36Sopenharmony_ci		*envpp++ = "RESIZE=1";
302162306a36Sopenharmony_ci	}
302262306a36Sopenharmony_ci
302362306a36Sopenharmony_ci	noio_flag = memalloc_noio_save();
302462306a36Sopenharmony_ci
302562306a36Sopenharmony_ci	r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp);
302662306a36Sopenharmony_ci
302762306a36Sopenharmony_ci	memalloc_noio_restore(noio_flag);
302862306a36Sopenharmony_ci
302962306a36Sopenharmony_ci	return r;
303062306a36Sopenharmony_ci}
303162306a36Sopenharmony_ci
303262306a36Sopenharmony_ciuint32_t dm_next_uevent_seq(struct mapped_device *md)
303362306a36Sopenharmony_ci{
303462306a36Sopenharmony_ci	return atomic_add_return(1, &md->uevent_seq);
303562306a36Sopenharmony_ci}
303662306a36Sopenharmony_ci
303762306a36Sopenharmony_ciuint32_t dm_get_event_nr(struct mapped_device *md)
303862306a36Sopenharmony_ci{
303962306a36Sopenharmony_ci	return atomic_read(&md->event_nr);
304062306a36Sopenharmony_ci}
304162306a36Sopenharmony_ci
304262306a36Sopenharmony_ciint dm_wait_event(struct mapped_device *md, int event_nr)
304362306a36Sopenharmony_ci{
304462306a36Sopenharmony_ci	return wait_event_interruptible(md->eventq,
304562306a36Sopenharmony_ci			(event_nr != atomic_read(&md->event_nr)));
304662306a36Sopenharmony_ci}
304762306a36Sopenharmony_ci
304862306a36Sopenharmony_civoid dm_uevent_add(struct mapped_device *md, struct list_head *elist)
304962306a36Sopenharmony_ci{
305062306a36Sopenharmony_ci	unsigned long flags;
305162306a36Sopenharmony_ci
305262306a36Sopenharmony_ci	spin_lock_irqsave(&md->uevent_lock, flags);
305362306a36Sopenharmony_ci	list_add(elist, &md->uevent_list);
305462306a36Sopenharmony_ci	spin_unlock_irqrestore(&md->uevent_lock, flags);
305562306a36Sopenharmony_ci}
305662306a36Sopenharmony_ci
305762306a36Sopenharmony_ci/*
305862306a36Sopenharmony_ci * The gendisk is only valid as long as you have a reference
305962306a36Sopenharmony_ci * count on 'md'.
306062306a36Sopenharmony_ci */
306162306a36Sopenharmony_cistruct gendisk *dm_disk(struct mapped_device *md)
306262306a36Sopenharmony_ci{
306362306a36Sopenharmony_ci	return md->disk;
306462306a36Sopenharmony_ci}
306562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_disk);
306662306a36Sopenharmony_ci
306762306a36Sopenharmony_cistruct kobject *dm_kobject(struct mapped_device *md)
306862306a36Sopenharmony_ci{
306962306a36Sopenharmony_ci	return &md->kobj_holder.kobj;
307062306a36Sopenharmony_ci}
307162306a36Sopenharmony_ci
307262306a36Sopenharmony_cistruct mapped_device *dm_get_from_kobject(struct kobject *kobj)
307362306a36Sopenharmony_ci{
307462306a36Sopenharmony_ci	struct mapped_device *md;
307562306a36Sopenharmony_ci
307662306a36Sopenharmony_ci	md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
307762306a36Sopenharmony_ci
307862306a36Sopenharmony_ci	spin_lock(&_minor_lock);
307962306a36Sopenharmony_ci	if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
308062306a36Sopenharmony_ci		md = NULL;
308162306a36Sopenharmony_ci		goto out;
308262306a36Sopenharmony_ci	}
308362306a36Sopenharmony_ci	dm_get(md);
308462306a36Sopenharmony_ciout:
308562306a36Sopenharmony_ci	spin_unlock(&_minor_lock);
308662306a36Sopenharmony_ci
308762306a36Sopenharmony_ci	return md;
308862306a36Sopenharmony_ci}
308962306a36Sopenharmony_ci
309062306a36Sopenharmony_ciint dm_suspended_md(struct mapped_device *md)
309162306a36Sopenharmony_ci{
309262306a36Sopenharmony_ci	return test_bit(DMF_SUSPENDED, &md->flags);
309362306a36Sopenharmony_ci}
309462306a36Sopenharmony_ci
309562306a36Sopenharmony_cistatic int dm_post_suspending_md(struct mapped_device *md)
309662306a36Sopenharmony_ci{
309762306a36Sopenharmony_ci	return test_bit(DMF_POST_SUSPENDING, &md->flags);
309862306a36Sopenharmony_ci}
309962306a36Sopenharmony_ci
310062306a36Sopenharmony_ciint dm_suspended_internally_md(struct mapped_device *md)
310162306a36Sopenharmony_ci{
310262306a36Sopenharmony_ci	return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
310362306a36Sopenharmony_ci}
310462306a36Sopenharmony_ci
310562306a36Sopenharmony_ciint dm_test_deferred_remove_flag(struct mapped_device *md)
310662306a36Sopenharmony_ci{
310762306a36Sopenharmony_ci	return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
310862306a36Sopenharmony_ci}
310962306a36Sopenharmony_ci
311062306a36Sopenharmony_ciint dm_suspended(struct dm_target *ti)
311162306a36Sopenharmony_ci{
311262306a36Sopenharmony_ci	return dm_suspended_md(ti->table->md);
311362306a36Sopenharmony_ci}
311462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_suspended);
311562306a36Sopenharmony_ci
311662306a36Sopenharmony_ciint dm_post_suspending(struct dm_target *ti)
311762306a36Sopenharmony_ci{
311862306a36Sopenharmony_ci	return dm_post_suspending_md(ti->table->md);
311962306a36Sopenharmony_ci}
312062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_post_suspending);
312162306a36Sopenharmony_ci
312262306a36Sopenharmony_ciint dm_noflush_suspending(struct dm_target *ti)
312362306a36Sopenharmony_ci{
312462306a36Sopenharmony_ci	return __noflush_suspending(ti->table->md);
312562306a36Sopenharmony_ci}
312662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_noflush_suspending);
312762306a36Sopenharmony_ci
312862306a36Sopenharmony_civoid dm_free_md_mempools(struct dm_md_mempools *pools)
312962306a36Sopenharmony_ci{
313062306a36Sopenharmony_ci	if (!pools)
313162306a36Sopenharmony_ci		return;
313262306a36Sopenharmony_ci
313362306a36Sopenharmony_ci	bioset_exit(&pools->bs);
313462306a36Sopenharmony_ci	bioset_exit(&pools->io_bs);
313562306a36Sopenharmony_ci
313662306a36Sopenharmony_ci	kfree(pools);
313762306a36Sopenharmony_ci}
313862306a36Sopenharmony_ci
313962306a36Sopenharmony_cistruct dm_pr {
314062306a36Sopenharmony_ci	u64	old_key;
314162306a36Sopenharmony_ci	u64	new_key;
314262306a36Sopenharmony_ci	u32	flags;
314362306a36Sopenharmony_ci	bool	abort;
314462306a36Sopenharmony_ci	bool	fail_early;
314562306a36Sopenharmony_ci	int	ret;
314662306a36Sopenharmony_ci	enum pr_type type;
314762306a36Sopenharmony_ci	struct pr_keys *read_keys;
314862306a36Sopenharmony_ci	struct pr_held_reservation *rsv;
314962306a36Sopenharmony_ci};
315062306a36Sopenharmony_ci
315162306a36Sopenharmony_cistatic int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
315262306a36Sopenharmony_ci		      struct dm_pr *pr)
315362306a36Sopenharmony_ci{
315462306a36Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
315562306a36Sopenharmony_ci	struct dm_table *table;
315662306a36Sopenharmony_ci	struct dm_target *ti;
315762306a36Sopenharmony_ci	int ret = -ENOTTY, srcu_idx;
315862306a36Sopenharmony_ci
315962306a36Sopenharmony_ci	table = dm_get_live_table(md, &srcu_idx);
316062306a36Sopenharmony_ci	if (!table || !dm_table_get_size(table))
316162306a36Sopenharmony_ci		goto out;
316262306a36Sopenharmony_ci
316362306a36Sopenharmony_ci	/* We only support devices that have a single target */
316462306a36Sopenharmony_ci	if (table->num_targets != 1)
316562306a36Sopenharmony_ci		goto out;
316662306a36Sopenharmony_ci	ti = dm_table_get_target(table, 0);
316762306a36Sopenharmony_ci
316862306a36Sopenharmony_ci	if (dm_suspended_md(md)) {
316962306a36Sopenharmony_ci		ret = -EAGAIN;
317062306a36Sopenharmony_ci		goto out;
317162306a36Sopenharmony_ci	}
317262306a36Sopenharmony_ci
317362306a36Sopenharmony_ci	ret = -EINVAL;
317462306a36Sopenharmony_ci	if (!ti->type->iterate_devices)
317562306a36Sopenharmony_ci		goto out;
317662306a36Sopenharmony_ci
317762306a36Sopenharmony_ci	ti->type->iterate_devices(ti, fn, pr);
317862306a36Sopenharmony_ci	ret = 0;
317962306a36Sopenharmony_ciout:
318062306a36Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
318162306a36Sopenharmony_ci	return ret;
318262306a36Sopenharmony_ci}
318362306a36Sopenharmony_ci
318462306a36Sopenharmony_ci/*
318562306a36Sopenharmony_ci * For register / unregister we need to manually call out to every path.
318662306a36Sopenharmony_ci */
318762306a36Sopenharmony_cistatic int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev,
318862306a36Sopenharmony_ci			    sector_t start, sector_t len, void *data)
318962306a36Sopenharmony_ci{
319062306a36Sopenharmony_ci	struct dm_pr *pr = data;
319162306a36Sopenharmony_ci	const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
319262306a36Sopenharmony_ci	int ret;
319362306a36Sopenharmony_ci
319462306a36Sopenharmony_ci	if (!ops || !ops->pr_register) {
319562306a36Sopenharmony_ci		pr->ret = -EOPNOTSUPP;
319662306a36Sopenharmony_ci		return -1;
319762306a36Sopenharmony_ci	}
319862306a36Sopenharmony_ci
319962306a36Sopenharmony_ci	ret = ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
320062306a36Sopenharmony_ci	if (!ret)
320162306a36Sopenharmony_ci		return 0;
320262306a36Sopenharmony_ci
320362306a36Sopenharmony_ci	if (!pr->ret)
320462306a36Sopenharmony_ci		pr->ret = ret;
320562306a36Sopenharmony_ci
320662306a36Sopenharmony_ci	if (pr->fail_early)
320762306a36Sopenharmony_ci		return -1;
320862306a36Sopenharmony_ci
320962306a36Sopenharmony_ci	return 0;
321062306a36Sopenharmony_ci}
321162306a36Sopenharmony_ci
321262306a36Sopenharmony_cistatic int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
321362306a36Sopenharmony_ci			  u32 flags)
321462306a36Sopenharmony_ci{
321562306a36Sopenharmony_ci	struct dm_pr pr = {
321662306a36Sopenharmony_ci		.old_key	= old_key,
321762306a36Sopenharmony_ci		.new_key	= new_key,
321862306a36Sopenharmony_ci		.flags		= flags,
321962306a36Sopenharmony_ci		.fail_early	= true,
322062306a36Sopenharmony_ci		.ret		= 0,
322162306a36Sopenharmony_ci	};
322262306a36Sopenharmony_ci	int ret;
322362306a36Sopenharmony_ci
322462306a36Sopenharmony_ci	ret = dm_call_pr(bdev, __dm_pr_register, &pr);
322562306a36Sopenharmony_ci	if (ret) {
322662306a36Sopenharmony_ci		/* Didn't even get to register a path */
322762306a36Sopenharmony_ci		return ret;
322862306a36Sopenharmony_ci	}
322962306a36Sopenharmony_ci
323062306a36Sopenharmony_ci	if (!pr.ret)
323162306a36Sopenharmony_ci		return 0;
323262306a36Sopenharmony_ci	ret = pr.ret;
323362306a36Sopenharmony_ci
323462306a36Sopenharmony_ci	if (!new_key)
323562306a36Sopenharmony_ci		return ret;
323662306a36Sopenharmony_ci
323762306a36Sopenharmony_ci	/* unregister all paths if we failed to register any path */
323862306a36Sopenharmony_ci	pr.old_key = new_key;
323962306a36Sopenharmony_ci	pr.new_key = 0;
324062306a36Sopenharmony_ci	pr.flags = 0;
324162306a36Sopenharmony_ci	pr.fail_early = false;
324262306a36Sopenharmony_ci	(void) dm_call_pr(bdev, __dm_pr_register, &pr);
324362306a36Sopenharmony_ci	return ret;
324462306a36Sopenharmony_ci}
324562306a36Sopenharmony_ci
324662306a36Sopenharmony_ci
324762306a36Sopenharmony_cistatic int __dm_pr_reserve(struct dm_target *ti, struct dm_dev *dev,
324862306a36Sopenharmony_ci			   sector_t start, sector_t len, void *data)
324962306a36Sopenharmony_ci{
325062306a36Sopenharmony_ci	struct dm_pr *pr = data;
325162306a36Sopenharmony_ci	const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
325262306a36Sopenharmony_ci
325362306a36Sopenharmony_ci	if (!ops || !ops->pr_reserve) {
325462306a36Sopenharmony_ci		pr->ret = -EOPNOTSUPP;
325562306a36Sopenharmony_ci		return -1;
325662306a36Sopenharmony_ci	}
325762306a36Sopenharmony_ci
325862306a36Sopenharmony_ci	pr->ret = ops->pr_reserve(dev->bdev, pr->old_key, pr->type, pr->flags);
325962306a36Sopenharmony_ci	if (!pr->ret)
326062306a36Sopenharmony_ci		return -1;
326162306a36Sopenharmony_ci
326262306a36Sopenharmony_ci	return 0;
326362306a36Sopenharmony_ci}
326462306a36Sopenharmony_ci
326562306a36Sopenharmony_cistatic int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
326662306a36Sopenharmony_ci			 u32 flags)
326762306a36Sopenharmony_ci{
326862306a36Sopenharmony_ci	struct dm_pr pr = {
326962306a36Sopenharmony_ci		.old_key	= key,
327062306a36Sopenharmony_ci		.flags		= flags,
327162306a36Sopenharmony_ci		.type		= type,
327262306a36Sopenharmony_ci		.fail_early	= false,
327362306a36Sopenharmony_ci		.ret		= 0,
327462306a36Sopenharmony_ci	};
327562306a36Sopenharmony_ci	int ret;
327662306a36Sopenharmony_ci
327762306a36Sopenharmony_ci	ret = dm_call_pr(bdev, __dm_pr_reserve, &pr);
327862306a36Sopenharmony_ci	if (ret)
327962306a36Sopenharmony_ci		return ret;
328062306a36Sopenharmony_ci
328162306a36Sopenharmony_ci	return pr.ret;
328262306a36Sopenharmony_ci}
328362306a36Sopenharmony_ci
328462306a36Sopenharmony_ci/*
328562306a36Sopenharmony_ci * If there is a non-All Registrants type of reservation, the release must be
328662306a36Sopenharmony_ci * sent down the holding path. For the cases where there is no reservation or
328762306a36Sopenharmony_ci * the path is not the holder the device will also return success, so we must
328862306a36Sopenharmony_ci * try each path to make sure we got the correct path.
328962306a36Sopenharmony_ci */
329062306a36Sopenharmony_cistatic int __dm_pr_release(struct dm_target *ti, struct dm_dev *dev,
329162306a36Sopenharmony_ci			   sector_t start, sector_t len, void *data)
329262306a36Sopenharmony_ci{
329362306a36Sopenharmony_ci	struct dm_pr *pr = data;
329462306a36Sopenharmony_ci	const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
329562306a36Sopenharmony_ci
329662306a36Sopenharmony_ci	if (!ops || !ops->pr_release) {
329762306a36Sopenharmony_ci		pr->ret = -EOPNOTSUPP;
329862306a36Sopenharmony_ci		return -1;
329962306a36Sopenharmony_ci	}
330062306a36Sopenharmony_ci
330162306a36Sopenharmony_ci	pr->ret = ops->pr_release(dev->bdev, pr->old_key, pr->type);
330262306a36Sopenharmony_ci	if (pr->ret)
330362306a36Sopenharmony_ci		return -1;
330462306a36Sopenharmony_ci
330562306a36Sopenharmony_ci	return 0;
330662306a36Sopenharmony_ci}
330762306a36Sopenharmony_ci
330862306a36Sopenharmony_cistatic int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
330962306a36Sopenharmony_ci{
331062306a36Sopenharmony_ci	struct dm_pr pr = {
331162306a36Sopenharmony_ci		.old_key	= key,
331262306a36Sopenharmony_ci		.type		= type,
331362306a36Sopenharmony_ci		.fail_early	= false,
331462306a36Sopenharmony_ci	};
331562306a36Sopenharmony_ci	int ret;
331662306a36Sopenharmony_ci
331762306a36Sopenharmony_ci	ret = dm_call_pr(bdev, __dm_pr_release, &pr);
331862306a36Sopenharmony_ci	if (ret)
331962306a36Sopenharmony_ci		return ret;
332062306a36Sopenharmony_ci
332162306a36Sopenharmony_ci	return pr.ret;
332262306a36Sopenharmony_ci}
332362306a36Sopenharmony_ci
332462306a36Sopenharmony_cistatic int __dm_pr_preempt(struct dm_target *ti, struct dm_dev *dev,
332562306a36Sopenharmony_ci			   sector_t start, sector_t len, void *data)
332662306a36Sopenharmony_ci{
332762306a36Sopenharmony_ci	struct dm_pr *pr = data;
332862306a36Sopenharmony_ci	const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
332962306a36Sopenharmony_ci
333062306a36Sopenharmony_ci	if (!ops || !ops->pr_preempt) {
333162306a36Sopenharmony_ci		pr->ret = -EOPNOTSUPP;
333262306a36Sopenharmony_ci		return -1;
333362306a36Sopenharmony_ci	}
333462306a36Sopenharmony_ci
333562306a36Sopenharmony_ci	pr->ret = ops->pr_preempt(dev->bdev, pr->old_key, pr->new_key, pr->type,
333662306a36Sopenharmony_ci				  pr->abort);
333762306a36Sopenharmony_ci	if (!pr->ret)
333862306a36Sopenharmony_ci		return -1;
333962306a36Sopenharmony_ci
334062306a36Sopenharmony_ci	return 0;
334162306a36Sopenharmony_ci}
334262306a36Sopenharmony_ci
334362306a36Sopenharmony_cistatic int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
334462306a36Sopenharmony_ci			 enum pr_type type, bool abort)
334562306a36Sopenharmony_ci{
334662306a36Sopenharmony_ci	struct dm_pr pr = {
334762306a36Sopenharmony_ci		.new_key	= new_key,
334862306a36Sopenharmony_ci		.old_key	= old_key,
334962306a36Sopenharmony_ci		.type		= type,
335062306a36Sopenharmony_ci		.fail_early	= false,
335162306a36Sopenharmony_ci	};
335262306a36Sopenharmony_ci	int ret;
335362306a36Sopenharmony_ci
335462306a36Sopenharmony_ci	ret = dm_call_pr(bdev, __dm_pr_preempt, &pr);
335562306a36Sopenharmony_ci	if (ret)
335662306a36Sopenharmony_ci		return ret;
335762306a36Sopenharmony_ci
335862306a36Sopenharmony_ci	return pr.ret;
335962306a36Sopenharmony_ci}
336062306a36Sopenharmony_ci
336162306a36Sopenharmony_cistatic int dm_pr_clear(struct block_device *bdev, u64 key)
336262306a36Sopenharmony_ci{
336362306a36Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
336462306a36Sopenharmony_ci	const struct pr_ops *ops;
336562306a36Sopenharmony_ci	int r, srcu_idx;
336662306a36Sopenharmony_ci
336762306a36Sopenharmony_ci	r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
336862306a36Sopenharmony_ci	if (r < 0)
336962306a36Sopenharmony_ci		goto out;
337062306a36Sopenharmony_ci
337162306a36Sopenharmony_ci	ops = bdev->bd_disk->fops->pr_ops;
337262306a36Sopenharmony_ci	if (ops && ops->pr_clear)
337362306a36Sopenharmony_ci		r = ops->pr_clear(bdev, key);
337462306a36Sopenharmony_ci	else
337562306a36Sopenharmony_ci		r = -EOPNOTSUPP;
337662306a36Sopenharmony_ciout:
337762306a36Sopenharmony_ci	dm_unprepare_ioctl(md, srcu_idx);
337862306a36Sopenharmony_ci	return r;
337962306a36Sopenharmony_ci}
338062306a36Sopenharmony_ci
338162306a36Sopenharmony_cistatic int __dm_pr_read_keys(struct dm_target *ti, struct dm_dev *dev,
338262306a36Sopenharmony_ci			     sector_t start, sector_t len, void *data)
338362306a36Sopenharmony_ci{
338462306a36Sopenharmony_ci	struct dm_pr *pr = data;
338562306a36Sopenharmony_ci	const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
338662306a36Sopenharmony_ci
338762306a36Sopenharmony_ci	if (!ops || !ops->pr_read_keys) {
338862306a36Sopenharmony_ci		pr->ret = -EOPNOTSUPP;
338962306a36Sopenharmony_ci		return -1;
339062306a36Sopenharmony_ci	}
339162306a36Sopenharmony_ci
339262306a36Sopenharmony_ci	pr->ret = ops->pr_read_keys(dev->bdev, pr->read_keys);
339362306a36Sopenharmony_ci	if (!pr->ret)
339462306a36Sopenharmony_ci		return -1;
339562306a36Sopenharmony_ci
339662306a36Sopenharmony_ci	return 0;
339762306a36Sopenharmony_ci}
339862306a36Sopenharmony_ci
339962306a36Sopenharmony_cistatic int dm_pr_read_keys(struct block_device *bdev, struct pr_keys *keys)
340062306a36Sopenharmony_ci{
340162306a36Sopenharmony_ci	struct dm_pr pr = {
340262306a36Sopenharmony_ci		.read_keys = keys,
340362306a36Sopenharmony_ci	};
340462306a36Sopenharmony_ci	int ret;
340562306a36Sopenharmony_ci
340662306a36Sopenharmony_ci	ret = dm_call_pr(bdev, __dm_pr_read_keys, &pr);
340762306a36Sopenharmony_ci	if (ret)
340862306a36Sopenharmony_ci		return ret;
340962306a36Sopenharmony_ci
341062306a36Sopenharmony_ci	return pr.ret;
341162306a36Sopenharmony_ci}
341262306a36Sopenharmony_ci
341362306a36Sopenharmony_cistatic int __dm_pr_read_reservation(struct dm_target *ti, struct dm_dev *dev,
341462306a36Sopenharmony_ci				    sector_t start, sector_t len, void *data)
341562306a36Sopenharmony_ci{
341662306a36Sopenharmony_ci	struct dm_pr *pr = data;
341762306a36Sopenharmony_ci	const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
341862306a36Sopenharmony_ci
341962306a36Sopenharmony_ci	if (!ops || !ops->pr_read_reservation) {
342062306a36Sopenharmony_ci		pr->ret = -EOPNOTSUPP;
342162306a36Sopenharmony_ci		return -1;
342262306a36Sopenharmony_ci	}
342362306a36Sopenharmony_ci
342462306a36Sopenharmony_ci	pr->ret = ops->pr_read_reservation(dev->bdev, pr->rsv);
342562306a36Sopenharmony_ci	if (!pr->ret)
342662306a36Sopenharmony_ci		return -1;
342762306a36Sopenharmony_ci
342862306a36Sopenharmony_ci	return 0;
342962306a36Sopenharmony_ci}
343062306a36Sopenharmony_ci
343162306a36Sopenharmony_cistatic int dm_pr_read_reservation(struct block_device *bdev,
343262306a36Sopenharmony_ci				  struct pr_held_reservation *rsv)
343362306a36Sopenharmony_ci{
343462306a36Sopenharmony_ci	struct dm_pr pr = {
343562306a36Sopenharmony_ci		.rsv = rsv,
343662306a36Sopenharmony_ci	};
343762306a36Sopenharmony_ci	int ret;
343862306a36Sopenharmony_ci
343962306a36Sopenharmony_ci	ret = dm_call_pr(bdev, __dm_pr_read_reservation, &pr);
344062306a36Sopenharmony_ci	if (ret)
344162306a36Sopenharmony_ci		return ret;
344262306a36Sopenharmony_ci
344362306a36Sopenharmony_ci	return pr.ret;
344462306a36Sopenharmony_ci}
344562306a36Sopenharmony_ci
344662306a36Sopenharmony_cistatic const struct pr_ops dm_pr_ops = {
344762306a36Sopenharmony_ci	.pr_register	= dm_pr_register,
344862306a36Sopenharmony_ci	.pr_reserve	= dm_pr_reserve,
344962306a36Sopenharmony_ci	.pr_release	= dm_pr_release,
345062306a36Sopenharmony_ci	.pr_preempt	= dm_pr_preempt,
345162306a36Sopenharmony_ci	.pr_clear	= dm_pr_clear,
345262306a36Sopenharmony_ci	.pr_read_keys	= dm_pr_read_keys,
345362306a36Sopenharmony_ci	.pr_read_reservation = dm_pr_read_reservation,
345462306a36Sopenharmony_ci};
345562306a36Sopenharmony_ci
345662306a36Sopenharmony_cistatic const struct block_device_operations dm_blk_dops = {
345762306a36Sopenharmony_ci	.submit_bio = dm_submit_bio,
345862306a36Sopenharmony_ci	.poll_bio = dm_poll_bio,
345962306a36Sopenharmony_ci	.open = dm_blk_open,
346062306a36Sopenharmony_ci	.release = dm_blk_close,
346162306a36Sopenharmony_ci	.ioctl = dm_blk_ioctl,
346262306a36Sopenharmony_ci	.getgeo = dm_blk_getgeo,
346362306a36Sopenharmony_ci	.report_zones = dm_blk_report_zones,
346462306a36Sopenharmony_ci	.pr_ops = &dm_pr_ops,
346562306a36Sopenharmony_ci	.owner = THIS_MODULE
346662306a36Sopenharmony_ci};
346762306a36Sopenharmony_ci
346862306a36Sopenharmony_cistatic const struct block_device_operations dm_rq_blk_dops = {
346962306a36Sopenharmony_ci	.open = dm_blk_open,
347062306a36Sopenharmony_ci	.release = dm_blk_close,
347162306a36Sopenharmony_ci	.ioctl = dm_blk_ioctl,
347262306a36Sopenharmony_ci	.getgeo = dm_blk_getgeo,
347362306a36Sopenharmony_ci	.pr_ops = &dm_pr_ops,
347462306a36Sopenharmony_ci	.owner = THIS_MODULE
347562306a36Sopenharmony_ci};
347662306a36Sopenharmony_ci
347762306a36Sopenharmony_cistatic const struct dax_operations dm_dax_ops = {
347862306a36Sopenharmony_ci	.direct_access = dm_dax_direct_access,
347962306a36Sopenharmony_ci	.zero_page_range = dm_dax_zero_page_range,
348062306a36Sopenharmony_ci	.recovery_write = dm_dax_recovery_write,
348162306a36Sopenharmony_ci};
348262306a36Sopenharmony_ci
348362306a36Sopenharmony_ci/*
348462306a36Sopenharmony_ci * module hooks
348562306a36Sopenharmony_ci */
348662306a36Sopenharmony_cimodule_init(dm_init);
348762306a36Sopenharmony_cimodule_exit(dm_exit);
348862306a36Sopenharmony_ci
348962306a36Sopenharmony_cimodule_param(major, uint, 0);
349062306a36Sopenharmony_ciMODULE_PARM_DESC(major, "The major number of the device mapper");
349162306a36Sopenharmony_ci
349262306a36Sopenharmony_cimodule_param(reserved_bio_based_ios, uint, 0644);
349362306a36Sopenharmony_ciMODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
349462306a36Sopenharmony_ci
349562306a36Sopenharmony_cimodule_param(dm_numa_node, int, 0644);
349662306a36Sopenharmony_ciMODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
349762306a36Sopenharmony_ci
349862306a36Sopenharmony_cimodule_param(swap_bios, int, 0644);
349962306a36Sopenharmony_ciMODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs");
350062306a36Sopenharmony_ci
350162306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " driver");
350262306a36Sopenharmony_ciMODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
350362306a36Sopenharmony_ciMODULE_LICENSE("GPL");
3504