18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
38c2ecf20Sopenharmony_ci * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This file is released under the GPL.
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include "dm-core.h"
98c2ecf20Sopenharmony_ci#include "dm-rq.h"
108c2ecf20Sopenharmony_ci#include "dm-uevent.h"
118c2ecf20Sopenharmony_ci
128c2ecf20Sopenharmony_ci#include <linux/init.h>
138c2ecf20Sopenharmony_ci#include <linux/module.h>
148c2ecf20Sopenharmony_ci#include <linux/mutex.h>
158c2ecf20Sopenharmony_ci#include <linux/sched/mm.h>
168c2ecf20Sopenharmony_ci#include <linux/sched/signal.h>
178c2ecf20Sopenharmony_ci#include <linux/blkpg.h>
188c2ecf20Sopenharmony_ci#include <linux/bio.h>
198c2ecf20Sopenharmony_ci#include <linux/mempool.h>
208c2ecf20Sopenharmony_ci#include <linux/dax.h>
218c2ecf20Sopenharmony_ci#include <linux/slab.h>
228c2ecf20Sopenharmony_ci#include <linux/idr.h>
238c2ecf20Sopenharmony_ci#include <linux/uio.h>
248c2ecf20Sopenharmony_ci#include <linux/hdreg.h>
258c2ecf20Sopenharmony_ci#include <linux/delay.h>
268c2ecf20Sopenharmony_ci#include <linux/wait.h>
278c2ecf20Sopenharmony_ci#include <linux/pr.h>
288c2ecf20Sopenharmony_ci#include <linux/refcount.h>
298c2ecf20Sopenharmony_ci#include <linux/part_stat.h>
308c2ecf20Sopenharmony_ci#include <linux/blk-crypto.h>
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX "core"
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci/*
358c2ecf20Sopenharmony_ci * Cookies are numeric values sent with CHANGE and REMOVE
368c2ecf20Sopenharmony_ci * uevents while resuming, removing or renaming the device.
378c2ecf20Sopenharmony_ci */
388c2ecf20Sopenharmony_ci#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
398c2ecf20Sopenharmony_ci#define DM_COOKIE_LENGTH 24
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_cistatic const char *_name = DM_NAME;
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_cistatic unsigned int major = 0;
448c2ecf20Sopenharmony_cistatic unsigned int _major = 0;
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_cistatic DEFINE_IDR(_minor_idr);
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(_minor_lock);
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_cistatic void do_deferred_remove(struct work_struct *w);
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_cistatic DECLARE_WORK(deferred_remove_work, do_deferred_remove);
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistatic struct workqueue_struct *deferred_remove_workqueue;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ciatomic_t dm_global_event_nr = ATOMIC_INIT(0);
578c2ecf20Sopenharmony_ciDECLARE_WAIT_QUEUE_HEAD(dm_global_eventq);
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_civoid dm_issue_global_event(void)
608c2ecf20Sopenharmony_ci{
618c2ecf20Sopenharmony_ci	atomic_inc(&dm_global_event_nr);
628c2ecf20Sopenharmony_ci	wake_up(&dm_global_eventq);
638c2ecf20Sopenharmony_ci}
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_ci/*
668c2ecf20Sopenharmony_ci * One of these is allocated (on-stack) per original bio.
678c2ecf20Sopenharmony_ci */
688c2ecf20Sopenharmony_cistruct clone_info {
698c2ecf20Sopenharmony_ci	struct dm_table *map;
708c2ecf20Sopenharmony_ci	struct bio *bio;
718c2ecf20Sopenharmony_ci	struct dm_io *io;
728c2ecf20Sopenharmony_ci	sector_t sector;
738c2ecf20Sopenharmony_ci	unsigned sector_count;
748c2ecf20Sopenharmony_ci};
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci/*
778c2ecf20Sopenharmony_ci * One of these is allocated per clone bio.
788c2ecf20Sopenharmony_ci */
798c2ecf20Sopenharmony_ci#define DM_TIO_MAGIC 7282014
808c2ecf20Sopenharmony_cistruct dm_target_io {
818c2ecf20Sopenharmony_ci	unsigned magic;
828c2ecf20Sopenharmony_ci	struct dm_io *io;
838c2ecf20Sopenharmony_ci	struct dm_target *ti;
848c2ecf20Sopenharmony_ci	unsigned target_bio_nr;
858c2ecf20Sopenharmony_ci	unsigned *len_ptr;
868c2ecf20Sopenharmony_ci	bool inside_dm_io;
878c2ecf20Sopenharmony_ci	struct bio clone;
888c2ecf20Sopenharmony_ci};
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci/*
918c2ecf20Sopenharmony_ci * One of these is allocated per original bio.
928c2ecf20Sopenharmony_ci * It contains the first clone used for that original.
938c2ecf20Sopenharmony_ci */
948c2ecf20Sopenharmony_ci#define DM_IO_MAGIC 5191977
958c2ecf20Sopenharmony_cistruct dm_io {
968c2ecf20Sopenharmony_ci	unsigned magic;
978c2ecf20Sopenharmony_ci	struct mapped_device *md;
988c2ecf20Sopenharmony_ci	blk_status_t status;
998c2ecf20Sopenharmony_ci	atomic_t io_count;
1008c2ecf20Sopenharmony_ci	struct bio *orig_bio;
1018c2ecf20Sopenharmony_ci	unsigned long start_time;
1028c2ecf20Sopenharmony_ci	spinlock_t endio_lock;
1038c2ecf20Sopenharmony_ci	struct dm_stats_aux stats_aux;
1048c2ecf20Sopenharmony_ci	/* last member of dm_target_io is 'struct bio' */
1058c2ecf20Sopenharmony_ci	struct dm_target_io tio;
1068c2ecf20Sopenharmony_ci};
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_civoid *dm_per_bio_data(struct bio *bio, size_t data_size)
1098c2ecf20Sopenharmony_ci{
1108c2ecf20Sopenharmony_ci	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
1118c2ecf20Sopenharmony_ci	if (!tio->inside_dm_io)
1128c2ecf20Sopenharmony_ci		return (char *)bio - offsetof(struct dm_target_io, clone) - data_size;
1138c2ecf20Sopenharmony_ci	return (char *)bio - offsetof(struct dm_target_io, clone) - offsetof(struct dm_io, tio) - data_size;
1148c2ecf20Sopenharmony_ci}
1158c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_per_bio_data);
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_cistruct bio *dm_bio_from_per_bio_data(void *data, size_t data_size)
1188c2ecf20Sopenharmony_ci{
1198c2ecf20Sopenharmony_ci	struct dm_io *io = (struct dm_io *)((char *)data + data_size);
1208c2ecf20Sopenharmony_ci	if (io->magic == DM_IO_MAGIC)
1218c2ecf20Sopenharmony_ci		return (struct bio *)((char *)io + offsetof(struct dm_io, tio) + offsetof(struct dm_target_io, clone));
1228c2ecf20Sopenharmony_ci	BUG_ON(io->magic != DM_TIO_MAGIC);
1238c2ecf20Sopenharmony_ci	return (struct bio *)((char *)io + offsetof(struct dm_target_io, clone));
1248c2ecf20Sopenharmony_ci}
1258c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bio_from_per_bio_data);
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ciunsigned dm_bio_get_target_bio_nr(const struct bio *bio)
1288c2ecf20Sopenharmony_ci{
1298c2ecf20Sopenharmony_ci	return container_of(bio, struct dm_target_io, clone)->target_bio_nr;
1308c2ecf20Sopenharmony_ci}
1318c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr);
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci#define MINOR_ALLOCED ((void *)-1)
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci/*
1368c2ecf20Sopenharmony_ci * Bits for the md->flags field.
1378c2ecf20Sopenharmony_ci */
1388c2ecf20Sopenharmony_ci#define DMF_BLOCK_IO_FOR_SUSPEND 0
1398c2ecf20Sopenharmony_ci#define DMF_SUSPENDED 1
1408c2ecf20Sopenharmony_ci#define DMF_FROZEN 2
1418c2ecf20Sopenharmony_ci#define DMF_FREEING 3
1428c2ecf20Sopenharmony_ci#define DMF_DELETING 4
1438c2ecf20Sopenharmony_ci#define DMF_NOFLUSH_SUSPENDING 5
1448c2ecf20Sopenharmony_ci#define DMF_DEFERRED_REMOVE 6
1458c2ecf20Sopenharmony_ci#define DMF_SUSPENDED_INTERNALLY 7
1468c2ecf20Sopenharmony_ci#define DMF_POST_SUSPENDING 8
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_ci#define DM_NUMA_NODE NUMA_NO_NODE
1498c2ecf20Sopenharmony_cistatic int dm_numa_node = DM_NUMA_NODE;
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci#define DEFAULT_SWAP_BIOS	(8 * 1048576 / PAGE_SIZE)
1528c2ecf20Sopenharmony_cistatic int swap_bios = DEFAULT_SWAP_BIOS;
1538c2ecf20Sopenharmony_cistatic int get_swap_bios(void)
1548c2ecf20Sopenharmony_ci{
1558c2ecf20Sopenharmony_ci	int latch = READ_ONCE(swap_bios);
1568c2ecf20Sopenharmony_ci	if (unlikely(latch <= 0))
1578c2ecf20Sopenharmony_ci		latch = DEFAULT_SWAP_BIOS;
1588c2ecf20Sopenharmony_ci	return latch;
1598c2ecf20Sopenharmony_ci}
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci/*
1628c2ecf20Sopenharmony_ci * For mempools pre-allocation at the table loading time.
1638c2ecf20Sopenharmony_ci */
1648c2ecf20Sopenharmony_cistruct dm_md_mempools {
1658c2ecf20Sopenharmony_ci	struct bio_set bs;
1668c2ecf20Sopenharmony_ci	struct bio_set io_bs;
1678c2ecf20Sopenharmony_ci};
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_cistruct table_device {
1708c2ecf20Sopenharmony_ci	struct list_head list;
1718c2ecf20Sopenharmony_ci	refcount_t count;
1728c2ecf20Sopenharmony_ci	struct dm_dev dm_dev;
1738c2ecf20Sopenharmony_ci};
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci/*
1768c2ecf20Sopenharmony_ci * Bio-based DM's mempools' reserved IOs set by the user.
1778c2ecf20Sopenharmony_ci */
1788c2ecf20Sopenharmony_ci#define RESERVED_BIO_BASED_IOS		16
1798c2ecf20Sopenharmony_cistatic unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_cistatic int __dm_get_module_param_int(int *module_param, int min, int max)
1828c2ecf20Sopenharmony_ci{
1838c2ecf20Sopenharmony_ci	int param = READ_ONCE(*module_param);
1848c2ecf20Sopenharmony_ci	int modified_param = 0;
1858c2ecf20Sopenharmony_ci	bool modified = true;
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	if (param < min)
1888c2ecf20Sopenharmony_ci		modified_param = min;
1898c2ecf20Sopenharmony_ci	else if (param > max)
1908c2ecf20Sopenharmony_ci		modified_param = max;
1918c2ecf20Sopenharmony_ci	else
1928c2ecf20Sopenharmony_ci		modified = false;
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	if (modified) {
1958c2ecf20Sopenharmony_ci		(void)cmpxchg(module_param, param, modified_param);
1968c2ecf20Sopenharmony_ci		param = modified_param;
1978c2ecf20Sopenharmony_ci	}
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	return param;
2008c2ecf20Sopenharmony_ci}
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ciunsigned __dm_get_module_param(unsigned *module_param,
2038c2ecf20Sopenharmony_ci			       unsigned def, unsigned max)
2048c2ecf20Sopenharmony_ci{
2058c2ecf20Sopenharmony_ci	unsigned param = READ_ONCE(*module_param);
2068c2ecf20Sopenharmony_ci	unsigned modified_param = 0;
2078c2ecf20Sopenharmony_ci
2088c2ecf20Sopenharmony_ci	if (!param)
2098c2ecf20Sopenharmony_ci		modified_param = def;
2108c2ecf20Sopenharmony_ci	else if (param > max)
2118c2ecf20Sopenharmony_ci		modified_param = max;
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	if (modified_param) {
2148c2ecf20Sopenharmony_ci		(void)cmpxchg(module_param, param, modified_param);
2158c2ecf20Sopenharmony_ci		param = modified_param;
2168c2ecf20Sopenharmony_ci	}
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	return param;
2198c2ecf20Sopenharmony_ci}
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ciunsigned dm_get_reserved_bio_based_ios(void)
2228c2ecf20Sopenharmony_ci{
2238c2ecf20Sopenharmony_ci	return __dm_get_module_param(&reserved_bio_based_ios,
2248c2ecf20Sopenharmony_ci				     RESERVED_BIO_BASED_IOS, DM_RESERVED_MAX_IOS);
2258c2ecf20Sopenharmony_ci}
2268c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_cistatic unsigned dm_get_numa_node(void)
2298c2ecf20Sopenharmony_ci{
2308c2ecf20Sopenharmony_ci	return __dm_get_module_param_int(&dm_numa_node,
2318c2ecf20Sopenharmony_ci					 DM_NUMA_NODE, num_online_nodes() - 1);
2328c2ecf20Sopenharmony_ci}
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_cistatic int __init local_init(void)
2358c2ecf20Sopenharmony_ci{
2368c2ecf20Sopenharmony_ci	int r;
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_ci	r = dm_uevent_init();
2398c2ecf20Sopenharmony_ci	if (r)
2408c2ecf20Sopenharmony_ci		return r;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
2438c2ecf20Sopenharmony_ci	if (!deferred_remove_workqueue) {
2448c2ecf20Sopenharmony_ci		r = -ENOMEM;
2458c2ecf20Sopenharmony_ci		goto out_uevent_exit;
2468c2ecf20Sopenharmony_ci	}
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	_major = major;
2498c2ecf20Sopenharmony_ci	r = register_blkdev(_major, _name);
2508c2ecf20Sopenharmony_ci	if (r < 0)
2518c2ecf20Sopenharmony_ci		goto out_free_workqueue;
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci	if (!_major)
2548c2ecf20Sopenharmony_ci		_major = r;
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci	return 0;
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ciout_free_workqueue:
2598c2ecf20Sopenharmony_ci	destroy_workqueue(deferred_remove_workqueue);
2608c2ecf20Sopenharmony_ciout_uevent_exit:
2618c2ecf20Sopenharmony_ci	dm_uevent_exit();
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	return r;
2648c2ecf20Sopenharmony_ci}
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_cistatic void local_exit(void)
2678c2ecf20Sopenharmony_ci{
2688c2ecf20Sopenharmony_ci	destroy_workqueue(deferred_remove_workqueue);
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ci	unregister_blkdev(_major, _name);
2718c2ecf20Sopenharmony_ci	dm_uevent_exit();
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	_major = 0;
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	DMINFO("cleaned up");
2768c2ecf20Sopenharmony_ci}
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_cistatic int (*_inits[])(void) __initdata = {
2798c2ecf20Sopenharmony_ci	local_init,
2808c2ecf20Sopenharmony_ci	dm_target_init,
2818c2ecf20Sopenharmony_ci	dm_linear_init,
2828c2ecf20Sopenharmony_ci	dm_stripe_init,
2838c2ecf20Sopenharmony_ci	dm_io_init,
2848c2ecf20Sopenharmony_ci	dm_kcopyd_init,
2858c2ecf20Sopenharmony_ci	dm_interface_init,
2868c2ecf20Sopenharmony_ci	dm_statistics_init,
2878c2ecf20Sopenharmony_ci};
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_cistatic void (*_exits[])(void) = {
2908c2ecf20Sopenharmony_ci	local_exit,
2918c2ecf20Sopenharmony_ci	dm_target_exit,
2928c2ecf20Sopenharmony_ci	dm_linear_exit,
2938c2ecf20Sopenharmony_ci	dm_stripe_exit,
2948c2ecf20Sopenharmony_ci	dm_io_exit,
2958c2ecf20Sopenharmony_ci	dm_kcopyd_exit,
2968c2ecf20Sopenharmony_ci	dm_interface_exit,
2978c2ecf20Sopenharmony_ci	dm_statistics_exit,
2988c2ecf20Sopenharmony_ci};
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_cistatic int __init dm_init(void)
3018c2ecf20Sopenharmony_ci{
3028c2ecf20Sopenharmony_ci	const int count = ARRAY_SIZE(_inits);
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	int r, i;
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci	for (i = 0; i < count; i++) {
3078c2ecf20Sopenharmony_ci		r = _inits[i]();
3088c2ecf20Sopenharmony_ci		if (r)
3098c2ecf20Sopenharmony_ci			goto bad;
3108c2ecf20Sopenharmony_ci	}
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci	return 0;
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci      bad:
3158c2ecf20Sopenharmony_ci	while (i--)
3168c2ecf20Sopenharmony_ci		_exits[i]();
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	return r;
3198c2ecf20Sopenharmony_ci}
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_cistatic void __exit dm_exit(void)
3228c2ecf20Sopenharmony_ci{
3238c2ecf20Sopenharmony_ci	int i = ARRAY_SIZE(_exits);
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_ci	while (i--)
3268c2ecf20Sopenharmony_ci		_exits[i]();
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci	/*
3298c2ecf20Sopenharmony_ci	 * Should be empty by this point.
3308c2ecf20Sopenharmony_ci	 */
3318c2ecf20Sopenharmony_ci	idr_destroy(&_minor_idr);
3328c2ecf20Sopenharmony_ci}
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci/*
3358c2ecf20Sopenharmony_ci * Block device functions
3368c2ecf20Sopenharmony_ci */
3378c2ecf20Sopenharmony_ciint dm_deleting_md(struct mapped_device *md)
3388c2ecf20Sopenharmony_ci{
3398c2ecf20Sopenharmony_ci	return test_bit(DMF_DELETING, &md->flags);
3408c2ecf20Sopenharmony_ci}
3418c2ecf20Sopenharmony_ci
3428c2ecf20Sopenharmony_cistatic int dm_blk_open(struct block_device *bdev, fmode_t mode)
3438c2ecf20Sopenharmony_ci{
3448c2ecf20Sopenharmony_ci	struct mapped_device *md;
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	md = bdev->bd_disk->private_data;
3498c2ecf20Sopenharmony_ci	if (!md)
3508c2ecf20Sopenharmony_ci		goto out;
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	if (test_bit(DMF_FREEING, &md->flags) ||
3538c2ecf20Sopenharmony_ci	    dm_deleting_md(md)) {
3548c2ecf20Sopenharmony_ci		md = NULL;
3558c2ecf20Sopenharmony_ci		goto out;
3568c2ecf20Sopenharmony_ci	}
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_ci	dm_get(md);
3598c2ecf20Sopenharmony_ci	atomic_inc(&md->open_count);
3608c2ecf20Sopenharmony_ciout:
3618c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci	return md ? 0 : -ENXIO;
3648c2ecf20Sopenharmony_ci}
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_cistatic void dm_blk_close(struct gendisk *disk, fmode_t mode)
3678c2ecf20Sopenharmony_ci{
3688c2ecf20Sopenharmony_ci	struct mapped_device *md;
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci	md = disk->private_data;
3738c2ecf20Sopenharmony_ci	if (WARN_ON(!md))
3748c2ecf20Sopenharmony_ci		goto out;
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&md->open_count) &&
3778c2ecf20Sopenharmony_ci	    (test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
3788c2ecf20Sopenharmony_ci		queue_work(deferred_remove_workqueue, &deferred_remove_work);
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	dm_put(md);
3818c2ecf20Sopenharmony_ciout:
3828c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
3838c2ecf20Sopenharmony_ci}
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_ciint dm_open_count(struct mapped_device *md)
3868c2ecf20Sopenharmony_ci{
3878c2ecf20Sopenharmony_ci	return atomic_read(&md->open_count);
3888c2ecf20Sopenharmony_ci}
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci/*
3918c2ecf20Sopenharmony_ci * Guarantees nothing is using the device before it's deleted.
3928c2ecf20Sopenharmony_ci */
3938c2ecf20Sopenharmony_ciint dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred)
3948c2ecf20Sopenharmony_ci{
3958c2ecf20Sopenharmony_ci	int r = 0;
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	if (dm_open_count(md)) {
4008c2ecf20Sopenharmony_ci		r = -EBUSY;
4018c2ecf20Sopenharmony_ci		if (mark_deferred)
4028c2ecf20Sopenharmony_ci			set_bit(DMF_DEFERRED_REMOVE, &md->flags);
4038c2ecf20Sopenharmony_ci	} else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags))
4048c2ecf20Sopenharmony_ci		r = -EEXIST;
4058c2ecf20Sopenharmony_ci	else
4068c2ecf20Sopenharmony_ci		set_bit(DMF_DELETING, &md->flags);
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci	return r;
4118c2ecf20Sopenharmony_ci}
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ciint dm_cancel_deferred_remove(struct mapped_device *md)
4148c2ecf20Sopenharmony_ci{
4158c2ecf20Sopenharmony_ci	int r = 0;
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci	if (test_bit(DMF_DELETING, &md->flags))
4208c2ecf20Sopenharmony_ci		r = -EBUSY;
4218c2ecf20Sopenharmony_ci	else
4228c2ecf20Sopenharmony_ci		clear_bit(DMF_DEFERRED_REMOVE, &md->flags);
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	return r;
4278c2ecf20Sopenharmony_ci}
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_cistatic void do_deferred_remove(struct work_struct *w)
4308c2ecf20Sopenharmony_ci{
4318c2ecf20Sopenharmony_ci	dm_deferred_remove();
4328c2ecf20Sopenharmony_ci}
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_cistatic int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
4358c2ecf20Sopenharmony_ci{
4368c2ecf20Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ci	return dm_get_geometry(md, geo);
4398c2ecf20Sopenharmony_ci}
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci#ifdef CONFIG_BLK_DEV_ZONED
4428c2ecf20Sopenharmony_ciint dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data)
4438c2ecf20Sopenharmony_ci{
4448c2ecf20Sopenharmony_ci	struct dm_report_zones_args *args = data;
4458c2ecf20Sopenharmony_ci	sector_t sector_diff = args->tgt->begin - args->start;
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci	/*
4488c2ecf20Sopenharmony_ci	 * Ignore zones beyond the target range.
4498c2ecf20Sopenharmony_ci	 */
4508c2ecf20Sopenharmony_ci	if (zone->start >= args->start + args->tgt->len)
4518c2ecf20Sopenharmony_ci		return 0;
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	/*
4548c2ecf20Sopenharmony_ci	 * Remap the start sector and write pointer position of the zone
4558c2ecf20Sopenharmony_ci	 * to match its position in the target range.
4568c2ecf20Sopenharmony_ci	 */
4578c2ecf20Sopenharmony_ci	zone->start += sector_diff;
4588c2ecf20Sopenharmony_ci	if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
4598c2ecf20Sopenharmony_ci		if (zone->cond == BLK_ZONE_COND_FULL)
4608c2ecf20Sopenharmony_ci			zone->wp = zone->start + zone->len;
4618c2ecf20Sopenharmony_ci		else if (zone->cond == BLK_ZONE_COND_EMPTY)
4628c2ecf20Sopenharmony_ci			zone->wp = zone->start;
4638c2ecf20Sopenharmony_ci		else
4648c2ecf20Sopenharmony_ci			zone->wp += sector_diff;
4658c2ecf20Sopenharmony_ci	}
4668c2ecf20Sopenharmony_ci
4678c2ecf20Sopenharmony_ci	args->next_sector = zone->start + zone->len;
4688c2ecf20Sopenharmony_ci	return args->orig_cb(zone, args->zone_idx++, args->orig_data);
4698c2ecf20Sopenharmony_ci}
4708c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_report_zones_cb);
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_cistatic int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
4738c2ecf20Sopenharmony_ci		unsigned int nr_zones, report_zones_cb cb, void *data)
4748c2ecf20Sopenharmony_ci{
4758c2ecf20Sopenharmony_ci	struct mapped_device *md = disk->private_data;
4768c2ecf20Sopenharmony_ci	struct dm_table *map;
4778c2ecf20Sopenharmony_ci	int srcu_idx, ret;
4788c2ecf20Sopenharmony_ci	struct dm_report_zones_args args = {
4798c2ecf20Sopenharmony_ci		.next_sector = sector,
4808c2ecf20Sopenharmony_ci		.orig_data = data,
4818c2ecf20Sopenharmony_ci		.orig_cb = cb,
4828c2ecf20Sopenharmony_ci	};
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci	if (dm_suspended_md(md))
4858c2ecf20Sopenharmony_ci		return -EAGAIN;
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci	map = dm_get_live_table(md, &srcu_idx);
4888c2ecf20Sopenharmony_ci	if (!map) {
4898c2ecf20Sopenharmony_ci		ret = -EIO;
4908c2ecf20Sopenharmony_ci		goto out;
4918c2ecf20Sopenharmony_ci	}
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci	do {
4948c2ecf20Sopenharmony_ci		struct dm_target *tgt;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci		tgt = dm_table_find_target(map, args.next_sector);
4978c2ecf20Sopenharmony_ci		if (WARN_ON_ONCE(!tgt->type->report_zones)) {
4988c2ecf20Sopenharmony_ci			ret = -EIO;
4998c2ecf20Sopenharmony_ci			goto out;
5008c2ecf20Sopenharmony_ci		}
5018c2ecf20Sopenharmony_ci
5028c2ecf20Sopenharmony_ci		args.tgt = tgt;
5038c2ecf20Sopenharmony_ci		ret = tgt->type->report_zones(tgt, &args,
5048c2ecf20Sopenharmony_ci					      nr_zones - args.zone_idx);
5058c2ecf20Sopenharmony_ci		if (ret < 0)
5068c2ecf20Sopenharmony_ci			goto out;
5078c2ecf20Sopenharmony_ci	} while (args.zone_idx < nr_zones &&
5088c2ecf20Sopenharmony_ci		 args.next_sector < get_capacity(disk));
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci	ret = args.zone_idx;
5118c2ecf20Sopenharmony_ciout:
5128c2ecf20Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
5138c2ecf20Sopenharmony_ci	return ret;
5148c2ecf20Sopenharmony_ci}
5158c2ecf20Sopenharmony_ci#else
5168c2ecf20Sopenharmony_ci#define dm_blk_report_zones		NULL
5178c2ecf20Sopenharmony_ci#endif /* CONFIG_BLK_DEV_ZONED */
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_cistatic int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
5208c2ecf20Sopenharmony_ci			    struct block_device **bdev)
5218c2ecf20Sopenharmony_ci{
5228c2ecf20Sopenharmony_ci	struct dm_target *tgt;
5238c2ecf20Sopenharmony_ci	struct dm_table *map;
5248c2ecf20Sopenharmony_ci	int r;
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_ciretry:
5278c2ecf20Sopenharmony_ci	r = -ENOTTY;
5288c2ecf20Sopenharmony_ci	map = dm_get_live_table(md, srcu_idx);
5298c2ecf20Sopenharmony_ci	if (!map || !dm_table_get_size(map))
5308c2ecf20Sopenharmony_ci		return r;
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	/* We only support devices that have a single target */
5338c2ecf20Sopenharmony_ci	if (dm_table_get_num_targets(map) != 1)
5348c2ecf20Sopenharmony_ci		return r;
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci	tgt = dm_table_get_target(map, 0);
5378c2ecf20Sopenharmony_ci	if (!tgt->type->prepare_ioctl)
5388c2ecf20Sopenharmony_ci		return r;
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci	if (dm_suspended_md(md))
5418c2ecf20Sopenharmony_ci		return -EAGAIN;
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ci	r = tgt->type->prepare_ioctl(tgt, bdev);
5448c2ecf20Sopenharmony_ci	if (r == -ENOTCONN && !fatal_signal_pending(current)) {
5458c2ecf20Sopenharmony_ci		dm_put_live_table(md, *srcu_idx);
5468c2ecf20Sopenharmony_ci		msleep(10);
5478c2ecf20Sopenharmony_ci		goto retry;
5488c2ecf20Sopenharmony_ci	}
5498c2ecf20Sopenharmony_ci
5508c2ecf20Sopenharmony_ci	return r;
5518c2ecf20Sopenharmony_ci}
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_cistatic void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx)
5548c2ecf20Sopenharmony_ci{
5558c2ecf20Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
5568c2ecf20Sopenharmony_ci}
5578c2ecf20Sopenharmony_ci
5588c2ecf20Sopenharmony_cistatic int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
5598c2ecf20Sopenharmony_ci			unsigned int cmd, unsigned long arg)
5608c2ecf20Sopenharmony_ci{
5618c2ecf20Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
5628c2ecf20Sopenharmony_ci	int r, srcu_idx;
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
5658c2ecf20Sopenharmony_ci	if (r < 0)
5668c2ecf20Sopenharmony_ci		goto out;
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci	if (r > 0) {
5698c2ecf20Sopenharmony_ci		/*
5708c2ecf20Sopenharmony_ci		 * Target determined this ioctl is being issued against a
5718c2ecf20Sopenharmony_ci		 * subset of the parent bdev; require extra privileges.
5728c2ecf20Sopenharmony_ci		 */
5738c2ecf20Sopenharmony_ci		if (!capable(CAP_SYS_RAWIO)) {
5748c2ecf20Sopenharmony_ci			DMDEBUG_LIMIT(
5758c2ecf20Sopenharmony_ci	"%s: sending ioctl %x to DM device without required privilege.",
5768c2ecf20Sopenharmony_ci				current->comm, cmd);
5778c2ecf20Sopenharmony_ci			r = -ENOIOCTLCMD;
5788c2ecf20Sopenharmony_ci			goto out;
5798c2ecf20Sopenharmony_ci		}
5808c2ecf20Sopenharmony_ci	}
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_ci	r =  __blkdev_driver_ioctl(bdev, mode, cmd, arg);
5838c2ecf20Sopenharmony_ciout:
5848c2ecf20Sopenharmony_ci	dm_unprepare_ioctl(md, srcu_idx);
5858c2ecf20Sopenharmony_ci	return r;
5868c2ecf20Sopenharmony_ci}
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_ciu64 dm_start_time_ns_from_clone(struct bio *bio)
5898c2ecf20Sopenharmony_ci{
5908c2ecf20Sopenharmony_ci	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
5918c2ecf20Sopenharmony_ci	struct dm_io *io = tio->io;
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_ci	return jiffies_to_nsecs(io->start_time);
5948c2ecf20Sopenharmony_ci}
5958c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_cistatic void start_io_acct(struct dm_io *io)
5988c2ecf20Sopenharmony_ci{
5998c2ecf20Sopenharmony_ci	struct mapped_device *md = io->md;
6008c2ecf20Sopenharmony_ci	struct bio *bio = io->orig_bio;
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci	io->start_time = bio_start_io_acct(bio);
6038c2ecf20Sopenharmony_ci	if (unlikely(dm_stats_used(&md->stats)))
6048c2ecf20Sopenharmony_ci		dm_stats_account_io(&md->stats, bio_data_dir(bio),
6058c2ecf20Sopenharmony_ci				    bio->bi_iter.bi_sector, bio_sectors(bio),
6068c2ecf20Sopenharmony_ci				    false, 0, &io->stats_aux);
6078c2ecf20Sopenharmony_ci}
6088c2ecf20Sopenharmony_ci
6098c2ecf20Sopenharmony_cistatic void end_io_acct(struct mapped_device *md, struct bio *bio,
6108c2ecf20Sopenharmony_ci			unsigned long start_time, struct dm_stats_aux *stats_aux)
6118c2ecf20Sopenharmony_ci{
6128c2ecf20Sopenharmony_ci	unsigned long duration = jiffies - start_time;
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_ci	if (unlikely(dm_stats_used(&md->stats)))
6158c2ecf20Sopenharmony_ci		dm_stats_account_io(&md->stats, bio_data_dir(bio),
6168c2ecf20Sopenharmony_ci				    bio->bi_iter.bi_sector, bio_sectors(bio),
6178c2ecf20Sopenharmony_ci				    true, duration, stats_aux);
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci	smp_wmb();
6208c2ecf20Sopenharmony_ci
6218c2ecf20Sopenharmony_ci	bio_end_io_acct(bio, start_time);
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci	/* nudge anyone waiting on suspend queue */
6248c2ecf20Sopenharmony_ci	if (unlikely(wq_has_sleeper(&md->wait)))
6258c2ecf20Sopenharmony_ci		wake_up(&md->wait);
6268c2ecf20Sopenharmony_ci}
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_cistatic struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
6298c2ecf20Sopenharmony_ci{
6308c2ecf20Sopenharmony_ci	struct dm_io *io;
6318c2ecf20Sopenharmony_ci	struct dm_target_io *tio;
6328c2ecf20Sopenharmony_ci	struct bio *clone;
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_ci	clone = bio_alloc_bioset(GFP_NOIO, 0, &md->io_bs);
6358c2ecf20Sopenharmony_ci	if (!clone)
6368c2ecf20Sopenharmony_ci		return NULL;
6378c2ecf20Sopenharmony_ci
6388c2ecf20Sopenharmony_ci	tio = container_of(clone, struct dm_target_io, clone);
6398c2ecf20Sopenharmony_ci	tio->inside_dm_io = true;
6408c2ecf20Sopenharmony_ci	tio->io = NULL;
6418c2ecf20Sopenharmony_ci
6428c2ecf20Sopenharmony_ci	io = container_of(tio, struct dm_io, tio);
6438c2ecf20Sopenharmony_ci	io->magic = DM_IO_MAGIC;
6448c2ecf20Sopenharmony_ci	io->status = 0;
6458c2ecf20Sopenharmony_ci	atomic_set(&io->io_count, 1);
6468c2ecf20Sopenharmony_ci	io->orig_bio = bio;
6478c2ecf20Sopenharmony_ci	io->md = md;
6488c2ecf20Sopenharmony_ci	spin_lock_init(&io->endio_lock);
6498c2ecf20Sopenharmony_ci
6508c2ecf20Sopenharmony_ci	start_io_acct(io);
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci	return io;
6538c2ecf20Sopenharmony_ci}
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_cistatic void free_io(struct mapped_device *md, struct dm_io *io)
6568c2ecf20Sopenharmony_ci{
6578c2ecf20Sopenharmony_ci	bio_put(&io->tio.clone);
6588c2ecf20Sopenharmony_ci}
6598c2ecf20Sopenharmony_ci
6608c2ecf20Sopenharmony_cistatic struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti,
6618c2ecf20Sopenharmony_ci				      unsigned target_bio_nr, gfp_t gfp_mask)
6628c2ecf20Sopenharmony_ci{
6638c2ecf20Sopenharmony_ci	struct dm_target_io *tio;
6648c2ecf20Sopenharmony_ci
6658c2ecf20Sopenharmony_ci	if (!ci->io->tio.io) {
6668c2ecf20Sopenharmony_ci		/* the dm_target_io embedded in ci->io is available */
6678c2ecf20Sopenharmony_ci		tio = &ci->io->tio;
6688c2ecf20Sopenharmony_ci	} else {
6698c2ecf20Sopenharmony_ci		struct bio *clone = bio_alloc_bioset(gfp_mask, 0, &ci->io->md->bs);
6708c2ecf20Sopenharmony_ci		if (!clone)
6718c2ecf20Sopenharmony_ci			return NULL;
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_ci		tio = container_of(clone, struct dm_target_io, clone);
6748c2ecf20Sopenharmony_ci		tio->inside_dm_io = false;
6758c2ecf20Sopenharmony_ci	}
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci	tio->magic = DM_TIO_MAGIC;
6788c2ecf20Sopenharmony_ci	tio->io = ci->io;
6798c2ecf20Sopenharmony_ci	tio->ti = ti;
6808c2ecf20Sopenharmony_ci	tio->target_bio_nr = target_bio_nr;
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_ci	return tio;
6838c2ecf20Sopenharmony_ci}
6848c2ecf20Sopenharmony_ci
6858c2ecf20Sopenharmony_cistatic void free_tio(struct dm_target_io *tio)
6868c2ecf20Sopenharmony_ci{
6878c2ecf20Sopenharmony_ci	if (tio->inside_dm_io)
6888c2ecf20Sopenharmony_ci		return;
6898c2ecf20Sopenharmony_ci	bio_put(&tio->clone);
6908c2ecf20Sopenharmony_ci}
6918c2ecf20Sopenharmony_ci
6928c2ecf20Sopenharmony_ci/*
6938c2ecf20Sopenharmony_ci * Add the bio to the list of deferred io.
6948c2ecf20Sopenharmony_ci */
6958c2ecf20Sopenharmony_cistatic void queue_io(struct mapped_device *md, struct bio *bio)
6968c2ecf20Sopenharmony_ci{
6978c2ecf20Sopenharmony_ci	unsigned long flags;
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_ci	spin_lock_irqsave(&md->deferred_lock, flags);
7008c2ecf20Sopenharmony_ci	bio_list_add(&md->deferred, bio);
7018c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&md->deferred_lock, flags);
7028c2ecf20Sopenharmony_ci	queue_work(md->wq, &md->work);
7038c2ecf20Sopenharmony_ci}
7048c2ecf20Sopenharmony_ci
7058c2ecf20Sopenharmony_ci/*
7068c2ecf20Sopenharmony_ci * Everyone (including functions in this file), should use this
7078c2ecf20Sopenharmony_ci * function to access the md->map field, and make sure they call
7088c2ecf20Sopenharmony_ci * dm_put_live_table() when finished.
7098c2ecf20Sopenharmony_ci */
7108c2ecf20Sopenharmony_cistruct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier)
7118c2ecf20Sopenharmony_ci{
7128c2ecf20Sopenharmony_ci	*srcu_idx = srcu_read_lock(&md->io_barrier);
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_ci	return srcu_dereference(md->map, &md->io_barrier);
7158c2ecf20Sopenharmony_ci}
7168c2ecf20Sopenharmony_ci
7178c2ecf20Sopenharmony_civoid dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier)
7188c2ecf20Sopenharmony_ci{
7198c2ecf20Sopenharmony_ci	srcu_read_unlock(&md->io_barrier, srcu_idx);
7208c2ecf20Sopenharmony_ci}
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_civoid dm_sync_table(struct mapped_device *md)
7238c2ecf20Sopenharmony_ci{
7248c2ecf20Sopenharmony_ci	synchronize_srcu(&md->io_barrier);
7258c2ecf20Sopenharmony_ci	synchronize_rcu_expedited();
7268c2ecf20Sopenharmony_ci}
7278c2ecf20Sopenharmony_ci
7288c2ecf20Sopenharmony_ci/*
7298c2ecf20Sopenharmony_ci * A fast alternative to dm_get_live_table/dm_put_live_table.
7308c2ecf20Sopenharmony_ci * The caller must not block between these two functions.
7318c2ecf20Sopenharmony_ci */
7328c2ecf20Sopenharmony_cistatic struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
7338c2ecf20Sopenharmony_ci{
7348c2ecf20Sopenharmony_ci	rcu_read_lock();
7358c2ecf20Sopenharmony_ci	return rcu_dereference(md->map);
7368c2ecf20Sopenharmony_ci}
7378c2ecf20Sopenharmony_ci
7388c2ecf20Sopenharmony_cistatic void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
7398c2ecf20Sopenharmony_ci{
7408c2ecf20Sopenharmony_ci	rcu_read_unlock();
7418c2ecf20Sopenharmony_ci}
7428c2ecf20Sopenharmony_ci
7438c2ecf20Sopenharmony_cistatic char *_dm_claim_ptr = "I belong to device-mapper";
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci/*
7468c2ecf20Sopenharmony_ci * Open a table device so we can use it as a map destination.
7478c2ecf20Sopenharmony_ci */
7488c2ecf20Sopenharmony_cistatic int open_table_device(struct table_device *td, dev_t dev,
7498c2ecf20Sopenharmony_ci			     struct mapped_device *md)
7508c2ecf20Sopenharmony_ci{
7518c2ecf20Sopenharmony_ci	struct block_device *bdev;
7528c2ecf20Sopenharmony_ci
7538c2ecf20Sopenharmony_ci	int r;
7548c2ecf20Sopenharmony_ci
7558c2ecf20Sopenharmony_ci	BUG_ON(td->dm_dev.bdev);
7568c2ecf20Sopenharmony_ci
7578c2ecf20Sopenharmony_ci	bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
7588c2ecf20Sopenharmony_ci	if (IS_ERR(bdev))
7598c2ecf20Sopenharmony_ci		return PTR_ERR(bdev);
7608c2ecf20Sopenharmony_ci
7618c2ecf20Sopenharmony_ci	r = bd_link_disk_holder(bdev, dm_disk(md));
7628c2ecf20Sopenharmony_ci	if (r) {
7638c2ecf20Sopenharmony_ci		blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
7648c2ecf20Sopenharmony_ci		return r;
7658c2ecf20Sopenharmony_ci	}
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci	td->dm_dev.bdev = bdev;
7688c2ecf20Sopenharmony_ci	td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
7698c2ecf20Sopenharmony_ci	return 0;
7708c2ecf20Sopenharmony_ci}
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci/*
7738c2ecf20Sopenharmony_ci * Close a table device that we've been using.
7748c2ecf20Sopenharmony_ci */
7758c2ecf20Sopenharmony_cistatic void close_table_device(struct table_device *td, struct mapped_device *md)
7768c2ecf20Sopenharmony_ci{
7778c2ecf20Sopenharmony_ci	if (!td->dm_dev.bdev)
7788c2ecf20Sopenharmony_ci		return;
7798c2ecf20Sopenharmony_ci
7808c2ecf20Sopenharmony_ci	bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
7818c2ecf20Sopenharmony_ci	blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
7828c2ecf20Sopenharmony_ci	put_dax(td->dm_dev.dax_dev);
7838c2ecf20Sopenharmony_ci	td->dm_dev.bdev = NULL;
7848c2ecf20Sopenharmony_ci	td->dm_dev.dax_dev = NULL;
7858c2ecf20Sopenharmony_ci}
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_cistatic struct table_device *find_table_device(struct list_head *l, dev_t dev,
7888c2ecf20Sopenharmony_ci					      fmode_t mode)
7898c2ecf20Sopenharmony_ci{
7908c2ecf20Sopenharmony_ci	struct table_device *td;
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci	list_for_each_entry(td, l, list)
7938c2ecf20Sopenharmony_ci		if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
7948c2ecf20Sopenharmony_ci			return td;
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_ci	return NULL;
7978c2ecf20Sopenharmony_ci}
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ciint dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
8008c2ecf20Sopenharmony_ci			struct dm_dev **result)
8018c2ecf20Sopenharmony_ci{
8028c2ecf20Sopenharmony_ci	int r;
8038c2ecf20Sopenharmony_ci	struct table_device *td;
8048c2ecf20Sopenharmony_ci
8058c2ecf20Sopenharmony_ci	mutex_lock(&md->table_devices_lock);
8068c2ecf20Sopenharmony_ci	td = find_table_device(&md->table_devices, dev, mode);
8078c2ecf20Sopenharmony_ci	if (!td) {
8088c2ecf20Sopenharmony_ci		td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
8098c2ecf20Sopenharmony_ci		if (!td) {
8108c2ecf20Sopenharmony_ci			mutex_unlock(&md->table_devices_lock);
8118c2ecf20Sopenharmony_ci			return -ENOMEM;
8128c2ecf20Sopenharmony_ci		}
8138c2ecf20Sopenharmony_ci
8148c2ecf20Sopenharmony_ci		td->dm_dev.mode = mode;
8158c2ecf20Sopenharmony_ci		td->dm_dev.bdev = NULL;
8168c2ecf20Sopenharmony_ci
8178c2ecf20Sopenharmony_ci		if ((r = open_table_device(td, dev, md))) {
8188c2ecf20Sopenharmony_ci			mutex_unlock(&md->table_devices_lock);
8198c2ecf20Sopenharmony_ci			kfree(td);
8208c2ecf20Sopenharmony_ci			return r;
8218c2ecf20Sopenharmony_ci		}
8228c2ecf20Sopenharmony_ci
8238c2ecf20Sopenharmony_ci		format_dev_t(td->dm_dev.name, dev);
8248c2ecf20Sopenharmony_ci
8258c2ecf20Sopenharmony_ci		refcount_set(&td->count, 1);
8268c2ecf20Sopenharmony_ci		list_add(&td->list, &md->table_devices);
8278c2ecf20Sopenharmony_ci	} else {
8288c2ecf20Sopenharmony_ci		refcount_inc(&td->count);
8298c2ecf20Sopenharmony_ci	}
8308c2ecf20Sopenharmony_ci	mutex_unlock(&md->table_devices_lock);
8318c2ecf20Sopenharmony_ci
8328c2ecf20Sopenharmony_ci	*result = &td->dm_dev;
8338c2ecf20Sopenharmony_ci	return 0;
8348c2ecf20Sopenharmony_ci}
8358c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_get_table_device);
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_civoid dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
8388c2ecf20Sopenharmony_ci{
8398c2ecf20Sopenharmony_ci	struct table_device *td = container_of(d, struct table_device, dm_dev);
8408c2ecf20Sopenharmony_ci
8418c2ecf20Sopenharmony_ci	mutex_lock(&md->table_devices_lock);
8428c2ecf20Sopenharmony_ci	if (refcount_dec_and_test(&td->count)) {
8438c2ecf20Sopenharmony_ci		close_table_device(td, md);
8448c2ecf20Sopenharmony_ci		list_del(&td->list);
8458c2ecf20Sopenharmony_ci		kfree(td);
8468c2ecf20Sopenharmony_ci	}
8478c2ecf20Sopenharmony_ci	mutex_unlock(&md->table_devices_lock);
8488c2ecf20Sopenharmony_ci}
8498c2ecf20Sopenharmony_ciEXPORT_SYMBOL(dm_put_table_device);
8508c2ecf20Sopenharmony_ci
8518c2ecf20Sopenharmony_cistatic void free_table_devices(struct list_head *devices)
8528c2ecf20Sopenharmony_ci{
8538c2ecf20Sopenharmony_ci	struct list_head *tmp, *next;
8548c2ecf20Sopenharmony_ci
8558c2ecf20Sopenharmony_ci	list_for_each_safe(tmp, next, devices) {
8568c2ecf20Sopenharmony_ci		struct table_device *td = list_entry(tmp, struct table_device, list);
8578c2ecf20Sopenharmony_ci
8588c2ecf20Sopenharmony_ci		DMWARN("dm_destroy: %s still exists with %d references",
8598c2ecf20Sopenharmony_ci		       td->dm_dev.name, refcount_read(&td->count));
8608c2ecf20Sopenharmony_ci		kfree(td);
8618c2ecf20Sopenharmony_ci	}
8628c2ecf20Sopenharmony_ci}
8638c2ecf20Sopenharmony_ci
8648c2ecf20Sopenharmony_ci/*
8658c2ecf20Sopenharmony_ci * Get the geometry associated with a dm device
8668c2ecf20Sopenharmony_ci */
8678c2ecf20Sopenharmony_ciint dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
8688c2ecf20Sopenharmony_ci{
8698c2ecf20Sopenharmony_ci	*geo = md->geometry;
8708c2ecf20Sopenharmony_ci
8718c2ecf20Sopenharmony_ci	return 0;
8728c2ecf20Sopenharmony_ci}
8738c2ecf20Sopenharmony_ci
8748c2ecf20Sopenharmony_ci/*
8758c2ecf20Sopenharmony_ci * Set the geometry of a device.
8768c2ecf20Sopenharmony_ci */
8778c2ecf20Sopenharmony_ciint dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
8788c2ecf20Sopenharmony_ci{
8798c2ecf20Sopenharmony_ci	sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
8808c2ecf20Sopenharmony_ci
8818c2ecf20Sopenharmony_ci	if (geo->start > sz) {
8828c2ecf20Sopenharmony_ci		DMWARN("Start sector is beyond the geometry limits.");
8838c2ecf20Sopenharmony_ci		return -EINVAL;
8848c2ecf20Sopenharmony_ci	}
8858c2ecf20Sopenharmony_ci
8868c2ecf20Sopenharmony_ci	md->geometry = *geo;
8878c2ecf20Sopenharmony_ci
8888c2ecf20Sopenharmony_ci	return 0;
8898c2ecf20Sopenharmony_ci}
8908c2ecf20Sopenharmony_ci
8918c2ecf20Sopenharmony_cistatic int __noflush_suspending(struct mapped_device *md)
8928c2ecf20Sopenharmony_ci{
8938c2ecf20Sopenharmony_ci	return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
8948c2ecf20Sopenharmony_ci}
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci/*
8978c2ecf20Sopenharmony_ci * Decrements the number of outstanding ios that a bio has been
8988c2ecf20Sopenharmony_ci * cloned into, completing the original io if necc.
8998c2ecf20Sopenharmony_ci */
9008c2ecf20Sopenharmony_cistatic void dec_pending(struct dm_io *io, blk_status_t error)
9018c2ecf20Sopenharmony_ci{
9028c2ecf20Sopenharmony_ci	unsigned long flags;
9038c2ecf20Sopenharmony_ci	blk_status_t io_error;
9048c2ecf20Sopenharmony_ci	struct bio *bio;
9058c2ecf20Sopenharmony_ci	struct mapped_device *md = io->md;
9068c2ecf20Sopenharmony_ci	unsigned long start_time = 0;
9078c2ecf20Sopenharmony_ci	struct dm_stats_aux stats_aux;
9088c2ecf20Sopenharmony_ci
9098c2ecf20Sopenharmony_ci	/* Push-back supersedes any I/O errors */
9108c2ecf20Sopenharmony_ci	if (unlikely(error)) {
9118c2ecf20Sopenharmony_ci		spin_lock_irqsave(&io->endio_lock, flags);
9128c2ecf20Sopenharmony_ci		if (!(io->status == BLK_STS_DM_REQUEUE && __noflush_suspending(md)))
9138c2ecf20Sopenharmony_ci			io->status = error;
9148c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&io->endio_lock, flags);
9158c2ecf20Sopenharmony_ci	}
9168c2ecf20Sopenharmony_ci
9178c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&io->io_count)) {
9188c2ecf20Sopenharmony_ci		if (io->status == BLK_STS_DM_REQUEUE) {
9198c2ecf20Sopenharmony_ci			/*
9208c2ecf20Sopenharmony_ci			 * Target requested pushing back the I/O.
9218c2ecf20Sopenharmony_ci			 */
9228c2ecf20Sopenharmony_ci			spin_lock_irqsave(&md->deferred_lock, flags);
9238c2ecf20Sopenharmony_ci			if (__noflush_suspending(md))
9248c2ecf20Sopenharmony_ci				/* NOTE early return due to BLK_STS_DM_REQUEUE below */
9258c2ecf20Sopenharmony_ci				bio_list_add_head(&md->deferred, io->orig_bio);
9268c2ecf20Sopenharmony_ci			else
9278c2ecf20Sopenharmony_ci				/* noflush suspend was interrupted. */
9288c2ecf20Sopenharmony_ci				io->status = BLK_STS_IOERR;
9298c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&md->deferred_lock, flags);
9308c2ecf20Sopenharmony_ci		}
9318c2ecf20Sopenharmony_ci
9328c2ecf20Sopenharmony_ci		io_error = io->status;
9338c2ecf20Sopenharmony_ci		bio = io->orig_bio;
9348c2ecf20Sopenharmony_ci		start_time = io->start_time;
9358c2ecf20Sopenharmony_ci		stats_aux = io->stats_aux;
9368c2ecf20Sopenharmony_ci		free_io(md, io);
9378c2ecf20Sopenharmony_ci		end_io_acct(md, bio, start_time, &stats_aux);
9388c2ecf20Sopenharmony_ci
9398c2ecf20Sopenharmony_ci		if (io_error == BLK_STS_DM_REQUEUE)
9408c2ecf20Sopenharmony_ci			return;
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_ci		if ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
9438c2ecf20Sopenharmony_ci			/*
9448c2ecf20Sopenharmony_ci			 * Preflush done for flush with data, reissue
9458c2ecf20Sopenharmony_ci			 * without REQ_PREFLUSH.
9468c2ecf20Sopenharmony_ci			 */
9478c2ecf20Sopenharmony_ci			bio->bi_opf &= ~REQ_PREFLUSH;
9488c2ecf20Sopenharmony_ci			queue_io(md, bio);
9498c2ecf20Sopenharmony_ci		} else {
9508c2ecf20Sopenharmony_ci			/* done with normal IO or empty flush */
9518c2ecf20Sopenharmony_ci			if (io_error)
9528c2ecf20Sopenharmony_ci				bio->bi_status = io_error;
9538c2ecf20Sopenharmony_ci			bio_endio(bio);
9548c2ecf20Sopenharmony_ci		}
9558c2ecf20Sopenharmony_ci	}
9568c2ecf20Sopenharmony_ci}
9578c2ecf20Sopenharmony_ci
9588c2ecf20Sopenharmony_civoid disable_discard(struct mapped_device *md)
9598c2ecf20Sopenharmony_ci{
9608c2ecf20Sopenharmony_ci	struct queue_limits *limits = dm_get_queue_limits(md);
9618c2ecf20Sopenharmony_ci
9628c2ecf20Sopenharmony_ci	/* device doesn't really support DISCARD, disable it */
9638c2ecf20Sopenharmony_ci	limits->max_discard_sectors = 0;
9648c2ecf20Sopenharmony_ci	blk_queue_flag_clear(QUEUE_FLAG_DISCARD, md->queue);
9658c2ecf20Sopenharmony_ci}
9668c2ecf20Sopenharmony_ci
9678c2ecf20Sopenharmony_civoid disable_write_same(struct mapped_device *md)
9688c2ecf20Sopenharmony_ci{
9698c2ecf20Sopenharmony_ci	struct queue_limits *limits = dm_get_queue_limits(md);
9708c2ecf20Sopenharmony_ci
9718c2ecf20Sopenharmony_ci	/* device doesn't really support WRITE SAME, disable it */
9728c2ecf20Sopenharmony_ci	limits->max_write_same_sectors = 0;
9738c2ecf20Sopenharmony_ci}
9748c2ecf20Sopenharmony_ci
9758c2ecf20Sopenharmony_civoid disable_write_zeroes(struct mapped_device *md)
9768c2ecf20Sopenharmony_ci{
9778c2ecf20Sopenharmony_ci	struct queue_limits *limits = dm_get_queue_limits(md);
9788c2ecf20Sopenharmony_ci
9798c2ecf20Sopenharmony_ci	/* device doesn't really support WRITE ZEROES, disable it */
9808c2ecf20Sopenharmony_ci	limits->max_write_zeroes_sectors = 0;
9818c2ecf20Sopenharmony_ci}
9828c2ecf20Sopenharmony_ci
9838c2ecf20Sopenharmony_cistatic bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
9848c2ecf20Sopenharmony_ci{
9858c2ecf20Sopenharmony_ci	return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios);
9868c2ecf20Sopenharmony_ci}
9878c2ecf20Sopenharmony_ci
9888c2ecf20Sopenharmony_cistatic void clone_endio(struct bio *bio)
9898c2ecf20Sopenharmony_ci{
9908c2ecf20Sopenharmony_ci	blk_status_t error = bio->bi_status;
9918c2ecf20Sopenharmony_ci	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
9928c2ecf20Sopenharmony_ci	struct dm_io *io = tio->io;
9938c2ecf20Sopenharmony_ci	struct mapped_device *md = tio->io->md;
9948c2ecf20Sopenharmony_ci	dm_endio_fn endio = tio->ti->type->end_io;
9958c2ecf20Sopenharmony_ci	struct bio *orig_bio = io->orig_bio;
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_ci	if (unlikely(error == BLK_STS_TARGET)) {
9988c2ecf20Sopenharmony_ci		if (bio_op(bio) == REQ_OP_DISCARD &&
9998c2ecf20Sopenharmony_ci		    !bio->bi_disk->queue->limits.max_discard_sectors)
10008c2ecf20Sopenharmony_ci			disable_discard(md);
10018c2ecf20Sopenharmony_ci		else if (bio_op(bio) == REQ_OP_WRITE_SAME &&
10028c2ecf20Sopenharmony_ci			 !bio->bi_disk->queue->limits.max_write_same_sectors)
10038c2ecf20Sopenharmony_ci			disable_write_same(md);
10048c2ecf20Sopenharmony_ci		else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
10058c2ecf20Sopenharmony_ci			 !bio->bi_disk->queue->limits.max_write_zeroes_sectors)
10068c2ecf20Sopenharmony_ci			disable_write_zeroes(md);
10078c2ecf20Sopenharmony_ci	}
10088c2ecf20Sopenharmony_ci
10098c2ecf20Sopenharmony_ci	/*
10108c2ecf20Sopenharmony_ci	 * For zone-append bios get offset in zone of the written
10118c2ecf20Sopenharmony_ci	 * sector and add that to the original bio sector pos.
10128c2ecf20Sopenharmony_ci	 */
10138c2ecf20Sopenharmony_ci	if (bio_op(orig_bio) == REQ_OP_ZONE_APPEND) {
10148c2ecf20Sopenharmony_ci		sector_t written_sector = bio->bi_iter.bi_sector;
10158c2ecf20Sopenharmony_ci		struct request_queue *q = orig_bio->bi_disk->queue;
10168c2ecf20Sopenharmony_ci		u64 mask = (u64)blk_queue_zone_sectors(q) - 1;
10178c2ecf20Sopenharmony_ci
10188c2ecf20Sopenharmony_ci		orig_bio->bi_iter.bi_sector += written_sector & mask;
10198c2ecf20Sopenharmony_ci	}
10208c2ecf20Sopenharmony_ci
10218c2ecf20Sopenharmony_ci	if (endio) {
10228c2ecf20Sopenharmony_ci		int r = endio(tio->ti, bio, &error);
10238c2ecf20Sopenharmony_ci		switch (r) {
10248c2ecf20Sopenharmony_ci		case DM_ENDIO_REQUEUE:
10258c2ecf20Sopenharmony_ci			error = BLK_STS_DM_REQUEUE;
10268c2ecf20Sopenharmony_ci			fallthrough;
10278c2ecf20Sopenharmony_ci		case DM_ENDIO_DONE:
10288c2ecf20Sopenharmony_ci			break;
10298c2ecf20Sopenharmony_ci		case DM_ENDIO_INCOMPLETE:
10308c2ecf20Sopenharmony_ci			/* The target will handle the io */
10318c2ecf20Sopenharmony_ci			return;
10328c2ecf20Sopenharmony_ci		default:
10338c2ecf20Sopenharmony_ci			DMWARN("unimplemented target endio return value: %d", r);
10348c2ecf20Sopenharmony_ci			BUG();
10358c2ecf20Sopenharmony_ci		}
10368c2ecf20Sopenharmony_ci	}
10378c2ecf20Sopenharmony_ci
10388c2ecf20Sopenharmony_ci	if (unlikely(swap_bios_limit(tio->ti, bio))) {
10398c2ecf20Sopenharmony_ci		struct mapped_device *md = io->md;
10408c2ecf20Sopenharmony_ci		up(&md->swap_bios_semaphore);
10418c2ecf20Sopenharmony_ci	}
10428c2ecf20Sopenharmony_ci
10438c2ecf20Sopenharmony_ci	free_tio(tio);
10448c2ecf20Sopenharmony_ci	dec_pending(io, error);
10458c2ecf20Sopenharmony_ci}
10468c2ecf20Sopenharmony_ci
10478c2ecf20Sopenharmony_ci/*
10488c2ecf20Sopenharmony_ci * Return maximum size of I/O possible at the supplied sector up to the current
10498c2ecf20Sopenharmony_ci * target boundary.
10508c2ecf20Sopenharmony_ci */
10518c2ecf20Sopenharmony_cistatic inline sector_t max_io_len_target_boundary(struct dm_target *ti,
10528c2ecf20Sopenharmony_ci						  sector_t target_offset)
10538c2ecf20Sopenharmony_ci{
10548c2ecf20Sopenharmony_ci	return ti->len - target_offset;
10558c2ecf20Sopenharmony_ci}
10568c2ecf20Sopenharmony_ci
10578c2ecf20Sopenharmony_cistatic sector_t max_io_len(struct dm_target *ti, sector_t sector)
10588c2ecf20Sopenharmony_ci{
10598c2ecf20Sopenharmony_ci	sector_t target_offset = dm_target_offset(ti, sector);
10608c2ecf20Sopenharmony_ci	sector_t len = max_io_len_target_boundary(ti, target_offset);
10618c2ecf20Sopenharmony_ci	sector_t max_len;
10628c2ecf20Sopenharmony_ci
10638c2ecf20Sopenharmony_ci	/*
10648c2ecf20Sopenharmony_ci	 * Does the target need to split IO even further?
10658c2ecf20Sopenharmony_ci	 * - varied (per target) IO splitting is a tenet of DM; this
10668c2ecf20Sopenharmony_ci	 *   explains why stacked chunk_sectors based splitting via
10678c2ecf20Sopenharmony_ci	 *   blk_max_size_offset() isn't possible here. So pass in
10688c2ecf20Sopenharmony_ci	 *   ti->max_io_len to override stacked chunk_sectors.
10698c2ecf20Sopenharmony_ci	 */
10708c2ecf20Sopenharmony_ci	if (ti->max_io_len) {
10718c2ecf20Sopenharmony_ci		max_len = blk_max_size_offset(ti->table->md->queue,
10728c2ecf20Sopenharmony_ci					      target_offset, ti->max_io_len);
10738c2ecf20Sopenharmony_ci		if (len > max_len)
10748c2ecf20Sopenharmony_ci			len = max_len;
10758c2ecf20Sopenharmony_ci	}
10768c2ecf20Sopenharmony_ci
10778c2ecf20Sopenharmony_ci	return len;
10788c2ecf20Sopenharmony_ci}
10798c2ecf20Sopenharmony_ci
10808c2ecf20Sopenharmony_ciint dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
10818c2ecf20Sopenharmony_ci{
10828c2ecf20Sopenharmony_ci	if (len > UINT_MAX) {
10838c2ecf20Sopenharmony_ci		DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
10848c2ecf20Sopenharmony_ci		      (unsigned long long)len, UINT_MAX);
10858c2ecf20Sopenharmony_ci		ti->error = "Maximum size of target IO is too large";
10868c2ecf20Sopenharmony_ci		return -EINVAL;
10878c2ecf20Sopenharmony_ci	}
10888c2ecf20Sopenharmony_ci
10898c2ecf20Sopenharmony_ci	ti->max_io_len = (uint32_t) len;
10908c2ecf20Sopenharmony_ci
10918c2ecf20Sopenharmony_ci	return 0;
10928c2ecf20Sopenharmony_ci}
10938c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
10948c2ecf20Sopenharmony_ci
10958c2ecf20Sopenharmony_cistatic struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
10968c2ecf20Sopenharmony_ci						sector_t sector, int *srcu_idx)
10978c2ecf20Sopenharmony_ci	__acquires(md->io_barrier)
10988c2ecf20Sopenharmony_ci{
10998c2ecf20Sopenharmony_ci	struct dm_table *map;
11008c2ecf20Sopenharmony_ci	struct dm_target *ti;
11018c2ecf20Sopenharmony_ci
11028c2ecf20Sopenharmony_ci	map = dm_get_live_table(md, srcu_idx);
11038c2ecf20Sopenharmony_ci	if (!map)
11048c2ecf20Sopenharmony_ci		return NULL;
11058c2ecf20Sopenharmony_ci
11068c2ecf20Sopenharmony_ci	ti = dm_table_find_target(map, sector);
11078c2ecf20Sopenharmony_ci	if (!ti)
11088c2ecf20Sopenharmony_ci		return NULL;
11098c2ecf20Sopenharmony_ci
11108c2ecf20Sopenharmony_ci	return ti;
11118c2ecf20Sopenharmony_ci}
11128c2ecf20Sopenharmony_ci
11138c2ecf20Sopenharmony_cistatic long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
11148c2ecf20Sopenharmony_ci				 long nr_pages, void **kaddr, pfn_t *pfn)
11158c2ecf20Sopenharmony_ci{
11168c2ecf20Sopenharmony_ci	struct mapped_device *md = dax_get_private(dax_dev);
11178c2ecf20Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
11188c2ecf20Sopenharmony_ci	struct dm_target *ti;
11198c2ecf20Sopenharmony_ci	long len, ret = -EIO;
11208c2ecf20Sopenharmony_ci	int srcu_idx;
11218c2ecf20Sopenharmony_ci
11228c2ecf20Sopenharmony_ci	ti = dm_dax_get_live_target(md, sector, &srcu_idx);
11238c2ecf20Sopenharmony_ci
11248c2ecf20Sopenharmony_ci	if (!ti)
11258c2ecf20Sopenharmony_ci		goto out;
11268c2ecf20Sopenharmony_ci	if (!ti->type->direct_access)
11278c2ecf20Sopenharmony_ci		goto out;
11288c2ecf20Sopenharmony_ci	len = max_io_len(ti, sector) / PAGE_SECTORS;
11298c2ecf20Sopenharmony_ci	if (len < 1)
11308c2ecf20Sopenharmony_ci		goto out;
11318c2ecf20Sopenharmony_ci	nr_pages = min(len, nr_pages);
11328c2ecf20Sopenharmony_ci	ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn);
11338c2ecf20Sopenharmony_ci
11348c2ecf20Sopenharmony_ci out:
11358c2ecf20Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
11368c2ecf20Sopenharmony_ci
11378c2ecf20Sopenharmony_ci	return ret;
11388c2ecf20Sopenharmony_ci}
11398c2ecf20Sopenharmony_ci
11408c2ecf20Sopenharmony_cistatic bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
11418c2ecf20Sopenharmony_ci		int blocksize, sector_t start, sector_t len)
11428c2ecf20Sopenharmony_ci{
11438c2ecf20Sopenharmony_ci	struct mapped_device *md = dax_get_private(dax_dev);
11448c2ecf20Sopenharmony_ci	struct dm_table *map;
11458c2ecf20Sopenharmony_ci	bool ret = false;
11468c2ecf20Sopenharmony_ci	int srcu_idx;
11478c2ecf20Sopenharmony_ci
11488c2ecf20Sopenharmony_ci	map = dm_get_live_table(md, &srcu_idx);
11498c2ecf20Sopenharmony_ci	if (!map)
11508c2ecf20Sopenharmony_ci		goto out;
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_ci	ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize);
11538c2ecf20Sopenharmony_ci
11548c2ecf20Sopenharmony_ciout:
11558c2ecf20Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
11568c2ecf20Sopenharmony_ci
11578c2ecf20Sopenharmony_ci	return ret;
11588c2ecf20Sopenharmony_ci}
11598c2ecf20Sopenharmony_ci
11608c2ecf20Sopenharmony_cistatic size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
11618c2ecf20Sopenharmony_ci				    void *addr, size_t bytes, struct iov_iter *i)
11628c2ecf20Sopenharmony_ci{
11638c2ecf20Sopenharmony_ci	struct mapped_device *md = dax_get_private(dax_dev);
11648c2ecf20Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
11658c2ecf20Sopenharmony_ci	struct dm_target *ti;
11668c2ecf20Sopenharmony_ci	long ret = 0;
11678c2ecf20Sopenharmony_ci	int srcu_idx;
11688c2ecf20Sopenharmony_ci
11698c2ecf20Sopenharmony_ci	ti = dm_dax_get_live_target(md, sector, &srcu_idx);
11708c2ecf20Sopenharmony_ci
11718c2ecf20Sopenharmony_ci	if (!ti)
11728c2ecf20Sopenharmony_ci		goto out;
11738c2ecf20Sopenharmony_ci	if (!ti->type->dax_copy_from_iter) {
11748c2ecf20Sopenharmony_ci		ret = copy_from_iter(addr, bytes, i);
11758c2ecf20Sopenharmony_ci		goto out;
11768c2ecf20Sopenharmony_ci	}
11778c2ecf20Sopenharmony_ci	ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
11788c2ecf20Sopenharmony_ci out:
11798c2ecf20Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
11808c2ecf20Sopenharmony_ci
11818c2ecf20Sopenharmony_ci	return ret;
11828c2ecf20Sopenharmony_ci}
11838c2ecf20Sopenharmony_ci
11848c2ecf20Sopenharmony_cistatic size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
11858c2ecf20Sopenharmony_ci		void *addr, size_t bytes, struct iov_iter *i)
11868c2ecf20Sopenharmony_ci{
11878c2ecf20Sopenharmony_ci	struct mapped_device *md = dax_get_private(dax_dev);
11888c2ecf20Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
11898c2ecf20Sopenharmony_ci	struct dm_target *ti;
11908c2ecf20Sopenharmony_ci	long ret = 0;
11918c2ecf20Sopenharmony_ci	int srcu_idx;
11928c2ecf20Sopenharmony_ci
11938c2ecf20Sopenharmony_ci	ti = dm_dax_get_live_target(md, sector, &srcu_idx);
11948c2ecf20Sopenharmony_ci
11958c2ecf20Sopenharmony_ci	if (!ti)
11968c2ecf20Sopenharmony_ci		goto out;
11978c2ecf20Sopenharmony_ci	if (!ti->type->dax_copy_to_iter) {
11988c2ecf20Sopenharmony_ci		ret = copy_to_iter(addr, bytes, i);
11998c2ecf20Sopenharmony_ci		goto out;
12008c2ecf20Sopenharmony_ci	}
12018c2ecf20Sopenharmony_ci	ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i);
12028c2ecf20Sopenharmony_ci out:
12038c2ecf20Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
12048c2ecf20Sopenharmony_ci
12058c2ecf20Sopenharmony_ci	return ret;
12068c2ecf20Sopenharmony_ci}
12078c2ecf20Sopenharmony_ci
12088c2ecf20Sopenharmony_cistatic int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
12098c2ecf20Sopenharmony_ci				  size_t nr_pages)
12108c2ecf20Sopenharmony_ci{
12118c2ecf20Sopenharmony_ci	struct mapped_device *md = dax_get_private(dax_dev);
12128c2ecf20Sopenharmony_ci	sector_t sector = pgoff * PAGE_SECTORS;
12138c2ecf20Sopenharmony_ci	struct dm_target *ti;
12148c2ecf20Sopenharmony_ci	int ret = -EIO;
12158c2ecf20Sopenharmony_ci	int srcu_idx;
12168c2ecf20Sopenharmony_ci
12178c2ecf20Sopenharmony_ci	ti = dm_dax_get_live_target(md, sector, &srcu_idx);
12188c2ecf20Sopenharmony_ci
12198c2ecf20Sopenharmony_ci	if (!ti)
12208c2ecf20Sopenharmony_ci		goto out;
12218c2ecf20Sopenharmony_ci	if (WARN_ON(!ti->type->dax_zero_page_range)) {
12228c2ecf20Sopenharmony_ci		/*
12238c2ecf20Sopenharmony_ci		 * ->zero_page_range() is mandatory dax operation. If we are
12248c2ecf20Sopenharmony_ci		 *  here, something is wrong.
12258c2ecf20Sopenharmony_ci		 */
12268c2ecf20Sopenharmony_ci		goto out;
12278c2ecf20Sopenharmony_ci	}
12288c2ecf20Sopenharmony_ci	ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages);
12298c2ecf20Sopenharmony_ci out:
12308c2ecf20Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
12318c2ecf20Sopenharmony_ci
12328c2ecf20Sopenharmony_ci	return ret;
12338c2ecf20Sopenharmony_ci}
12348c2ecf20Sopenharmony_ci
12358c2ecf20Sopenharmony_ci/*
12368c2ecf20Sopenharmony_ci * A target may call dm_accept_partial_bio only from the map routine.  It is
12378c2ecf20Sopenharmony_ci * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
12388c2ecf20Sopenharmony_ci * operations and REQ_OP_ZONE_APPEND (zone append writes).
12398c2ecf20Sopenharmony_ci *
12408c2ecf20Sopenharmony_ci * dm_accept_partial_bio informs the dm that the target only wants to process
12418c2ecf20Sopenharmony_ci * additional n_sectors sectors of the bio and the rest of the data should be
12428c2ecf20Sopenharmony_ci * sent in a next bio.
12438c2ecf20Sopenharmony_ci *
12448c2ecf20Sopenharmony_ci * A diagram that explains the arithmetics:
12458c2ecf20Sopenharmony_ci * +--------------------+---------------+-------+
12468c2ecf20Sopenharmony_ci * |         1          |       2       |   3   |
12478c2ecf20Sopenharmony_ci * +--------------------+---------------+-------+
12488c2ecf20Sopenharmony_ci *
12498c2ecf20Sopenharmony_ci * <-------------- *tio->len_ptr --------------->
12508c2ecf20Sopenharmony_ci *                      <------- bi_size ------->
12518c2ecf20Sopenharmony_ci *                      <-- n_sectors -->
12528c2ecf20Sopenharmony_ci *
12538c2ecf20Sopenharmony_ci * Region 1 was already iterated over with bio_advance or similar function.
12548c2ecf20Sopenharmony_ci *	(it may be empty if the target doesn't use bio_advance)
12558c2ecf20Sopenharmony_ci * Region 2 is the remaining bio size that the target wants to process.
12568c2ecf20Sopenharmony_ci *	(it may be empty if region 1 is non-empty, although there is no reason
12578c2ecf20Sopenharmony_ci *	 to make it empty)
12588c2ecf20Sopenharmony_ci * The target requires that region 3 is to be sent in the next bio.
12598c2ecf20Sopenharmony_ci *
12608c2ecf20Sopenharmony_ci * If the target wants to receive multiple copies of the bio (via num_*bios, etc),
12618c2ecf20Sopenharmony_ci * the partially processed part (the sum of regions 1+2) must be the same for all
12628c2ecf20Sopenharmony_ci * copies of the bio.
12638c2ecf20Sopenharmony_ci */
12648c2ecf20Sopenharmony_civoid dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
12658c2ecf20Sopenharmony_ci{
12668c2ecf20Sopenharmony_ci	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
12678c2ecf20Sopenharmony_ci	unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
12688c2ecf20Sopenharmony_ci
12698c2ecf20Sopenharmony_ci	BUG_ON(bio->bi_opf & REQ_PREFLUSH);
12708c2ecf20Sopenharmony_ci	BUG_ON(op_is_zone_mgmt(bio_op(bio)));
12718c2ecf20Sopenharmony_ci	BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND);
12728c2ecf20Sopenharmony_ci	BUG_ON(bi_size > *tio->len_ptr);
12738c2ecf20Sopenharmony_ci	BUG_ON(n_sectors > bi_size);
12748c2ecf20Sopenharmony_ci
12758c2ecf20Sopenharmony_ci	*tio->len_ptr -= bi_size - n_sectors;
12768c2ecf20Sopenharmony_ci	bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
12778c2ecf20Sopenharmony_ci}
12788c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_accept_partial_bio);
12798c2ecf20Sopenharmony_ci
12808c2ecf20Sopenharmony_cistatic noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
12818c2ecf20Sopenharmony_ci{
12828c2ecf20Sopenharmony_ci	mutex_lock(&md->swap_bios_lock);
12838c2ecf20Sopenharmony_ci	while (latch < md->swap_bios) {
12848c2ecf20Sopenharmony_ci		cond_resched();
12858c2ecf20Sopenharmony_ci		down(&md->swap_bios_semaphore);
12868c2ecf20Sopenharmony_ci		md->swap_bios--;
12878c2ecf20Sopenharmony_ci	}
12888c2ecf20Sopenharmony_ci	while (latch > md->swap_bios) {
12898c2ecf20Sopenharmony_ci		cond_resched();
12908c2ecf20Sopenharmony_ci		up(&md->swap_bios_semaphore);
12918c2ecf20Sopenharmony_ci		md->swap_bios++;
12928c2ecf20Sopenharmony_ci	}
12938c2ecf20Sopenharmony_ci	mutex_unlock(&md->swap_bios_lock);
12948c2ecf20Sopenharmony_ci}
12958c2ecf20Sopenharmony_ci
12968c2ecf20Sopenharmony_cistatic blk_qc_t __map_bio(struct dm_target_io *tio)
12978c2ecf20Sopenharmony_ci{
12988c2ecf20Sopenharmony_ci	int r;
12998c2ecf20Sopenharmony_ci	sector_t sector;
13008c2ecf20Sopenharmony_ci	struct bio *clone = &tio->clone;
13018c2ecf20Sopenharmony_ci	struct dm_io *io = tio->io;
13028c2ecf20Sopenharmony_ci	struct dm_target *ti = tio->ti;
13038c2ecf20Sopenharmony_ci	blk_qc_t ret = BLK_QC_T_NONE;
13048c2ecf20Sopenharmony_ci
13058c2ecf20Sopenharmony_ci	clone->bi_end_io = clone_endio;
13068c2ecf20Sopenharmony_ci
13078c2ecf20Sopenharmony_ci	/*
13088c2ecf20Sopenharmony_ci	 * Map the clone.  If r == 0 we don't need to do
13098c2ecf20Sopenharmony_ci	 * anything, the target has assumed ownership of
13108c2ecf20Sopenharmony_ci	 * this io.
13118c2ecf20Sopenharmony_ci	 */
13128c2ecf20Sopenharmony_ci	atomic_inc(&io->io_count);
13138c2ecf20Sopenharmony_ci	sector = clone->bi_iter.bi_sector;
13148c2ecf20Sopenharmony_ci
13158c2ecf20Sopenharmony_ci	if (unlikely(swap_bios_limit(ti, clone))) {
13168c2ecf20Sopenharmony_ci		struct mapped_device *md = io->md;
13178c2ecf20Sopenharmony_ci		int latch = get_swap_bios();
13188c2ecf20Sopenharmony_ci		if (unlikely(latch != md->swap_bios))
13198c2ecf20Sopenharmony_ci			__set_swap_bios_limit(md, latch);
13208c2ecf20Sopenharmony_ci		down(&md->swap_bios_semaphore);
13218c2ecf20Sopenharmony_ci	}
13228c2ecf20Sopenharmony_ci
13238c2ecf20Sopenharmony_ci	r = ti->type->map(ti, clone);
13248c2ecf20Sopenharmony_ci	switch (r) {
13258c2ecf20Sopenharmony_ci	case DM_MAPIO_SUBMITTED:
13268c2ecf20Sopenharmony_ci		break;
13278c2ecf20Sopenharmony_ci	case DM_MAPIO_REMAPPED:
13288c2ecf20Sopenharmony_ci		/* the bio has been remapped so dispatch it */
13298c2ecf20Sopenharmony_ci		trace_block_bio_remap(clone->bi_disk->queue, clone,
13308c2ecf20Sopenharmony_ci				      bio_dev(io->orig_bio), sector);
13318c2ecf20Sopenharmony_ci		ret = submit_bio_noacct(clone);
13328c2ecf20Sopenharmony_ci		break;
13338c2ecf20Sopenharmony_ci	case DM_MAPIO_KILL:
13348c2ecf20Sopenharmony_ci		if (unlikely(swap_bios_limit(ti, clone))) {
13358c2ecf20Sopenharmony_ci			struct mapped_device *md = io->md;
13368c2ecf20Sopenharmony_ci			up(&md->swap_bios_semaphore);
13378c2ecf20Sopenharmony_ci		}
13388c2ecf20Sopenharmony_ci		free_tio(tio);
13398c2ecf20Sopenharmony_ci		dec_pending(io, BLK_STS_IOERR);
13408c2ecf20Sopenharmony_ci		break;
13418c2ecf20Sopenharmony_ci	case DM_MAPIO_REQUEUE:
13428c2ecf20Sopenharmony_ci		if (unlikely(swap_bios_limit(ti, clone))) {
13438c2ecf20Sopenharmony_ci			struct mapped_device *md = io->md;
13448c2ecf20Sopenharmony_ci			up(&md->swap_bios_semaphore);
13458c2ecf20Sopenharmony_ci		}
13468c2ecf20Sopenharmony_ci		free_tio(tio);
13478c2ecf20Sopenharmony_ci		dec_pending(io, BLK_STS_DM_REQUEUE);
13488c2ecf20Sopenharmony_ci		break;
13498c2ecf20Sopenharmony_ci	default:
13508c2ecf20Sopenharmony_ci		DMWARN("unimplemented target map return value: %d", r);
13518c2ecf20Sopenharmony_ci		BUG();
13528c2ecf20Sopenharmony_ci	}
13538c2ecf20Sopenharmony_ci
13548c2ecf20Sopenharmony_ci	return ret;
13558c2ecf20Sopenharmony_ci}
13568c2ecf20Sopenharmony_ci
13578c2ecf20Sopenharmony_cistatic void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
13588c2ecf20Sopenharmony_ci{
13598c2ecf20Sopenharmony_ci	bio->bi_iter.bi_sector = sector;
13608c2ecf20Sopenharmony_ci	bio->bi_iter.bi_size = to_bytes(len);
13618c2ecf20Sopenharmony_ci}
13628c2ecf20Sopenharmony_ci
13638c2ecf20Sopenharmony_ci/*
13648c2ecf20Sopenharmony_ci * Creates a bio that consists of range of complete bvecs.
13658c2ecf20Sopenharmony_ci */
13668c2ecf20Sopenharmony_cistatic int clone_bio(struct dm_target_io *tio, struct bio *bio,
13678c2ecf20Sopenharmony_ci		     sector_t sector, unsigned len)
13688c2ecf20Sopenharmony_ci{
13698c2ecf20Sopenharmony_ci	struct bio *clone = &tio->clone;
13708c2ecf20Sopenharmony_ci	int r;
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_ci	__bio_clone_fast(clone, bio);
13738c2ecf20Sopenharmony_ci
13748c2ecf20Sopenharmony_ci	r = bio_crypt_clone(clone, bio, GFP_NOIO);
13758c2ecf20Sopenharmony_ci	if (r < 0)
13768c2ecf20Sopenharmony_ci		return r;
13778c2ecf20Sopenharmony_ci
13788c2ecf20Sopenharmony_ci	if (bio_integrity(bio)) {
13798c2ecf20Sopenharmony_ci		if (unlikely(!dm_target_has_integrity(tio->ti->type) &&
13808c2ecf20Sopenharmony_ci			     !dm_target_passes_integrity(tio->ti->type))) {
13818c2ecf20Sopenharmony_ci			DMWARN("%s: the target %s doesn't support integrity data.",
13828c2ecf20Sopenharmony_ci				dm_device_name(tio->io->md),
13838c2ecf20Sopenharmony_ci				tio->ti->type->name);
13848c2ecf20Sopenharmony_ci			return -EIO;
13858c2ecf20Sopenharmony_ci		}
13868c2ecf20Sopenharmony_ci
13878c2ecf20Sopenharmony_ci		r = bio_integrity_clone(clone, bio, GFP_NOIO);
13888c2ecf20Sopenharmony_ci		if (r < 0)
13898c2ecf20Sopenharmony_ci			return r;
13908c2ecf20Sopenharmony_ci	}
13918c2ecf20Sopenharmony_ci
13928c2ecf20Sopenharmony_ci	bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
13938c2ecf20Sopenharmony_ci	clone->bi_iter.bi_size = to_bytes(len);
13948c2ecf20Sopenharmony_ci
13958c2ecf20Sopenharmony_ci	if (bio_integrity(bio))
13968c2ecf20Sopenharmony_ci		bio_integrity_trim(clone);
13978c2ecf20Sopenharmony_ci
13988c2ecf20Sopenharmony_ci	return 0;
13998c2ecf20Sopenharmony_ci}
14008c2ecf20Sopenharmony_ci
14018c2ecf20Sopenharmony_cistatic void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
14028c2ecf20Sopenharmony_ci				struct dm_target *ti, unsigned num_bios)
14038c2ecf20Sopenharmony_ci{
14048c2ecf20Sopenharmony_ci	struct dm_target_io *tio;
14058c2ecf20Sopenharmony_ci	int try;
14068c2ecf20Sopenharmony_ci
14078c2ecf20Sopenharmony_ci	if (!num_bios)
14088c2ecf20Sopenharmony_ci		return;
14098c2ecf20Sopenharmony_ci
14108c2ecf20Sopenharmony_ci	if (num_bios == 1) {
14118c2ecf20Sopenharmony_ci		tio = alloc_tio(ci, ti, 0, GFP_NOIO);
14128c2ecf20Sopenharmony_ci		bio_list_add(blist, &tio->clone);
14138c2ecf20Sopenharmony_ci		return;
14148c2ecf20Sopenharmony_ci	}
14158c2ecf20Sopenharmony_ci
14168c2ecf20Sopenharmony_ci	for (try = 0; try < 2; try++) {
14178c2ecf20Sopenharmony_ci		int bio_nr;
14188c2ecf20Sopenharmony_ci		struct bio *bio;
14198c2ecf20Sopenharmony_ci
14208c2ecf20Sopenharmony_ci		if (try)
14218c2ecf20Sopenharmony_ci			mutex_lock(&ci->io->md->table_devices_lock);
14228c2ecf20Sopenharmony_ci		for (bio_nr = 0; bio_nr < num_bios; bio_nr++) {
14238c2ecf20Sopenharmony_ci			tio = alloc_tio(ci, ti, bio_nr, try ? GFP_NOIO : GFP_NOWAIT);
14248c2ecf20Sopenharmony_ci			if (!tio)
14258c2ecf20Sopenharmony_ci				break;
14268c2ecf20Sopenharmony_ci
14278c2ecf20Sopenharmony_ci			bio_list_add(blist, &tio->clone);
14288c2ecf20Sopenharmony_ci		}
14298c2ecf20Sopenharmony_ci		if (try)
14308c2ecf20Sopenharmony_ci			mutex_unlock(&ci->io->md->table_devices_lock);
14318c2ecf20Sopenharmony_ci		if (bio_nr == num_bios)
14328c2ecf20Sopenharmony_ci			return;
14338c2ecf20Sopenharmony_ci
14348c2ecf20Sopenharmony_ci		while ((bio = bio_list_pop(blist))) {
14358c2ecf20Sopenharmony_ci			tio = container_of(bio, struct dm_target_io, clone);
14368c2ecf20Sopenharmony_ci			free_tio(tio);
14378c2ecf20Sopenharmony_ci		}
14388c2ecf20Sopenharmony_ci	}
14398c2ecf20Sopenharmony_ci}
14408c2ecf20Sopenharmony_ci
14418c2ecf20Sopenharmony_cistatic blk_qc_t __clone_and_map_simple_bio(struct clone_info *ci,
14428c2ecf20Sopenharmony_ci					   struct dm_target_io *tio, unsigned *len)
14438c2ecf20Sopenharmony_ci{
14448c2ecf20Sopenharmony_ci	struct bio *clone = &tio->clone;
14458c2ecf20Sopenharmony_ci
14468c2ecf20Sopenharmony_ci	tio->len_ptr = len;
14478c2ecf20Sopenharmony_ci
14488c2ecf20Sopenharmony_ci	__bio_clone_fast(clone, ci->bio);
14498c2ecf20Sopenharmony_ci	if (len)
14508c2ecf20Sopenharmony_ci		bio_setup_sector(clone, ci->sector, *len);
14518c2ecf20Sopenharmony_ci
14528c2ecf20Sopenharmony_ci	return __map_bio(tio);
14538c2ecf20Sopenharmony_ci}
14548c2ecf20Sopenharmony_ci
14558c2ecf20Sopenharmony_cistatic void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
14568c2ecf20Sopenharmony_ci				  unsigned num_bios, unsigned *len)
14578c2ecf20Sopenharmony_ci{
14588c2ecf20Sopenharmony_ci	struct bio_list blist = BIO_EMPTY_LIST;
14598c2ecf20Sopenharmony_ci	struct bio *bio;
14608c2ecf20Sopenharmony_ci	struct dm_target_io *tio;
14618c2ecf20Sopenharmony_ci
14628c2ecf20Sopenharmony_ci	alloc_multiple_bios(&blist, ci, ti, num_bios);
14638c2ecf20Sopenharmony_ci
14648c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&blist))) {
14658c2ecf20Sopenharmony_ci		tio = container_of(bio, struct dm_target_io, clone);
14668c2ecf20Sopenharmony_ci		(void) __clone_and_map_simple_bio(ci, tio, len);
14678c2ecf20Sopenharmony_ci	}
14688c2ecf20Sopenharmony_ci}
14698c2ecf20Sopenharmony_ci
14708c2ecf20Sopenharmony_cistatic int __send_empty_flush(struct clone_info *ci)
14718c2ecf20Sopenharmony_ci{
14728c2ecf20Sopenharmony_ci	unsigned target_nr = 0;
14738c2ecf20Sopenharmony_ci	struct dm_target *ti;
14748c2ecf20Sopenharmony_ci	struct bio flush_bio;
14758c2ecf20Sopenharmony_ci
14768c2ecf20Sopenharmony_ci	/*
14778c2ecf20Sopenharmony_ci	 * Use an on-stack bio for this, it's safe since we don't
14788c2ecf20Sopenharmony_ci	 * need to reference it after submit. It's just used as
14798c2ecf20Sopenharmony_ci	 * the basis for the clone(s).
14808c2ecf20Sopenharmony_ci	 */
14818c2ecf20Sopenharmony_ci	bio_init(&flush_bio, NULL, 0);
14828c2ecf20Sopenharmony_ci	flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
14838c2ecf20Sopenharmony_ci	ci->bio = &flush_bio;
14848c2ecf20Sopenharmony_ci	ci->sector_count = 0;
14858c2ecf20Sopenharmony_ci
14868c2ecf20Sopenharmony_ci	/*
14878c2ecf20Sopenharmony_ci	 * Empty flush uses a statically initialized bio, as the base for
14888c2ecf20Sopenharmony_ci	 * cloning.  However, blkg association requires that a bdev is
14898c2ecf20Sopenharmony_ci	 * associated with a gendisk, which doesn't happen until the bdev is
14908c2ecf20Sopenharmony_ci	 * opened.  So, blkg association is done at issue time of the flush
14918c2ecf20Sopenharmony_ci	 * rather than when the device is created in alloc_dev().
14928c2ecf20Sopenharmony_ci	 */
14938c2ecf20Sopenharmony_ci	bio_set_dev(ci->bio, ci->io->md->bdev);
14948c2ecf20Sopenharmony_ci
14958c2ecf20Sopenharmony_ci	BUG_ON(bio_has_data(ci->bio));
14968c2ecf20Sopenharmony_ci	while ((ti = dm_table_get_target(ci->map, target_nr++)))
14978c2ecf20Sopenharmony_ci		__send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
14988c2ecf20Sopenharmony_ci
14998c2ecf20Sopenharmony_ci	bio_uninit(ci->bio);
15008c2ecf20Sopenharmony_ci	return 0;
15018c2ecf20Sopenharmony_ci}
15028c2ecf20Sopenharmony_ci
15038c2ecf20Sopenharmony_cistatic int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
15048c2ecf20Sopenharmony_ci				    sector_t sector, unsigned *len)
15058c2ecf20Sopenharmony_ci{
15068c2ecf20Sopenharmony_ci	struct bio *bio = ci->bio;
15078c2ecf20Sopenharmony_ci	struct dm_target_io *tio;
15088c2ecf20Sopenharmony_ci	int r;
15098c2ecf20Sopenharmony_ci
15108c2ecf20Sopenharmony_ci	tio = alloc_tio(ci, ti, 0, GFP_NOIO);
15118c2ecf20Sopenharmony_ci	tio->len_ptr = len;
15128c2ecf20Sopenharmony_ci	r = clone_bio(tio, bio, sector, *len);
15138c2ecf20Sopenharmony_ci	if (r < 0) {
15148c2ecf20Sopenharmony_ci		free_tio(tio);
15158c2ecf20Sopenharmony_ci		return r;
15168c2ecf20Sopenharmony_ci	}
15178c2ecf20Sopenharmony_ci	(void) __map_bio(tio);
15188c2ecf20Sopenharmony_ci
15198c2ecf20Sopenharmony_ci	return 0;
15208c2ecf20Sopenharmony_ci}
15218c2ecf20Sopenharmony_ci
15228c2ecf20Sopenharmony_cistatic int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
15238c2ecf20Sopenharmony_ci				       unsigned num_bios)
15248c2ecf20Sopenharmony_ci{
15258c2ecf20Sopenharmony_ci	unsigned len;
15268c2ecf20Sopenharmony_ci
15278c2ecf20Sopenharmony_ci	/*
15288c2ecf20Sopenharmony_ci	 * Even though the device advertised support for this type of
15298c2ecf20Sopenharmony_ci	 * request, that does not mean every target supports it, and
15308c2ecf20Sopenharmony_ci	 * reconfiguration might also have changed that since the
15318c2ecf20Sopenharmony_ci	 * check was performed.
15328c2ecf20Sopenharmony_ci	 */
15338c2ecf20Sopenharmony_ci	if (!num_bios)
15348c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
15358c2ecf20Sopenharmony_ci
15368c2ecf20Sopenharmony_ci	len = min_t(sector_t, ci->sector_count,
15378c2ecf20Sopenharmony_ci		    max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector)));
15388c2ecf20Sopenharmony_ci
15398c2ecf20Sopenharmony_ci	__send_duplicate_bios(ci, ti, num_bios, &len);
15408c2ecf20Sopenharmony_ci
15418c2ecf20Sopenharmony_ci	ci->sector += len;
15428c2ecf20Sopenharmony_ci	ci->sector_count -= len;
15438c2ecf20Sopenharmony_ci
15448c2ecf20Sopenharmony_ci	return 0;
15458c2ecf20Sopenharmony_ci}
15468c2ecf20Sopenharmony_ci
15478c2ecf20Sopenharmony_cistatic bool is_abnormal_io(struct bio *bio)
15488c2ecf20Sopenharmony_ci{
15498c2ecf20Sopenharmony_ci	bool r = false;
15508c2ecf20Sopenharmony_ci
15518c2ecf20Sopenharmony_ci	switch (bio_op(bio)) {
15528c2ecf20Sopenharmony_ci	case REQ_OP_DISCARD:
15538c2ecf20Sopenharmony_ci	case REQ_OP_SECURE_ERASE:
15548c2ecf20Sopenharmony_ci	case REQ_OP_WRITE_SAME:
15558c2ecf20Sopenharmony_ci	case REQ_OP_WRITE_ZEROES:
15568c2ecf20Sopenharmony_ci		r = true;
15578c2ecf20Sopenharmony_ci		break;
15588c2ecf20Sopenharmony_ci	}
15598c2ecf20Sopenharmony_ci
15608c2ecf20Sopenharmony_ci	return r;
15618c2ecf20Sopenharmony_ci}
15628c2ecf20Sopenharmony_ci
15638c2ecf20Sopenharmony_cistatic bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
15648c2ecf20Sopenharmony_ci				  int *result)
15658c2ecf20Sopenharmony_ci{
15668c2ecf20Sopenharmony_ci	struct bio *bio = ci->bio;
15678c2ecf20Sopenharmony_ci	unsigned num_bios = 0;
15688c2ecf20Sopenharmony_ci
15698c2ecf20Sopenharmony_ci	switch (bio_op(bio)) {
15708c2ecf20Sopenharmony_ci	case REQ_OP_DISCARD:
15718c2ecf20Sopenharmony_ci		num_bios = ti->num_discard_bios;
15728c2ecf20Sopenharmony_ci		break;
15738c2ecf20Sopenharmony_ci	case REQ_OP_SECURE_ERASE:
15748c2ecf20Sopenharmony_ci		num_bios = ti->num_secure_erase_bios;
15758c2ecf20Sopenharmony_ci		break;
15768c2ecf20Sopenharmony_ci	case REQ_OP_WRITE_SAME:
15778c2ecf20Sopenharmony_ci		num_bios = ti->num_write_same_bios;
15788c2ecf20Sopenharmony_ci		break;
15798c2ecf20Sopenharmony_ci	case REQ_OP_WRITE_ZEROES:
15808c2ecf20Sopenharmony_ci		num_bios = ti->num_write_zeroes_bios;
15818c2ecf20Sopenharmony_ci		break;
15828c2ecf20Sopenharmony_ci	default:
15838c2ecf20Sopenharmony_ci		return false;
15848c2ecf20Sopenharmony_ci	}
15858c2ecf20Sopenharmony_ci
15868c2ecf20Sopenharmony_ci	*result = __send_changing_extent_only(ci, ti, num_bios);
15878c2ecf20Sopenharmony_ci	return true;
15888c2ecf20Sopenharmony_ci}
15898c2ecf20Sopenharmony_ci
15908c2ecf20Sopenharmony_ci/*
15918c2ecf20Sopenharmony_ci * Select the correct strategy for processing a non-flush bio.
15928c2ecf20Sopenharmony_ci */
15938c2ecf20Sopenharmony_cistatic int __split_and_process_non_flush(struct clone_info *ci)
15948c2ecf20Sopenharmony_ci{
15958c2ecf20Sopenharmony_ci	struct dm_target *ti;
15968c2ecf20Sopenharmony_ci	unsigned len;
15978c2ecf20Sopenharmony_ci	int r;
15988c2ecf20Sopenharmony_ci
15998c2ecf20Sopenharmony_ci	ti = dm_table_find_target(ci->map, ci->sector);
16008c2ecf20Sopenharmony_ci	if (!ti)
16018c2ecf20Sopenharmony_ci		return -EIO;
16028c2ecf20Sopenharmony_ci
16038c2ecf20Sopenharmony_ci	if (__process_abnormal_io(ci, ti, &r))
16048c2ecf20Sopenharmony_ci		return r;
16058c2ecf20Sopenharmony_ci
16068c2ecf20Sopenharmony_ci	len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
16078c2ecf20Sopenharmony_ci
16088c2ecf20Sopenharmony_ci	r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
16098c2ecf20Sopenharmony_ci	if (r < 0)
16108c2ecf20Sopenharmony_ci		return r;
16118c2ecf20Sopenharmony_ci
16128c2ecf20Sopenharmony_ci	ci->sector += len;
16138c2ecf20Sopenharmony_ci	ci->sector_count -= len;
16148c2ecf20Sopenharmony_ci
16158c2ecf20Sopenharmony_ci	return 0;
16168c2ecf20Sopenharmony_ci}
16178c2ecf20Sopenharmony_ci
16188c2ecf20Sopenharmony_cistatic void init_clone_info(struct clone_info *ci, struct mapped_device *md,
16198c2ecf20Sopenharmony_ci			    struct dm_table *map, struct bio *bio)
16208c2ecf20Sopenharmony_ci{
16218c2ecf20Sopenharmony_ci	ci->map = map;
16228c2ecf20Sopenharmony_ci	ci->io = alloc_io(md, bio);
16238c2ecf20Sopenharmony_ci	ci->sector = bio->bi_iter.bi_sector;
16248c2ecf20Sopenharmony_ci}
16258c2ecf20Sopenharmony_ci
16268c2ecf20Sopenharmony_ci#define __dm_part_stat_sub(part, field, subnd)	\
16278c2ecf20Sopenharmony_ci	(part_stat_get(part, field) -= (subnd))
16288c2ecf20Sopenharmony_ci
16298c2ecf20Sopenharmony_ci/*
16308c2ecf20Sopenharmony_ci * Entry point to split a bio into clones and submit them to the targets.
16318c2ecf20Sopenharmony_ci */
16328c2ecf20Sopenharmony_cistatic blk_qc_t __split_and_process_bio(struct mapped_device *md,
16338c2ecf20Sopenharmony_ci					struct dm_table *map, struct bio *bio)
16348c2ecf20Sopenharmony_ci{
16358c2ecf20Sopenharmony_ci	struct clone_info ci;
16368c2ecf20Sopenharmony_ci	blk_qc_t ret = BLK_QC_T_NONE;
16378c2ecf20Sopenharmony_ci	int error = 0;
16388c2ecf20Sopenharmony_ci
16398c2ecf20Sopenharmony_ci	init_clone_info(&ci, md, map, bio);
16408c2ecf20Sopenharmony_ci
16418c2ecf20Sopenharmony_ci	if (bio->bi_opf & REQ_PREFLUSH) {
16428c2ecf20Sopenharmony_ci		error = __send_empty_flush(&ci);
16438c2ecf20Sopenharmony_ci		/* dec_pending submits any data associated with flush */
16448c2ecf20Sopenharmony_ci	} else if (op_is_zone_mgmt(bio_op(bio))) {
16458c2ecf20Sopenharmony_ci		ci.bio = bio;
16468c2ecf20Sopenharmony_ci		ci.sector_count = 0;
16478c2ecf20Sopenharmony_ci		error = __split_and_process_non_flush(&ci);
16488c2ecf20Sopenharmony_ci	} else {
16498c2ecf20Sopenharmony_ci		ci.bio = bio;
16508c2ecf20Sopenharmony_ci		ci.sector_count = bio_sectors(bio);
16518c2ecf20Sopenharmony_ci		while (ci.sector_count && !error) {
16528c2ecf20Sopenharmony_ci			error = __split_and_process_non_flush(&ci);
16538c2ecf20Sopenharmony_ci			if (current->bio_list && ci.sector_count && !error) {
16548c2ecf20Sopenharmony_ci				/*
16558c2ecf20Sopenharmony_ci				 * Remainder must be passed to submit_bio_noacct()
16568c2ecf20Sopenharmony_ci				 * so that it gets handled *after* bios already submitted
16578c2ecf20Sopenharmony_ci				 * have been completely processed.
16588c2ecf20Sopenharmony_ci				 * We take a clone of the original to store in
16598c2ecf20Sopenharmony_ci				 * ci.io->orig_bio to be used by end_io_acct() and
16608c2ecf20Sopenharmony_ci				 * for dec_pending to use for completion handling.
16618c2ecf20Sopenharmony_ci				 */
16628c2ecf20Sopenharmony_ci				struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count,
16638c2ecf20Sopenharmony_ci							  GFP_NOIO, &md->queue->bio_split);
16648c2ecf20Sopenharmony_ci				ci.io->orig_bio = b;
16658c2ecf20Sopenharmony_ci
16668c2ecf20Sopenharmony_ci				/*
16678c2ecf20Sopenharmony_ci				 * Adjust IO stats for each split, otherwise upon queue
16688c2ecf20Sopenharmony_ci				 * reentry there will be redundant IO accounting.
16698c2ecf20Sopenharmony_ci				 * NOTE: this is a stop-gap fix, a proper fix involves
16708c2ecf20Sopenharmony_ci				 * significant refactoring of DM core's bio splitting
16718c2ecf20Sopenharmony_ci				 * (by eliminating DM's splitting and just using bio_split)
16728c2ecf20Sopenharmony_ci				 */
16738c2ecf20Sopenharmony_ci				part_stat_lock();
16748c2ecf20Sopenharmony_ci				__dm_part_stat_sub(&dm_disk(md)->part0,
16758c2ecf20Sopenharmony_ci						   sectors[op_stat_group(bio_op(bio))], ci.sector_count);
16768c2ecf20Sopenharmony_ci				part_stat_unlock();
16778c2ecf20Sopenharmony_ci
16788c2ecf20Sopenharmony_ci				bio_chain(b, bio);
16798c2ecf20Sopenharmony_ci				trace_block_split(md->queue, b, bio->bi_iter.bi_sector);
16808c2ecf20Sopenharmony_ci				ret = submit_bio_noacct(bio);
16818c2ecf20Sopenharmony_ci				break;
16828c2ecf20Sopenharmony_ci			}
16838c2ecf20Sopenharmony_ci		}
16848c2ecf20Sopenharmony_ci	}
16858c2ecf20Sopenharmony_ci
16868c2ecf20Sopenharmony_ci	/* drop the extra reference count */
16878c2ecf20Sopenharmony_ci	dec_pending(ci.io, errno_to_blk_status(error));
16888c2ecf20Sopenharmony_ci	return ret;
16898c2ecf20Sopenharmony_ci}
16908c2ecf20Sopenharmony_ci
16918c2ecf20Sopenharmony_cistatic blk_qc_t dm_submit_bio(struct bio *bio)
16928c2ecf20Sopenharmony_ci{
16938c2ecf20Sopenharmony_ci	struct mapped_device *md = bio->bi_disk->private_data;
16948c2ecf20Sopenharmony_ci	blk_qc_t ret = BLK_QC_T_NONE;
16958c2ecf20Sopenharmony_ci	int srcu_idx;
16968c2ecf20Sopenharmony_ci	struct dm_table *map;
16978c2ecf20Sopenharmony_ci
16988c2ecf20Sopenharmony_ci	map = dm_get_live_table(md, &srcu_idx);
16998c2ecf20Sopenharmony_ci
17008c2ecf20Sopenharmony_ci	/* If suspended, or map not yet available, queue this IO for later */
17018c2ecf20Sopenharmony_ci	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
17028c2ecf20Sopenharmony_ci	    unlikely(!map)) {
17038c2ecf20Sopenharmony_ci		if (bio->bi_opf & REQ_NOWAIT)
17048c2ecf20Sopenharmony_ci			bio_wouldblock_error(bio);
17058c2ecf20Sopenharmony_ci		else if (bio->bi_opf & REQ_RAHEAD)
17068c2ecf20Sopenharmony_ci			bio_io_error(bio);
17078c2ecf20Sopenharmony_ci		else
17088c2ecf20Sopenharmony_ci			queue_io(md, bio);
17098c2ecf20Sopenharmony_ci		goto out;
17108c2ecf20Sopenharmony_ci	}
17118c2ecf20Sopenharmony_ci
17128c2ecf20Sopenharmony_ci	/*
17138c2ecf20Sopenharmony_ci	 * Use blk_queue_split() for abnormal IO (e.g. discard, writesame, etc)
17148c2ecf20Sopenharmony_ci	 * otherwise associated queue_limits won't be imposed.
17158c2ecf20Sopenharmony_ci	 */
17168c2ecf20Sopenharmony_ci	if (is_abnormal_io(bio))
17178c2ecf20Sopenharmony_ci		blk_queue_split(&bio);
17188c2ecf20Sopenharmony_ci
17198c2ecf20Sopenharmony_ci	ret = __split_and_process_bio(md, map, bio);
17208c2ecf20Sopenharmony_ciout:
17218c2ecf20Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
17228c2ecf20Sopenharmony_ci	return ret;
17238c2ecf20Sopenharmony_ci}
17248c2ecf20Sopenharmony_ci
17258c2ecf20Sopenharmony_ci/*-----------------------------------------------------------------
17268c2ecf20Sopenharmony_ci * An IDR is used to keep track of allocated minor numbers.
17278c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/
17288c2ecf20Sopenharmony_cistatic void free_minor(int minor)
17298c2ecf20Sopenharmony_ci{
17308c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
17318c2ecf20Sopenharmony_ci	idr_remove(&_minor_idr, minor);
17328c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
17338c2ecf20Sopenharmony_ci}
17348c2ecf20Sopenharmony_ci
17358c2ecf20Sopenharmony_ci/*
17368c2ecf20Sopenharmony_ci * See if the device with a specific minor # is free.
17378c2ecf20Sopenharmony_ci */
17388c2ecf20Sopenharmony_cistatic int specific_minor(int minor)
17398c2ecf20Sopenharmony_ci{
17408c2ecf20Sopenharmony_ci	int r;
17418c2ecf20Sopenharmony_ci
17428c2ecf20Sopenharmony_ci	if (minor >= (1 << MINORBITS))
17438c2ecf20Sopenharmony_ci		return -EINVAL;
17448c2ecf20Sopenharmony_ci
17458c2ecf20Sopenharmony_ci	idr_preload(GFP_KERNEL);
17468c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
17478c2ecf20Sopenharmony_ci
17488c2ecf20Sopenharmony_ci	r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT);
17498c2ecf20Sopenharmony_ci
17508c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
17518c2ecf20Sopenharmony_ci	idr_preload_end();
17528c2ecf20Sopenharmony_ci	if (r < 0)
17538c2ecf20Sopenharmony_ci		return r == -ENOSPC ? -EBUSY : r;
17548c2ecf20Sopenharmony_ci	return 0;
17558c2ecf20Sopenharmony_ci}
17568c2ecf20Sopenharmony_ci
17578c2ecf20Sopenharmony_cistatic int next_free_minor(int *minor)
17588c2ecf20Sopenharmony_ci{
17598c2ecf20Sopenharmony_ci	int r;
17608c2ecf20Sopenharmony_ci
17618c2ecf20Sopenharmony_ci	idr_preload(GFP_KERNEL);
17628c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
17638c2ecf20Sopenharmony_ci
17648c2ecf20Sopenharmony_ci	r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT);
17658c2ecf20Sopenharmony_ci
17668c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
17678c2ecf20Sopenharmony_ci	idr_preload_end();
17688c2ecf20Sopenharmony_ci	if (r < 0)
17698c2ecf20Sopenharmony_ci		return r;
17708c2ecf20Sopenharmony_ci	*minor = r;
17718c2ecf20Sopenharmony_ci	return 0;
17728c2ecf20Sopenharmony_ci}
17738c2ecf20Sopenharmony_ci
17748c2ecf20Sopenharmony_cistatic const struct block_device_operations dm_blk_dops;
17758c2ecf20Sopenharmony_cistatic const struct block_device_operations dm_rq_blk_dops;
17768c2ecf20Sopenharmony_cistatic const struct dax_operations dm_dax_ops;
17778c2ecf20Sopenharmony_ci
17788c2ecf20Sopenharmony_cistatic void dm_wq_work(struct work_struct *work);
17798c2ecf20Sopenharmony_ci
17808c2ecf20Sopenharmony_cistatic void cleanup_mapped_device(struct mapped_device *md)
17818c2ecf20Sopenharmony_ci{
17828c2ecf20Sopenharmony_ci	if (md->wq)
17838c2ecf20Sopenharmony_ci		destroy_workqueue(md->wq);
17848c2ecf20Sopenharmony_ci	bioset_exit(&md->bs);
17858c2ecf20Sopenharmony_ci	bioset_exit(&md->io_bs);
17868c2ecf20Sopenharmony_ci
17878c2ecf20Sopenharmony_ci	if (md->dax_dev) {
17888c2ecf20Sopenharmony_ci		kill_dax(md->dax_dev);
17898c2ecf20Sopenharmony_ci		put_dax(md->dax_dev);
17908c2ecf20Sopenharmony_ci		md->dax_dev = NULL;
17918c2ecf20Sopenharmony_ci	}
17928c2ecf20Sopenharmony_ci
17938c2ecf20Sopenharmony_ci	if (md->disk) {
17948c2ecf20Sopenharmony_ci		spin_lock(&_minor_lock);
17958c2ecf20Sopenharmony_ci		md->disk->private_data = NULL;
17968c2ecf20Sopenharmony_ci		spin_unlock(&_minor_lock);
17978c2ecf20Sopenharmony_ci		del_gendisk(md->disk);
17988c2ecf20Sopenharmony_ci		put_disk(md->disk);
17998c2ecf20Sopenharmony_ci	}
18008c2ecf20Sopenharmony_ci
18018c2ecf20Sopenharmony_ci	if (md->queue)
18028c2ecf20Sopenharmony_ci		blk_cleanup_queue(md->queue);
18038c2ecf20Sopenharmony_ci
18048c2ecf20Sopenharmony_ci	cleanup_srcu_struct(&md->io_barrier);
18058c2ecf20Sopenharmony_ci
18068c2ecf20Sopenharmony_ci	if (md->bdev) {
18078c2ecf20Sopenharmony_ci		bdput(md->bdev);
18088c2ecf20Sopenharmony_ci		md->bdev = NULL;
18098c2ecf20Sopenharmony_ci	}
18108c2ecf20Sopenharmony_ci
18118c2ecf20Sopenharmony_ci	mutex_destroy(&md->suspend_lock);
18128c2ecf20Sopenharmony_ci	mutex_destroy(&md->type_lock);
18138c2ecf20Sopenharmony_ci	mutex_destroy(&md->table_devices_lock);
18148c2ecf20Sopenharmony_ci	mutex_destroy(&md->swap_bios_lock);
18158c2ecf20Sopenharmony_ci
18168c2ecf20Sopenharmony_ci	dm_mq_cleanup_mapped_device(md);
18178c2ecf20Sopenharmony_ci}
18188c2ecf20Sopenharmony_ci
18198c2ecf20Sopenharmony_ci/*
18208c2ecf20Sopenharmony_ci * Allocate and initialise a blank device with a given minor.
18218c2ecf20Sopenharmony_ci */
18228c2ecf20Sopenharmony_cistatic struct mapped_device *alloc_dev(int minor)
18238c2ecf20Sopenharmony_ci{
18248c2ecf20Sopenharmony_ci	int r, numa_node_id = dm_get_numa_node();
18258c2ecf20Sopenharmony_ci	struct mapped_device *md;
18268c2ecf20Sopenharmony_ci	void *old_md;
18278c2ecf20Sopenharmony_ci
18288c2ecf20Sopenharmony_ci	md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id);
18298c2ecf20Sopenharmony_ci	if (!md) {
18308c2ecf20Sopenharmony_ci		DMWARN("unable to allocate device, out of memory.");
18318c2ecf20Sopenharmony_ci		return NULL;
18328c2ecf20Sopenharmony_ci	}
18338c2ecf20Sopenharmony_ci
18348c2ecf20Sopenharmony_ci	if (!try_module_get(THIS_MODULE))
18358c2ecf20Sopenharmony_ci		goto bad_module_get;
18368c2ecf20Sopenharmony_ci
18378c2ecf20Sopenharmony_ci	/* get a minor number for the dev */
18388c2ecf20Sopenharmony_ci	if (minor == DM_ANY_MINOR)
18398c2ecf20Sopenharmony_ci		r = next_free_minor(&minor);
18408c2ecf20Sopenharmony_ci	else
18418c2ecf20Sopenharmony_ci		r = specific_minor(minor);
18428c2ecf20Sopenharmony_ci	if (r < 0)
18438c2ecf20Sopenharmony_ci		goto bad_minor;
18448c2ecf20Sopenharmony_ci
18458c2ecf20Sopenharmony_ci	r = init_srcu_struct(&md->io_barrier);
18468c2ecf20Sopenharmony_ci	if (r < 0)
18478c2ecf20Sopenharmony_ci		goto bad_io_barrier;
18488c2ecf20Sopenharmony_ci
18498c2ecf20Sopenharmony_ci	md->numa_node_id = numa_node_id;
18508c2ecf20Sopenharmony_ci	md->init_tio_pdu = false;
18518c2ecf20Sopenharmony_ci	md->type = DM_TYPE_NONE;
18528c2ecf20Sopenharmony_ci	mutex_init(&md->suspend_lock);
18538c2ecf20Sopenharmony_ci	mutex_init(&md->type_lock);
18548c2ecf20Sopenharmony_ci	mutex_init(&md->table_devices_lock);
18558c2ecf20Sopenharmony_ci	spin_lock_init(&md->deferred_lock);
18568c2ecf20Sopenharmony_ci	atomic_set(&md->holders, 1);
18578c2ecf20Sopenharmony_ci	atomic_set(&md->open_count, 0);
18588c2ecf20Sopenharmony_ci	atomic_set(&md->event_nr, 0);
18598c2ecf20Sopenharmony_ci	atomic_set(&md->uevent_seq, 0);
18608c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&md->uevent_list);
18618c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&md->table_devices);
18628c2ecf20Sopenharmony_ci	spin_lock_init(&md->uevent_lock);
18638c2ecf20Sopenharmony_ci
18648c2ecf20Sopenharmony_ci	/*
18658c2ecf20Sopenharmony_ci	 * default to bio-based until DM table is loaded and md->type
18668c2ecf20Sopenharmony_ci	 * established. If request-based table is loaded: blk-mq will
18678c2ecf20Sopenharmony_ci	 * override accordingly.
18688c2ecf20Sopenharmony_ci	 */
18698c2ecf20Sopenharmony_ci	md->queue = blk_alloc_queue(numa_node_id);
18708c2ecf20Sopenharmony_ci	if (!md->queue)
18718c2ecf20Sopenharmony_ci		goto bad;
18728c2ecf20Sopenharmony_ci
18738c2ecf20Sopenharmony_ci	md->disk = alloc_disk_node(1, md->numa_node_id);
18748c2ecf20Sopenharmony_ci	if (!md->disk)
18758c2ecf20Sopenharmony_ci		goto bad;
18768c2ecf20Sopenharmony_ci
18778c2ecf20Sopenharmony_ci	init_waitqueue_head(&md->wait);
18788c2ecf20Sopenharmony_ci	INIT_WORK(&md->work, dm_wq_work);
18798c2ecf20Sopenharmony_ci	init_waitqueue_head(&md->eventq);
18808c2ecf20Sopenharmony_ci	init_completion(&md->kobj_holder.completion);
18818c2ecf20Sopenharmony_ci
18828c2ecf20Sopenharmony_ci	md->swap_bios = get_swap_bios();
18838c2ecf20Sopenharmony_ci	sema_init(&md->swap_bios_semaphore, md->swap_bios);
18848c2ecf20Sopenharmony_ci	mutex_init(&md->swap_bios_lock);
18858c2ecf20Sopenharmony_ci
18868c2ecf20Sopenharmony_ci	md->disk->major = _major;
18878c2ecf20Sopenharmony_ci	md->disk->first_minor = minor;
18888c2ecf20Sopenharmony_ci	md->disk->fops = &dm_blk_dops;
18898c2ecf20Sopenharmony_ci	md->disk->queue = md->queue;
18908c2ecf20Sopenharmony_ci	md->disk->private_data = md;
18918c2ecf20Sopenharmony_ci	sprintf(md->disk->disk_name, "dm-%d", minor);
18928c2ecf20Sopenharmony_ci
18938c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
18948c2ecf20Sopenharmony_ci		md->dax_dev = alloc_dax(md, md->disk->disk_name,
18958c2ecf20Sopenharmony_ci					&dm_dax_ops, 0);
18968c2ecf20Sopenharmony_ci		if (IS_ERR(md->dax_dev)) {
18978c2ecf20Sopenharmony_ci			md->dax_dev = NULL;
18988c2ecf20Sopenharmony_ci			goto bad;
18998c2ecf20Sopenharmony_ci		}
19008c2ecf20Sopenharmony_ci	}
19018c2ecf20Sopenharmony_ci
19028c2ecf20Sopenharmony_ci	add_disk_no_queue_reg(md->disk);
19038c2ecf20Sopenharmony_ci	format_dev_t(md->name, MKDEV(_major, minor));
19048c2ecf20Sopenharmony_ci
19058c2ecf20Sopenharmony_ci	md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0);
19068c2ecf20Sopenharmony_ci	if (!md->wq)
19078c2ecf20Sopenharmony_ci		goto bad;
19088c2ecf20Sopenharmony_ci
19098c2ecf20Sopenharmony_ci	md->bdev = bdget_disk(md->disk, 0);
19108c2ecf20Sopenharmony_ci	if (!md->bdev)
19118c2ecf20Sopenharmony_ci		goto bad;
19128c2ecf20Sopenharmony_ci
19138c2ecf20Sopenharmony_ci	r = dm_stats_init(&md->stats);
19148c2ecf20Sopenharmony_ci	if (r < 0)
19158c2ecf20Sopenharmony_ci		goto bad;
19168c2ecf20Sopenharmony_ci
19178c2ecf20Sopenharmony_ci	/* Populate the mapping, nobody knows we exist yet */
19188c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
19198c2ecf20Sopenharmony_ci	old_md = idr_replace(&_minor_idr, md, minor);
19208c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
19218c2ecf20Sopenharmony_ci
19228c2ecf20Sopenharmony_ci	BUG_ON(old_md != MINOR_ALLOCED);
19238c2ecf20Sopenharmony_ci
19248c2ecf20Sopenharmony_ci	return md;
19258c2ecf20Sopenharmony_ci
19268c2ecf20Sopenharmony_cibad:
19278c2ecf20Sopenharmony_ci	cleanup_mapped_device(md);
19288c2ecf20Sopenharmony_cibad_io_barrier:
19298c2ecf20Sopenharmony_ci	free_minor(minor);
19308c2ecf20Sopenharmony_cibad_minor:
19318c2ecf20Sopenharmony_ci	module_put(THIS_MODULE);
19328c2ecf20Sopenharmony_cibad_module_get:
19338c2ecf20Sopenharmony_ci	kvfree(md);
19348c2ecf20Sopenharmony_ci	return NULL;
19358c2ecf20Sopenharmony_ci}
19368c2ecf20Sopenharmony_ci
19378c2ecf20Sopenharmony_cistatic void unlock_fs(struct mapped_device *md);
19388c2ecf20Sopenharmony_ci
19398c2ecf20Sopenharmony_cistatic void free_dev(struct mapped_device *md)
19408c2ecf20Sopenharmony_ci{
19418c2ecf20Sopenharmony_ci	int minor = MINOR(disk_devt(md->disk));
19428c2ecf20Sopenharmony_ci
19438c2ecf20Sopenharmony_ci	unlock_fs(md);
19448c2ecf20Sopenharmony_ci
19458c2ecf20Sopenharmony_ci	cleanup_mapped_device(md);
19468c2ecf20Sopenharmony_ci
19478c2ecf20Sopenharmony_ci	free_table_devices(&md->table_devices);
19488c2ecf20Sopenharmony_ci	dm_stats_cleanup(&md->stats);
19498c2ecf20Sopenharmony_ci	free_minor(minor);
19508c2ecf20Sopenharmony_ci
19518c2ecf20Sopenharmony_ci	module_put(THIS_MODULE);
19528c2ecf20Sopenharmony_ci	kvfree(md);
19538c2ecf20Sopenharmony_ci}
19548c2ecf20Sopenharmony_ci
19558c2ecf20Sopenharmony_cistatic int __bind_mempools(struct mapped_device *md, struct dm_table *t)
19568c2ecf20Sopenharmony_ci{
19578c2ecf20Sopenharmony_ci	struct dm_md_mempools *p = dm_table_get_md_mempools(t);
19588c2ecf20Sopenharmony_ci	int ret = 0;
19598c2ecf20Sopenharmony_ci
19608c2ecf20Sopenharmony_ci	if (dm_table_bio_based(t)) {
19618c2ecf20Sopenharmony_ci		/*
19628c2ecf20Sopenharmony_ci		 * The md may already have mempools that need changing.
19638c2ecf20Sopenharmony_ci		 * If so, reload bioset because front_pad may have changed
19648c2ecf20Sopenharmony_ci		 * because a different table was loaded.
19658c2ecf20Sopenharmony_ci		 */
19668c2ecf20Sopenharmony_ci		bioset_exit(&md->bs);
19678c2ecf20Sopenharmony_ci		bioset_exit(&md->io_bs);
19688c2ecf20Sopenharmony_ci
19698c2ecf20Sopenharmony_ci	} else if (bioset_initialized(&md->bs)) {
19708c2ecf20Sopenharmony_ci		/*
19718c2ecf20Sopenharmony_ci		 * There's no need to reload with request-based dm
19728c2ecf20Sopenharmony_ci		 * because the size of front_pad doesn't change.
19738c2ecf20Sopenharmony_ci		 * Note for future: If you are to reload bioset,
19748c2ecf20Sopenharmony_ci		 * prep-ed requests in the queue may refer
19758c2ecf20Sopenharmony_ci		 * to bio from the old bioset, so you must walk
19768c2ecf20Sopenharmony_ci		 * through the queue to unprep.
19778c2ecf20Sopenharmony_ci		 */
19788c2ecf20Sopenharmony_ci		goto out;
19798c2ecf20Sopenharmony_ci	}
19808c2ecf20Sopenharmony_ci
19818c2ecf20Sopenharmony_ci	BUG_ON(!p ||
19828c2ecf20Sopenharmony_ci	       bioset_initialized(&md->bs) ||
19838c2ecf20Sopenharmony_ci	       bioset_initialized(&md->io_bs));
19848c2ecf20Sopenharmony_ci
19858c2ecf20Sopenharmony_ci	ret = bioset_init_from_src(&md->bs, &p->bs);
19868c2ecf20Sopenharmony_ci	if (ret)
19878c2ecf20Sopenharmony_ci		goto out;
19888c2ecf20Sopenharmony_ci	ret = bioset_init_from_src(&md->io_bs, &p->io_bs);
19898c2ecf20Sopenharmony_ci	if (ret)
19908c2ecf20Sopenharmony_ci		bioset_exit(&md->bs);
19918c2ecf20Sopenharmony_ciout:
19928c2ecf20Sopenharmony_ci	/* mempool bind completed, no longer need any mempools in the table */
19938c2ecf20Sopenharmony_ci	dm_table_free_md_mempools(t);
19948c2ecf20Sopenharmony_ci	return ret;
19958c2ecf20Sopenharmony_ci}
19968c2ecf20Sopenharmony_ci
19978c2ecf20Sopenharmony_ci/*
19988c2ecf20Sopenharmony_ci * Bind a table to the device.
19998c2ecf20Sopenharmony_ci */
20008c2ecf20Sopenharmony_cistatic void event_callback(void *context)
20018c2ecf20Sopenharmony_ci{
20028c2ecf20Sopenharmony_ci	unsigned long flags;
20038c2ecf20Sopenharmony_ci	LIST_HEAD(uevents);
20048c2ecf20Sopenharmony_ci	struct mapped_device *md = (struct mapped_device *) context;
20058c2ecf20Sopenharmony_ci
20068c2ecf20Sopenharmony_ci	spin_lock_irqsave(&md->uevent_lock, flags);
20078c2ecf20Sopenharmony_ci	list_splice_init(&md->uevent_list, &uevents);
20088c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&md->uevent_lock, flags);
20098c2ecf20Sopenharmony_ci
20108c2ecf20Sopenharmony_ci	dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj);
20118c2ecf20Sopenharmony_ci
20128c2ecf20Sopenharmony_ci	atomic_inc(&md->event_nr);
20138c2ecf20Sopenharmony_ci	wake_up(&md->eventq);
20148c2ecf20Sopenharmony_ci	dm_issue_global_event();
20158c2ecf20Sopenharmony_ci}
20168c2ecf20Sopenharmony_ci
20178c2ecf20Sopenharmony_ci/*
20188c2ecf20Sopenharmony_ci * Returns old map, which caller must destroy.
20198c2ecf20Sopenharmony_ci */
20208c2ecf20Sopenharmony_cistatic struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
20218c2ecf20Sopenharmony_ci			       struct queue_limits *limits)
20228c2ecf20Sopenharmony_ci{
20238c2ecf20Sopenharmony_ci	struct dm_table *old_map;
20248c2ecf20Sopenharmony_ci	struct request_queue *q = md->queue;
20258c2ecf20Sopenharmony_ci	bool request_based = dm_table_request_based(t);
20268c2ecf20Sopenharmony_ci	sector_t size;
20278c2ecf20Sopenharmony_ci	int ret;
20288c2ecf20Sopenharmony_ci
20298c2ecf20Sopenharmony_ci	lockdep_assert_held(&md->suspend_lock);
20308c2ecf20Sopenharmony_ci
20318c2ecf20Sopenharmony_ci	size = dm_table_get_size(t);
20328c2ecf20Sopenharmony_ci
20338c2ecf20Sopenharmony_ci	/*
20348c2ecf20Sopenharmony_ci	 * Wipe any geometry if the size of the table changed.
20358c2ecf20Sopenharmony_ci	 */
20368c2ecf20Sopenharmony_ci	if (size != dm_get_size(md))
20378c2ecf20Sopenharmony_ci		memset(&md->geometry, 0, sizeof(md->geometry));
20388c2ecf20Sopenharmony_ci
20398c2ecf20Sopenharmony_ci	set_capacity(md->disk, size);
20408c2ecf20Sopenharmony_ci	bd_set_nr_sectors(md->bdev, size);
20418c2ecf20Sopenharmony_ci
20428c2ecf20Sopenharmony_ci	dm_table_event_callback(t, event_callback, md);
20438c2ecf20Sopenharmony_ci
20448c2ecf20Sopenharmony_ci	if (request_based) {
20458c2ecf20Sopenharmony_ci		/*
20468c2ecf20Sopenharmony_ci		 * Leverage the fact that request-based DM targets are
20478c2ecf20Sopenharmony_ci		 * immutable singletons - used to optimize dm_mq_queue_rq.
20488c2ecf20Sopenharmony_ci		 */
20498c2ecf20Sopenharmony_ci		md->immutable_target = dm_table_get_immutable_target(t);
20508c2ecf20Sopenharmony_ci	}
20518c2ecf20Sopenharmony_ci
20528c2ecf20Sopenharmony_ci	ret = __bind_mempools(md, t);
20538c2ecf20Sopenharmony_ci	if (ret) {
20548c2ecf20Sopenharmony_ci		old_map = ERR_PTR(ret);
20558c2ecf20Sopenharmony_ci		goto out;
20568c2ecf20Sopenharmony_ci	}
20578c2ecf20Sopenharmony_ci
20588c2ecf20Sopenharmony_ci	old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
20598c2ecf20Sopenharmony_ci	rcu_assign_pointer(md->map, (void *)t);
20608c2ecf20Sopenharmony_ci	md->immutable_target_type = dm_table_get_immutable_target_type(t);
20618c2ecf20Sopenharmony_ci
20628c2ecf20Sopenharmony_ci	dm_table_set_restrictions(t, q, limits);
20638c2ecf20Sopenharmony_ci	if (old_map)
20648c2ecf20Sopenharmony_ci		dm_sync_table(md);
20658c2ecf20Sopenharmony_ci
20668c2ecf20Sopenharmony_ciout:
20678c2ecf20Sopenharmony_ci	return old_map;
20688c2ecf20Sopenharmony_ci}
20698c2ecf20Sopenharmony_ci
20708c2ecf20Sopenharmony_ci/*
20718c2ecf20Sopenharmony_ci * Returns unbound table for the caller to free.
20728c2ecf20Sopenharmony_ci */
20738c2ecf20Sopenharmony_cistatic struct dm_table *__unbind(struct mapped_device *md)
20748c2ecf20Sopenharmony_ci{
20758c2ecf20Sopenharmony_ci	struct dm_table *map = rcu_dereference_protected(md->map, 1);
20768c2ecf20Sopenharmony_ci
20778c2ecf20Sopenharmony_ci	if (!map)
20788c2ecf20Sopenharmony_ci		return NULL;
20798c2ecf20Sopenharmony_ci
20808c2ecf20Sopenharmony_ci	dm_table_event_callback(map, NULL, NULL);
20818c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(md->map, NULL);
20828c2ecf20Sopenharmony_ci	dm_sync_table(md);
20838c2ecf20Sopenharmony_ci
20848c2ecf20Sopenharmony_ci	return map;
20858c2ecf20Sopenharmony_ci}
20868c2ecf20Sopenharmony_ci
20878c2ecf20Sopenharmony_ci/*
20888c2ecf20Sopenharmony_ci * Constructor for a new device.
20898c2ecf20Sopenharmony_ci */
20908c2ecf20Sopenharmony_ciint dm_create(int minor, struct mapped_device **result)
20918c2ecf20Sopenharmony_ci{
20928c2ecf20Sopenharmony_ci	int r;
20938c2ecf20Sopenharmony_ci	struct mapped_device *md;
20948c2ecf20Sopenharmony_ci
20958c2ecf20Sopenharmony_ci	md = alloc_dev(minor);
20968c2ecf20Sopenharmony_ci	if (!md)
20978c2ecf20Sopenharmony_ci		return -ENXIO;
20988c2ecf20Sopenharmony_ci
20998c2ecf20Sopenharmony_ci	r = dm_sysfs_init(md);
21008c2ecf20Sopenharmony_ci	if (r) {
21018c2ecf20Sopenharmony_ci		free_dev(md);
21028c2ecf20Sopenharmony_ci		return r;
21038c2ecf20Sopenharmony_ci	}
21048c2ecf20Sopenharmony_ci
21058c2ecf20Sopenharmony_ci	*result = md;
21068c2ecf20Sopenharmony_ci	return 0;
21078c2ecf20Sopenharmony_ci}
21088c2ecf20Sopenharmony_ci
21098c2ecf20Sopenharmony_ci/*
21108c2ecf20Sopenharmony_ci * Functions to manage md->type.
21118c2ecf20Sopenharmony_ci * All are required to hold md->type_lock.
21128c2ecf20Sopenharmony_ci */
21138c2ecf20Sopenharmony_civoid dm_lock_md_type(struct mapped_device *md)
21148c2ecf20Sopenharmony_ci{
21158c2ecf20Sopenharmony_ci	mutex_lock(&md->type_lock);
21168c2ecf20Sopenharmony_ci}
21178c2ecf20Sopenharmony_ci
21188c2ecf20Sopenharmony_civoid dm_unlock_md_type(struct mapped_device *md)
21198c2ecf20Sopenharmony_ci{
21208c2ecf20Sopenharmony_ci	mutex_unlock(&md->type_lock);
21218c2ecf20Sopenharmony_ci}
21228c2ecf20Sopenharmony_ci
21238c2ecf20Sopenharmony_civoid dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type)
21248c2ecf20Sopenharmony_ci{
21258c2ecf20Sopenharmony_ci	BUG_ON(!mutex_is_locked(&md->type_lock));
21268c2ecf20Sopenharmony_ci	md->type = type;
21278c2ecf20Sopenharmony_ci}
21288c2ecf20Sopenharmony_ci
21298c2ecf20Sopenharmony_cienum dm_queue_mode dm_get_md_type(struct mapped_device *md)
21308c2ecf20Sopenharmony_ci{
21318c2ecf20Sopenharmony_ci	return md->type;
21328c2ecf20Sopenharmony_ci}
21338c2ecf20Sopenharmony_ci
21348c2ecf20Sopenharmony_cistruct target_type *dm_get_immutable_target_type(struct mapped_device *md)
21358c2ecf20Sopenharmony_ci{
21368c2ecf20Sopenharmony_ci	return md->immutable_target_type;
21378c2ecf20Sopenharmony_ci}
21388c2ecf20Sopenharmony_ci
21398c2ecf20Sopenharmony_ci/*
21408c2ecf20Sopenharmony_ci * The queue_limits are only valid as long as you have a reference
21418c2ecf20Sopenharmony_ci * count on 'md'.
21428c2ecf20Sopenharmony_ci */
21438c2ecf20Sopenharmony_cistruct queue_limits *dm_get_queue_limits(struct mapped_device *md)
21448c2ecf20Sopenharmony_ci{
21458c2ecf20Sopenharmony_ci	BUG_ON(!atomic_read(&md->holders));
21468c2ecf20Sopenharmony_ci	return &md->queue->limits;
21478c2ecf20Sopenharmony_ci}
21488c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_get_queue_limits);
21498c2ecf20Sopenharmony_ci
21508c2ecf20Sopenharmony_ci/*
21518c2ecf20Sopenharmony_ci * Setup the DM device's queue based on md's type
21528c2ecf20Sopenharmony_ci */
21538c2ecf20Sopenharmony_ciint dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
21548c2ecf20Sopenharmony_ci{
21558c2ecf20Sopenharmony_ci	int r;
21568c2ecf20Sopenharmony_ci	struct queue_limits limits;
21578c2ecf20Sopenharmony_ci	enum dm_queue_mode type = dm_get_md_type(md);
21588c2ecf20Sopenharmony_ci
21598c2ecf20Sopenharmony_ci	switch (type) {
21608c2ecf20Sopenharmony_ci	case DM_TYPE_REQUEST_BASED:
21618c2ecf20Sopenharmony_ci		md->disk->fops = &dm_rq_blk_dops;
21628c2ecf20Sopenharmony_ci		r = dm_mq_init_request_queue(md, t);
21638c2ecf20Sopenharmony_ci		if (r) {
21648c2ecf20Sopenharmony_ci			DMERR("Cannot initialize queue for request-based dm mapped device");
21658c2ecf20Sopenharmony_ci			return r;
21668c2ecf20Sopenharmony_ci		}
21678c2ecf20Sopenharmony_ci		break;
21688c2ecf20Sopenharmony_ci	case DM_TYPE_BIO_BASED:
21698c2ecf20Sopenharmony_ci	case DM_TYPE_DAX_BIO_BASED:
21708c2ecf20Sopenharmony_ci		break;
21718c2ecf20Sopenharmony_ci	case DM_TYPE_NONE:
21728c2ecf20Sopenharmony_ci		WARN_ON_ONCE(true);
21738c2ecf20Sopenharmony_ci		break;
21748c2ecf20Sopenharmony_ci	}
21758c2ecf20Sopenharmony_ci
21768c2ecf20Sopenharmony_ci	r = dm_calculate_queue_limits(t, &limits);
21778c2ecf20Sopenharmony_ci	if (r) {
21788c2ecf20Sopenharmony_ci		DMERR("Cannot calculate initial queue limits");
21798c2ecf20Sopenharmony_ci		return r;
21808c2ecf20Sopenharmony_ci	}
21818c2ecf20Sopenharmony_ci	dm_table_set_restrictions(t, md->queue, &limits);
21828c2ecf20Sopenharmony_ci	blk_register_queue(md->disk);
21838c2ecf20Sopenharmony_ci
21848c2ecf20Sopenharmony_ci	return 0;
21858c2ecf20Sopenharmony_ci}
21868c2ecf20Sopenharmony_ci
21878c2ecf20Sopenharmony_cistruct mapped_device *dm_get_md(dev_t dev)
21888c2ecf20Sopenharmony_ci{
21898c2ecf20Sopenharmony_ci	struct mapped_device *md;
21908c2ecf20Sopenharmony_ci	unsigned minor = MINOR(dev);
21918c2ecf20Sopenharmony_ci
21928c2ecf20Sopenharmony_ci	if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
21938c2ecf20Sopenharmony_ci		return NULL;
21948c2ecf20Sopenharmony_ci
21958c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
21968c2ecf20Sopenharmony_ci
21978c2ecf20Sopenharmony_ci	md = idr_find(&_minor_idr, minor);
21988c2ecf20Sopenharmony_ci	if (!md || md == MINOR_ALLOCED || (MINOR(disk_devt(dm_disk(md))) != minor) ||
21998c2ecf20Sopenharmony_ci	    test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
22008c2ecf20Sopenharmony_ci		md = NULL;
22018c2ecf20Sopenharmony_ci		goto out;
22028c2ecf20Sopenharmony_ci	}
22038c2ecf20Sopenharmony_ci	dm_get(md);
22048c2ecf20Sopenharmony_ciout:
22058c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
22068c2ecf20Sopenharmony_ci
22078c2ecf20Sopenharmony_ci	return md;
22088c2ecf20Sopenharmony_ci}
22098c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_get_md);
22108c2ecf20Sopenharmony_ci
22118c2ecf20Sopenharmony_civoid *dm_get_mdptr(struct mapped_device *md)
22128c2ecf20Sopenharmony_ci{
22138c2ecf20Sopenharmony_ci	return md->interface_ptr;
22148c2ecf20Sopenharmony_ci}
22158c2ecf20Sopenharmony_ci
22168c2ecf20Sopenharmony_civoid dm_set_mdptr(struct mapped_device *md, void *ptr)
22178c2ecf20Sopenharmony_ci{
22188c2ecf20Sopenharmony_ci	md->interface_ptr = ptr;
22198c2ecf20Sopenharmony_ci}
22208c2ecf20Sopenharmony_ci
22218c2ecf20Sopenharmony_civoid dm_get(struct mapped_device *md)
22228c2ecf20Sopenharmony_ci{
22238c2ecf20Sopenharmony_ci	atomic_inc(&md->holders);
22248c2ecf20Sopenharmony_ci	BUG_ON(test_bit(DMF_FREEING, &md->flags));
22258c2ecf20Sopenharmony_ci}
22268c2ecf20Sopenharmony_ci
22278c2ecf20Sopenharmony_ciint dm_hold(struct mapped_device *md)
22288c2ecf20Sopenharmony_ci{
22298c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
22308c2ecf20Sopenharmony_ci	if (test_bit(DMF_FREEING, &md->flags)) {
22318c2ecf20Sopenharmony_ci		spin_unlock(&_minor_lock);
22328c2ecf20Sopenharmony_ci		return -EBUSY;
22338c2ecf20Sopenharmony_ci	}
22348c2ecf20Sopenharmony_ci	dm_get(md);
22358c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
22368c2ecf20Sopenharmony_ci	return 0;
22378c2ecf20Sopenharmony_ci}
22388c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_hold);
22398c2ecf20Sopenharmony_ci
22408c2ecf20Sopenharmony_ciconst char *dm_device_name(struct mapped_device *md)
22418c2ecf20Sopenharmony_ci{
22428c2ecf20Sopenharmony_ci	return md->name;
22438c2ecf20Sopenharmony_ci}
22448c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_device_name);
22458c2ecf20Sopenharmony_ci
22468c2ecf20Sopenharmony_cistatic void __dm_destroy(struct mapped_device *md, bool wait)
22478c2ecf20Sopenharmony_ci{
22488c2ecf20Sopenharmony_ci	struct dm_table *map;
22498c2ecf20Sopenharmony_ci	int srcu_idx;
22508c2ecf20Sopenharmony_ci
22518c2ecf20Sopenharmony_ci	might_sleep();
22528c2ecf20Sopenharmony_ci
22538c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
22548c2ecf20Sopenharmony_ci	idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
22558c2ecf20Sopenharmony_ci	set_bit(DMF_FREEING, &md->flags);
22568c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
22578c2ecf20Sopenharmony_ci
22588c2ecf20Sopenharmony_ci	blk_set_queue_dying(md->queue);
22598c2ecf20Sopenharmony_ci
22608c2ecf20Sopenharmony_ci	/*
22618c2ecf20Sopenharmony_ci	 * Take suspend_lock so that presuspend and postsuspend methods
22628c2ecf20Sopenharmony_ci	 * do not race with internal suspend.
22638c2ecf20Sopenharmony_ci	 */
22648c2ecf20Sopenharmony_ci	mutex_lock(&md->suspend_lock);
22658c2ecf20Sopenharmony_ci	map = dm_get_live_table(md, &srcu_idx);
22668c2ecf20Sopenharmony_ci	if (!dm_suspended_md(md)) {
22678c2ecf20Sopenharmony_ci		dm_table_presuspend_targets(map);
22688c2ecf20Sopenharmony_ci		set_bit(DMF_SUSPENDED, &md->flags);
22698c2ecf20Sopenharmony_ci		set_bit(DMF_POST_SUSPENDING, &md->flags);
22708c2ecf20Sopenharmony_ci		dm_table_postsuspend_targets(map);
22718c2ecf20Sopenharmony_ci	}
22728c2ecf20Sopenharmony_ci	/* dm_put_live_table must be before msleep, otherwise deadlock is possible */
22738c2ecf20Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
22748c2ecf20Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
22758c2ecf20Sopenharmony_ci
22768c2ecf20Sopenharmony_ci	/*
22778c2ecf20Sopenharmony_ci	 * Rare, but there may be I/O requests still going to complete,
22788c2ecf20Sopenharmony_ci	 * for example.  Wait for all references to disappear.
22798c2ecf20Sopenharmony_ci	 * No one should increment the reference count of the mapped_device,
22808c2ecf20Sopenharmony_ci	 * after the mapped_device state becomes DMF_FREEING.
22818c2ecf20Sopenharmony_ci	 */
22828c2ecf20Sopenharmony_ci	if (wait)
22838c2ecf20Sopenharmony_ci		while (atomic_read(&md->holders))
22848c2ecf20Sopenharmony_ci			msleep(1);
22858c2ecf20Sopenharmony_ci	else if (atomic_read(&md->holders))
22868c2ecf20Sopenharmony_ci		DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
22878c2ecf20Sopenharmony_ci		       dm_device_name(md), atomic_read(&md->holders));
22888c2ecf20Sopenharmony_ci
22898c2ecf20Sopenharmony_ci	dm_sysfs_exit(md);
22908c2ecf20Sopenharmony_ci	dm_table_destroy(__unbind(md));
22918c2ecf20Sopenharmony_ci	free_dev(md);
22928c2ecf20Sopenharmony_ci}
22938c2ecf20Sopenharmony_ci
22948c2ecf20Sopenharmony_civoid dm_destroy(struct mapped_device *md)
22958c2ecf20Sopenharmony_ci{
22968c2ecf20Sopenharmony_ci	__dm_destroy(md, true);
22978c2ecf20Sopenharmony_ci}
22988c2ecf20Sopenharmony_ci
22998c2ecf20Sopenharmony_civoid dm_destroy_immediate(struct mapped_device *md)
23008c2ecf20Sopenharmony_ci{
23018c2ecf20Sopenharmony_ci	__dm_destroy(md, false);
23028c2ecf20Sopenharmony_ci}
23038c2ecf20Sopenharmony_ci
23048c2ecf20Sopenharmony_civoid dm_put(struct mapped_device *md)
23058c2ecf20Sopenharmony_ci{
23068c2ecf20Sopenharmony_ci	atomic_dec(&md->holders);
23078c2ecf20Sopenharmony_ci}
23088c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_put);
23098c2ecf20Sopenharmony_ci
23108c2ecf20Sopenharmony_cistatic bool md_in_flight_bios(struct mapped_device *md)
23118c2ecf20Sopenharmony_ci{
23128c2ecf20Sopenharmony_ci	int cpu;
23138c2ecf20Sopenharmony_ci	struct hd_struct *part = &dm_disk(md)->part0;
23148c2ecf20Sopenharmony_ci	long sum = 0;
23158c2ecf20Sopenharmony_ci
23168c2ecf20Sopenharmony_ci	for_each_possible_cpu(cpu) {
23178c2ecf20Sopenharmony_ci		sum += part_stat_local_read_cpu(part, in_flight[0], cpu);
23188c2ecf20Sopenharmony_ci		sum += part_stat_local_read_cpu(part, in_flight[1], cpu);
23198c2ecf20Sopenharmony_ci	}
23208c2ecf20Sopenharmony_ci
23218c2ecf20Sopenharmony_ci	return sum != 0;
23228c2ecf20Sopenharmony_ci}
23238c2ecf20Sopenharmony_ci
23248c2ecf20Sopenharmony_cistatic int dm_wait_for_bios_completion(struct mapped_device *md, long task_state)
23258c2ecf20Sopenharmony_ci{
23268c2ecf20Sopenharmony_ci	int r = 0;
23278c2ecf20Sopenharmony_ci	DEFINE_WAIT(wait);
23288c2ecf20Sopenharmony_ci
23298c2ecf20Sopenharmony_ci	while (true) {
23308c2ecf20Sopenharmony_ci		prepare_to_wait(&md->wait, &wait, task_state);
23318c2ecf20Sopenharmony_ci
23328c2ecf20Sopenharmony_ci		if (!md_in_flight_bios(md))
23338c2ecf20Sopenharmony_ci			break;
23348c2ecf20Sopenharmony_ci
23358c2ecf20Sopenharmony_ci		if (signal_pending_state(task_state, current)) {
23368c2ecf20Sopenharmony_ci			r = -EINTR;
23378c2ecf20Sopenharmony_ci			break;
23388c2ecf20Sopenharmony_ci		}
23398c2ecf20Sopenharmony_ci
23408c2ecf20Sopenharmony_ci		io_schedule();
23418c2ecf20Sopenharmony_ci	}
23428c2ecf20Sopenharmony_ci	finish_wait(&md->wait, &wait);
23438c2ecf20Sopenharmony_ci
23448c2ecf20Sopenharmony_ci	smp_rmb();
23458c2ecf20Sopenharmony_ci
23468c2ecf20Sopenharmony_ci	return r;
23478c2ecf20Sopenharmony_ci}
23488c2ecf20Sopenharmony_ci
23498c2ecf20Sopenharmony_cistatic int dm_wait_for_completion(struct mapped_device *md, long task_state)
23508c2ecf20Sopenharmony_ci{
23518c2ecf20Sopenharmony_ci	int r = 0;
23528c2ecf20Sopenharmony_ci
23538c2ecf20Sopenharmony_ci	if (!queue_is_mq(md->queue))
23548c2ecf20Sopenharmony_ci		return dm_wait_for_bios_completion(md, task_state);
23558c2ecf20Sopenharmony_ci
23568c2ecf20Sopenharmony_ci	while (true) {
23578c2ecf20Sopenharmony_ci		if (!blk_mq_queue_inflight(md->queue))
23588c2ecf20Sopenharmony_ci			break;
23598c2ecf20Sopenharmony_ci
23608c2ecf20Sopenharmony_ci		if (signal_pending_state(task_state, current)) {
23618c2ecf20Sopenharmony_ci			r = -EINTR;
23628c2ecf20Sopenharmony_ci			break;
23638c2ecf20Sopenharmony_ci		}
23648c2ecf20Sopenharmony_ci
23658c2ecf20Sopenharmony_ci		msleep(5);
23668c2ecf20Sopenharmony_ci	}
23678c2ecf20Sopenharmony_ci
23688c2ecf20Sopenharmony_ci	return r;
23698c2ecf20Sopenharmony_ci}
23708c2ecf20Sopenharmony_ci
23718c2ecf20Sopenharmony_ci/*
23728c2ecf20Sopenharmony_ci * Process the deferred bios
23738c2ecf20Sopenharmony_ci */
23748c2ecf20Sopenharmony_cistatic void dm_wq_work(struct work_struct *work)
23758c2ecf20Sopenharmony_ci{
23768c2ecf20Sopenharmony_ci	struct mapped_device *md = container_of(work, struct mapped_device, work);
23778c2ecf20Sopenharmony_ci	struct bio *bio;
23788c2ecf20Sopenharmony_ci
23798c2ecf20Sopenharmony_ci	while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
23808c2ecf20Sopenharmony_ci		spin_lock_irq(&md->deferred_lock);
23818c2ecf20Sopenharmony_ci		bio = bio_list_pop(&md->deferred);
23828c2ecf20Sopenharmony_ci		spin_unlock_irq(&md->deferred_lock);
23838c2ecf20Sopenharmony_ci
23848c2ecf20Sopenharmony_ci		if (!bio)
23858c2ecf20Sopenharmony_ci			break;
23868c2ecf20Sopenharmony_ci
23878c2ecf20Sopenharmony_ci		submit_bio_noacct(bio);
23888c2ecf20Sopenharmony_ci		cond_resched();
23898c2ecf20Sopenharmony_ci	}
23908c2ecf20Sopenharmony_ci}
23918c2ecf20Sopenharmony_ci
23928c2ecf20Sopenharmony_cistatic void dm_queue_flush(struct mapped_device *md)
23938c2ecf20Sopenharmony_ci{
23948c2ecf20Sopenharmony_ci	clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
23958c2ecf20Sopenharmony_ci	smp_mb__after_atomic();
23968c2ecf20Sopenharmony_ci	queue_work(md->wq, &md->work);
23978c2ecf20Sopenharmony_ci}
23988c2ecf20Sopenharmony_ci
23998c2ecf20Sopenharmony_ci/*
24008c2ecf20Sopenharmony_ci * Swap in a new table, returning the old one for the caller to destroy.
24018c2ecf20Sopenharmony_ci */
24028c2ecf20Sopenharmony_cistruct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table)
24038c2ecf20Sopenharmony_ci{
24048c2ecf20Sopenharmony_ci	struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL);
24058c2ecf20Sopenharmony_ci	struct queue_limits limits;
24068c2ecf20Sopenharmony_ci	int r;
24078c2ecf20Sopenharmony_ci
24088c2ecf20Sopenharmony_ci	mutex_lock(&md->suspend_lock);
24098c2ecf20Sopenharmony_ci
24108c2ecf20Sopenharmony_ci	/* device must be suspended */
24118c2ecf20Sopenharmony_ci	if (!dm_suspended_md(md))
24128c2ecf20Sopenharmony_ci		goto out;
24138c2ecf20Sopenharmony_ci
24148c2ecf20Sopenharmony_ci	/*
24158c2ecf20Sopenharmony_ci	 * If the new table has no data devices, retain the existing limits.
24168c2ecf20Sopenharmony_ci	 * This helps multipath with queue_if_no_path if all paths disappear,
24178c2ecf20Sopenharmony_ci	 * then new I/O is queued based on these limits, and then some paths
24188c2ecf20Sopenharmony_ci	 * reappear.
24198c2ecf20Sopenharmony_ci	 */
24208c2ecf20Sopenharmony_ci	if (dm_table_has_no_data_devices(table)) {
24218c2ecf20Sopenharmony_ci		live_map = dm_get_live_table_fast(md);
24228c2ecf20Sopenharmony_ci		if (live_map)
24238c2ecf20Sopenharmony_ci			limits = md->queue->limits;
24248c2ecf20Sopenharmony_ci		dm_put_live_table_fast(md);
24258c2ecf20Sopenharmony_ci	}
24268c2ecf20Sopenharmony_ci
24278c2ecf20Sopenharmony_ci	if (!live_map) {
24288c2ecf20Sopenharmony_ci		r = dm_calculate_queue_limits(table, &limits);
24298c2ecf20Sopenharmony_ci		if (r) {
24308c2ecf20Sopenharmony_ci			map = ERR_PTR(r);
24318c2ecf20Sopenharmony_ci			goto out;
24328c2ecf20Sopenharmony_ci		}
24338c2ecf20Sopenharmony_ci	}
24348c2ecf20Sopenharmony_ci
24358c2ecf20Sopenharmony_ci	map = __bind(md, table, &limits);
24368c2ecf20Sopenharmony_ci	dm_issue_global_event();
24378c2ecf20Sopenharmony_ci
24388c2ecf20Sopenharmony_ciout:
24398c2ecf20Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
24408c2ecf20Sopenharmony_ci	return map;
24418c2ecf20Sopenharmony_ci}
24428c2ecf20Sopenharmony_ci
24438c2ecf20Sopenharmony_ci/*
24448c2ecf20Sopenharmony_ci * Functions to lock and unlock any filesystem running on the
24458c2ecf20Sopenharmony_ci * device.
24468c2ecf20Sopenharmony_ci */
24478c2ecf20Sopenharmony_cistatic int lock_fs(struct mapped_device *md)
24488c2ecf20Sopenharmony_ci{
24498c2ecf20Sopenharmony_ci	int r;
24508c2ecf20Sopenharmony_ci
24518c2ecf20Sopenharmony_ci	WARN_ON(md->frozen_sb);
24528c2ecf20Sopenharmony_ci
24538c2ecf20Sopenharmony_ci	md->frozen_sb = freeze_bdev(md->bdev);
24548c2ecf20Sopenharmony_ci	if (IS_ERR(md->frozen_sb)) {
24558c2ecf20Sopenharmony_ci		r = PTR_ERR(md->frozen_sb);
24568c2ecf20Sopenharmony_ci		md->frozen_sb = NULL;
24578c2ecf20Sopenharmony_ci		return r;
24588c2ecf20Sopenharmony_ci	}
24598c2ecf20Sopenharmony_ci
24608c2ecf20Sopenharmony_ci	set_bit(DMF_FROZEN, &md->flags);
24618c2ecf20Sopenharmony_ci
24628c2ecf20Sopenharmony_ci	return 0;
24638c2ecf20Sopenharmony_ci}
24648c2ecf20Sopenharmony_ci
24658c2ecf20Sopenharmony_cistatic void unlock_fs(struct mapped_device *md)
24668c2ecf20Sopenharmony_ci{
24678c2ecf20Sopenharmony_ci	if (!test_bit(DMF_FROZEN, &md->flags))
24688c2ecf20Sopenharmony_ci		return;
24698c2ecf20Sopenharmony_ci
24708c2ecf20Sopenharmony_ci	thaw_bdev(md->bdev, md->frozen_sb);
24718c2ecf20Sopenharmony_ci	md->frozen_sb = NULL;
24728c2ecf20Sopenharmony_ci	clear_bit(DMF_FROZEN, &md->flags);
24738c2ecf20Sopenharmony_ci}
24748c2ecf20Sopenharmony_ci
24758c2ecf20Sopenharmony_ci/*
24768c2ecf20Sopenharmony_ci * @suspend_flags: DM_SUSPEND_LOCKFS_FLAG and/or DM_SUSPEND_NOFLUSH_FLAG
24778c2ecf20Sopenharmony_ci * @task_state: e.g. TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE
24788c2ecf20Sopenharmony_ci * @dmf_suspended_flag: DMF_SUSPENDED or DMF_SUSPENDED_INTERNALLY
24798c2ecf20Sopenharmony_ci *
24808c2ecf20Sopenharmony_ci * If __dm_suspend returns 0, the device is completely quiescent
24818c2ecf20Sopenharmony_ci * now. There is no request-processing activity. All new requests
24828c2ecf20Sopenharmony_ci * are being added to md->deferred list.
24838c2ecf20Sopenharmony_ci */
24848c2ecf20Sopenharmony_cistatic int __dm_suspend(struct mapped_device *md, struct dm_table *map,
24858c2ecf20Sopenharmony_ci			unsigned suspend_flags, long task_state,
24868c2ecf20Sopenharmony_ci			int dmf_suspended_flag)
24878c2ecf20Sopenharmony_ci{
24888c2ecf20Sopenharmony_ci	bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG;
24898c2ecf20Sopenharmony_ci	bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG;
24908c2ecf20Sopenharmony_ci	int r;
24918c2ecf20Sopenharmony_ci
24928c2ecf20Sopenharmony_ci	lockdep_assert_held(&md->suspend_lock);
24938c2ecf20Sopenharmony_ci
24948c2ecf20Sopenharmony_ci	/*
24958c2ecf20Sopenharmony_ci	 * DMF_NOFLUSH_SUSPENDING must be set before presuspend.
24968c2ecf20Sopenharmony_ci	 * This flag is cleared before dm_suspend returns.
24978c2ecf20Sopenharmony_ci	 */
24988c2ecf20Sopenharmony_ci	if (noflush)
24998c2ecf20Sopenharmony_ci		set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
25008c2ecf20Sopenharmony_ci	else
25018c2ecf20Sopenharmony_ci		DMDEBUG("%s: suspending with flush", dm_device_name(md));
25028c2ecf20Sopenharmony_ci
25038c2ecf20Sopenharmony_ci	/*
25048c2ecf20Sopenharmony_ci	 * This gets reverted if there's an error later and the targets
25058c2ecf20Sopenharmony_ci	 * provide the .presuspend_undo hook.
25068c2ecf20Sopenharmony_ci	 */
25078c2ecf20Sopenharmony_ci	dm_table_presuspend_targets(map);
25088c2ecf20Sopenharmony_ci
25098c2ecf20Sopenharmony_ci	/*
25108c2ecf20Sopenharmony_ci	 * Flush I/O to the device.
25118c2ecf20Sopenharmony_ci	 * Any I/O submitted after lock_fs() may not be flushed.
25128c2ecf20Sopenharmony_ci	 * noflush takes precedence over do_lockfs.
25138c2ecf20Sopenharmony_ci	 * (lock_fs() flushes I/Os and waits for them to complete.)
25148c2ecf20Sopenharmony_ci	 */
25158c2ecf20Sopenharmony_ci	if (!noflush && do_lockfs) {
25168c2ecf20Sopenharmony_ci		r = lock_fs(md);
25178c2ecf20Sopenharmony_ci		if (r) {
25188c2ecf20Sopenharmony_ci			dm_table_presuspend_undo_targets(map);
25198c2ecf20Sopenharmony_ci			return r;
25208c2ecf20Sopenharmony_ci		}
25218c2ecf20Sopenharmony_ci	}
25228c2ecf20Sopenharmony_ci
25238c2ecf20Sopenharmony_ci	/*
25248c2ecf20Sopenharmony_ci	 * Here we must make sure that no processes are submitting requests
25258c2ecf20Sopenharmony_ci	 * to target drivers i.e. no one may be executing
25268c2ecf20Sopenharmony_ci	 * __split_and_process_bio from dm_submit_bio.
25278c2ecf20Sopenharmony_ci	 *
25288c2ecf20Sopenharmony_ci	 * To get all processes out of __split_and_process_bio in dm_submit_bio,
25298c2ecf20Sopenharmony_ci	 * we take the write lock. To prevent any process from reentering
25308c2ecf20Sopenharmony_ci	 * __split_and_process_bio from dm_submit_bio and quiesce the thread
25318c2ecf20Sopenharmony_ci	 * (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND and call
25328c2ecf20Sopenharmony_ci	 * flush_workqueue(md->wq).
25338c2ecf20Sopenharmony_ci	 */
25348c2ecf20Sopenharmony_ci	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
25358c2ecf20Sopenharmony_ci	if (map)
25368c2ecf20Sopenharmony_ci		synchronize_srcu(&md->io_barrier);
25378c2ecf20Sopenharmony_ci
25388c2ecf20Sopenharmony_ci	/*
25398c2ecf20Sopenharmony_ci	 * Stop md->queue before flushing md->wq in case request-based
25408c2ecf20Sopenharmony_ci	 * dm defers requests to md->wq from md->queue.
25418c2ecf20Sopenharmony_ci	 */
25428c2ecf20Sopenharmony_ci	if (dm_request_based(md))
25438c2ecf20Sopenharmony_ci		dm_stop_queue(md->queue);
25448c2ecf20Sopenharmony_ci
25458c2ecf20Sopenharmony_ci	flush_workqueue(md->wq);
25468c2ecf20Sopenharmony_ci
25478c2ecf20Sopenharmony_ci	/*
25488c2ecf20Sopenharmony_ci	 * At this point no more requests are entering target request routines.
25498c2ecf20Sopenharmony_ci	 * We call dm_wait_for_completion to wait for all existing requests
25508c2ecf20Sopenharmony_ci	 * to finish.
25518c2ecf20Sopenharmony_ci	 */
25528c2ecf20Sopenharmony_ci	r = dm_wait_for_completion(md, task_state);
25538c2ecf20Sopenharmony_ci	if (!r)
25548c2ecf20Sopenharmony_ci		set_bit(dmf_suspended_flag, &md->flags);
25558c2ecf20Sopenharmony_ci
25568c2ecf20Sopenharmony_ci	if (noflush)
25578c2ecf20Sopenharmony_ci		clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
25588c2ecf20Sopenharmony_ci	if (map)
25598c2ecf20Sopenharmony_ci		synchronize_srcu(&md->io_barrier);
25608c2ecf20Sopenharmony_ci
25618c2ecf20Sopenharmony_ci	/* were we interrupted ? */
25628c2ecf20Sopenharmony_ci	if (r < 0) {
25638c2ecf20Sopenharmony_ci		dm_queue_flush(md);
25648c2ecf20Sopenharmony_ci
25658c2ecf20Sopenharmony_ci		if (dm_request_based(md))
25668c2ecf20Sopenharmony_ci			dm_start_queue(md->queue);
25678c2ecf20Sopenharmony_ci
25688c2ecf20Sopenharmony_ci		unlock_fs(md);
25698c2ecf20Sopenharmony_ci		dm_table_presuspend_undo_targets(map);
25708c2ecf20Sopenharmony_ci		/* pushback list is already flushed, so skip flush */
25718c2ecf20Sopenharmony_ci	}
25728c2ecf20Sopenharmony_ci
25738c2ecf20Sopenharmony_ci	return r;
25748c2ecf20Sopenharmony_ci}
25758c2ecf20Sopenharmony_ci
25768c2ecf20Sopenharmony_ci/*
25778c2ecf20Sopenharmony_ci * We need to be able to change a mapping table under a mounted
25788c2ecf20Sopenharmony_ci * filesystem.  For example we might want to move some data in
25798c2ecf20Sopenharmony_ci * the background.  Before the table can be swapped with
25808c2ecf20Sopenharmony_ci * dm_bind_table, dm_suspend must be called to flush any in
25818c2ecf20Sopenharmony_ci * flight bios and ensure that any further io gets deferred.
25828c2ecf20Sopenharmony_ci */
25838c2ecf20Sopenharmony_ci/*
25848c2ecf20Sopenharmony_ci * Suspend mechanism in request-based dm.
25858c2ecf20Sopenharmony_ci *
25868c2ecf20Sopenharmony_ci * 1. Flush all I/Os by lock_fs() if needed.
25878c2ecf20Sopenharmony_ci * 2. Stop dispatching any I/O by stopping the request_queue.
25888c2ecf20Sopenharmony_ci * 3. Wait for all in-flight I/Os to be completed or requeued.
25898c2ecf20Sopenharmony_ci *
25908c2ecf20Sopenharmony_ci * To abort suspend, start the request_queue.
25918c2ecf20Sopenharmony_ci */
25928c2ecf20Sopenharmony_ciint dm_suspend(struct mapped_device *md, unsigned suspend_flags)
25938c2ecf20Sopenharmony_ci{
25948c2ecf20Sopenharmony_ci	struct dm_table *map = NULL;
25958c2ecf20Sopenharmony_ci	int r = 0;
25968c2ecf20Sopenharmony_ci
25978c2ecf20Sopenharmony_ciretry:
25988c2ecf20Sopenharmony_ci	mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
25998c2ecf20Sopenharmony_ci
26008c2ecf20Sopenharmony_ci	if (dm_suspended_md(md)) {
26018c2ecf20Sopenharmony_ci		r = -EINVAL;
26028c2ecf20Sopenharmony_ci		goto out_unlock;
26038c2ecf20Sopenharmony_ci	}
26048c2ecf20Sopenharmony_ci
26058c2ecf20Sopenharmony_ci	if (dm_suspended_internally_md(md)) {
26068c2ecf20Sopenharmony_ci		/* already internally suspended, wait for internal resume */
26078c2ecf20Sopenharmony_ci		mutex_unlock(&md->suspend_lock);
26088c2ecf20Sopenharmony_ci		r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
26098c2ecf20Sopenharmony_ci		if (r)
26108c2ecf20Sopenharmony_ci			return r;
26118c2ecf20Sopenharmony_ci		goto retry;
26128c2ecf20Sopenharmony_ci	}
26138c2ecf20Sopenharmony_ci
26148c2ecf20Sopenharmony_ci	map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
26158c2ecf20Sopenharmony_ci
26168c2ecf20Sopenharmony_ci	r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED);
26178c2ecf20Sopenharmony_ci	if (r)
26188c2ecf20Sopenharmony_ci		goto out_unlock;
26198c2ecf20Sopenharmony_ci
26208c2ecf20Sopenharmony_ci	set_bit(DMF_POST_SUSPENDING, &md->flags);
26218c2ecf20Sopenharmony_ci	dm_table_postsuspend_targets(map);
26228c2ecf20Sopenharmony_ci	clear_bit(DMF_POST_SUSPENDING, &md->flags);
26238c2ecf20Sopenharmony_ci
26248c2ecf20Sopenharmony_ciout_unlock:
26258c2ecf20Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
26268c2ecf20Sopenharmony_ci	return r;
26278c2ecf20Sopenharmony_ci}
26288c2ecf20Sopenharmony_ci
26298c2ecf20Sopenharmony_cistatic int __dm_resume(struct mapped_device *md, struct dm_table *map)
26308c2ecf20Sopenharmony_ci{
26318c2ecf20Sopenharmony_ci	if (map) {
26328c2ecf20Sopenharmony_ci		int r = dm_table_resume_targets(map);
26338c2ecf20Sopenharmony_ci		if (r)
26348c2ecf20Sopenharmony_ci			return r;
26358c2ecf20Sopenharmony_ci	}
26368c2ecf20Sopenharmony_ci
26378c2ecf20Sopenharmony_ci	dm_queue_flush(md);
26388c2ecf20Sopenharmony_ci
26398c2ecf20Sopenharmony_ci	/*
26408c2ecf20Sopenharmony_ci	 * Flushing deferred I/Os must be done after targets are resumed
26418c2ecf20Sopenharmony_ci	 * so that mapping of targets can work correctly.
26428c2ecf20Sopenharmony_ci	 * Request-based dm is queueing the deferred I/Os in its request_queue.
26438c2ecf20Sopenharmony_ci	 */
26448c2ecf20Sopenharmony_ci	if (dm_request_based(md))
26458c2ecf20Sopenharmony_ci		dm_start_queue(md->queue);
26468c2ecf20Sopenharmony_ci
26478c2ecf20Sopenharmony_ci	unlock_fs(md);
26488c2ecf20Sopenharmony_ci
26498c2ecf20Sopenharmony_ci	return 0;
26508c2ecf20Sopenharmony_ci}
26518c2ecf20Sopenharmony_ci
26528c2ecf20Sopenharmony_ciint dm_resume(struct mapped_device *md)
26538c2ecf20Sopenharmony_ci{
26548c2ecf20Sopenharmony_ci	int r;
26558c2ecf20Sopenharmony_ci	struct dm_table *map = NULL;
26568c2ecf20Sopenharmony_ci
26578c2ecf20Sopenharmony_ciretry:
26588c2ecf20Sopenharmony_ci	r = -EINVAL;
26598c2ecf20Sopenharmony_ci	mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING);
26608c2ecf20Sopenharmony_ci
26618c2ecf20Sopenharmony_ci	if (!dm_suspended_md(md))
26628c2ecf20Sopenharmony_ci		goto out;
26638c2ecf20Sopenharmony_ci
26648c2ecf20Sopenharmony_ci	if (dm_suspended_internally_md(md)) {
26658c2ecf20Sopenharmony_ci		/* already internally suspended, wait for internal resume */
26668c2ecf20Sopenharmony_ci		mutex_unlock(&md->suspend_lock);
26678c2ecf20Sopenharmony_ci		r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE);
26688c2ecf20Sopenharmony_ci		if (r)
26698c2ecf20Sopenharmony_ci			return r;
26708c2ecf20Sopenharmony_ci		goto retry;
26718c2ecf20Sopenharmony_ci	}
26728c2ecf20Sopenharmony_ci
26738c2ecf20Sopenharmony_ci	map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
26748c2ecf20Sopenharmony_ci	if (!map || !dm_table_get_size(map))
26758c2ecf20Sopenharmony_ci		goto out;
26768c2ecf20Sopenharmony_ci
26778c2ecf20Sopenharmony_ci	r = __dm_resume(md, map);
26788c2ecf20Sopenharmony_ci	if (r)
26798c2ecf20Sopenharmony_ci		goto out;
26808c2ecf20Sopenharmony_ci
26818c2ecf20Sopenharmony_ci	clear_bit(DMF_SUSPENDED, &md->flags);
26828c2ecf20Sopenharmony_ciout:
26838c2ecf20Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
26848c2ecf20Sopenharmony_ci
26858c2ecf20Sopenharmony_ci	return r;
26868c2ecf20Sopenharmony_ci}
26878c2ecf20Sopenharmony_ci
26888c2ecf20Sopenharmony_ci/*
26898c2ecf20Sopenharmony_ci * Internal suspend/resume works like userspace-driven suspend. It waits
26908c2ecf20Sopenharmony_ci * until all bios finish and prevents issuing new bios to the target drivers.
26918c2ecf20Sopenharmony_ci * It may be used only from the kernel.
26928c2ecf20Sopenharmony_ci */
26938c2ecf20Sopenharmony_ci
26948c2ecf20Sopenharmony_cistatic void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags)
26958c2ecf20Sopenharmony_ci{
26968c2ecf20Sopenharmony_ci	struct dm_table *map = NULL;
26978c2ecf20Sopenharmony_ci
26988c2ecf20Sopenharmony_ci	lockdep_assert_held(&md->suspend_lock);
26998c2ecf20Sopenharmony_ci
27008c2ecf20Sopenharmony_ci	if (md->internal_suspend_count++)
27018c2ecf20Sopenharmony_ci		return; /* nested internal suspend */
27028c2ecf20Sopenharmony_ci
27038c2ecf20Sopenharmony_ci	if (dm_suspended_md(md)) {
27048c2ecf20Sopenharmony_ci		set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
27058c2ecf20Sopenharmony_ci		return; /* nest suspend */
27068c2ecf20Sopenharmony_ci	}
27078c2ecf20Sopenharmony_ci
27088c2ecf20Sopenharmony_ci	map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
27098c2ecf20Sopenharmony_ci
27108c2ecf20Sopenharmony_ci	/*
27118c2ecf20Sopenharmony_ci	 * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is
27128c2ecf20Sopenharmony_ci	 * supported.  Properly supporting a TASK_INTERRUPTIBLE internal suspend
27138c2ecf20Sopenharmony_ci	 * would require changing .presuspend to return an error -- avoid this
27148c2ecf20Sopenharmony_ci	 * until there is a need for more elaborate variants of internal suspend.
27158c2ecf20Sopenharmony_ci	 */
27168c2ecf20Sopenharmony_ci	(void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE,
27178c2ecf20Sopenharmony_ci			    DMF_SUSPENDED_INTERNALLY);
27188c2ecf20Sopenharmony_ci
27198c2ecf20Sopenharmony_ci	set_bit(DMF_POST_SUSPENDING, &md->flags);
27208c2ecf20Sopenharmony_ci	dm_table_postsuspend_targets(map);
27218c2ecf20Sopenharmony_ci	clear_bit(DMF_POST_SUSPENDING, &md->flags);
27228c2ecf20Sopenharmony_ci}
27238c2ecf20Sopenharmony_ci
27248c2ecf20Sopenharmony_cistatic void __dm_internal_resume(struct mapped_device *md)
27258c2ecf20Sopenharmony_ci{
27268c2ecf20Sopenharmony_ci	BUG_ON(!md->internal_suspend_count);
27278c2ecf20Sopenharmony_ci
27288c2ecf20Sopenharmony_ci	if (--md->internal_suspend_count)
27298c2ecf20Sopenharmony_ci		return; /* resume from nested internal suspend */
27308c2ecf20Sopenharmony_ci
27318c2ecf20Sopenharmony_ci	if (dm_suspended_md(md))
27328c2ecf20Sopenharmony_ci		goto done; /* resume from nested suspend */
27338c2ecf20Sopenharmony_ci
27348c2ecf20Sopenharmony_ci	/*
27358c2ecf20Sopenharmony_ci	 * NOTE: existing callers don't need to call dm_table_resume_targets
27368c2ecf20Sopenharmony_ci	 * (which may fail -- so best to avoid it for now by passing NULL map)
27378c2ecf20Sopenharmony_ci	 */
27388c2ecf20Sopenharmony_ci	(void) __dm_resume(md, NULL);
27398c2ecf20Sopenharmony_ci
27408c2ecf20Sopenharmony_cidone:
27418c2ecf20Sopenharmony_ci	clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
27428c2ecf20Sopenharmony_ci	smp_mb__after_atomic();
27438c2ecf20Sopenharmony_ci	wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY);
27448c2ecf20Sopenharmony_ci}
27458c2ecf20Sopenharmony_ci
27468c2ecf20Sopenharmony_civoid dm_internal_suspend_noflush(struct mapped_device *md)
27478c2ecf20Sopenharmony_ci{
27488c2ecf20Sopenharmony_ci	mutex_lock(&md->suspend_lock);
27498c2ecf20Sopenharmony_ci	__dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG);
27508c2ecf20Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
27518c2ecf20Sopenharmony_ci}
27528c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_suspend_noflush);
27538c2ecf20Sopenharmony_ci
27548c2ecf20Sopenharmony_civoid dm_internal_resume(struct mapped_device *md)
27558c2ecf20Sopenharmony_ci{
27568c2ecf20Sopenharmony_ci	mutex_lock(&md->suspend_lock);
27578c2ecf20Sopenharmony_ci	__dm_internal_resume(md);
27588c2ecf20Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
27598c2ecf20Sopenharmony_ci}
27608c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_resume);
27618c2ecf20Sopenharmony_ci
27628c2ecf20Sopenharmony_ci/*
27638c2ecf20Sopenharmony_ci * Fast variants of internal suspend/resume hold md->suspend_lock,
27648c2ecf20Sopenharmony_ci * which prevents interaction with userspace-driven suspend.
27658c2ecf20Sopenharmony_ci */
27668c2ecf20Sopenharmony_ci
27678c2ecf20Sopenharmony_civoid dm_internal_suspend_fast(struct mapped_device *md)
27688c2ecf20Sopenharmony_ci{
27698c2ecf20Sopenharmony_ci	mutex_lock(&md->suspend_lock);
27708c2ecf20Sopenharmony_ci	if (dm_suspended_md(md) || dm_suspended_internally_md(md))
27718c2ecf20Sopenharmony_ci		return;
27728c2ecf20Sopenharmony_ci
27738c2ecf20Sopenharmony_ci	set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
27748c2ecf20Sopenharmony_ci	synchronize_srcu(&md->io_barrier);
27758c2ecf20Sopenharmony_ci	flush_workqueue(md->wq);
27768c2ecf20Sopenharmony_ci	dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
27778c2ecf20Sopenharmony_ci}
27788c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_suspend_fast);
27798c2ecf20Sopenharmony_ci
27808c2ecf20Sopenharmony_civoid dm_internal_resume_fast(struct mapped_device *md)
27818c2ecf20Sopenharmony_ci{
27828c2ecf20Sopenharmony_ci	if (dm_suspended_md(md) || dm_suspended_internally_md(md))
27838c2ecf20Sopenharmony_ci		goto done;
27848c2ecf20Sopenharmony_ci
27858c2ecf20Sopenharmony_ci	dm_queue_flush(md);
27868c2ecf20Sopenharmony_ci
27878c2ecf20Sopenharmony_cidone:
27888c2ecf20Sopenharmony_ci	mutex_unlock(&md->suspend_lock);
27898c2ecf20Sopenharmony_ci}
27908c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_resume_fast);
27918c2ecf20Sopenharmony_ci
27928c2ecf20Sopenharmony_ci/*-----------------------------------------------------------------
27938c2ecf20Sopenharmony_ci * Event notification.
27948c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/
27958c2ecf20Sopenharmony_ciint dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
27968c2ecf20Sopenharmony_ci		       unsigned cookie)
27978c2ecf20Sopenharmony_ci{
27988c2ecf20Sopenharmony_ci	int r;
27998c2ecf20Sopenharmony_ci	unsigned noio_flag;
28008c2ecf20Sopenharmony_ci	char udev_cookie[DM_COOKIE_LENGTH];
28018c2ecf20Sopenharmony_ci	char *envp[] = { udev_cookie, NULL };
28028c2ecf20Sopenharmony_ci
28038c2ecf20Sopenharmony_ci	noio_flag = memalloc_noio_save();
28048c2ecf20Sopenharmony_ci
28058c2ecf20Sopenharmony_ci	if (!cookie)
28068c2ecf20Sopenharmony_ci		r = kobject_uevent(&disk_to_dev(md->disk)->kobj, action);
28078c2ecf20Sopenharmony_ci	else {
28088c2ecf20Sopenharmony_ci		snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u",
28098c2ecf20Sopenharmony_ci			 DM_COOKIE_ENV_VAR_NAME, cookie);
28108c2ecf20Sopenharmony_ci		r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj,
28118c2ecf20Sopenharmony_ci				       action, envp);
28128c2ecf20Sopenharmony_ci	}
28138c2ecf20Sopenharmony_ci
28148c2ecf20Sopenharmony_ci	memalloc_noio_restore(noio_flag);
28158c2ecf20Sopenharmony_ci
28168c2ecf20Sopenharmony_ci	return r;
28178c2ecf20Sopenharmony_ci}
28188c2ecf20Sopenharmony_ci
28198c2ecf20Sopenharmony_ciuint32_t dm_next_uevent_seq(struct mapped_device *md)
28208c2ecf20Sopenharmony_ci{
28218c2ecf20Sopenharmony_ci	return atomic_add_return(1, &md->uevent_seq);
28228c2ecf20Sopenharmony_ci}
28238c2ecf20Sopenharmony_ci
28248c2ecf20Sopenharmony_ciuint32_t dm_get_event_nr(struct mapped_device *md)
28258c2ecf20Sopenharmony_ci{
28268c2ecf20Sopenharmony_ci	return atomic_read(&md->event_nr);
28278c2ecf20Sopenharmony_ci}
28288c2ecf20Sopenharmony_ci
28298c2ecf20Sopenharmony_ciint dm_wait_event(struct mapped_device *md, int event_nr)
28308c2ecf20Sopenharmony_ci{
28318c2ecf20Sopenharmony_ci	return wait_event_interruptible(md->eventq,
28328c2ecf20Sopenharmony_ci			(event_nr != atomic_read(&md->event_nr)));
28338c2ecf20Sopenharmony_ci}
28348c2ecf20Sopenharmony_ci
28358c2ecf20Sopenharmony_civoid dm_uevent_add(struct mapped_device *md, struct list_head *elist)
28368c2ecf20Sopenharmony_ci{
28378c2ecf20Sopenharmony_ci	unsigned long flags;
28388c2ecf20Sopenharmony_ci
28398c2ecf20Sopenharmony_ci	spin_lock_irqsave(&md->uevent_lock, flags);
28408c2ecf20Sopenharmony_ci	list_add(elist, &md->uevent_list);
28418c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&md->uevent_lock, flags);
28428c2ecf20Sopenharmony_ci}
28438c2ecf20Sopenharmony_ci
28448c2ecf20Sopenharmony_ci/*
28458c2ecf20Sopenharmony_ci * The gendisk is only valid as long as you have a reference
28468c2ecf20Sopenharmony_ci * count on 'md'.
28478c2ecf20Sopenharmony_ci */
28488c2ecf20Sopenharmony_cistruct gendisk *dm_disk(struct mapped_device *md)
28498c2ecf20Sopenharmony_ci{
28508c2ecf20Sopenharmony_ci	return md->disk;
28518c2ecf20Sopenharmony_ci}
28528c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_disk);
28538c2ecf20Sopenharmony_ci
28548c2ecf20Sopenharmony_cistruct kobject *dm_kobject(struct mapped_device *md)
28558c2ecf20Sopenharmony_ci{
28568c2ecf20Sopenharmony_ci	return &md->kobj_holder.kobj;
28578c2ecf20Sopenharmony_ci}
28588c2ecf20Sopenharmony_ci
28598c2ecf20Sopenharmony_cistruct mapped_device *dm_get_from_kobject(struct kobject *kobj)
28608c2ecf20Sopenharmony_ci{
28618c2ecf20Sopenharmony_ci	struct mapped_device *md;
28628c2ecf20Sopenharmony_ci
28638c2ecf20Sopenharmony_ci	md = container_of(kobj, struct mapped_device, kobj_holder.kobj);
28648c2ecf20Sopenharmony_ci
28658c2ecf20Sopenharmony_ci	spin_lock(&_minor_lock);
28668c2ecf20Sopenharmony_ci	if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) {
28678c2ecf20Sopenharmony_ci		md = NULL;
28688c2ecf20Sopenharmony_ci		goto out;
28698c2ecf20Sopenharmony_ci	}
28708c2ecf20Sopenharmony_ci	dm_get(md);
28718c2ecf20Sopenharmony_ciout:
28728c2ecf20Sopenharmony_ci	spin_unlock(&_minor_lock);
28738c2ecf20Sopenharmony_ci
28748c2ecf20Sopenharmony_ci	return md;
28758c2ecf20Sopenharmony_ci}
28768c2ecf20Sopenharmony_ci
28778c2ecf20Sopenharmony_ciint dm_suspended_md(struct mapped_device *md)
28788c2ecf20Sopenharmony_ci{
28798c2ecf20Sopenharmony_ci	return test_bit(DMF_SUSPENDED, &md->flags);
28808c2ecf20Sopenharmony_ci}
28818c2ecf20Sopenharmony_ci
28828c2ecf20Sopenharmony_cistatic int dm_post_suspending_md(struct mapped_device *md)
28838c2ecf20Sopenharmony_ci{
28848c2ecf20Sopenharmony_ci	return test_bit(DMF_POST_SUSPENDING, &md->flags);
28858c2ecf20Sopenharmony_ci}
28868c2ecf20Sopenharmony_ci
28878c2ecf20Sopenharmony_ciint dm_suspended_internally_md(struct mapped_device *md)
28888c2ecf20Sopenharmony_ci{
28898c2ecf20Sopenharmony_ci	return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags);
28908c2ecf20Sopenharmony_ci}
28918c2ecf20Sopenharmony_ci
28928c2ecf20Sopenharmony_ciint dm_test_deferred_remove_flag(struct mapped_device *md)
28938c2ecf20Sopenharmony_ci{
28948c2ecf20Sopenharmony_ci	return test_bit(DMF_DEFERRED_REMOVE, &md->flags);
28958c2ecf20Sopenharmony_ci}
28968c2ecf20Sopenharmony_ci
28978c2ecf20Sopenharmony_ciint dm_suspended(struct dm_target *ti)
28988c2ecf20Sopenharmony_ci{
28998c2ecf20Sopenharmony_ci	return dm_suspended_md(ti->table->md);
29008c2ecf20Sopenharmony_ci}
29018c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_suspended);
29028c2ecf20Sopenharmony_ci
29038c2ecf20Sopenharmony_ciint dm_post_suspending(struct dm_target *ti)
29048c2ecf20Sopenharmony_ci{
29058c2ecf20Sopenharmony_ci	return dm_post_suspending_md(ti->table->md);
29068c2ecf20Sopenharmony_ci}
29078c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_post_suspending);
29088c2ecf20Sopenharmony_ci
29098c2ecf20Sopenharmony_ciint dm_noflush_suspending(struct dm_target *ti)
29108c2ecf20Sopenharmony_ci{
29118c2ecf20Sopenharmony_ci	return __noflush_suspending(ti->table->md);
29128c2ecf20Sopenharmony_ci}
29138c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_noflush_suspending);
29148c2ecf20Sopenharmony_ci
29158c2ecf20Sopenharmony_cistruct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_queue_mode type,
29168c2ecf20Sopenharmony_ci					    unsigned integrity, unsigned per_io_data_size,
29178c2ecf20Sopenharmony_ci					    unsigned min_pool_size)
29188c2ecf20Sopenharmony_ci{
29198c2ecf20Sopenharmony_ci	struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
29208c2ecf20Sopenharmony_ci	unsigned int pool_size = 0;
29218c2ecf20Sopenharmony_ci	unsigned int front_pad, io_front_pad;
29228c2ecf20Sopenharmony_ci	int ret;
29238c2ecf20Sopenharmony_ci
29248c2ecf20Sopenharmony_ci	if (!pools)
29258c2ecf20Sopenharmony_ci		return NULL;
29268c2ecf20Sopenharmony_ci
29278c2ecf20Sopenharmony_ci	switch (type) {
29288c2ecf20Sopenharmony_ci	case DM_TYPE_BIO_BASED:
29298c2ecf20Sopenharmony_ci	case DM_TYPE_DAX_BIO_BASED:
29308c2ecf20Sopenharmony_ci		pool_size = max(dm_get_reserved_bio_based_ios(), min_pool_size);
29318c2ecf20Sopenharmony_ci		front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
29328c2ecf20Sopenharmony_ci		io_front_pad = roundup(front_pad,  __alignof__(struct dm_io)) + offsetof(struct dm_io, tio);
29338c2ecf20Sopenharmony_ci		ret = bioset_init(&pools->io_bs, pool_size, io_front_pad, 0);
29348c2ecf20Sopenharmony_ci		if (ret)
29358c2ecf20Sopenharmony_ci			goto out;
29368c2ecf20Sopenharmony_ci		if (integrity && bioset_integrity_create(&pools->io_bs, pool_size))
29378c2ecf20Sopenharmony_ci			goto out;
29388c2ecf20Sopenharmony_ci		break;
29398c2ecf20Sopenharmony_ci	case DM_TYPE_REQUEST_BASED:
29408c2ecf20Sopenharmony_ci		pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size);
29418c2ecf20Sopenharmony_ci		front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
29428c2ecf20Sopenharmony_ci		/* per_io_data_size is used for blk-mq pdu at queue allocation */
29438c2ecf20Sopenharmony_ci		break;
29448c2ecf20Sopenharmony_ci	default:
29458c2ecf20Sopenharmony_ci		BUG();
29468c2ecf20Sopenharmony_ci	}
29478c2ecf20Sopenharmony_ci
29488c2ecf20Sopenharmony_ci	ret = bioset_init(&pools->bs, pool_size, front_pad, 0);
29498c2ecf20Sopenharmony_ci	if (ret)
29508c2ecf20Sopenharmony_ci		goto out;
29518c2ecf20Sopenharmony_ci
29528c2ecf20Sopenharmony_ci	if (integrity && bioset_integrity_create(&pools->bs, pool_size))
29538c2ecf20Sopenharmony_ci		goto out;
29548c2ecf20Sopenharmony_ci
29558c2ecf20Sopenharmony_ci	return pools;
29568c2ecf20Sopenharmony_ci
29578c2ecf20Sopenharmony_ciout:
29588c2ecf20Sopenharmony_ci	dm_free_md_mempools(pools);
29598c2ecf20Sopenharmony_ci
29608c2ecf20Sopenharmony_ci	return NULL;
29618c2ecf20Sopenharmony_ci}
29628c2ecf20Sopenharmony_ci
29638c2ecf20Sopenharmony_civoid dm_free_md_mempools(struct dm_md_mempools *pools)
29648c2ecf20Sopenharmony_ci{
29658c2ecf20Sopenharmony_ci	if (!pools)
29668c2ecf20Sopenharmony_ci		return;
29678c2ecf20Sopenharmony_ci
29688c2ecf20Sopenharmony_ci	bioset_exit(&pools->bs);
29698c2ecf20Sopenharmony_ci	bioset_exit(&pools->io_bs);
29708c2ecf20Sopenharmony_ci
29718c2ecf20Sopenharmony_ci	kfree(pools);
29728c2ecf20Sopenharmony_ci}
29738c2ecf20Sopenharmony_ci
29748c2ecf20Sopenharmony_cistruct dm_pr {
29758c2ecf20Sopenharmony_ci	u64	old_key;
29768c2ecf20Sopenharmony_ci	u64	new_key;
29778c2ecf20Sopenharmony_ci	u32	flags;
29788c2ecf20Sopenharmony_ci	bool	fail_early;
29798c2ecf20Sopenharmony_ci};
29808c2ecf20Sopenharmony_ci
29818c2ecf20Sopenharmony_cistatic int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn,
29828c2ecf20Sopenharmony_ci		      void *data)
29838c2ecf20Sopenharmony_ci{
29848c2ecf20Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
29858c2ecf20Sopenharmony_ci	struct dm_table *table;
29868c2ecf20Sopenharmony_ci	struct dm_target *ti;
29878c2ecf20Sopenharmony_ci	int ret = -ENOTTY, srcu_idx;
29888c2ecf20Sopenharmony_ci
29898c2ecf20Sopenharmony_ci	table = dm_get_live_table(md, &srcu_idx);
29908c2ecf20Sopenharmony_ci	if (!table || !dm_table_get_size(table))
29918c2ecf20Sopenharmony_ci		goto out;
29928c2ecf20Sopenharmony_ci
29938c2ecf20Sopenharmony_ci	/* We only support devices that have a single target */
29948c2ecf20Sopenharmony_ci	if (dm_table_get_num_targets(table) != 1)
29958c2ecf20Sopenharmony_ci		goto out;
29968c2ecf20Sopenharmony_ci	ti = dm_table_get_target(table, 0);
29978c2ecf20Sopenharmony_ci
29988c2ecf20Sopenharmony_ci	if (dm_suspended_md(md)) {
29998c2ecf20Sopenharmony_ci		ret = -EAGAIN;
30008c2ecf20Sopenharmony_ci		goto out;
30018c2ecf20Sopenharmony_ci	}
30028c2ecf20Sopenharmony_ci
30038c2ecf20Sopenharmony_ci	ret = -EINVAL;
30048c2ecf20Sopenharmony_ci	if (!ti->type->iterate_devices)
30058c2ecf20Sopenharmony_ci		goto out;
30068c2ecf20Sopenharmony_ci
30078c2ecf20Sopenharmony_ci	ret = ti->type->iterate_devices(ti, fn, data);
30088c2ecf20Sopenharmony_ciout:
30098c2ecf20Sopenharmony_ci	dm_put_live_table(md, srcu_idx);
30108c2ecf20Sopenharmony_ci	return ret;
30118c2ecf20Sopenharmony_ci}
30128c2ecf20Sopenharmony_ci
30138c2ecf20Sopenharmony_ci/*
30148c2ecf20Sopenharmony_ci * For register / unregister we need to manually call out to every path.
30158c2ecf20Sopenharmony_ci */
30168c2ecf20Sopenharmony_cistatic int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev,
30178c2ecf20Sopenharmony_ci			    sector_t start, sector_t len, void *data)
30188c2ecf20Sopenharmony_ci{
30198c2ecf20Sopenharmony_ci	struct dm_pr *pr = data;
30208c2ecf20Sopenharmony_ci	const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops;
30218c2ecf20Sopenharmony_ci
30228c2ecf20Sopenharmony_ci	if (!ops || !ops->pr_register)
30238c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
30248c2ecf20Sopenharmony_ci	return ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags);
30258c2ecf20Sopenharmony_ci}
30268c2ecf20Sopenharmony_ci
30278c2ecf20Sopenharmony_cistatic int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
30288c2ecf20Sopenharmony_ci			  u32 flags)
30298c2ecf20Sopenharmony_ci{
30308c2ecf20Sopenharmony_ci	struct dm_pr pr = {
30318c2ecf20Sopenharmony_ci		.old_key	= old_key,
30328c2ecf20Sopenharmony_ci		.new_key	= new_key,
30338c2ecf20Sopenharmony_ci		.flags		= flags,
30348c2ecf20Sopenharmony_ci		.fail_early	= true,
30358c2ecf20Sopenharmony_ci	};
30368c2ecf20Sopenharmony_ci	int ret;
30378c2ecf20Sopenharmony_ci
30388c2ecf20Sopenharmony_ci	ret = dm_call_pr(bdev, __dm_pr_register, &pr);
30398c2ecf20Sopenharmony_ci	if (ret && new_key) {
30408c2ecf20Sopenharmony_ci		/* unregister all paths if we failed to register any path */
30418c2ecf20Sopenharmony_ci		pr.old_key = new_key;
30428c2ecf20Sopenharmony_ci		pr.new_key = 0;
30438c2ecf20Sopenharmony_ci		pr.flags = 0;
30448c2ecf20Sopenharmony_ci		pr.fail_early = false;
30458c2ecf20Sopenharmony_ci		dm_call_pr(bdev, __dm_pr_register, &pr);
30468c2ecf20Sopenharmony_ci	}
30478c2ecf20Sopenharmony_ci
30488c2ecf20Sopenharmony_ci	return ret;
30498c2ecf20Sopenharmony_ci}
30508c2ecf20Sopenharmony_ci
30518c2ecf20Sopenharmony_cistatic int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
30528c2ecf20Sopenharmony_ci			 u32 flags)
30538c2ecf20Sopenharmony_ci{
30548c2ecf20Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
30558c2ecf20Sopenharmony_ci	const struct pr_ops *ops;
30568c2ecf20Sopenharmony_ci	int r, srcu_idx;
30578c2ecf20Sopenharmony_ci
30588c2ecf20Sopenharmony_ci	r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
30598c2ecf20Sopenharmony_ci	if (r < 0)
30608c2ecf20Sopenharmony_ci		goto out;
30618c2ecf20Sopenharmony_ci
30628c2ecf20Sopenharmony_ci	ops = bdev->bd_disk->fops->pr_ops;
30638c2ecf20Sopenharmony_ci	if (ops && ops->pr_reserve)
30648c2ecf20Sopenharmony_ci		r = ops->pr_reserve(bdev, key, type, flags);
30658c2ecf20Sopenharmony_ci	else
30668c2ecf20Sopenharmony_ci		r = -EOPNOTSUPP;
30678c2ecf20Sopenharmony_ciout:
30688c2ecf20Sopenharmony_ci	dm_unprepare_ioctl(md, srcu_idx);
30698c2ecf20Sopenharmony_ci	return r;
30708c2ecf20Sopenharmony_ci}
30718c2ecf20Sopenharmony_ci
30728c2ecf20Sopenharmony_cistatic int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
30738c2ecf20Sopenharmony_ci{
30748c2ecf20Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
30758c2ecf20Sopenharmony_ci	const struct pr_ops *ops;
30768c2ecf20Sopenharmony_ci	int r, srcu_idx;
30778c2ecf20Sopenharmony_ci
30788c2ecf20Sopenharmony_ci	r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
30798c2ecf20Sopenharmony_ci	if (r < 0)
30808c2ecf20Sopenharmony_ci		goto out;
30818c2ecf20Sopenharmony_ci
30828c2ecf20Sopenharmony_ci	ops = bdev->bd_disk->fops->pr_ops;
30838c2ecf20Sopenharmony_ci	if (ops && ops->pr_release)
30848c2ecf20Sopenharmony_ci		r = ops->pr_release(bdev, key, type);
30858c2ecf20Sopenharmony_ci	else
30868c2ecf20Sopenharmony_ci		r = -EOPNOTSUPP;
30878c2ecf20Sopenharmony_ciout:
30888c2ecf20Sopenharmony_ci	dm_unprepare_ioctl(md, srcu_idx);
30898c2ecf20Sopenharmony_ci	return r;
30908c2ecf20Sopenharmony_ci}
30918c2ecf20Sopenharmony_ci
30928c2ecf20Sopenharmony_cistatic int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
30938c2ecf20Sopenharmony_ci			 enum pr_type type, bool abort)
30948c2ecf20Sopenharmony_ci{
30958c2ecf20Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
30968c2ecf20Sopenharmony_ci	const struct pr_ops *ops;
30978c2ecf20Sopenharmony_ci	int r, srcu_idx;
30988c2ecf20Sopenharmony_ci
30998c2ecf20Sopenharmony_ci	r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
31008c2ecf20Sopenharmony_ci	if (r < 0)
31018c2ecf20Sopenharmony_ci		goto out;
31028c2ecf20Sopenharmony_ci
31038c2ecf20Sopenharmony_ci	ops = bdev->bd_disk->fops->pr_ops;
31048c2ecf20Sopenharmony_ci	if (ops && ops->pr_preempt)
31058c2ecf20Sopenharmony_ci		r = ops->pr_preempt(bdev, old_key, new_key, type, abort);
31068c2ecf20Sopenharmony_ci	else
31078c2ecf20Sopenharmony_ci		r = -EOPNOTSUPP;
31088c2ecf20Sopenharmony_ciout:
31098c2ecf20Sopenharmony_ci	dm_unprepare_ioctl(md, srcu_idx);
31108c2ecf20Sopenharmony_ci	return r;
31118c2ecf20Sopenharmony_ci}
31128c2ecf20Sopenharmony_ci
31138c2ecf20Sopenharmony_cistatic int dm_pr_clear(struct block_device *bdev, u64 key)
31148c2ecf20Sopenharmony_ci{
31158c2ecf20Sopenharmony_ci	struct mapped_device *md = bdev->bd_disk->private_data;
31168c2ecf20Sopenharmony_ci	const struct pr_ops *ops;
31178c2ecf20Sopenharmony_ci	int r, srcu_idx;
31188c2ecf20Sopenharmony_ci
31198c2ecf20Sopenharmony_ci	r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
31208c2ecf20Sopenharmony_ci	if (r < 0)
31218c2ecf20Sopenharmony_ci		goto out;
31228c2ecf20Sopenharmony_ci
31238c2ecf20Sopenharmony_ci	ops = bdev->bd_disk->fops->pr_ops;
31248c2ecf20Sopenharmony_ci	if (ops && ops->pr_clear)
31258c2ecf20Sopenharmony_ci		r = ops->pr_clear(bdev, key);
31268c2ecf20Sopenharmony_ci	else
31278c2ecf20Sopenharmony_ci		r = -EOPNOTSUPP;
31288c2ecf20Sopenharmony_ciout:
31298c2ecf20Sopenharmony_ci	dm_unprepare_ioctl(md, srcu_idx);
31308c2ecf20Sopenharmony_ci	return r;
31318c2ecf20Sopenharmony_ci}
31328c2ecf20Sopenharmony_ci
31338c2ecf20Sopenharmony_cistatic const struct pr_ops dm_pr_ops = {
31348c2ecf20Sopenharmony_ci	.pr_register	= dm_pr_register,
31358c2ecf20Sopenharmony_ci	.pr_reserve	= dm_pr_reserve,
31368c2ecf20Sopenharmony_ci	.pr_release	= dm_pr_release,
31378c2ecf20Sopenharmony_ci	.pr_preempt	= dm_pr_preempt,
31388c2ecf20Sopenharmony_ci	.pr_clear	= dm_pr_clear,
31398c2ecf20Sopenharmony_ci};
31408c2ecf20Sopenharmony_ci
31418c2ecf20Sopenharmony_cistatic const struct block_device_operations dm_blk_dops = {
31428c2ecf20Sopenharmony_ci	.submit_bio = dm_submit_bio,
31438c2ecf20Sopenharmony_ci	.open = dm_blk_open,
31448c2ecf20Sopenharmony_ci	.release = dm_blk_close,
31458c2ecf20Sopenharmony_ci	.ioctl = dm_blk_ioctl,
31468c2ecf20Sopenharmony_ci	.getgeo = dm_blk_getgeo,
31478c2ecf20Sopenharmony_ci	.report_zones = dm_blk_report_zones,
31488c2ecf20Sopenharmony_ci	.pr_ops = &dm_pr_ops,
31498c2ecf20Sopenharmony_ci	.owner = THIS_MODULE
31508c2ecf20Sopenharmony_ci};
31518c2ecf20Sopenharmony_ci
31528c2ecf20Sopenharmony_cistatic const struct block_device_operations dm_rq_blk_dops = {
31538c2ecf20Sopenharmony_ci	.open = dm_blk_open,
31548c2ecf20Sopenharmony_ci	.release = dm_blk_close,
31558c2ecf20Sopenharmony_ci	.ioctl = dm_blk_ioctl,
31568c2ecf20Sopenharmony_ci	.getgeo = dm_blk_getgeo,
31578c2ecf20Sopenharmony_ci	.pr_ops = &dm_pr_ops,
31588c2ecf20Sopenharmony_ci	.owner = THIS_MODULE
31598c2ecf20Sopenharmony_ci};
31608c2ecf20Sopenharmony_ci
31618c2ecf20Sopenharmony_cistatic const struct dax_operations dm_dax_ops = {
31628c2ecf20Sopenharmony_ci	.direct_access = dm_dax_direct_access,
31638c2ecf20Sopenharmony_ci	.dax_supported = dm_dax_supported,
31648c2ecf20Sopenharmony_ci	.copy_from_iter = dm_dax_copy_from_iter,
31658c2ecf20Sopenharmony_ci	.copy_to_iter = dm_dax_copy_to_iter,
31668c2ecf20Sopenharmony_ci	.zero_page_range = dm_dax_zero_page_range,
31678c2ecf20Sopenharmony_ci};
31688c2ecf20Sopenharmony_ci
31698c2ecf20Sopenharmony_ci/*
31708c2ecf20Sopenharmony_ci * module hooks
31718c2ecf20Sopenharmony_ci */
31728c2ecf20Sopenharmony_cimodule_init(dm_init);
31738c2ecf20Sopenharmony_cimodule_exit(dm_exit);
31748c2ecf20Sopenharmony_ci
31758c2ecf20Sopenharmony_cimodule_param(major, uint, 0);
31768c2ecf20Sopenharmony_ciMODULE_PARM_DESC(major, "The major number of the device mapper");
31778c2ecf20Sopenharmony_ci
31788c2ecf20Sopenharmony_cimodule_param(reserved_bio_based_ios, uint, S_IRUGO | S_IWUSR);
31798c2ecf20Sopenharmony_ciMODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
31808c2ecf20Sopenharmony_ci
31818c2ecf20Sopenharmony_cimodule_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
31828c2ecf20Sopenharmony_ciMODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
31838c2ecf20Sopenharmony_ci
31848c2ecf20Sopenharmony_cimodule_param(swap_bios, int, S_IRUGO | S_IWUSR);
31858c2ecf20Sopenharmony_ciMODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs");
31868c2ecf20Sopenharmony_ci
31878c2ecf20Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " driver");
31888c2ecf20Sopenharmony_ciMODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
31898c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
3190