162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 462306a36Sopenharmony_ci * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This file is released under the GPL. 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include "dm-core.h" 1062306a36Sopenharmony_ci#include "dm-rq.h" 1162306a36Sopenharmony_ci#include "dm-uevent.h" 1262306a36Sopenharmony_ci#include "dm-ima.h" 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/init.h> 1562306a36Sopenharmony_ci#include <linux/module.h> 1662306a36Sopenharmony_ci#include <linux/mutex.h> 1762306a36Sopenharmony_ci#include <linux/sched/mm.h> 1862306a36Sopenharmony_ci#include <linux/sched/signal.h> 1962306a36Sopenharmony_ci#include <linux/blkpg.h> 2062306a36Sopenharmony_ci#include <linux/bio.h> 2162306a36Sopenharmony_ci#include <linux/mempool.h> 2262306a36Sopenharmony_ci#include <linux/dax.h> 2362306a36Sopenharmony_ci#include <linux/slab.h> 2462306a36Sopenharmony_ci#include <linux/idr.h> 2562306a36Sopenharmony_ci#include <linux/uio.h> 2662306a36Sopenharmony_ci#include <linux/hdreg.h> 2762306a36Sopenharmony_ci#include <linux/delay.h> 2862306a36Sopenharmony_ci#include <linux/wait.h> 2962306a36Sopenharmony_ci#include <linux/pr.h> 3062306a36Sopenharmony_ci#include <linux/refcount.h> 3162306a36Sopenharmony_ci#include <linux/part_stat.h> 3262306a36Sopenharmony_ci#include <linux/blk-crypto.h> 3362306a36Sopenharmony_ci#include <linux/blk-crypto-profile.h> 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci#define DM_MSG_PREFIX "core" 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* 3862306a36Sopenharmony_ci * Cookies are numeric values sent with CHANGE and REMOVE 3962306a36Sopenharmony_ci * uevents while resuming, removing or renaming the device. 4062306a36Sopenharmony_ci */ 4162306a36Sopenharmony_ci#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" 4262306a36Sopenharmony_ci#define DM_COOKIE_LENGTH 24 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci/* 4562306a36Sopenharmony_ci * For REQ_POLLED fs bio, this flag is set if we link mapped underlying 4662306a36Sopenharmony_ci * dm_io into one list, and reuse bio->bi_private as the list head. Before 4762306a36Sopenharmony_ci * ending this fs bio, we will recover its ->bi_private. 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_ci#define REQ_DM_POLL_LIST REQ_DRV 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistatic const char *_name = DM_NAME; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic unsigned int major; 5462306a36Sopenharmony_cistatic unsigned int _major; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cistatic DEFINE_IDR(_minor_idr); 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_cistatic DEFINE_SPINLOCK(_minor_lock); 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_cistatic void do_deferred_remove(struct work_struct *w); 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistatic DECLARE_WORK(deferred_remove_work, do_deferred_remove); 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_cistatic struct workqueue_struct *deferred_remove_workqueue; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ciatomic_t dm_global_event_nr = ATOMIC_INIT(0); 6762306a36Sopenharmony_ciDECLARE_WAIT_QUEUE_HEAD(dm_global_eventq); 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_civoid dm_issue_global_event(void) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci atomic_inc(&dm_global_event_nr); 7262306a36Sopenharmony_ci wake_up(&dm_global_eventq); 7362306a36Sopenharmony_ci} 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(stats_enabled); 7662306a36Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(swap_bios_enabled); 7762306a36Sopenharmony_ciDEFINE_STATIC_KEY_FALSE(zoned_enabled); 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci/* 8062306a36Sopenharmony_ci * One of these is allocated (on-stack) per original bio. 8162306a36Sopenharmony_ci */ 8262306a36Sopenharmony_cistruct clone_info { 8362306a36Sopenharmony_ci struct dm_table *map; 8462306a36Sopenharmony_ci struct bio *bio; 8562306a36Sopenharmony_ci struct dm_io *io; 8662306a36Sopenharmony_ci sector_t sector; 8762306a36Sopenharmony_ci unsigned int sector_count; 8862306a36Sopenharmony_ci bool is_abnormal_io:1; 8962306a36Sopenharmony_ci bool submit_as_polled:1; 9062306a36Sopenharmony_ci}; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_cistatic inline struct dm_target_io *clone_to_tio(struct bio *clone) 9362306a36Sopenharmony_ci{ 9462306a36Sopenharmony_ci return container_of(clone, struct dm_target_io, clone); 9562306a36Sopenharmony_ci} 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_civoid *dm_per_bio_data(struct bio *bio, size_t data_size) 9862306a36Sopenharmony_ci{ 9962306a36Sopenharmony_ci if (!dm_tio_flagged(clone_to_tio(bio), DM_TIO_INSIDE_DM_IO)) 10062306a36Sopenharmony_ci return (char *)bio - DM_TARGET_IO_BIO_OFFSET - data_size; 10162306a36Sopenharmony_ci return (char *)bio - DM_IO_BIO_OFFSET - data_size; 10262306a36Sopenharmony_ci} 10362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_per_bio_data); 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_cistruct bio *dm_bio_from_per_bio_data(void *data, size_t data_size) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci struct dm_io *io = (struct dm_io *)((char *)data + data_size); 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci if (io->magic == DM_IO_MAGIC) 11062306a36Sopenharmony_ci return (struct bio *)((char *)io + DM_IO_BIO_OFFSET); 11162306a36Sopenharmony_ci BUG_ON(io->magic != DM_TIO_MAGIC); 11262306a36Sopenharmony_ci return (struct bio *)((char *)io + DM_TARGET_IO_BIO_OFFSET); 11362306a36Sopenharmony_ci} 11462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bio_from_per_bio_data); 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ciunsigned int dm_bio_get_target_bio_nr(const struct bio *bio) 11762306a36Sopenharmony_ci{ 11862306a36Sopenharmony_ci return container_of(bio, struct dm_target_io, clone)->target_bio_nr; 11962306a36Sopenharmony_ci} 12062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci#define MINOR_ALLOCED ((void *)-1) 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci#define DM_NUMA_NODE NUMA_NO_NODE 12562306a36Sopenharmony_cistatic int dm_numa_node = DM_NUMA_NODE; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci#define DEFAULT_SWAP_BIOS (8 * 1048576 / PAGE_SIZE) 12862306a36Sopenharmony_cistatic int swap_bios = DEFAULT_SWAP_BIOS; 12962306a36Sopenharmony_cistatic int get_swap_bios(void) 13062306a36Sopenharmony_ci{ 13162306a36Sopenharmony_ci int latch = READ_ONCE(swap_bios); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci if (unlikely(latch <= 0)) 13462306a36Sopenharmony_ci latch = DEFAULT_SWAP_BIOS; 13562306a36Sopenharmony_ci return latch; 13662306a36Sopenharmony_ci} 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_cistruct table_device { 13962306a36Sopenharmony_ci struct list_head list; 14062306a36Sopenharmony_ci refcount_t count; 14162306a36Sopenharmony_ci struct dm_dev dm_dev; 14262306a36Sopenharmony_ci}; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci/* 14562306a36Sopenharmony_ci * Bio-based DM's mempools' reserved IOs set by the user. 14662306a36Sopenharmony_ci */ 14762306a36Sopenharmony_ci#define RESERVED_BIO_BASED_IOS 16 14862306a36Sopenharmony_cistatic unsigned int reserved_bio_based_ios = RESERVED_BIO_BASED_IOS; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_cistatic int __dm_get_module_param_int(int *module_param, int min, int max) 15162306a36Sopenharmony_ci{ 15262306a36Sopenharmony_ci int param = READ_ONCE(*module_param); 15362306a36Sopenharmony_ci int modified_param = 0; 15462306a36Sopenharmony_ci bool modified = true; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci if (param < min) 15762306a36Sopenharmony_ci modified_param = min; 15862306a36Sopenharmony_ci else if (param > max) 15962306a36Sopenharmony_ci modified_param = max; 16062306a36Sopenharmony_ci else 16162306a36Sopenharmony_ci modified = false; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci if (modified) { 16462306a36Sopenharmony_ci (void)cmpxchg(module_param, param, modified_param); 16562306a36Sopenharmony_ci param = modified_param; 16662306a36Sopenharmony_ci } 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci return param; 16962306a36Sopenharmony_ci} 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ciunsigned int __dm_get_module_param(unsigned int *module_param, unsigned int def, unsigned int max) 17262306a36Sopenharmony_ci{ 17362306a36Sopenharmony_ci unsigned int param = READ_ONCE(*module_param); 17462306a36Sopenharmony_ci unsigned int modified_param = 0; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci if (!param) 17762306a36Sopenharmony_ci modified_param = def; 17862306a36Sopenharmony_ci else if (param > max) 17962306a36Sopenharmony_ci modified_param = max; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci if (modified_param) { 18262306a36Sopenharmony_ci (void)cmpxchg(module_param, param, modified_param); 18362306a36Sopenharmony_ci param = modified_param; 18462306a36Sopenharmony_ci } 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci return param; 18762306a36Sopenharmony_ci} 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ciunsigned int dm_get_reserved_bio_based_ios(void) 19062306a36Sopenharmony_ci{ 19162306a36Sopenharmony_ci return __dm_get_module_param(&reserved_bio_based_ios, 19262306a36Sopenharmony_ci RESERVED_BIO_BASED_IOS, DM_RESERVED_MAX_IOS); 19362306a36Sopenharmony_ci} 19462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios); 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_cistatic unsigned int dm_get_numa_node(void) 19762306a36Sopenharmony_ci{ 19862306a36Sopenharmony_ci return __dm_get_module_param_int(&dm_numa_node, 19962306a36Sopenharmony_ci DM_NUMA_NODE, num_online_nodes() - 1); 20062306a36Sopenharmony_ci} 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_cistatic int __init local_init(void) 20362306a36Sopenharmony_ci{ 20462306a36Sopenharmony_ci int r; 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_ci r = dm_uevent_init(); 20762306a36Sopenharmony_ci if (r) 20862306a36Sopenharmony_ci return r; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci deferred_remove_workqueue = alloc_ordered_workqueue("kdmremove", 0); 21162306a36Sopenharmony_ci if (!deferred_remove_workqueue) { 21262306a36Sopenharmony_ci r = -ENOMEM; 21362306a36Sopenharmony_ci goto out_uevent_exit; 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci _major = major; 21762306a36Sopenharmony_ci r = register_blkdev(_major, _name); 21862306a36Sopenharmony_ci if (r < 0) 21962306a36Sopenharmony_ci goto out_free_workqueue; 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci if (!_major) 22262306a36Sopenharmony_ci _major = r; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci return 0; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ciout_free_workqueue: 22762306a36Sopenharmony_ci destroy_workqueue(deferred_remove_workqueue); 22862306a36Sopenharmony_ciout_uevent_exit: 22962306a36Sopenharmony_ci dm_uevent_exit(); 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci return r; 23262306a36Sopenharmony_ci} 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_cistatic void local_exit(void) 23562306a36Sopenharmony_ci{ 23662306a36Sopenharmony_ci destroy_workqueue(deferred_remove_workqueue); 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci unregister_blkdev(_major, _name); 23962306a36Sopenharmony_ci dm_uevent_exit(); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci _major = 0; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci DMINFO("cleaned up"); 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_cistatic int (*_inits[])(void) __initdata = { 24762306a36Sopenharmony_ci local_init, 24862306a36Sopenharmony_ci dm_target_init, 24962306a36Sopenharmony_ci dm_linear_init, 25062306a36Sopenharmony_ci dm_stripe_init, 25162306a36Sopenharmony_ci dm_io_init, 25262306a36Sopenharmony_ci dm_kcopyd_init, 25362306a36Sopenharmony_ci dm_interface_init, 25462306a36Sopenharmony_ci dm_statistics_init, 25562306a36Sopenharmony_ci}; 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_cistatic void (*_exits[])(void) = { 25862306a36Sopenharmony_ci local_exit, 25962306a36Sopenharmony_ci dm_target_exit, 26062306a36Sopenharmony_ci dm_linear_exit, 26162306a36Sopenharmony_ci dm_stripe_exit, 26262306a36Sopenharmony_ci dm_io_exit, 26362306a36Sopenharmony_ci dm_kcopyd_exit, 26462306a36Sopenharmony_ci dm_interface_exit, 26562306a36Sopenharmony_ci dm_statistics_exit, 26662306a36Sopenharmony_ci}; 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_cistatic int __init dm_init(void) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci const int count = ARRAY_SIZE(_inits); 27162306a36Sopenharmony_ci int r, i; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci#if (IS_ENABLED(CONFIG_IMA) && !IS_ENABLED(CONFIG_IMA_DISABLE_HTABLE)) 27462306a36Sopenharmony_ci DMWARN("CONFIG_IMA_DISABLE_HTABLE is disabled." 27562306a36Sopenharmony_ci " Duplicate IMA measurements will not be recorded in the IMA log."); 27662306a36Sopenharmony_ci#endif 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci for (i = 0; i < count; i++) { 27962306a36Sopenharmony_ci r = _inits[i](); 28062306a36Sopenharmony_ci if (r) 28162306a36Sopenharmony_ci goto bad; 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci return 0; 28562306a36Sopenharmony_cibad: 28662306a36Sopenharmony_ci while (i--) 28762306a36Sopenharmony_ci _exits[i](); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci return r; 29062306a36Sopenharmony_ci} 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_cistatic void __exit dm_exit(void) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci int i = ARRAY_SIZE(_exits); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci while (i--) 29762306a36Sopenharmony_ci _exits[i](); 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci /* 30062306a36Sopenharmony_ci * Should be empty by this point. 30162306a36Sopenharmony_ci */ 30262306a36Sopenharmony_ci idr_destroy(&_minor_idr); 30362306a36Sopenharmony_ci} 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci/* 30662306a36Sopenharmony_ci * Block device functions 30762306a36Sopenharmony_ci */ 30862306a36Sopenharmony_ciint dm_deleting_md(struct mapped_device *md) 30962306a36Sopenharmony_ci{ 31062306a36Sopenharmony_ci return test_bit(DMF_DELETING, &md->flags); 31162306a36Sopenharmony_ci} 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_cistatic int dm_blk_open(struct gendisk *disk, blk_mode_t mode) 31462306a36Sopenharmony_ci{ 31562306a36Sopenharmony_ci struct mapped_device *md; 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci spin_lock(&_minor_lock); 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci md = disk->private_data; 32062306a36Sopenharmony_ci if (!md) 32162306a36Sopenharmony_ci goto out; 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci if (test_bit(DMF_FREEING, &md->flags) || 32462306a36Sopenharmony_ci dm_deleting_md(md)) { 32562306a36Sopenharmony_ci md = NULL; 32662306a36Sopenharmony_ci goto out; 32762306a36Sopenharmony_ci } 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci dm_get(md); 33062306a36Sopenharmony_ci atomic_inc(&md->open_count); 33162306a36Sopenharmony_ciout: 33262306a36Sopenharmony_ci spin_unlock(&_minor_lock); 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci return md ? 0 : -ENXIO; 33562306a36Sopenharmony_ci} 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_cistatic void dm_blk_close(struct gendisk *disk) 33862306a36Sopenharmony_ci{ 33962306a36Sopenharmony_ci struct mapped_device *md; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci spin_lock(&_minor_lock); 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci md = disk->private_data; 34462306a36Sopenharmony_ci if (WARN_ON(!md)) 34562306a36Sopenharmony_ci goto out; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci if (atomic_dec_and_test(&md->open_count) && 34862306a36Sopenharmony_ci (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) 34962306a36Sopenharmony_ci queue_work(deferred_remove_workqueue, &deferred_remove_work); 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci dm_put(md); 35262306a36Sopenharmony_ciout: 35362306a36Sopenharmony_ci spin_unlock(&_minor_lock); 35462306a36Sopenharmony_ci} 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_ciint dm_open_count(struct mapped_device *md) 35762306a36Sopenharmony_ci{ 35862306a36Sopenharmony_ci return atomic_read(&md->open_count); 35962306a36Sopenharmony_ci} 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci/* 36262306a36Sopenharmony_ci * Guarantees nothing is using the device before it's deleted. 36362306a36Sopenharmony_ci */ 36462306a36Sopenharmony_ciint dm_lock_for_deletion(struct mapped_device *md, bool mark_deferred, bool only_deferred) 36562306a36Sopenharmony_ci{ 36662306a36Sopenharmony_ci int r = 0; 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci spin_lock(&_minor_lock); 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci if (dm_open_count(md)) { 37162306a36Sopenharmony_ci r = -EBUSY; 37262306a36Sopenharmony_ci if (mark_deferred) 37362306a36Sopenharmony_ci set_bit(DMF_DEFERRED_REMOVE, &md->flags); 37462306a36Sopenharmony_ci } else if (only_deferred && !test_bit(DMF_DEFERRED_REMOVE, &md->flags)) 37562306a36Sopenharmony_ci r = -EEXIST; 37662306a36Sopenharmony_ci else 37762306a36Sopenharmony_ci set_bit(DMF_DELETING, &md->flags); 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci spin_unlock(&_minor_lock); 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci return r; 38262306a36Sopenharmony_ci} 38362306a36Sopenharmony_ci 38462306a36Sopenharmony_ciint dm_cancel_deferred_remove(struct mapped_device *md) 38562306a36Sopenharmony_ci{ 38662306a36Sopenharmony_ci int r = 0; 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci spin_lock(&_minor_lock); 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci if (test_bit(DMF_DELETING, &md->flags)) 39162306a36Sopenharmony_ci r = -EBUSY; 39262306a36Sopenharmony_ci else 39362306a36Sopenharmony_ci clear_bit(DMF_DEFERRED_REMOVE, &md->flags); 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci spin_unlock(&_minor_lock); 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci return r; 39862306a36Sopenharmony_ci} 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_cistatic void do_deferred_remove(struct work_struct *w) 40162306a36Sopenharmony_ci{ 40262306a36Sopenharmony_ci dm_deferred_remove(); 40362306a36Sopenharmony_ci} 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_cistatic int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) 40662306a36Sopenharmony_ci{ 40762306a36Sopenharmony_ci struct mapped_device *md = bdev->bd_disk->private_data; 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci return dm_get_geometry(md, geo); 41062306a36Sopenharmony_ci} 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_cistatic int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx, 41362306a36Sopenharmony_ci struct block_device **bdev) 41462306a36Sopenharmony_ci{ 41562306a36Sopenharmony_ci struct dm_target *ti; 41662306a36Sopenharmony_ci struct dm_table *map; 41762306a36Sopenharmony_ci int r; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ciretry: 42062306a36Sopenharmony_ci r = -ENOTTY; 42162306a36Sopenharmony_ci map = dm_get_live_table(md, srcu_idx); 42262306a36Sopenharmony_ci if (!map || !dm_table_get_size(map)) 42362306a36Sopenharmony_ci return r; 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci /* We only support devices that have a single target */ 42662306a36Sopenharmony_ci if (map->num_targets != 1) 42762306a36Sopenharmony_ci return r; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci ti = dm_table_get_target(map, 0); 43062306a36Sopenharmony_ci if (!ti->type->prepare_ioctl) 43162306a36Sopenharmony_ci return r; 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci if (dm_suspended_md(md)) 43462306a36Sopenharmony_ci return -EAGAIN; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci r = ti->type->prepare_ioctl(ti, bdev); 43762306a36Sopenharmony_ci if (r == -ENOTCONN && !fatal_signal_pending(current)) { 43862306a36Sopenharmony_ci dm_put_live_table(md, *srcu_idx); 43962306a36Sopenharmony_ci fsleep(10000); 44062306a36Sopenharmony_ci goto retry; 44162306a36Sopenharmony_ci } 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci return r; 44462306a36Sopenharmony_ci} 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_cistatic void dm_unprepare_ioctl(struct mapped_device *md, int srcu_idx) 44762306a36Sopenharmony_ci{ 44862306a36Sopenharmony_ci dm_put_live_table(md, srcu_idx); 44962306a36Sopenharmony_ci} 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_cistatic int dm_blk_ioctl(struct block_device *bdev, blk_mode_t mode, 45262306a36Sopenharmony_ci unsigned int cmd, unsigned long arg) 45362306a36Sopenharmony_ci{ 45462306a36Sopenharmony_ci struct mapped_device *md = bdev->bd_disk->private_data; 45562306a36Sopenharmony_ci int r, srcu_idx; 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci r = dm_prepare_ioctl(md, &srcu_idx, &bdev); 45862306a36Sopenharmony_ci if (r < 0) 45962306a36Sopenharmony_ci goto out; 46062306a36Sopenharmony_ci 46162306a36Sopenharmony_ci if (r > 0) { 46262306a36Sopenharmony_ci /* 46362306a36Sopenharmony_ci * Target determined this ioctl is being issued against a 46462306a36Sopenharmony_ci * subset of the parent bdev; require extra privileges. 46562306a36Sopenharmony_ci */ 46662306a36Sopenharmony_ci if (!capable(CAP_SYS_RAWIO)) { 46762306a36Sopenharmony_ci DMDEBUG_LIMIT( 46862306a36Sopenharmony_ci "%s: sending ioctl %x to DM device without required privilege.", 46962306a36Sopenharmony_ci current->comm, cmd); 47062306a36Sopenharmony_ci r = -ENOIOCTLCMD; 47162306a36Sopenharmony_ci goto out; 47262306a36Sopenharmony_ci } 47362306a36Sopenharmony_ci } 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci if (!bdev->bd_disk->fops->ioctl) 47662306a36Sopenharmony_ci r = -ENOTTY; 47762306a36Sopenharmony_ci else 47862306a36Sopenharmony_ci r = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 47962306a36Sopenharmony_ciout: 48062306a36Sopenharmony_ci dm_unprepare_ioctl(md, srcu_idx); 48162306a36Sopenharmony_ci return r; 48262306a36Sopenharmony_ci} 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ciu64 dm_start_time_ns_from_clone(struct bio *bio) 48562306a36Sopenharmony_ci{ 48662306a36Sopenharmony_ci return jiffies_to_nsecs(clone_to_tio(bio)->io->start_time); 48762306a36Sopenharmony_ci} 48862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone); 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_cistatic inline bool bio_is_flush_with_data(struct bio *bio) 49162306a36Sopenharmony_ci{ 49262306a36Sopenharmony_ci return ((bio->bi_opf & REQ_PREFLUSH) && bio->bi_iter.bi_size); 49362306a36Sopenharmony_ci} 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_cistatic inline unsigned int dm_io_sectors(struct dm_io *io, struct bio *bio) 49662306a36Sopenharmony_ci{ 49762306a36Sopenharmony_ci /* 49862306a36Sopenharmony_ci * If REQ_PREFLUSH set, don't account payload, it will be 49962306a36Sopenharmony_ci * submitted (and accounted) after this flush completes. 50062306a36Sopenharmony_ci */ 50162306a36Sopenharmony_ci if (bio_is_flush_with_data(bio)) 50262306a36Sopenharmony_ci return 0; 50362306a36Sopenharmony_ci if (unlikely(dm_io_flagged(io, DM_IO_WAS_SPLIT))) 50462306a36Sopenharmony_ci return io->sectors; 50562306a36Sopenharmony_ci return bio_sectors(bio); 50662306a36Sopenharmony_ci} 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_cistatic void dm_io_acct(struct dm_io *io, bool end) 50962306a36Sopenharmony_ci{ 51062306a36Sopenharmony_ci struct bio *bio = io->orig_bio; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci if (dm_io_flagged(io, DM_IO_BLK_STAT)) { 51362306a36Sopenharmony_ci if (!end) 51462306a36Sopenharmony_ci bdev_start_io_acct(bio->bi_bdev, bio_op(bio), 51562306a36Sopenharmony_ci io->start_time); 51662306a36Sopenharmony_ci else 51762306a36Sopenharmony_ci bdev_end_io_acct(bio->bi_bdev, bio_op(bio), 51862306a36Sopenharmony_ci dm_io_sectors(io, bio), 51962306a36Sopenharmony_ci io->start_time); 52062306a36Sopenharmony_ci } 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci if (static_branch_unlikely(&stats_enabled) && 52362306a36Sopenharmony_ci unlikely(dm_stats_used(&io->md->stats))) { 52462306a36Sopenharmony_ci sector_t sector; 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci if (unlikely(dm_io_flagged(io, DM_IO_WAS_SPLIT))) 52762306a36Sopenharmony_ci sector = bio_end_sector(bio) - io->sector_offset; 52862306a36Sopenharmony_ci else 52962306a36Sopenharmony_ci sector = bio->bi_iter.bi_sector; 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci dm_stats_account_io(&io->md->stats, bio_data_dir(bio), 53262306a36Sopenharmony_ci sector, dm_io_sectors(io, bio), 53362306a36Sopenharmony_ci end, io->start_time, &io->stats_aux); 53462306a36Sopenharmony_ci } 53562306a36Sopenharmony_ci} 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_cistatic void __dm_start_io_acct(struct dm_io *io) 53862306a36Sopenharmony_ci{ 53962306a36Sopenharmony_ci dm_io_acct(io, false); 54062306a36Sopenharmony_ci} 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_cistatic void dm_start_io_acct(struct dm_io *io, struct bio *clone) 54362306a36Sopenharmony_ci{ 54462306a36Sopenharmony_ci /* 54562306a36Sopenharmony_ci * Ensure IO accounting is only ever started once. 54662306a36Sopenharmony_ci */ 54762306a36Sopenharmony_ci if (dm_io_flagged(io, DM_IO_ACCOUNTED)) 54862306a36Sopenharmony_ci return; 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci /* Expect no possibility for race unless DM_TIO_IS_DUPLICATE_BIO. */ 55162306a36Sopenharmony_ci if (!clone || likely(dm_tio_is_normal(clone_to_tio(clone)))) { 55262306a36Sopenharmony_ci dm_io_set_flag(io, DM_IO_ACCOUNTED); 55362306a36Sopenharmony_ci } else { 55462306a36Sopenharmony_ci unsigned long flags; 55562306a36Sopenharmony_ci /* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */ 55662306a36Sopenharmony_ci spin_lock_irqsave(&io->lock, flags); 55762306a36Sopenharmony_ci if (dm_io_flagged(io, DM_IO_ACCOUNTED)) { 55862306a36Sopenharmony_ci spin_unlock_irqrestore(&io->lock, flags); 55962306a36Sopenharmony_ci return; 56062306a36Sopenharmony_ci } 56162306a36Sopenharmony_ci dm_io_set_flag(io, DM_IO_ACCOUNTED); 56262306a36Sopenharmony_ci spin_unlock_irqrestore(&io->lock, flags); 56362306a36Sopenharmony_ci } 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci __dm_start_io_acct(io); 56662306a36Sopenharmony_ci} 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_cistatic void dm_end_io_acct(struct dm_io *io) 56962306a36Sopenharmony_ci{ 57062306a36Sopenharmony_ci dm_io_acct(io, true); 57162306a36Sopenharmony_ci} 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_cistatic struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) 57462306a36Sopenharmony_ci{ 57562306a36Sopenharmony_ci struct dm_io *io; 57662306a36Sopenharmony_ci struct dm_target_io *tio; 57762306a36Sopenharmony_ci struct bio *clone; 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); 58062306a36Sopenharmony_ci tio = clone_to_tio(clone); 58162306a36Sopenharmony_ci tio->flags = 0; 58262306a36Sopenharmony_ci dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO); 58362306a36Sopenharmony_ci tio->io = NULL; 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci io = container_of(tio, struct dm_io, tio); 58662306a36Sopenharmony_ci io->magic = DM_IO_MAGIC; 58762306a36Sopenharmony_ci io->status = BLK_STS_OK; 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci /* one ref is for submission, the other is for completion */ 59062306a36Sopenharmony_ci atomic_set(&io->io_count, 2); 59162306a36Sopenharmony_ci this_cpu_inc(*md->pending_io); 59262306a36Sopenharmony_ci io->orig_bio = bio; 59362306a36Sopenharmony_ci io->md = md; 59462306a36Sopenharmony_ci spin_lock_init(&io->lock); 59562306a36Sopenharmony_ci io->start_time = jiffies; 59662306a36Sopenharmony_ci io->flags = 0; 59762306a36Sopenharmony_ci if (blk_queue_io_stat(md->queue)) 59862306a36Sopenharmony_ci dm_io_set_flag(io, DM_IO_BLK_STAT); 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci if (static_branch_unlikely(&stats_enabled) && 60162306a36Sopenharmony_ci unlikely(dm_stats_used(&md->stats))) 60262306a36Sopenharmony_ci dm_stats_record_start(&md->stats, &io->stats_aux); 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci return io; 60562306a36Sopenharmony_ci} 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_cistatic void free_io(struct dm_io *io) 60862306a36Sopenharmony_ci{ 60962306a36Sopenharmony_ci bio_put(&io->tio.clone); 61062306a36Sopenharmony_ci} 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_cistatic struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, 61362306a36Sopenharmony_ci unsigned int target_bio_nr, unsigned int *len, gfp_t gfp_mask) 61462306a36Sopenharmony_ci{ 61562306a36Sopenharmony_ci struct mapped_device *md = ci->io->md; 61662306a36Sopenharmony_ci struct dm_target_io *tio; 61762306a36Sopenharmony_ci struct bio *clone; 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci if (!ci->io->tio.io) { 62062306a36Sopenharmony_ci /* the dm_target_io embedded in ci->io is available */ 62162306a36Sopenharmony_ci tio = &ci->io->tio; 62262306a36Sopenharmony_ci /* alloc_io() already initialized embedded clone */ 62362306a36Sopenharmony_ci clone = &tio->clone; 62462306a36Sopenharmony_ci } else { 62562306a36Sopenharmony_ci clone = bio_alloc_clone(NULL, ci->bio, gfp_mask, 62662306a36Sopenharmony_ci &md->mempools->bs); 62762306a36Sopenharmony_ci if (!clone) 62862306a36Sopenharmony_ci return NULL; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci /* REQ_DM_POLL_LIST shouldn't be inherited */ 63162306a36Sopenharmony_ci clone->bi_opf &= ~REQ_DM_POLL_LIST; 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci tio = clone_to_tio(clone); 63462306a36Sopenharmony_ci tio->flags = 0; /* also clears DM_TIO_INSIDE_DM_IO */ 63562306a36Sopenharmony_ci } 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci tio->magic = DM_TIO_MAGIC; 63862306a36Sopenharmony_ci tio->io = ci->io; 63962306a36Sopenharmony_ci tio->ti = ti; 64062306a36Sopenharmony_ci tio->target_bio_nr = target_bio_nr; 64162306a36Sopenharmony_ci tio->len_ptr = len; 64262306a36Sopenharmony_ci tio->old_sector = 0; 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci /* Set default bdev, but target must bio_set_dev() before issuing IO */ 64562306a36Sopenharmony_ci clone->bi_bdev = md->disk->part0; 64662306a36Sopenharmony_ci if (unlikely(ti->needs_bio_set_dev)) 64762306a36Sopenharmony_ci bio_set_dev(clone, md->disk->part0); 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci if (len) { 65062306a36Sopenharmony_ci clone->bi_iter.bi_size = to_bytes(*len); 65162306a36Sopenharmony_ci if (bio_integrity(clone)) 65262306a36Sopenharmony_ci bio_integrity_trim(clone); 65362306a36Sopenharmony_ci } 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci return clone; 65662306a36Sopenharmony_ci} 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_cistatic void free_tio(struct bio *clone) 65962306a36Sopenharmony_ci{ 66062306a36Sopenharmony_ci if (dm_tio_flagged(clone_to_tio(clone), DM_TIO_INSIDE_DM_IO)) 66162306a36Sopenharmony_ci return; 66262306a36Sopenharmony_ci bio_put(clone); 66362306a36Sopenharmony_ci} 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_ci/* 66662306a36Sopenharmony_ci * Add the bio to the list of deferred io. 66762306a36Sopenharmony_ci */ 66862306a36Sopenharmony_cistatic void queue_io(struct mapped_device *md, struct bio *bio) 66962306a36Sopenharmony_ci{ 67062306a36Sopenharmony_ci unsigned long flags; 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci spin_lock_irqsave(&md->deferred_lock, flags); 67362306a36Sopenharmony_ci bio_list_add(&md->deferred, bio); 67462306a36Sopenharmony_ci spin_unlock_irqrestore(&md->deferred_lock, flags); 67562306a36Sopenharmony_ci queue_work(md->wq, &md->work); 67662306a36Sopenharmony_ci} 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci/* 67962306a36Sopenharmony_ci * Everyone (including functions in this file), should use this 68062306a36Sopenharmony_ci * function to access the md->map field, and make sure they call 68162306a36Sopenharmony_ci * dm_put_live_table() when finished. 68262306a36Sopenharmony_ci */ 68362306a36Sopenharmony_cistruct dm_table *dm_get_live_table(struct mapped_device *md, 68462306a36Sopenharmony_ci int *srcu_idx) __acquires(md->io_barrier) 68562306a36Sopenharmony_ci{ 68662306a36Sopenharmony_ci *srcu_idx = srcu_read_lock(&md->io_barrier); 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci return srcu_dereference(md->map, &md->io_barrier); 68962306a36Sopenharmony_ci} 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_civoid dm_put_live_table(struct mapped_device *md, 69262306a36Sopenharmony_ci int srcu_idx) __releases(md->io_barrier) 69362306a36Sopenharmony_ci{ 69462306a36Sopenharmony_ci srcu_read_unlock(&md->io_barrier, srcu_idx); 69562306a36Sopenharmony_ci} 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_civoid dm_sync_table(struct mapped_device *md) 69862306a36Sopenharmony_ci{ 69962306a36Sopenharmony_ci synchronize_srcu(&md->io_barrier); 70062306a36Sopenharmony_ci synchronize_rcu_expedited(); 70162306a36Sopenharmony_ci} 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci/* 70462306a36Sopenharmony_ci * A fast alternative to dm_get_live_table/dm_put_live_table. 70562306a36Sopenharmony_ci * The caller must not block between these two functions. 70662306a36Sopenharmony_ci */ 70762306a36Sopenharmony_cistatic struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU) 70862306a36Sopenharmony_ci{ 70962306a36Sopenharmony_ci rcu_read_lock(); 71062306a36Sopenharmony_ci return rcu_dereference(md->map); 71162306a36Sopenharmony_ci} 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_cistatic void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) 71462306a36Sopenharmony_ci{ 71562306a36Sopenharmony_ci rcu_read_unlock(); 71662306a36Sopenharmony_ci} 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_cistatic char *_dm_claim_ptr = "I belong to device-mapper"; 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci/* 72162306a36Sopenharmony_ci * Open a table device so we can use it as a map destination. 72262306a36Sopenharmony_ci */ 72362306a36Sopenharmony_cistatic struct table_device *open_table_device(struct mapped_device *md, 72462306a36Sopenharmony_ci dev_t dev, blk_mode_t mode) 72562306a36Sopenharmony_ci{ 72662306a36Sopenharmony_ci struct table_device *td; 72762306a36Sopenharmony_ci struct block_device *bdev; 72862306a36Sopenharmony_ci u64 part_off; 72962306a36Sopenharmony_ci int r; 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id); 73262306a36Sopenharmony_ci if (!td) 73362306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 73462306a36Sopenharmony_ci refcount_set(&td->count, 1); 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_ci bdev = blkdev_get_by_dev(dev, mode, _dm_claim_ptr, NULL); 73762306a36Sopenharmony_ci if (IS_ERR(bdev)) { 73862306a36Sopenharmony_ci r = PTR_ERR(bdev); 73962306a36Sopenharmony_ci goto out_free_td; 74062306a36Sopenharmony_ci } 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci /* 74362306a36Sopenharmony_ci * We can be called before the dm disk is added. In that case we can't 74462306a36Sopenharmony_ci * register the holder relation here. It will be done once add_disk was 74562306a36Sopenharmony_ci * called. 74662306a36Sopenharmony_ci */ 74762306a36Sopenharmony_ci if (md->disk->slave_dir) { 74862306a36Sopenharmony_ci r = bd_link_disk_holder(bdev, md->disk); 74962306a36Sopenharmony_ci if (r) 75062306a36Sopenharmony_ci goto out_blkdev_put; 75162306a36Sopenharmony_ci } 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci td->dm_dev.mode = mode; 75462306a36Sopenharmony_ci td->dm_dev.bdev = bdev; 75562306a36Sopenharmony_ci td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev, &part_off, NULL, NULL); 75662306a36Sopenharmony_ci format_dev_t(td->dm_dev.name, dev); 75762306a36Sopenharmony_ci list_add(&td->list, &md->table_devices); 75862306a36Sopenharmony_ci return td; 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ciout_blkdev_put: 76162306a36Sopenharmony_ci blkdev_put(bdev, _dm_claim_ptr); 76262306a36Sopenharmony_ciout_free_td: 76362306a36Sopenharmony_ci kfree(td); 76462306a36Sopenharmony_ci return ERR_PTR(r); 76562306a36Sopenharmony_ci} 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci/* 76862306a36Sopenharmony_ci * Close a table device that we've been using. 76962306a36Sopenharmony_ci */ 77062306a36Sopenharmony_cistatic void close_table_device(struct table_device *td, struct mapped_device *md) 77162306a36Sopenharmony_ci{ 77262306a36Sopenharmony_ci if (md->disk->slave_dir) 77362306a36Sopenharmony_ci bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); 77462306a36Sopenharmony_ci blkdev_put(td->dm_dev.bdev, _dm_claim_ptr); 77562306a36Sopenharmony_ci put_dax(td->dm_dev.dax_dev); 77662306a36Sopenharmony_ci list_del(&td->list); 77762306a36Sopenharmony_ci kfree(td); 77862306a36Sopenharmony_ci} 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_cistatic struct table_device *find_table_device(struct list_head *l, dev_t dev, 78162306a36Sopenharmony_ci blk_mode_t mode) 78262306a36Sopenharmony_ci{ 78362306a36Sopenharmony_ci struct table_device *td; 78462306a36Sopenharmony_ci 78562306a36Sopenharmony_ci list_for_each_entry(td, l, list) 78662306a36Sopenharmony_ci if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode) 78762306a36Sopenharmony_ci return td; 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci return NULL; 79062306a36Sopenharmony_ci} 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ciint dm_get_table_device(struct mapped_device *md, dev_t dev, blk_mode_t mode, 79362306a36Sopenharmony_ci struct dm_dev **result) 79462306a36Sopenharmony_ci{ 79562306a36Sopenharmony_ci struct table_device *td; 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci mutex_lock(&md->table_devices_lock); 79862306a36Sopenharmony_ci td = find_table_device(&md->table_devices, dev, mode); 79962306a36Sopenharmony_ci if (!td) { 80062306a36Sopenharmony_ci td = open_table_device(md, dev, mode); 80162306a36Sopenharmony_ci if (IS_ERR(td)) { 80262306a36Sopenharmony_ci mutex_unlock(&md->table_devices_lock); 80362306a36Sopenharmony_ci return PTR_ERR(td); 80462306a36Sopenharmony_ci } 80562306a36Sopenharmony_ci } else { 80662306a36Sopenharmony_ci refcount_inc(&td->count); 80762306a36Sopenharmony_ci } 80862306a36Sopenharmony_ci mutex_unlock(&md->table_devices_lock); 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci *result = &td->dm_dev; 81162306a36Sopenharmony_ci return 0; 81262306a36Sopenharmony_ci} 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_civoid dm_put_table_device(struct mapped_device *md, struct dm_dev *d) 81562306a36Sopenharmony_ci{ 81662306a36Sopenharmony_ci struct table_device *td = container_of(d, struct table_device, dm_dev); 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci mutex_lock(&md->table_devices_lock); 81962306a36Sopenharmony_ci if (refcount_dec_and_test(&td->count)) 82062306a36Sopenharmony_ci close_table_device(td, md); 82162306a36Sopenharmony_ci mutex_unlock(&md->table_devices_lock); 82262306a36Sopenharmony_ci} 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci/* 82562306a36Sopenharmony_ci * Get the geometry associated with a dm device 82662306a36Sopenharmony_ci */ 82762306a36Sopenharmony_ciint dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) 82862306a36Sopenharmony_ci{ 82962306a36Sopenharmony_ci *geo = md->geometry; 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci return 0; 83262306a36Sopenharmony_ci} 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci/* 83562306a36Sopenharmony_ci * Set the geometry of a device. 83662306a36Sopenharmony_ci */ 83762306a36Sopenharmony_ciint dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) 83862306a36Sopenharmony_ci{ 83962306a36Sopenharmony_ci sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci if (geo->start > sz) { 84262306a36Sopenharmony_ci DMERR("Start sector is beyond the geometry limits."); 84362306a36Sopenharmony_ci return -EINVAL; 84462306a36Sopenharmony_ci } 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_ci md->geometry = *geo; 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci return 0; 84962306a36Sopenharmony_ci} 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_cistatic int __noflush_suspending(struct mapped_device *md) 85262306a36Sopenharmony_ci{ 85362306a36Sopenharmony_ci return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 85462306a36Sopenharmony_ci} 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_cistatic void dm_requeue_add_io(struct dm_io *io, bool first_stage) 85762306a36Sopenharmony_ci{ 85862306a36Sopenharmony_ci struct mapped_device *md = io->md; 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci if (first_stage) { 86162306a36Sopenharmony_ci struct dm_io *next = md->requeue_list; 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci md->requeue_list = io; 86462306a36Sopenharmony_ci io->next = next; 86562306a36Sopenharmony_ci } else { 86662306a36Sopenharmony_ci bio_list_add_head(&md->deferred, io->orig_bio); 86762306a36Sopenharmony_ci } 86862306a36Sopenharmony_ci} 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_cistatic void dm_kick_requeue(struct mapped_device *md, bool first_stage) 87162306a36Sopenharmony_ci{ 87262306a36Sopenharmony_ci if (first_stage) 87362306a36Sopenharmony_ci queue_work(md->wq, &md->requeue_work); 87462306a36Sopenharmony_ci else 87562306a36Sopenharmony_ci queue_work(md->wq, &md->work); 87662306a36Sopenharmony_ci} 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci/* 87962306a36Sopenharmony_ci * Return true if the dm_io's original bio is requeued. 88062306a36Sopenharmony_ci * io->status is updated with error if requeue disallowed. 88162306a36Sopenharmony_ci */ 88262306a36Sopenharmony_cistatic bool dm_handle_requeue(struct dm_io *io, bool first_stage) 88362306a36Sopenharmony_ci{ 88462306a36Sopenharmony_ci struct bio *bio = io->orig_bio; 88562306a36Sopenharmony_ci bool handle_requeue = (io->status == BLK_STS_DM_REQUEUE); 88662306a36Sopenharmony_ci bool handle_polled_eagain = ((io->status == BLK_STS_AGAIN) && 88762306a36Sopenharmony_ci (bio->bi_opf & REQ_POLLED)); 88862306a36Sopenharmony_ci struct mapped_device *md = io->md; 88962306a36Sopenharmony_ci bool requeued = false; 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci if (handle_requeue || handle_polled_eagain) { 89262306a36Sopenharmony_ci unsigned long flags; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci if (bio->bi_opf & REQ_POLLED) { 89562306a36Sopenharmony_ci /* 89662306a36Sopenharmony_ci * Upper layer won't help us poll split bio 89762306a36Sopenharmony_ci * (io->orig_bio may only reflect a subset of the 89862306a36Sopenharmony_ci * pre-split original) so clear REQ_POLLED. 89962306a36Sopenharmony_ci */ 90062306a36Sopenharmony_ci bio_clear_polled(bio); 90162306a36Sopenharmony_ci } 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci /* 90462306a36Sopenharmony_ci * Target requested pushing back the I/O or 90562306a36Sopenharmony_ci * polled IO hit BLK_STS_AGAIN. 90662306a36Sopenharmony_ci */ 90762306a36Sopenharmony_ci spin_lock_irqsave(&md->deferred_lock, flags); 90862306a36Sopenharmony_ci if ((__noflush_suspending(md) && 90962306a36Sopenharmony_ci !WARN_ON_ONCE(dm_is_zone_write(md, bio))) || 91062306a36Sopenharmony_ci handle_polled_eagain || first_stage) { 91162306a36Sopenharmony_ci dm_requeue_add_io(io, first_stage); 91262306a36Sopenharmony_ci requeued = true; 91362306a36Sopenharmony_ci } else { 91462306a36Sopenharmony_ci /* 91562306a36Sopenharmony_ci * noflush suspend was interrupted or this is 91662306a36Sopenharmony_ci * a write to a zoned target. 91762306a36Sopenharmony_ci */ 91862306a36Sopenharmony_ci io->status = BLK_STS_IOERR; 91962306a36Sopenharmony_ci } 92062306a36Sopenharmony_ci spin_unlock_irqrestore(&md->deferred_lock, flags); 92162306a36Sopenharmony_ci } 92262306a36Sopenharmony_ci 92362306a36Sopenharmony_ci if (requeued) 92462306a36Sopenharmony_ci dm_kick_requeue(md, first_stage); 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_ci return requeued; 92762306a36Sopenharmony_ci} 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_cistatic void __dm_io_complete(struct dm_io *io, bool first_stage) 93062306a36Sopenharmony_ci{ 93162306a36Sopenharmony_ci struct bio *bio = io->orig_bio; 93262306a36Sopenharmony_ci struct mapped_device *md = io->md; 93362306a36Sopenharmony_ci blk_status_t io_error; 93462306a36Sopenharmony_ci bool requeued; 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci requeued = dm_handle_requeue(io, first_stage); 93762306a36Sopenharmony_ci if (requeued && first_stage) 93862306a36Sopenharmony_ci return; 93962306a36Sopenharmony_ci 94062306a36Sopenharmony_ci io_error = io->status; 94162306a36Sopenharmony_ci if (dm_io_flagged(io, DM_IO_ACCOUNTED)) 94262306a36Sopenharmony_ci dm_end_io_acct(io); 94362306a36Sopenharmony_ci else if (!io_error) { 94462306a36Sopenharmony_ci /* 94562306a36Sopenharmony_ci * Must handle target that DM_MAPIO_SUBMITTED only to 94662306a36Sopenharmony_ci * then bio_endio() rather than dm_submit_bio_remap() 94762306a36Sopenharmony_ci */ 94862306a36Sopenharmony_ci __dm_start_io_acct(io); 94962306a36Sopenharmony_ci dm_end_io_acct(io); 95062306a36Sopenharmony_ci } 95162306a36Sopenharmony_ci free_io(io); 95262306a36Sopenharmony_ci smp_wmb(); 95362306a36Sopenharmony_ci this_cpu_dec(*md->pending_io); 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci /* nudge anyone waiting on suspend queue */ 95662306a36Sopenharmony_ci if (unlikely(wq_has_sleeper(&md->wait))) 95762306a36Sopenharmony_ci wake_up(&md->wait); 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci /* Return early if the original bio was requeued */ 96062306a36Sopenharmony_ci if (requeued) 96162306a36Sopenharmony_ci return; 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci if (bio_is_flush_with_data(bio)) { 96462306a36Sopenharmony_ci /* 96562306a36Sopenharmony_ci * Preflush done for flush with data, reissue 96662306a36Sopenharmony_ci * without REQ_PREFLUSH. 96762306a36Sopenharmony_ci */ 96862306a36Sopenharmony_ci bio->bi_opf &= ~REQ_PREFLUSH; 96962306a36Sopenharmony_ci queue_io(md, bio); 97062306a36Sopenharmony_ci } else { 97162306a36Sopenharmony_ci /* done with normal IO or empty flush */ 97262306a36Sopenharmony_ci if (io_error) 97362306a36Sopenharmony_ci bio->bi_status = io_error; 97462306a36Sopenharmony_ci bio_endio(bio); 97562306a36Sopenharmony_ci } 97662306a36Sopenharmony_ci} 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_cistatic void dm_wq_requeue_work(struct work_struct *work) 97962306a36Sopenharmony_ci{ 98062306a36Sopenharmony_ci struct mapped_device *md = container_of(work, struct mapped_device, 98162306a36Sopenharmony_ci requeue_work); 98262306a36Sopenharmony_ci unsigned long flags; 98362306a36Sopenharmony_ci struct dm_io *io; 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci /* reuse deferred lock to simplify dm_handle_requeue */ 98662306a36Sopenharmony_ci spin_lock_irqsave(&md->deferred_lock, flags); 98762306a36Sopenharmony_ci io = md->requeue_list; 98862306a36Sopenharmony_ci md->requeue_list = NULL; 98962306a36Sopenharmony_ci spin_unlock_irqrestore(&md->deferred_lock, flags); 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci while (io) { 99262306a36Sopenharmony_ci struct dm_io *next = io->next; 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci dm_io_rewind(io, &md->disk->bio_split); 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci io->next = NULL; 99762306a36Sopenharmony_ci __dm_io_complete(io, false); 99862306a36Sopenharmony_ci io = next; 99962306a36Sopenharmony_ci cond_resched(); 100062306a36Sopenharmony_ci } 100162306a36Sopenharmony_ci} 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci/* 100462306a36Sopenharmony_ci * Two staged requeue: 100562306a36Sopenharmony_ci * 100662306a36Sopenharmony_ci * 1) io->orig_bio points to the real original bio, and the part mapped to 100762306a36Sopenharmony_ci * this io must be requeued, instead of other parts of the original bio. 100862306a36Sopenharmony_ci * 100962306a36Sopenharmony_ci * 2) io->orig_bio points to new cloned bio which matches the requeued dm_io. 101062306a36Sopenharmony_ci */ 101162306a36Sopenharmony_cistatic void dm_io_complete(struct dm_io *io) 101262306a36Sopenharmony_ci{ 101362306a36Sopenharmony_ci bool first_requeue; 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci /* 101662306a36Sopenharmony_ci * Only dm_io that has been split needs two stage requeue, otherwise 101762306a36Sopenharmony_ci * we may run into long bio clone chain during suspend and OOM could 101862306a36Sopenharmony_ci * be triggered. 101962306a36Sopenharmony_ci * 102062306a36Sopenharmony_ci * Also flush data dm_io won't be marked as DM_IO_WAS_SPLIT, so they 102162306a36Sopenharmony_ci * also aren't handled via the first stage requeue. 102262306a36Sopenharmony_ci */ 102362306a36Sopenharmony_ci if (dm_io_flagged(io, DM_IO_WAS_SPLIT)) 102462306a36Sopenharmony_ci first_requeue = true; 102562306a36Sopenharmony_ci else 102662306a36Sopenharmony_ci first_requeue = false; 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_ci __dm_io_complete(io, first_requeue); 102962306a36Sopenharmony_ci} 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci/* 103262306a36Sopenharmony_ci * Decrements the number of outstanding ios that a bio has been 103362306a36Sopenharmony_ci * cloned into, completing the original io if necc. 103462306a36Sopenharmony_ci */ 103562306a36Sopenharmony_cistatic inline void __dm_io_dec_pending(struct dm_io *io) 103662306a36Sopenharmony_ci{ 103762306a36Sopenharmony_ci if (atomic_dec_and_test(&io->io_count)) 103862306a36Sopenharmony_ci dm_io_complete(io); 103962306a36Sopenharmony_ci} 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_cistatic void dm_io_set_error(struct dm_io *io, blk_status_t error) 104262306a36Sopenharmony_ci{ 104362306a36Sopenharmony_ci unsigned long flags; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci /* Push-back supersedes any I/O errors */ 104662306a36Sopenharmony_ci spin_lock_irqsave(&io->lock, flags); 104762306a36Sopenharmony_ci if (!(io->status == BLK_STS_DM_REQUEUE && 104862306a36Sopenharmony_ci __noflush_suspending(io->md))) { 104962306a36Sopenharmony_ci io->status = error; 105062306a36Sopenharmony_ci } 105162306a36Sopenharmony_ci spin_unlock_irqrestore(&io->lock, flags); 105262306a36Sopenharmony_ci} 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_cistatic void dm_io_dec_pending(struct dm_io *io, blk_status_t error) 105562306a36Sopenharmony_ci{ 105662306a36Sopenharmony_ci if (unlikely(error)) 105762306a36Sopenharmony_ci dm_io_set_error(io, error); 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_ci __dm_io_dec_pending(io); 106062306a36Sopenharmony_ci} 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci/* 106362306a36Sopenharmony_ci * The queue_limits are only valid as long as you have a reference 106462306a36Sopenharmony_ci * count on 'md'. But _not_ imposing verification to avoid atomic_read(), 106562306a36Sopenharmony_ci */ 106662306a36Sopenharmony_cistatic inline struct queue_limits *dm_get_queue_limits(struct mapped_device *md) 106762306a36Sopenharmony_ci{ 106862306a36Sopenharmony_ci return &md->queue->limits; 106962306a36Sopenharmony_ci} 107062306a36Sopenharmony_ci 107162306a36Sopenharmony_civoid disable_discard(struct mapped_device *md) 107262306a36Sopenharmony_ci{ 107362306a36Sopenharmony_ci struct queue_limits *limits = dm_get_queue_limits(md); 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci /* device doesn't really support DISCARD, disable it */ 107662306a36Sopenharmony_ci limits->max_discard_sectors = 0; 107762306a36Sopenharmony_ci} 107862306a36Sopenharmony_ci 107962306a36Sopenharmony_civoid disable_write_zeroes(struct mapped_device *md) 108062306a36Sopenharmony_ci{ 108162306a36Sopenharmony_ci struct queue_limits *limits = dm_get_queue_limits(md); 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_ci /* device doesn't really support WRITE ZEROES, disable it */ 108462306a36Sopenharmony_ci limits->max_write_zeroes_sectors = 0; 108562306a36Sopenharmony_ci} 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_cistatic bool swap_bios_limit(struct dm_target *ti, struct bio *bio) 108862306a36Sopenharmony_ci{ 108962306a36Sopenharmony_ci return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios); 109062306a36Sopenharmony_ci} 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_cistatic void clone_endio(struct bio *bio) 109362306a36Sopenharmony_ci{ 109462306a36Sopenharmony_ci blk_status_t error = bio->bi_status; 109562306a36Sopenharmony_ci struct dm_target_io *tio = clone_to_tio(bio); 109662306a36Sopenharmony_ci struct dm_target *ti = tio->ti; 109762306a36Sopenharmony_ci dm_endio_fn endio = ti->type->end_io; 109862306a36Sopenharmony_ci struct dm_io *io = tio->io; 109962306a36Sopenharmony_ci struct mapped_device *md = io->md; 110062306a36Sopenharmony_ci 110162306a36Sopenharmony_ci if (unlikely(error == BLK_STS_TARGET)) { 110262306a36Sopenharmony_ci if (bio_op(bio) == REQ_OP_DISCARD && 110362306a36Sopenharmony_ci !bdev_max_discard_sectors(bio->bi_bdev)) 110462306a36Sopenharmony_ci disable_discard(md); 110562306a36Sopenharmony_ci else if (bio_op(bio) == REQ_OP_WRITE_ZEROES && 110662306a36Sopenharmony_ci !bdev_write_zeroes_sectors(bio->bi_bdev)) 110762306a36Sopenharmony_ci disable_write_zeroes(md); 110862306a36Sopenharmony_ci } 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_ci if (static_branch_unlikely(&zoned_enabled) && 111162306a36Sopenharmony_ci unlikely(bdev_is_zoned(bio->bi_bdev))) 111262306a36Sopenharmony_ci dm_zone_endio(io, bio); 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci if (endio) { 111562306a36Sopenharmony_ci int r = endio(ti, bio, &error); 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ci switch (r) { 111862306a36Sopenharmony_ci case DM_ENDIO_REQUEUE: 111962306a36Sopenharmony_ci if (static_branch_unlikely(&zoned_enabled)) { 112062306a36Sopenharmony_ci /* 112162306a36Sopenharmony_ci * Requeuing writes to a sequential zone of a zoned 112262306a36Sopenharmony_ci * target will break the sequential write pattern: 112362306a36Sopenharmony_ci * fail such IO. 112462306a36Sopenharmony_ci */ 112562306a36Sopenharmony_ci if (WARN_ON_ONCE(dm_is_zone_write(md, bio))) 112662306a36Sopenharmony_ci error = BLK_STS_IOERR; 112762306a36Sopenharmony_ci else 112862306a36Sopenharmony_ci error = BLK_STS_DM_REQUEUE; 112962306a36Sopenharmony_ci } else 113062306a36Sopenharmony_ci error = BLK_STS_DM_REQUEUE; 113162306a36Sopenharmony_ci fallthrough; 113262306a36Sopenharmony_ci case DM_ENDIO_DONE: 113362306a36Sopenharmony_ci break; 113462306a36Sopenharmony_ci case DM_ENDIO_INCOMPLETE: 113562306a36Sopenharmony_ci /* The target will handle the io */ 113662306a36Sopenharmony_ci return; 113762306a36Sopenharmony_ci default: 113862306a36Sopenharmony_ci DMCRIT("unimplemented target endio return value: %d", r); 113962306a36Sopenharmony_ci BUG(); 114062306a36Sopenharmony_ci } 114162306a36Sopenharmony_ci } 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci if (static_branch_unlikely(&swap_bios_enabled) && 114462306a36Sopenharmony_ci unlikely(swap_bios_limit(ti, bio))) 114562306a36Sopenharmony_ci up(&md->swap_bios_semaphore); 114662306a36Sopenharmony_ci 114762306a36Sopenharmony_ci free_tio(bio); 114862306a36Sopenharmony_ci dm_io_dec_pending(io, error); 114962306a36Sopenharmony_ci} 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci/* 115262306a36Sopenharmony_ci * Return maximum size of I/O possible at the supplied sector up to the current 115362306a36Sopenharmony_ci * target boundary. 115462306a36Sopenharmony_ci */ 115562306a36Sopenharmony_cistatic inline sector_t max_io_len_target_boundary(struct dm_target *ti, 115662306a36Sopenharmony_ci sector_t target_offset) 115762306a36Sopenharmony_ci{ 115862306a36Sopenharmony_ci return ti->len - target_offset; 115962306a36Sopenharmony_ci} 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_cistatic sector_t __max_io_len(struct dm_target *ti, sector_t sector, 116262306a36Sopenharmony_ci unsigned int max_granularity, 116362306a36Sopenharmony_ci unsigned int max_sectors) 116462306a36Sopenharmony_ci{ 116562306a36Sopenharmony_ci sector_t target_offset = dm_target_offset(ti, sector); 116662306a36Sopenharmony_ci sector_t len = max_io_len_target_boundary(ti, target_offset); 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci /* 116962306a36Sopenharmony_ci * Does the target need to split IO even further? 117062306a36Sopenharmony_ci * - varied (per target) IO splitting is a tenet of DM; this 117162306a36Sopenharmony_ci * explains why stacked chunk_sectors based splitting via 117262306a36Sopenharmony_ci * bio_split_to_limits() isn't possible here. 117362306a36Sopenharmony_ci */ 117462306a36Sopenharmony_ci if (!max_granularity) 117562306a36Sopenharmony_ci return len; 117662306a36Sopenharmony_ci return min_t(sector_t, len, 117762306a36Sopenharmony_ci min(max_sectors ? : queue_max_sectors(ti->table->md->queue), 117862306a36Sopenharmony_ci blk_chunk_sectors_left(target_offset, max_granularity))); 117962306a36Sopenharmony_ci} 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_cistatic inline sector_t max_io_len(struct dm_target *ti, sector_t sector) 118262306a36Sopenharmony_ci{ 118362306a36Sopenharmony_ci return __max_io_len(ti, sector, ti->max_io_len, 0); 118462306a36Sopenharmony_ci} 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_ciint dm_set_target_max_io_len(struct dm_target *ti, sector_t len) 118762306a36Sopenharmony_ci{ 118862306a36Sopenharmony_ci if (len > UINT_MAX) { 118962306a36Sopenharmony_ci DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)", 119062306a36Sopenharmony_ci (unsigned long long)len, UINT_MAX); 119162306a36Sopenharmony_ci ti->error = "Maximum size of target IO is too large"; 119262306a36Sopenharmony_ci return -EINVAL; 119362306a36Sopenharmony_ci } 119462306a36Sopenharmony_ci 119562306a36Sopenharmony_ci ti->max_io_len = (uint32_t) len; 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci return 0; 119862306a36Sopenharmony_ci} 119962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_set_target_max_io_len); 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_cistatic struct dm_target *dm_dax_get_live_target(struct mapped_device *md, 120262306a36Sopenharmony_ci sector_t sector, int *srcu_idx) 120362306a36Sopenharmony_ci __acquires(md->io_barrier) 120462306a36Sopenharmony_ci{ 120562306a36Sopenharmony_ci struct dm_table *map; 120662306a36Sopenharmony_ci struct dm_target *ti; 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci map = dm_get_live_table(md, srcu_idx); 120962306a36Sopenharmony_ci if (!map) 121062306a36Sopenharmony_ci return NULL; 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci ti = dm_table_find_target(map, sector); 121362306a36Sopenharmony_ci if (!ti) 121462306a36Sopenharmony_ci return NULL; 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci return ti; 121762306a36Sopenharmony_ci} 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_cistatic long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 122062306a36Sopenharmony_ci long nr_pages, enum dax_access_mode mode, void **kaddr, 122162306a36Sopenharmony_ci pfn_t *pfn) 122262306a36Sopenharmony_ci{ 122362306a36Sopenharmony_ci struct mapped_device *md = dax_get_private(dax_dev); 122462306a36Sopenharmony_ci sector_t sector = pgoff * PAGE_SECTORS; 122562306a36Sopenharmony_ci struct dm_target *ti; 122662306a36Sopenharmony_ci long len, ret = -EIO; 122762306a36Sopenharmony_ci int srcu_idx; 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_ci ti = dm_dax_get_live_target(md, sector, &srcu_idx); 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_ci if (!ti) 123262306a36Sopenharmony_ci goto out; 123362306a36Sopenharmony_ci if (!ti->type->direct_access) 123462306a36Sopenharmony_ci goto out; 123562306a36Sopenharmony_ci len = max_io_len(ti, sector) / PAGE_SECTORS; 123662306a36Sopenharmony_ci if (len < 1) 123762306a36Sopenharmony_ci goto out; 123862306a36Sopenharmony_ci nr_pages = min(len, nr_pages); 123962306a36Sopenharmony_ci ret = ti->type->direct_access(ti, pgoff, nr_pages, mode, kaddr, pfn); 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_ci out: 124262306a36Sopenharmony_ci dm_put_live_table(md, srcu_idx); 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci return ret; 124562306a36Sopenharmony_ci} 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_cistatic int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, 124862306a36Sopenharmony_ci size_t nr_pages) 124962306a36Sopenharmony_ci{ 125062306a36Sopenharmony_ci struct mapped_device *md = dax_get_private(dax_dev); 125162306a36Sopenharmony_ci sector_t sector = pgoff * PAGE_SECTORS; 125262306a36Sopenharmony_ci struct dm_target *ti; 125362306a36Sopenharmony_ci int ret = -EIO; 125462306a36Sopenharmony_ci int srcu_idx; 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci ti = dm_dax_get_live_target(md, sector, &srcu_idx); 125762306a36Sopenharmony_ci 125862306a36Sopenharmony_ci if (!ti) 125962306a36Sopenharmony_ci goto out; 126062306a36Sopenharmony_ci if (WARN_ON(!ti->type->dax_zero_page_range)) { 126162306a36Sopenharmony_ci /* 126262306a36Sopenharmony_ci * ->zero_page_range() is mandatory dax operation. If we are 126362306a36Sopenharmony_ci * here, something is wrong. 126462306a36Sopenharmony_ci */ 126562306a36Sopenharmony_ci goto out; 126662306a36Sopenharmony_ci } 126762306a36Sopenharmony_ci ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages); 126862306a36Sopenharmony_ci out: 126962306a36Sopenharmony_ci dm_put_live_table(md, srcu_idx); 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci return ret; 127262306a36Sopenharmony_ci} 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_cistatic size_t dm_dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, 127562306a36Sopenharmony_ci void *addr, size_t bytes, struct iov_iter *i) 127662306a36Sopenharmony_ci{ 127762306a36Sopenharmony_ci struct mapped_device *md = dax_get_private(dax_dev); 127862306a36Sopenharmony_ci sector_t sector = pgoff * PAGE_SECTORS; 127962306a36Sopenharmony_ci struct dm_target *ti; 128062306a36Sopenharmony_ci int srcu_idx; 128162306a36Sopenharmony_ci long ret = 0; 128262306a36Sopenharmony_ci 128362306a36Sopenharmony_ci ti = dm_dax_get_live_target(md, sector, &srcu_idx); 128462306a36Sopenharmony_ci if (!ti || !ti->type->dax_recovery_write) 128562306a36Sopenharmony_ci goto out; 128662306a36Sopenharmony_ci 128762306a36Sopenharmony_ci ret = ti->type->dax_recovery_write(ti, pgoff, addr, bytes, i); 128862306a36Sopenharmony_ciout: 128962306a36Sopenharmony_ci dm_put_live_table(md, srcu_idx); 129062306a36Sopenharmony_ci return ret; 129162306a36Sopenharmony_ci} 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_ci/* 129462306a36Sopenharmony_ci * A target may call dm_accept_partial_bio only from the map routine. It is 129562306a36Sopenharmony_ci * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management 129662306a36Sopenharmony_ci * operations, REQ_OP_ZONE_APPEND (zone append writes) and any bio serviced by 129762306a36Sopenharmony_ci * __send_duplicate_bios(). 129862306a36Sopenharmony_ci * 129962306a36Sopenharmony_ci * dm_accept_partial_bio informs the dm that the target only wants to process 130062306a36Sopenharmony_ci * additional n_sectors sectors of the bio and the rest of the data should be 130162306a36Sopenharmony_ci * sent in a next bio. 130262306a36Sopenharmony_ci * 130362306a36Sopenharmony_ci * A diagram that explains the arithmetics: 130462306a36Sopenharmony_ci * +--------------------+---------------+-------+ 130562306a36Sopenharmony_ci * | 1 | 2 | 3 | 130662306a36Sopenharmony_ci * +--------------------+---------------+-------+ 130762306a36Sopenharmony_ci * 130862306a36Sopenharmony_ci * <-------------- *tio->len_ptr ---------------> 130962306a36Sopenharmony_ci * <----- bio_sectors -----> 131062306a36Sopenharmony_ci * <-- n_sectors --> 131162306a36Sopenharmony_ci * 131262306a36Sopenharmony_ci * Region 1 was already iterated over with bio_advance or similar function. 131362306a36Sopenharmony_ci * (it may be empty if the target doesn't use bio_advance) 131462306a36Sopenharmony_ci * Region 2 is the remaining bio size that the target wants to process. 131562306a36Sopenharmony_ci * (it may be empty if region 1 is non-empty, although there is no reason 131662306a36Sopenharmony_ci * to make it empty) 131762306a36Sopenharmony_ci * The target requires that region 3 is to be sent in the next bio. 131862306a36Sopenharmony_ci * 131962306a36Sopenharmony_ci * If the target wants to receive multiple copies of the bio (via num_*bios, etc), 132062306a36Sopenharmony_ci * the partially processed part (the sum of regions 1+2) must be the same for all 132162306a36Sopenharmony_ci * copies of the bio. 132262306a36Sopenharmony_ci */ 132362306a36Sopenharmony_civoid dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors) 132462306a36Sopenharmony_ci{ 132562306a36Sopenharmony_ci struct dm_target_io *tio = clone_to_tio(bio); 132662306a36Sopenharmony_ci struct dm_io *io = tio->io; 132762306a36Sopenharmony_ci unsigned int bio_sectors = bio_sectors(bio); 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_ci BUG_ON(dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO)); 133062306a36Sopenharmony_ci BUG_ON(op_is_zone_mgmt(bio_op(bio))); 133162306a36Sopenharmony_ci BUG_ON(bio_op(bio) == REQ_OP_ZONE_APPEND); 133262306a36Sopenharmony_ci BUG_ON(bio_sectors > *tio->len_ptr); 133362306a36Sopenharmony_ci BUG_ON(n_sectors > bio_sectors); 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci *tio->len_ptr -= bio_sectors - n_sectors; 133662306a36Sopenharmony_ci bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT; 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci /* 133962306a36Sopenharmony_ci * __split_and_process_bio() may have already saved mapped part 134062306a36Sopenharmony_ci * for accounting but it is being reduced so update accordingly. 134162306a36Sopenharmony_ci */ 134262306a36Sopenharmony_ci dm_io_set_flag(io, DM_IO_WAS_SPLIT); 134362306a36Sopenharmony_ci io->sectors = n_sectors; 134462306a36Sopenharmony_ci io->sector_offset = bio_sectors(io->orig_bio); 134562306a36Sopenharmony_ci} 134662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_accept_partial_bio); 134762306a36Sopenharmony_ci 134862306a36Sopenharmony_ci/* 134962306a36Sopenharmony_ci * @clone: clone bio that DM core passed to target's .map function 135062306a36Sopenharmony_ci * @tgt_clone: clone of @clone bio that target needs submitted 135162306a36Sopenharmony_ci * 135262306a36Sopenharmony_ci * Targets should use this interface to submit bios they take 135362306a36Sopenharmony_ci * ownership of when returning DM_MAPIO_SUBMITTED. 135462306a36Sopenharmony_ci * 135562306a36Sopenharmony_ci * Target should also enable ti->accounts_remapped_io 135662306a36Sopenharmony_ci */ 135762306a36Sopenharmony_civoid dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone) 135862306a36Sopenharmony_ci{ 135962306a36Sopenharmony_ci struct dm_target_io *tio = clone_to_tio(clone); 136062306a36Sopenharmony_ci struct dm_io *io = tio->io; 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_ci /* establish bio that will get submitted */ 136362306a36Sopenharmony_ci if (!tgt_clone) 136462306a36Sopenharmony_ci tgt_clone = clone; 136562306a36Sopenharmony_ci 136662306a36Sopenharmony_ci /* 136762306a36Sopenharmony_ci * Account io->origin_bio to DM dev on behalf of target 136862306a36Sopenharmony_ci * that took ownership of IO with DM_MAPIO_SUBMITTED. 136962306a36Sopenharmony_ci */ 137062306a36Sopenharmony_ci dm_start_io_acct(io, clone); 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci trace_block_bio_remap(tgt_clone, disk_devt(io->md->disk), 137362306a36Sopenharmony_ci tio->old_sector); 137462306a36Sopenharmony_ci submit_bio_noacct(tgt_clone); 137562306a36Sopenharmony_ci} 137662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_submit_bio_remap); 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_cistatic noinline void __set_swap_bios_limit(struct mapped_device *md, int latch) 137962306a36Sopenharmony_ci{ 138062306a36Sopenharmony_ci mutex_lock(&md->swap_bios_lock); 138162306a36Sopenharmony_ci while (latch < md->swap_bios) { 138262306a36Sopenharmony_ci cond_resched(); 138362306a36Sopenharmony_ci down(&md->swap_bios_semaphore); 138462306a36Sopenharmony_ci md->swap_bios--; 138562306a36Sopenharmony_ci } 138662306a36Sopenharmony_ci while (latch > md->swap_bios) { 138762306a36Sopenharmony_ci cond_resched(); 138862306a36Sopenharmony_ci up(&md->swap_bios_semaphore); 138962306a36Sopenharmony_ci md->swap_bios++; 139062306a36Sopenharmony_ci } 139162306a36Sopenharmony_ci mutex_unlock(&md->swap_bios_lock); 139262306a36Sopenharmony_ci} 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_cistatic void __map_bio(struct bio *clone) 139562306a36Sopenharmony_ci{ 139662306a36Sopenharmony_ci struct dm_target_io *tio = clone_to_tio(clone); 139762306a36Sopenharmony_ci struct dm_target *ti = tio->ti; 139862306a36Sopenharmony_ci struct dm_io *io = tio->io; 139962306a36Sopenharmony_ci struct mapped_device *md = io->md; 140062306a36Sopenharmony_ci int r; 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ci clone->bi_end_io = clone_endio; 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_ci /* 140562306a36Sopenharmony_ci * Map the clone. 140662306a36Sopenharmony_ci */ 140762306a36Sopenharmony_ci tio->old_sector = clone->bi_iter.bi_sector; 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci if (static_branch_unlikely(&swap_bios_enabled) && 141062306a36Sopenharmony_ci unlikely(swap_bios_limit(ti, clone))) { 141162306a36Sopenharmony_ci int latch = get_swap_bios(); 141262306a36Sopenharmony_ci 141362306a36Sopenharmony_ci if (unlikely(latch != md->swap_bios)) 141462306a36Sopenharmony_ci __set_swap_bios_limit(md, latch); 141562306a36Sopenharmony_ci down(&md->swap_bios_semaphore); 141662306a36Sopenharmony_ci } 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci if (static_branch_unlikely(&zoned_enabled)) { 141962306a36Sopenharmony_ci /* 142062306a36Sopenharmony_ci * Check if the IO needs a special mapping due to zone append 142162306a36Sopenharmony_ci * emulation on zoned target. In this case, dm_zone_map_bio() 142262306a36Sopenharmony_ci * calls the target map operation. 142362306a36Sopenharmony_ci */ 142462306a36Sopenharmony_ci if (unlikely(dm_emulate_zone_append(md))) 142562306a36Sopenharmony_ci r = dm_zone_map_bio(tio); 142662306a36Sopenharmony_ci else 142762306a36Sopenharmony_ci r = ti->type->map(ti, clone); 142862306a36Sopenharmony_ci } else 142962306a36Sopenharmony_ci r = ti->type->map(ti, clone); 143062306a36Sopenharmony_ci 143162306a36Sopenharmony_ci switch (r) { 143262306a36Sopenharmony_ci case DM_MAPIO_SUBMITTED: 143362306a36Sopenharmony_ci /* target has assumed ownership of this io */ 143462306a36Sopenharmony_ci if (!ti->accounts_remapped_io) 143562306a36Sopenharmony_ci dm_start_io_acct(io, clone); 143662306a36Sopenharmony_ci break; 143762306a36Sopenharmony_ci case DM_MAPIO_REMAPPED: 143862306a36Sopenharmony_ci dm_submit_bio_remap(clone, NULL); 143962306a36Sopenharmony_ci break; 144062306a36Sopenharmony_ci case DM_MAPIO_KILL: 144162306a36Sopenharmony_ci case DM_MAPIO_REQUEUE: 144262306a36Sopenharmony_ci if (static_branch_unlikely(&swap_bios_enabled) && 144362306a36Sopenharmony_ci unlikely(swap_bios_limit(ti, clone))) 144462306a36Sopenharmony_ci up(&md->swap_bios_semaphore); 144562306a36Sopenharmony_ci free_tio(clone); 144662306a36Sopenharmony_ci if (r == DM_MAPIO_KILL) 144762306a36Sopenharmony_ci dm_io_dec_pending(io, BLK_STS_IOERR); 144862306a36Sopenharmony_ci else 144962306a36Sopenharmony_ci dm_io_dec_pending(io, BLK_STS_DM_REQUEUE); 145062306a36Sopenharmony_ci break; 145162306a36Sopenharmony_ci default: 145262306a36Sopenharmony_ci DMCRIT("unimplemented target map return value: %d", r); 145362306a36Sopenharmony_ci BUG(); 145462306a36Sopenharmony_ci } 145562306a36Sopenharmony_ci} 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_cistatic void setup_split_accounting(struct clone_info *ci, unsigned int len) 145862306a36Sopenharmony_ci{ 145962306a36Sopenharmony_ci struct dm_io *io = ci->io; 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci if (ci->sector_count > len) { 146262306a36Sopenharmony_ci /* 146362306a36Sopenharmony_ci * Split needed, save the mapped part for accounting. 146462306a36Sopenharmony_ci * NOTE: dm_accept_partial_bio() will update accordingly. 146562306a36Sopenharmony_ci */ 146662306a36Sopenharmony_ci dm_io_set_flag(io, DM_IO_WAS_SPLIT); 146762306a36Sopenharmony_ci io->sectors = len; 146862306a36Sopenharmony_ci io->sector_offset = bio_sectors(ci->bio); 146962306a36Sopenharmony_ci } 147062306a36Sopenharmony_ci} 147162306a36Sopenharmony_ci 147262306a36Sopenharmony_cistatic void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci, 147362306a36Sopenharmony_ci struct dm_target *ti, unsigned int num_bios, 147462306a36Sopenharmony_ci unsigned *len) 147562306a36Sopenharmony_ci{ 147662306a36Sopenharmony_ci struct bio *bio; 147762306a36Sopenharmony_ci int try; 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci for (try = 0; try < 2; try++) { 148062306a36Sopenharmony_ci int bio_nr; 148162306a36Sopenharmony_ci 148262306a36Sopenharmony_ci if (try) 148362306a36Sopenharmony_ci mutex_lock(&ci->io->md->table_devices_lock); 148462306a36Sopenharmony_ci for (bio_nr = 0; bio_nr < num_bios; bio_nr++) { 148562306a36Sopenharmony_ci bio = alloc_tio(ci, ti, bio_nr, len, 148662306a36Sopenharmony_ci try ? GFP_NOIO : GFP_NOWAIT); 148762306a36Sopenharmony_ci if (!bio) 148862306a36Sopenharmony_ci break; 148962306a36Sopenharmony_ci 149062306a36Sopenharmony_ci bio_list_add(blist, bio); 149162306a36Sopenharmony_ci } 149262306a36Sopenharmony_ci if (try) 149362306a36Sopenharmony_ci mutex_unlock(&ci->io->md->table_devices_lock); 149462306a36Sopenharmony_ci if (bio_nr == num_bios) 149562306a36Sopenharmony_ci return; 149662306a36Sopenharmony_ci 149762306a36Sopenharmony_ci while ((bio = bio_list_pop(blist))) 149862306a36Sopenharmony_ci free_tio(bio); 149962306a36Sopenharmony_ci } 150062306a36Sopenharmony_ci} 150162306a36Sopenharmony_ci 150262306a36Sopenharmony_cistatic int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, 150362306a36Sopenharmony_ci unsigned int num_bios, unsigned int *len) 150462306a36Sopenharmony_ci{ 150562306a36Sopenharmony_ci struct bio_list blist = BIO_EMPTY_LIST; 150662306a36Sopenharmony_ci struct bio *clone; 150762306a36Sopenharmony_ci unsigned int ret = 0; 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci switch (num_bios) { 151062306a36Sopenharmony_ci case 0: 151162306a36Sopenharmony_ci break; 151262306a36Sopenharmony_ci case 1: 151362306a36Sopenharmony_ci if (len) 151462306a36Sopenharmony_ci setup_split_accounting(ci, *len); 151562306a36Sopenharmony_ci clone = alloc_tio(ci, ti, 0, len, GFP_NOIO); 151662306a36Sopenharmony_ci __map_bio(clone); 151762306a36Sopenharmony_ci ret = 1; 151862306a36Sopenharmony_ci break; 151962306a36Sopenharmony_ci default: 152062306a36Sopenharmony_ci if (len) 152162306a36Sopenharmony_ci setup_split_accounting(ci, *len); 152262306a36Sopenharmony_ci /* dm_accept_partial_bio() is not supported with shared tio->len_ptr */ 152362306a36Sopenharmony_ci alloc_multiple_bios(&blist, ci, ti, num_bios, len); 152462306a36Sopenharmony_ci while ((clone = bio_list_pop(&blist))) { 152562306a36Sopenharmony_ci dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO); 152662306a36Sopenharmony_ci __map_bio(clone); 152762306a36Sopenharmony_ci ret += 1; 152862306a36Sopenharmony_ci } 152962306a36Sopenharmony_ci break; 153062306a36Sopenharmony_ci } 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci return ret; 153362306a36Sopenharmony_ci} 153462306a36Sopenharmony_ci 153562306a36Sopenharmony_cistatic void __send_empty_flush(struct clone_info *ci) 153662306a36Sopenharmony_ci{ 153762306a36Sopenharmony_ci struct dm_table *t = ci->map; 153862306a36Sopenharmony_ci struct bio flush_bio; 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci /* 154162306a36Sopenharmony_ci * Use an on-stack bio for this, it's safe since we don't 154262306a36Sopenharmony_ci * need to reference it after submit. It's just used as 154362306a36Sopenharmony_ci * the basis for the clone(s). 154462306a36Sopenharmony_ci */ 154562306a36Sopenharmony_ci bio_init(&flush_bio, ci->io->md->disk->part0, NULL, 0, 154662306a36Sopenharmony_ci REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC); 154762306a36Sopenharmony_ci 154862306a36Sopenharmony_ci ci->bio = &flush_bio; 154962306a36Sopenharmony_ci ci->sector_count = 0; 155062306a36Sopenharmony_ci ci->io->tio.clone.bi_iter.bi_size = 0; 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci for (unsigned int i = 0; i < t->num_targets; i++) { 155362306a36Sopenharmony_ci unsigned int bios; 155462306a36Sopenharmony_ci struct dm_target *ti = dm_table_get_target(t, i); 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci atomic_add(ti->num_flush_bios, &ci->io->io_count); 155762306a36Sopenharmony_ci bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); 155862306a36Sopenharmony_ci atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count); 155962306a36Sopenharmony_ci } 156062306a36Sopenharmony_ci 156162306a36Sopenharmony_ci /* 156262306a36Sopenharmony_ci * alloc_io() takes one extra reference for submission, so the 156362306a36Sopenharmony_ci * reference won't reach 0 without the following subtraction 156462306a36Sopenharmony_ci */ 156562306a36Sopenharmony_ci atomic_sub(1, &ci->io->io_count); 156662306a36Sopenharmony_ci 156762306a36Sopenharmony_ci bio_uninit(ci->bio); 156862306a36Sopenharmony_ci} 156962306a36Sopenharmony_ci 157062306a36Sopenharmony_cistatic void __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti, 157162306a36Sopenharmony_ci unsigned int num_bios, 157262306a36Sopenharmony_ci unsigned int max_granularity, 157362306a36Sopenharmony_ci unsigned int max_sectors) 157462306a36Sopenharmony_ci{ 157562306a36Sopenharmony_ci unsigned int len, bios; 157662306a36Sopenharmony_ci 157762306a36Sopenharmony_ci len = min_t(sector_t, ci->sector_count, 157862306a36Sopenharmony_ci __max_io_len(ti, ci->sector, max_granularity, max_sectors)); 157962306a36Sopenharmony_ci 158062306a36Sopenharmony_ci atomic_add(num_bios, &ci->io->io_count); 158162306a36Sopenharmony_ci bios = __send_duplicate_bios(ci, ti, num_bios, &len); 158262306a36Sopenharmony_ci /* 158362306a36Sopenharmony_ci * alloc_io() takes one extra reference for submission, so the 158462306a36Sopenharmony_ci * reference won't reach 0 without the following (+1) subtraction 158562306a36Sopenharmony_ci */ 158662306a36Sopenharmony_ci atomic_sub(num_bios - bios + 1, &ci->io->io_count); 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci ci->sector += len; 158962306a36Sopenharmony_ci ci->sector_count -= len; 159062306a36Sopenharmony_ci} 159162306a36Sopenharmony_ci 159262306a36Sopenharmony_cistatic bool is_abnormal_io(struct bio *bio) 159362306a36Sopenharmony_ci{ 159462306a36Sopenharmony_ci enum req_op op = bio_op(bio); 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) { 159762306a36Sopenharmony_ci switch (op) { 159862306a36Sopenharmony_ci case REQ_OP_DISCARD: 159962306a36Sopenharmony_ci case REQ_OP_SECURE_ERASE: 160062306a36Sopenharmony_ci case REQ_OP_WRITE_ZEROES: 160162306a36Sopenharmony_ci return true; 160262306a36Sopenharmony_ci default: 160362306a36Sopenharmony_ci break; 160462306a36Sopenharmony_ci } 160562306a36Sopenharmony_ci } 160662306a36Sopenharmony_ci 160762306a36Sopenharmony_ci return false; 160862306a36Sopenharmony_ci} 160962306a36Sopenharmony_ci 161062306a36Sopenharmony_cistatic blk_status_t __process_abnormal_io(struct clone_info *ci, 161162306a36Sopenharmony_ci struct dm_target *ti) 161262306a36Sopenharmony_ci{ 161362306a36Sopenharmony_ci unsigned int num_bios = 0; 161462306a36Sopenharmony_ci unsigned int max_granularity = 0; 161562306a36Sopenharmony_ci unsigned int max_sectors = 0; 161662306a36Sopenharmony_ci struct queue_limits *limits = dm_get_queue_limits(ti->table->md); 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_ci switch (bio_op(ci->bio)) { 161962306a36Sopenharmony_ci case REQ_OP_DISCARD: 162062306a36Sopenharmony_ci num_bios = ti->num_discard_bios; 162162306a36Sopenharmony_ci max_sectors = limits->max_discard_sectors; 162262306a36Sopenharmony_ci if (ti->max_discard_granularity) 162362306a36Sopenharmony_ci max_granularity = max_sectors; 162462306a36Sopenharmony_ci break; 162562306a36Sopenharmony_ci case REQ_OP_SECURE_ERASE: 162662306a36Sopenharmony_ci num_bios = ti->num_secure_erase_bios; 162762306a36Sopenharmony_ci max_sectors = limits->max_secure_erase_sectors; 162862306a36Sopenharmony_ci if (ti->max_secure_erase_granularity) 162962306a36Sopenharmony_ci max_granularity = max_sectors; 163062306a36Sopenharmony_ci break; 163162306a36Sopenharmony_ci case REQ_OP_WRITE_ZEROES: 163262306a36Sopenharmony_ci num_bios = ti->num_write_zeroes_bios; 163362306a36Sopenharmony_ci max_sectors = limits->max_write_zeroes_sectors; 163462306a36Sopenharmony_ci if (ti->max_write_zeroes_granularity) 163562306a36Sopenharmony_ci max_granularity = max_sectors; 163662306a36Sopenharmony_ci break; 163762306a36Sopenharmony_ci default: 163862306a36Sopenharmony_ci break; 163962306a36Sopenharmony_ci } 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci /* 164262306a36Sopenharmony_ci * Even though the device advertised support for this type of 164362306a36Sopenharmony_ci * request, that does not mean every target supports it, and 164462306a36Sopenharmony_ci * reconfiguration might also have changed that since the 164562306a36Sopenharmony_ci * check was performed. 164662306a36Sopenharmony_ci */ 164762306a36Sopenharmony_ci if (unlikely(!num_bios)) 164862306a36Sopenharmony_ci return BLK_STS_NOTSUPP; 164962306a36Sopenharmony_ci 165062306a36Sopenharmony_ci __send_changing_extent_only(ci, ti, num_bios, 165162306a36Sopenharmony_ci max_granularity, max_sectors); 165262306a36Sopenharmony_ci return BLK_STS_OK; 165362306a36Sopenharmony_ci} 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_ci/* 165662306a36Sopenharmony_ci * Reuse ->bi_private as dm_io list head for storing all dm_io instances 165762306a36Sopenharmony_ci * associated with this bio, and this bio's bi_private needs to be 165862306a36Sopenharmony_ci * stored in dm_io->data before the reuse. 165962306a36Sopenharmony_ci * 166062306a36Sopenharmony_ci * bio->bi_private is owned by fs or upper layer, so block layer won't 166162306a36Sopenharmony_ci * touch it after splitting. Meantime it won't be changed by anyone after 166262306a36Sopenharmony_ci * bio is submitted. So this reuse is safe. 166362306a36Sopenharmony_ci */ 166462306a36Sopenharmony_cistatic inline struct dm_io **dm_poll_list_head(struct bio *bio) 166562306a36Sopenharmony_ci{ 166662306a36Sopenharmony_ci return (struct dm_io **)&bio->bi_private; 166762306a36Sopenharmony_ci} 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_cistatic void dm_queue_poll_io(struct bio *bio, struct dm_io *io) 167062306a36Sopenharmony_ci{ 167162306a36Sopenharmony_ci struct dm_io **head = dm_poll_list_head(bio); 167262306a36Sopenharmony_ci 167362306a36Sopenharmony_ci if (!(bio->bi_opf & REQ_DM_POLL_LIST)) { 167462306a36Sopenharmony_ci bio->bi_opf |= REQ_DM_POLL_LIST; 167562306a36Sopenharmony_ci /* 167662306a36Sopenharmony_ci * Save .bi_private into dm_io, so that we can reuse 167762306a36Sopenharmony_ci * .bi_private as dm_io list head for storing dm_io list 167862306a36Sopenharmony_ci */ 167962306a36Sopenharmony_ci io->data = bio->bi_private; 168062306a36Sopenharmony_ci 168162306a36Sopenharmony_ci /* tell block layer to poll for completion */ 168262306a36Sopenharmony_ci bio->bi_cookie = ~BLK_QC_T_NONE; 168362306a36Sopenharmony_ci 168462306a36Sopenharmony_ci io->next = NULL; 168562306a36Sopenharmony_ci } else { 168662306a36Sopenharmony_ci /* 168762306a36Sopenharmony_ci * bio recursed due to split, reuse original poll list, 168862306a36Sopenharmony_ci * and save bio->bi_private too. 168962306a36Sopenharmony_ci */ 169062306a36Sopenharmony_ci io->data = (*head)->data; 169162306a36Sopenharmony_ci io->next = *head; 169262306a36Sopenharmony_ci } 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_ci *head = io; 169562306a36Sopenharmony_ci} 169662306a36Sopenharmony_ci 169762306a36Sopenharmony_ci/* 169862306a36Sopenharmony_ci * Select the correct strategy for processing a non-flush bio. 169962306a36Sopenharmony_ci */ 170062306a36Sopenharmony_cistatic blk_status_t __split_and_process_bio(struct clone_info *ci) 170162306a36Sopenharmony_ci{ 170262306a36Sopenharmony_ci struct bio *clone; 170362306a36Sopenharmony_ci struct dm_target *ti; 170462306a36Sopenharmony_ci unsigned int len; 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci ti = dm_table_find_target(ci->map, ci->sector); 170762306a36Sopenharmony_ci if (unlikely(!ti)) 170862306a36Sopenharmony_ci return BLK_STS_IOERR; 170962306a36Sopenharmony_ci 171062306a36Sopenharmony_ci if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) && 171162306a36Sopenharmony_ci unlikely(!dm_target_supports_nowait(ti->type))) 171262306a36Sopenharmony_ci return BLK_STS_NOTSUPP; 171362306a36Sopenharmony_ci 171462306a36Sopenharmony_ci if (unlikely(ci->is_abnormal_io)) 171562306a36Sopenharmony_ci return __process_abnormal_io(ci, ti); 171662306a36Sopenharmony_ci 171762306a36Sopenharmony_ci /* 171862306a36Sopenharmony_ci * Only support bio polling for normal IO, and the target io is 171962306a36Sopenharmony_ci * exactly inside the dm_io instance (verified in dm_poll_dm_io) 172062306a36Sopenharmony_ci */ 172162306a36Sopenharmony_ci ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED); 172262306a36Sopenharmony_ci 172362306a36Sopenharmony_ci len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count); 172462306a36Sopenharmony_ci setup_split_accounting(ci, len); 172562306a36Sopenharmony_ci clone = alloc_tio(ci, ti, 0, &len, GFP_NOIO); 172662306a36Sopenharmony_ci __map_bio(clone); 172762306a36Sopenharmony_ci 172862306a36Sopenharmony_ci ci->sector += len; 172962306a36Sopenharmony_ci ci->sector_count -= len; 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_ci return BLK_STS_OK; 173262306a36Sopenharmony_ci} 173362306a36Sopenharmony_ci 173462306a36Sopenharmony_cistatic void init_clone_info(struct clone_info *ci, struct mapped_device *md, 173562306a36Sopenharmony_ci struct dm_table *map, struct bio *bio, bool is_abnormal) 173662306a36Sopenharmony_ci{ 173762306a36Sopenharmony_ci ci->map = map; 173862306a36Sopenharmony_ci ci->io = alloc_io(md, bio); 173962306a36Sopenharmony_ci ci->bio = bio; 174062306a36Sopenharmony_ci ci->is_abnormal_io = is_abnormal; 174162306a36Sopenharmony_ci ci->submit_as_polled = false; 174262306a36Sopenharmony_ci ci->sector = bio->bi_iter.bi_sector; 174362306a36Sopenharmony_ci ci->sector_count = bio_sectors(bio); 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_ci /* Shouldn't happen but sector_count was being set to 0 so... */ 174662306a36Sopenharmony_ci if (static_branch_unlikely(&zoned_enabled) && 174762306a36Sopenharmony_ci WARN_ON_ONCE(op_is_zone_mgmt(bio_op(bio)) && ci->sector_count)) 174862306a36Sopenharmony_ci ci->sector_count = 0; 174962306a36Sopenharmony_ci} 175062306a36Sopenharmony_ci 175162306a36Sopenharmony_ci/* 175262306a36Sopenharmony_ci * Entry point to split a bio into clones and submit them to the targets. 175362306a36Sopenharmony_ci */ 175462306a36Sopenharmony_cistatic void dm_split_and_process_bio(struct mapped_device *md, 175562306a36Sopenharmony_ci struct dm_table *map, struct bio *bio) 175662306a36Sopenharmony_ci{ 175762306a36Sopenharmony_ci struct clone_info ci; 175862306a36Sopenharmony_ci struct dm_io *io; 175962306a36Sopenharmony_ci blk_status_t error = BLK_STS_OK; 176062306a36Sopenharmony_ci bool is_abnormal; 176162306a36Sopenharmony_ci 176262306a36Sopenharmony_ci is_abnormal = is_abnormal_io(bio); 176362306a36Sopenharmony_ci if (unlikely(is_abnormal)) { 176462306a36Sopenharmony_ci /* 176562306a36Sopenharmony_ci * Use bio_split_to_limits() for abnormal IO (e.g. discard, etc) 176662306a36Sopenharmony_ci * otherwise associated queue_limits won't be imposed. 176762306a36Sopenharmony_ci */ 176862306a36Sopenharmony_ci bio = bio_split_to_limits(bio); 176962306a36Sopenharmony_ci if (!bio) 177062306a36Sopenharmony_ci return; 177162306a36Sopenharmony_ci } 177262306a36Sopenharmony_ci 177362306a36Sopenharmony_ci init_clone_info(&ci, md, map, bio, is_abnormal); 177462306a36Sopenharmony_ci io = ci.io; 177562306a36Sopenharmony_ci 177662306a36Sopenharmony_ci if (bio->bi_opf & REQ_PREFLUSH) { 177762306a36Sopenharmony_ci __send_empty_flush(&ci); 177862306a36Sopenharmony_ci /* dm_io_complete submits any data associated with flush */ 177962306a36Sopenharmony_ci goto out; 178062306a36Sopenharmony_ci } 178162306a36Sopenharmony_ci 178262306a36Sopenharmony_ci error = __split_and_process_bio(&ci); 178362306a36Sopenharmony_ci if (error || !ci.sector_count) 178462306a36Sopenharmony_ci goto out; 178562306a36Sopenharmony_ci /* 178662306a36Sopenharmony_ci * Remainder must be passed to submit_bio_noacct() so it gets handled 178762306a36Sopenharmony_ci * *after* bios already submitted have been completely processed. 178862306a36Sopenharmony_ci */ 178962306a36Sopenharmony_ci bio_trim(bio, io->sectors, ci.sector_count); 179062306a36Sopenharmony_ci trace_block_split(bio, bio->bi_iter.bi_sector); 179162306a36Sopenharmony_ci bio_inc_remaining(bio); 179262306a36Sopenharmony_ci submit_bio_noacct(bio); 179362306a36Sopenharmony_ciout: 179462306a36Sopenharmony_ci /* 179562306a36Sopenharmony_ci * Drop the extra reference count for non-POLLED bio, and hold one 179662306a36Sopenharmony_ci * reference for POLLED bio, which will be released in dm_poll_bio 179762306a36Sopenharmony_ci * 179862306a36Sopenharmony_ci * Add every dm_io instance into the dm_io list head which is stored 179962306a36Sopenharmony_ci * in bio->bi_private, so that dm_poll_bio can poll them all. 180062306a36Sopenharmony_ci */ 180162306a36Sopenharmony_ci if (error || !ci.submit_as_polled) { 180262306a36Sopenharmony_ci /* 180362306a36Sopenharmony_ci * In case of submission failure, the extra reference for 180462306a36Sopenharmony_ci * submitting io isn't consumed yet 180562306a36Sopenharmony_ci */ 180662306a36Sopenharmony_ci if (error) 180762306a36Sopenharmony_ci atomic_dec(&io->io_count); 180862306a36Sopenharmony_ci dm_io_dec_pending(io, error); 180962306a36Sopenharmony_ci } else 181062306a36Sopenharmony_ci dm_queue_poll_io(bio, io); 181162306a36Sopenharmony_ci} 181262306a36Sopenharmony_ci 181362306a36Sopenharmony_cistatic void dm_submit_bio(struct bio *bio) 181462306a36Sopenharmony_ci{ 181562306a36Sopenharmony_ci struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; 181662306a36Sopenharmony_ci int srcu_idx; 181762306a36Sopenharmony_ci struct dm_table *map; 181862306a36Sopenharmony_ci 181962306a36Sopenharmony_ci map = dm_get_live_table(md, &srcu_idx); 182062306a36Sopenharmony_ci 182162306a36Sopenharmony_ci /* If suspended, or map not yet available, queue this IO for later */ 182262306a36Sopenharmony_ci if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) || 182362306a36Sopenharmony_ci unlikely(!map)) { 182462306a36Sopenharmony_ci if (bio->bi_opf & REQ_NOWAIT) 182562306a36Sopenharmony_ci bio_wouldblock_error(bio); 182662306a36Sopenharmony_ci else if (bio->bi_opf & REQ_RAHEAD) 182762306a36Sopenharmony_ci bio_io_error(bio); 182862306a36Sopenharmony_ci else 182962306a36Sopenharmony_ci queue_io(md, bio); 183062306a36Sopenharmony_ci goto out; 183162306a36Sopenharmony_ci } 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci dm_split_and_process_bio(md, map, bio); 183462306a36Sopenharmony_ciout: 183562306a36Sopenharmony_ci dm_put_live_table(md, srcu_idx); 183662306a36Sopenharmony_ci} 183762306a36Sopenharmony_ci 183862306a36Sopenharmony_cistatic bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob, 183962306a36Sopenharmony_ci unsigned int flags) 184062306a36Sopenharmony_ci{ 184162306a36Sopenharmony_ci WARN_ON_ONCE(!dm_tio_is_normal(&io->tio)); 184262306a36Sopenharmony_ci 184362306a36Sopenharmony_ci /* don't poll if the mapped io is done */ 184462306a36Sopenharmony_ci if (atomic_read(&io->io_count) > 1) 184562306a36Sopenharmony_ci bio_poll(&io->tio.clone, iob, flags); 184662306a36Sopenharmony_ci 184762306a36Sopenharmony_ci /* bio_poll holds the last reference */ 184862306a36Sopenharmony_ci return atomic_read(&io->io_count) == 1; 184962306a36Sopenharmony_ci} 185062306a36Sopenharmony_ci 185162306a36Sopenharmony_cistatic int dm_poll_bio(struct bio *bio, struct io_comp_batch *iob, 185262306a36Sopenharmony_ci unsigned int flags) 185362306a36Sopenharmony_ci{ 185462306a36Sopenharmony_ci struct dm_io **head = dm_poll_list_head(bio); 185562306a36Sopenharmony_ci struct dm_io *list = *head; 185662306a36Sopenharmony_ci struct dm_io *tmp = NULL; 185762306a36Sopenharmony_ci struct dm_io *curr, *next; 185862306a36Sopenharmony_ci 185962306a36Sopenharmony_ci /* Only poll normal bio which was marked as REQ_DM_POLL_LIST */ 186062306a36Sopenharmony_ci if (!(bio->bi_opf & REQ_DM_POLL_LIST)) 186162306a36Sopenharmony_ci return 0; 186262306a36Sopenharmony_ci 186362306a36Sopenharmony_ci WARN_ON_ONCE(!list); 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_ci /* 186662306a36Sopenharmony_ci * Restore .bi_private before possibly completing dm_io. 186762306a36Sopenharmony_ci * 186862306a36Sopenharmony_ci * bio_poll() is only possible once @bio has been completely 186962306a36Sopenharmony_ci * submitted via submit_bio_noacct()'s depth-first submission. 187062306a36Sopenharmony_ci * So there is no dm_queue_poll_io() race associated with 187162306a36Sopenharmony_ci * clearing REQ_DM_POLL_LIST here. 187262306a36Sopenharmony_ci */ 187362306a36Sopenharmony_ci bio->bi_opf &= ~REQ_DM_POLL_LIST; 187462306a36Sopenharmony_ci bio->bi_private = list->data; 187562306a36Sopenharmony_ci 187662306a36Sopenharmony_ci for (curr = list, next = curr->next; curr; curr = next, next = 187762306a36Sopenharmony_ci curr ? curr->next : NULL) { 187862306a36Sopenharmony_ci if (dm_poll_dm_io(curr, iob, flags)) { 187962306a36Sopenharmony_ci /* 188062306a36Sopenharmony_ci * clone_endio() has already occurred, so no 188162306a36Sopenharmony_ci * error handling is needed here. 188262306a36Sopenharmony_ci */ 188362306a36Sopenharmony_ci __dm_io_dec_pending(curr); 188462306a36Sopenharmony_ci } else { 188562306a36Sopenharmony_ci curr->next = tmp; 188662306a36Sopenharmony_ci tmp = curr; 188762306a36Sopenharmony_ci } 188862306a36Sopenharmony_ci } 188962306a36Sopenharmony_ci 189062306a36Sopenharmony_ci /* Not done? */ 189162306a36Sopenharmony_ci if (tmp) { 189262306a36Sopenharmony_ci bio->bi_opf |= REQ_DM_POLL_LIST; 189362306a36Sopenharmony_ci /* Reset bio->bi_private to dm_io list head */ 189462306a36Sopenharmony_ci *head = tmp; 189562306a36Sopenharmony_ci return 0; 189662306a36Sopenharmony_ci } 189762306a36Sopenharmony_ci return 1; 189862306a36Sopenharmony_ci} 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_ci/* 190162306a36Sopenharmony_ci *--------------------------------------------------------------- 190262306a36Sopenharmony_ci * An IDR is used to keep track of allocated minor numbers. 190362306a36Sopenharmony_ci *--------------------------------------------------------------- 190462306a36Sopenharmony_ci */ 190562306a36Sopenharmony_cistatic void free_minor(int minor) 190662306a36Sopenharmony_ci{ 190762306a36Sopenharmony_ci spin_lock(&_minor_lock); 190862306a36Sopenharmony_ci idr_remove(&_minor_idr, minor); 190962306a36Sopenharmony_ci spin_unlock(&_minor_lock); 191062306a36Sopenharmony_ci} 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci/* 191362306a36Sopenharmony_ci * See if the device with a specific minor # is free. 191462306a36Sopenharmony_ci */ 191562306a36Sopenharmony_cistatic int specific_minor(int minor) 191662306a36Sopenharmony_ci{ 191762306a36Sopenharmony_ci int r; 191862306a36Sopenharmony_ci 191962306a36Sopenharmony_ci if (minor >= (1 << MINORBITS)) 192062306a36Sopenharmony_ci return -EINVAL; 192162306a36Sopenharmony_ci 192262306a36Sopenharmony_ci idr_preload(GFP_KERNEL); 192362306a36Sopenharmony_ci spin_lock(&_minor_lock); 192462306a36Sopenharmony_ci 192562306a36Sopenharmony_ci r = idr_alloc(&_minor_idr, MINOR_ALLOCED, minor, minor + 1, GFP_NOWAIT); 192662306a36Sopenharmony_ci 192762306a36Sopenharmony_ci spin_unlock(&_minor_lock); 192862306a36Sopenharmony_ci idr_preload_end(); 192962306a36Sopenharmony_ci if (r < 0) 193062306a36Sopenharmony_ci return r == -ENOSPC ? -EBUSY : r; 193162306a36Sopenharmony_ci return 0; 193262306a36Sopenharmony_ci} 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_cistatic int next_free_minor(int *minor) 193562306a36Sopenharmony_ci{ 193662306a36Sopenharmony_ci int r; 193762306a36Sopenharmony_ci 193862306a36Sopenharmony_ci idr_preload(GFP_KERNEL); 193962306a36Sopenharmony_ci spin_lock(&_minor_lock); 194062306a36Sopenharmony_ci 194162306a36Sopenharmony_ci r = idr_alloc(&_minor_idr, MINOR_ALLOCED, 0, 1 << MINORBITS, GFP_NOWAIT); 194262306a36Sopenharmony_ci 194362306a36Sopenharmony_ci spin_unlock(&_minor_lock); 194462306a36Sopenharmony_ci idr_preload_end(); 194562306a36Sopenharmony_ci if (r < 0) 194662306a36Sopenharmony_ci return r; 194762306a36Sopenharmony_ci *minor = r; 194862306a36Sopenharmony_ci return 0; 194962306a36Sopenharmony_ci} 195062306a36Sopenharmony_ci 195162306a36Sopenharmony_cistatic const struct block_device_operations dm_blk_dops; 195262306a36Sopenharmony_cistatic const struct block_device_operations dm_rq_blk_dops; 195362306a36Sopenharmony_cistatic const struct dax_operations dm_dax_ops; 195462306a36Sopenharmony_ci 195562306a36Sopenharmony_cistatic void dm_wq_work(struct work_struct *work); 195662306a36Sopenharmony_ci 195762306a36Sopenharmony_ci#ifdef CONFIG_BLK_INLINE_ENCRYPTION 195862306a36Sopenharmony_cistatic void dm_queue_destroy_crypto_profile(struct request_queue *q) 195962306a36Sopenharmony_ci{ 196062306a36Sopenharmony_ci dm_destroy_crypto_profile(q->crypto_profile); 196162306a36Sopenharmony_ci} 196262306a36Sopenharmony_ci 196362306a36Sopenharmony_ci#else /* CONFIG_BLK_INLINE_ENCRYPTION */ 196462306a36Sopenharmony_ci 196562306a36Sopenharmony_cistatic inline void dm_queue_destroy_crypto_profile(struct request_queue *q) 196662306a36Sopenharmony_ci{ 196762306a36Sopenharmony_ci} 196862306a36Sopenharmony_ci#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */ 196962306a36Sopenharmony_ci 197062306a36Sopenharmony_cistatic void cleanup_mapped_device(struct mapped_device *md) 197162306a36Sopenharmony_ci{ 197262306a36Sopenharmony_ci if (md->wq) 197362306a36Sopenharmony_ci destroy_workqueue(md->wq); 197462306a36Sopenharmony_ci dm_free_md_mempools(md->mempools); 197562306a36Sopenharmony_ci 197662306a36Sopenharmony_ci if (md->dax_dev) { 197762306a36Sopenharmony_ci dax_remove_host(md->disk); 197862306a36Sopenharmony_ci kill_dax(md->dax_dev); 197962306a36Sopenharmony_ci put_dax(md->dax_dev); 198062306a36Sopenharmony_ci md->dax_dev = NULL; 198162306a36Sopenharmony_ci } 198262306a36Sopenharmony_ci 198362306a36Sopenharmony_ci dm_cleanup_zoned_dev(md); 198462306a36Sopenharmony_ci if (md->disk) { 198562306a36Sopenharmony_ci spin_lock(&_minor_lock); 198662306a36Sopenharmony_ci md->disk->private_data = NULL; 198762306a36Sopenharmony_ci spin_unlock(&_minor_lock); 198862306a36Sopenharmony_ci if (dm_get_md_type(md) != DM_TYPE_NONE) { 198962306a36Sopenharmony_ci struct table_device *td; 199062306a36Sopenharmony_ci 199162306a36Sopenharmony_ci dm_sysfs_exit(md); 199262306a36Sopenharmony_ci list_for_each_entry(td, &md->table_devices, list) { 199362306a36Sopenharmony_ci bd_unlink_disk_holder(td->dm_dev.bdev, 199462306a36Sopenharmony_ci md->disk); 199562306a36Sopenharmony_ci } 199662306a36Sopenharmony_ci 199762306a36Sopenharmony_ci /* 199862306a36Sopenharmony_ci * Hold lock to make sure del_gendisk() won't concurrent 199962306a36Sopenharmony_ci * with open/close_table_device(). 200062306a36Sopenharmony_ci */ 200162306a36Sopenharmony_ci mutex_lock(&md->table_devices_lock); 200262306a36Sopenharmony_ci del_gendisk(md->disk); 200362306a36Sopenharmony_ci mutex_unlock(&md->table_devices_lock); 200462306a36Sopenharmony_ci } 200562306a36Sopenharmony_ci dm_queue_destroy_crypto_profile(md->queue); 200662306a36Sopenharmony_ci put_disk(md->disk); 200762306a36Sopenharmony_ci } 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci if (md->pending_io) { 201062306a36Sopenharmony_ci free_percpu(md->pending_io); 201162306a36Sopenharmony_ci md->pending_io = NULL; 201262306a36Sopenharmony_ci } 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_ci cleanup_srcu_struct(&md->io_barrier); 201562306a36Sopenharmony_ci 201662306a36Sopenharmony_ci mutex_destroy(&md->suspend_lock); 201762306a36Sopenharmony_ci mutex_destroy(&md->type_lock); 201862306a36Sopenharmony_ci mutex_destroy(&md->table_devices_lock); 201962306a36Sopenharmony_ci mutex_destroy(&md->swap_bios_lock); 202062306a36Sopenharmony_ci 202162306a36Sopenharmony_ci dm_mq_cleanup_mapped_device(md); 202262306a36Sopenharmony_ci} 202362306a36Sopenharmony_ci 202462306a36Sopenharmony_ci/* 202562306a36Sopenharmony_ci * Allocate and initialise a blank device with a given minor. 202662306a36Sopenharmony_ci */ 202762306a36Sopenharmony_cistatic struct mapped_device *alloc_dev(int minor) 202862306a36Sopenharmony_ci{ 202962306a36Sopenharmony_ci int r, numa_node_id = dm_get_numa_node(); 203062306a36Sopenharmony_ci struct mapped_device *md; 203162306a36Sopenharmony_ci void *old_md; 203262306a36Sopenharmony_ci 203362306a36Sopenharmony_ci md = kvzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id); 203462306a36Sopenharmony_ci if (!md) { 203562306a36Sopenharmony_ci DMERR("unable to allocate device, out of memory."); 203662306a36Sopenharmony_ci return NULL; 203762306a36Sopenharmony_ci } 203862306a36Sopenharmony_ci 203962306a36Sopenharmony_ci if (!try_module_get(THIS_MODULE)) 204062306a36Sopenharmony_ci goto bad_module_get; 204162306a36Sopenharmony_ci 204262306a36Sopenharmony_ci /* get a minor number for the dev */ 204362306a36Sopenharmony_ci if (minor == DM_ANY_MINOR) 204462306a36Sopenharmony_ci r = next_free_minor(&minor); 204562306a36Sopenharmony_ci else 204662306a36Sopenharmony_ci r = specific_minor(minor); 204762306a36Sopenharmony_ci if (r < 0) 204862306a36Sopenharmony_ci goto bad_minor; 204962306a36Sopenharmony_ci 205062306a36Sopenharmony_ci r = init_srcu_struct(&md->io_barrier); 205162306a36Sopenharmony_ci if (r < 0) 205262306a36Sopenharmony_ci goto bad_io_barrier; 205362306a36Sopenharmony_ci 205462306a36Sopenharmony_ci md->numa_node_id = numa_node_id; 205562306a36Sopenharmony_ci md->init_tio_pdu = false; 205662306a36Sopenharmony_ci md->type = DM_TYPE_NONE; 205762306a36Sopenharmony_ci mutex_init(&md->suspend_lock); 205862306a36Sopenharmony_ci mutex_init(&md->type_lock); 205962306a36Sopenharmony_ci mutex_init(&md->table_devices_lock); 206062306a36Sopenharmony_ci spin_lock_init(&md->deferred_lock); 206162306a36Sopenharmony_ci atomic_set(&md->holders, 1); 206262306a36Sopenharmony_ci atomic_set(&md->open_count, 0); 206362306a36Sopenharmony_ci atomic_set(&md->event_nr, 0); 206462306a36Sopenharmony_ci atomic_set(&md->uevent_seq, 0); 206562306a36Sopenharmony_ci INIT_LIST_HEAD(&md->uevent_list); 206662306a36Sopenharmony_ci INIT_LIST_HEAD(&md->table_devices); 206762306a36Sopenharmony_ci spin_lock_init(&md->uevent_lock); 206862306a36Sopenharmony_ci 206962306a36Sopenharmony_ci /* 207062306a36Sopenharmony_ci * default to bio-based until DM table is loaded and md->type 207162306a36Sopenharmony_ci * established. If request-based table is loaded: blk-mq will 207262306a36Sopenharmony_ci * override accordingly. 207362306a36Sopenharmony_ci */ 207462306a36Sopenharmony_ci md->disk = blk_alloc_disk(md->numa_node_id); 207562306a36Sopenharmony_ci if (!md->disk) 207662306a36Sopenharmony_ci goto bad; 207762306a36Sopenharmony_ci md->queue = md->disk->queue; 207862306a36Sopenharmony_ci 207962306a36Sopenharmony_ci init_waitqueue_head(&md->wait); 208062306a36Sopenharmony_ci INIT_WORK(&md->work, dm_wq_work); 208162306a36Sopenharmony_ci INIT_WORK(&md->requeue_work, dm_wq_requeue_work); 208262306a36Sopenharmony_ci init_waitqueue_head(&md->eventq); 208362306a36Sopenharmony_ci init_completion(&md->kobj_holder.completion); 208462306a36Sopenharmony_ci 208562306a36Sopenharmony_ci md->requeue_list = NULL; 208662306a36Sopenharmony_ci md->swap_bios = get_swap_bios(); 208762306a36Sopenharmony_ci sema_init(&md->swap_bios_semaphore, md->swap_bios); 208862306a36Sopenharmony_ci mutex_init(&md->swap_bios_lock); 208962306a36Sopenharmony_ci 209062306a36Sopenharmony_ci md->disk->major = _major; 209162306a36Sopenharmony_ci md->disk->first_minor = minor; 209262306a36Sopenharmony_ci md->disk->minors = 1; 209362306a36Sopenharmony_ci md->disk->flags |= GENHD_FL_NO_PART; 209462306a36Sopenharmony_ci md->disk->fops = &dm_blk_dops; 209562306a36Sopenharmony_ci md->disk->private_data = md; 209662306a36Sopenharmony_ci sprintf(md->disk->disk_name, "dm-%d", minor); 209762306a36Sopenharmony_ci 209862306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_FS_DAX)) { 209962306a36Sopenharmony_ci md->dax_dev = alloc_dax(md, &dm_dax_ops); 210062306a36Sopenharmony_ci if (IS_ERR(md->dax_dev)) { 210162306a36Sopenharmony_ci md->dax_dev = NULL; 210262306a36Sopenharmony_ci goto bad; 210362306a36Sopenharmony_ci } 210462306a36Sopenharmony_ci set_dax_nocache(md->dax_dev); 210562306a36Sopenharmony_ci set_dax_nomc(md->dax_dev); 210662306a36Sopenharmony_ci if (dax_add_host(md->dax_dev, md->disk)) 210762306a36Sopenharmony_ci goto bad; 210862306a36Sopenharmony_ci } 210962306a36Sopenharmony_ci 211062306a36Sopenharmony_ci format_dev_t(md->name, MKDEV(_major, minor)); 211162306a36Sopenharmony_ci 211262306a36Sopenharmony_ci md->wq = alloc_workqueue("kdmflush/%s", WQ_MEM_RECLAIM, 0, md->name); 211362306a36Sopenharmony_ci if (!md->wq) 211462306a36Sopenharmony_ci goto bad; 211562306a36Sopenharmony_ci 211662306a36Sopenharmony_ci md->pending_io = alloc_percpu(unsigned long); 211762306a36Sopenharmony_ci if (!md->pending_io) 211862306a36Sopenharmony_ci goto bad; 211962306a36Sopenharmony_ci 212062306a36Sopenharmony_ci r = dm_stats_init(&md->stats); 212162306a36Sopenharmony_ci if (r < 0) 212262306a36Sopenharmony_ci goto bad; 212362306a36Sopenharmony_ci 212462306a36Sopenharmony_ci /* Populate the mapping, nobody knows we exist yet */ 212562306a36Sopenharmony_ci spin_lock(&_minor_lock); 212662306a36Sopenharmony_ci old_md = idr_replace(&_minor_idr, md, minor); 212762306a36Sopenharmony_ci spin_unlock(&_minor_lock); 212862306a36Sopenharmony_ci 212962306a36Sopenharmony_ci BUG_ON(old_md != MINOR_ALLOCED); 213062306a36Sopenharmony_ci 213162306a36Sopenharmony_ci return md; 213262306a36Sopenharmony_ci 213362306a36Sopenharmony_cibad: 213462306a36Sopenharmony_ci cleanup_mapped_device(md); 213562306a36Sopenharmony_cibad_io_barrier: 213662306a36Sopenharmony_ci free_minor(minor); 213762306a36Sopenharmony_cibad_minor: 213862306a36Sopenharmony_ci module_put(THIS_MODULE); 213962306a36Sopenharmony_cibad_module_get: 214062306a36Sopenharmony_ci kvfree(md); 214162306a36Sopenharmony_ci return NULL; 214262306a36Sopenharmony_ci} 214362306a36Sopenharmony_ci 214462306a36Sopenharmony_cistatic void unlock_fs(struct mapped_device *md); 214562306a36Sopenharmony_ci 214662306a36Sopenharmony_cistatic void free_dev(struct mapped_device *md) 214762306a36Sopenharmony_ci{ 214862306a36Sopenharmony_ci int minor = MINOR(disk_devt(md->disk)); 214962306a36Sopenharmony_ci 215062306a36Sopenharmony_ci unlock_fs(md); 215162306a36Sopenharmony_ci 215262306a36Sopenharmony_ci cleanup_mapped_device(md); 215362306a36Sopenharmony_ci 215462306a36Sopenharmony_ci WARN_ON_ONCE(!list_empty(&md->table_devices)); 215562306a36Sopenharmony_ci dm_stats_cleanup(&md->stats); 215662306a36Sopenharmony_ci free_minor(minor); 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_ci module_put(THIS_MODULE); 215962306a36Sopenharmony_ci kvfree(md); 216062306a36Sopenharmony_ci} 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ci/* 216362306a36Sopenharmony_ci * Bind a table to the device. 216462306a36Sopenharmony_ci */ 216562306a36Sopenharmony_cistatic void event_callback(void *context) 216662306a36Sopenharmony_ci{ 216762306a36Sopenharmony_ci unsigned long flags; 216862306a36Sopenharmony_ci LIST_HEAD(uevents); 216962306a36Sopenharmony_ci struct mapped_device *md = context; 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci spin_lock_irqsave(&md->uevent_lock, flags); 217262306a36Sopenharmony_ci list_splice_init(&md->uevent_list, &uevents); 217362306a36Sopenharmony_ci spin_unlock_irqrestore(&md->uevent_lock, flags); 217462306a36Sopenharmony_ci 217562306a36Sopenharmony_ci dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_ci atomic_inc(&md->event_nr); 217862306a36Sopenharmony_ci wake_up(&md->eventq); 217962306a36Sopenharmony_ci dm_issue_global_event(); 218062306a36Sopenharmony_ci} 218162306a36Sopenharmony_ci 218262306a36Sopenharmony_ci/* 218362306a36Sopenharmony_ci * Returns old map, which caller must destroy. 218462306a36Sopenharmony_ci */ 218562306a36Sopenharmony_cistatic struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, 218662306a36Sopenharmony_ci struct queue_limits *limits) 218762306a36Sopenharmony_ci{ 218862306a36Sopenharmony_ci struct dm_table *old_map; 218962306a36Sopenharmony_ci sector_t size; 219062306a36Sopenharmony_ci int ret; 219162306a36Sopenharmony_ci 219262306a36Sopenharmony_ci lockdep_assert_held(&md->suspend_lock); 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_ci size = dm_table_get_size(t); 219562306a36Sopenharmony_ci 219662306a36Sopenharmony_ci /* 219762306a36Sopenharmony_ci * Wipe any geometry if the size of the table changed. 219862306a36Sopenharmony_ci */ 219962306a36Sopenharmony_ci if (size != dm_get_size(md)) 220062306a36Sopenharmony_ci memset(&md->geometry, 0, sizeof(md->geometry)); 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_ci set_capacity(md->disk, size); 220362306a36Sopenharmony_ci 220462306a36Sopenharmony_ci dm_table_event_callback(t, event_callback, md); 220562306a36Sopenharmony_ci 220662306a36Sopenharmony_ci if (dm_table_request_based(t)) { 220762306a36Sopenharmony_ci /* 220862306a36Sopenharmony_ci * Leverage the fact that request-based DM targets are 220962306a36Sopenharmony_ci * immutable singletons - used to optimize dm_mq_queue_rq. 221062306a36Sopenharmony_ci */ 221162306a36Sopenharmony_ci md->immutable_target = dm_table_get_immutable_target(t); 221262306a36Sopenharmony_ci 221362306a36Sopenharmony_ci /* 221462306a36Sopenharmony_ci * There is no need to reload with request-based dm because the 221562306a36Sopenharmony_ci * size of front_pad doesn't change. 221662306a36Sopenharmony_ci * 221762306a36Sopenharmony_ci * Note for future: If you are to reload bioset, prep-ed 221862306a36Sopenharmony_ci * requests in the queue may refer to bio from the old bioset, 221962306a36Sopenharmony_ci * so you must walk through the queue to unprep. 222062306a36Sopenharmony_ci */ 222162306a36Sopenharmony_ci if (!md->mempools) { 222262306a36Sopenharmony_ci md->mempools = t->mempools; 222362306a36Sopenharmony_ci t->mempools = NULL; 222462306a36Sopenharmony_ci } 222562306a36Sopenharmony_ci } else { 222662306a36Sopenharmony_ci /* 222762306a36Sopenharmony_ci * The md may already have mempools that need changing. 222862306a36Sopenharmony_ci * If so, reload bioset because front_pad may have changed 222962306a36Sopenharmony_ci * because a different table was loaded. 223062306a36Sopenharmony_ci */ 223162306a36Sopenharmony_ci dm_free_md_mempools(md->mempools); 223262306a36Sopenharmony_ci md->mempools = t->mempools; 223362306a36Sopenharmony_ci t->mempools = NULL; 223462306a36Sopenharmony_ci } 223562306a36Sopenharmony_ci 223662306a36Sopenharmony_ci ret = dm_table_set_restrictions(t, md->queue, limits); 223762306a36Sopenharmony_ci if (ret) { 223862306a36Sopenharmony_ci old_map = ERR_PTR(ret); 223962306a36Sopenharmony_ci goto out; 224062306a36Sopenharmony_ci } 224162306a36Sopenharmony_ci 224262306a36Sopenharmony_ci old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 224362306a36Sopenharmony_ci rcu_assign_pointer(md->map, (void *)t); 224462306a36Sopenharmony_ci md->immutable_target_type = dm_table_get_immutable_target_type(t); 224562306a36Sopenharmony_ci 224662306a36Sopenharmony_ci if (old_map) 224762306a36Sopenharmony_ci dm_sync_table(md); 224862306a36Sopenharmony_ciout: 224962306a36Sopenharmony_ci return old_map; 225062306a36Sopenharmony_ci} 225162306a36Sopenharmony_ci 225262306a36Sopenharmony_ci/* 225362306a36Sopenharmony_ci * Returns unbound table for the caller to free. 225462306a36Sopenharmony_ci */ 225562306a36Sopenharmony_cistatic struct dm_table *__unbind(struct mapped_device *md) 225662306a36Sopenharmony_ci{ 225762306a36Sopenharmony_ci struct dm_table *map = rcu_dereference_protected(md->map, 1); 225862306a36Sopenharmony_ci 225962306a36Sopenharmony_ci if (!map) 226062306a36Sopenharmony_ci return NULL; 226162306a36Sopenharmony_ci 226262306a36Sopenharmony_ci dm_table_event_callback(map, NULL, NULL); 226362306a36Sopenharmony_ci RCU_INIT_POINTER(md->map, NULL); 226462306a36Sopenharmony_ci dm_sync_table(md); 226562306a36Sopenharmony_ci 226662306a36Sopenharmony_ci return map; 226762306a36Sopenharmony_ci} 226862306a36Sopenharmony_ci 226962306a36Sopenharmony_ci/* 227062306a36Sopenharmony_ci * Constructor for a new device. 227162306a36Sopenharmony_ci */ 227262306a36Sopenharmony_ciint dm_create(int minor, struct mapped_device **result) 227362306a36Sopenharmony_ci{ 227462306a36Sopenharmony_ci struct mapped_device *md; 227562306a36Sopenharmony_ci 227662306a36Sopenharmony_ci md = alloc_dev(minor); 227762306a36Sopenharmony_ci if (!md) 227862306a36Sopenharmony_ci return -ENXIO; 227962306a36Sopenharmony_ci 228062306a36Sopenharmony_ci dm_ima_reset_data(md); 228162306a36Sopenharmony_ci 228262306a36Sopenharmony_ci *result = md; 228362306a36Sopenharmony_ci return 0; 228462306a36Sopenharmony_ci} 228562306a36Sopenharmony_ci 228662306a36Sopenharmony_ci/* 228762306a36Sopenharmony_ci * Functions to manage md->type. 228862306a36Sopenharmony_ci * All are required to hold md->type_lock. 228962306a36Sopenharmony_ci */ 229062306a36Sopenharmony_civoid dm_lock_md_type(struct mapped_device *md) 229162306a36Sopenharmony_ci{ 229262306a36Sopenharmony_ci mutex_lock(&md->type_lock); 229362306a36Sopenharmony_ci} 229462306a36Sopenharmony_ci 229562306a36Sopenharmony_civoid dm_unlock_md_type(struct mapped_device *md) 229662306a36Sopenharmony_ci{ 229762306a36Sopenharmony_ci mutex_unlock(&md->type_lock); 229862306a36Sopenharmony_ci} 229962306a36Sopenharmony_ci 230062306a36Sopenharmony_civoid dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type) 230162306a36Sopenharmony_ci{ 230262306a36Sopenharmony_ci BUG_ON(!mutex_is_locked(&md->type_lock)); 230362306a36Sopenharmony_ci md->type = type; 230462306a36Sopenharmony_ci} 230562306a36Sopenharmony_ci 230662306a36Sopenharmony_cienum dm_queue_mode dm_get_md_type(struct mapped_device *md) 230762306a36Sopenharmony_ci{ 230862306a36Sopenharmony_ci return md->type; 230962306a36Sopenharmony_ci} 231062306a36Sopenharmony_ci 231162306a36Sopenharmony_cistruct target_type *dm_get_immutable_target_type(struct mapped_device *md) 231262306a36Sopenharmony_ci{ 231362306a36Sopenharmony_ci return md->immutable_target_type; 231462306a36Sopenharmony_ci} 231562306a36Sopenharmony_ci 231662306a36Sopenharmony_ci/* 231762306a36Sopenharmony_ci * Setup the DM device's queue based on md's type 231862306a36Sopenharmony_ci */ 231962306a36Sopenharmony_ciint dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) 232062306a36Sopenharmony_ci{ 232162306a36Sopenharmony_ci enum dm_queue_mode type = dm_table_get_type(t); 232262306a36Sopenharmony_ci struct queue_limits limits; 232362306a36Sopenharmony_ci struct table_device *td; 232462306a36Sopenharmony_ci int r; 232562306a36Sopenharmony_ci 232662306a36Sopenharmony_ci switch (type) { 232762306a36Sopenharmony_ci case DM_TYPE_REQUEST_BASED: 232862306a36Sopenharmony_ci md->disk->fops = &dm_rq_blk_dops; 232962306a36Sopenharmony_ci r = dm_mq_init_request_queue(md, t); 233062306a36Sopenharmony_ci if (r) { 233162306a36Sopenharmony_ci DMERR("Cannot initialize queue for request-based dm mapped device"); 233262306a36Sopenharmony_ci return r; 233362306a36Sopenharmony_ci } 233462306a36Sopenharmony_ci break; 233562306a36Sopenharmony_ci case DM_TYPE_BIO_BASED: 233662306a36Sopenharmony_ci case DM_TYPE_DAX_BIO_BASED: 233762306a36Sopenharmony_ci blk_queue_flag_set(QUEUE_FLAG_IO_STAT, md->queue); 233862306a36Sopenharmony_ci break; 233962306a36Sopenharmony_ci case DM_TYPE_NONE: 234062306a36Sopenharmony_ci WARN_ON_ONCE(true); 234162306a36Sopenharmony_ci break; 234262306a36Sopenharmony_ci } 234362306a36Sopenharmony_ci 234462306a36Sopenharmony_ci r = dm_calculate_queue_limits(t, &limits); 234562306a36Sopenharmony_ci if (r) { 234662306a36Sopenharmony_ci DMERR("Cannot calculate initial queue limits"); 234762306a36Sopenharmony_ci return r; 234862306a36Sopenharmony_ci } 234962306a36Sopenharmony_ci r = dm_table_set_restrictions(t, md->queue, &limits); 235062306a36Sopenharmony_ci if (r) 235162306a36Sopenharmony_ci return r; 235262306a36Sopenharmony_ci 235362306a36Sopenharmony_ci /* 235462306a36Sopenharmony_ci * Hold lock to make sure add_disk() and del_gendisk() won't concurrent 235562306a36Sopenharmony_ci * with open_table_device() and close_table_device(). 235662306a36Sopenharmony_ci */ 235762306a36Sopenharmony_ci mutex_lock(&md->table_devices_lock); 235862306a36Sopenharmony_ci r = add_disk(md->disk); 235962306a36Sopenharmony_ci mutex_unlock(&md->table_devices_lock); 236062306a36Sopenharmony_ci if (r) 236162306a36Sopenharmony_ci return r; 236262306a36Sopenharmony_ci 236362306a36Sopenharmony_ci /* 236462306a36Sopenharmony_ci * Register the holder relationship for devices added before the disk 236562306a36Sopenharmony_ci * was live. 236662306a36Sopenharmony_ci */ 236762306a36Sopenharmony_ci list_for_each_entry(td, &md->table_devices, list) { 236862306a36Sopenharmony_ci r = bd_link_disk_holder(td->dm_dev.bdev, md->disk); 236962306a36Sopenharmony_ci if (r) 237062306a36Sopenharmony_ci goto out_undo_holders; 237162306a36Sopenharmony_ci } 237262306a36Sopenharmony_ci 237362306a36Sopenharmony_ci r = dm_sysfs_init(md); 237462306a36Sopenharmony_ci if (r) 237562306a36Sopenharmony_ci goto out_undo_holders; 237662306a36Sopenharmony_ci 237762306a36Sopenharmony_ci md->type = type; 237862306a36Sopenharmony_ci return 0; 237962306a36Sopenharmony_ci 238062306a36Sopenharmony_ciout_undo_holders: 238162306a36Sopenharmony_ci list_for_each_entry_continue_reverse(td, &md->table_devices, list) 238262306a36Sopenharmony_ci bd_unlink_disk_holder(td->dm_dev.bdev, md->disk); 238362306a36Sopenharmony_ci mutex_lock(&md->table_devices_lock); 238462306a36Sopenharmony_ci del_gendisk(md->disk); 238562306a36Sopenharmony_ci mutex_unlock(&md->table_devices_lock); 238662306a36Sopenharmony_ci return r; 238762306a36Sopenharmony_ci} 238862306a36Sopenharmony_ci 238962306a36Sopenharmony_cistruct mapped_device *dm_get_md(dev_t dev) 239062306a36Sopenharmony_ci{ 239162306a36Sopenharmony_ci struct mapped_device *md; 239262306a36Sopenharmony_ci unsigned int minor = MINOR(dev); 239362306a36Sopenharmony_ci 239462306a36Sopenharmony_ci if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) 239562306a36Sopenharmony_ci return NULL; 239662306a36Sopenharmony_ci 239762306a36Sopenharmony_ci spin_lock(&_minor_lock); 239862306a36Sopenharmony_ci 239962306a36Sopenharmony_ci md = idr_find(&_minor_idr, minor); 240062306a36Sopenharmony_ci if (!md || md == MINOR_ALLOCED || (MINOR(disk_devt(dm_disk(md))) != minor) || 240162306a36Sopenharmony_ci test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) { 240262306a36Sopenharmony_ci md = NULL; 240362306a36Sopenharmony_ci goto out; 240462306a36Sopenharmony_ci } 240562306a36Sopenharmony_ci dm_get(md); 240662306a36Sopenharmony_ciout: 240762306a36Sopenharmony_ci spin_unlock(&_minor_lock); 240862306a36Sopenharmony_ci 240962306a36Sopenharmony_ci return md; 241062306a36Sopenharmony_ci} 241162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_get_md); 241262306a36Sopenharmony_ci 241362306a36Sopenharmony_civoid *dm_get_mdptr(struct mapped_device *md) 241462306a36Sopenharmony_ci{ 241562306a36Sopenharmony_ci return md->interface_ptr; 241662306a36Sopenharmony_ci} 241762306a36Sopenharmony_ci 241862306a36Sopenharmony_civoid dm_set_mdptr(struct mapped_device *md, void *ptr) 241962306a36Sopenharmony_ci{ 242062306a36Sopenharmony_ci md->interface_ptr = ptr; 242162306a36Sopenharmony_ci} 242262306a36Sopenharmony_ci 242362306a36Sopenharmony_civoid dm_get(struct mapped_device *md) 242462306a36Sopenharmony_ci{ 242562306a36Sopenharmony_ci atomic_inc(&md->holders); 242662306a36Sopenharmony_ci BUG_ON(test_bit(DMF_FREEING, &md->flags)); 242762306a36Sopenharmony_ci} 242862306a36Sopenharmony_ci 242962306a36Sopenharmony_ciint dm_hold(struct mapped_device *md) 243062306a36Sopenharmony_ci{ 243162306a36Sopenharmony_ci spin_lock(&_minor_lock); 243262306a36Sopenharmony_ci if (test_bit(DMF_FREEING, &md->flags)) { 243362306a36Sopenharmony_ci spin_unlock(&_minor_lock); 243462306a36Sopenharmony_ci return -EBUSY; 243562306a36Sopenharmony_ci } 243662306a36Sopenharmony_ci dm_get(md); 243762306a36Sopenharmony_ci spin_unlock(&_minor_lock); 243862306a36Sopenharmony_ci return 0; 243962306a36Sopenharmony_ci} 244062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_hold); 244162306a36Sopenharmony_ci 244262306a36Sopenharmony_ciconst char *dm_device_name(struct mapped_device *md) 244362306a36Sopenharmony_ci{ 244462306a36Sopenharmony_ci return md->name; 244562306a36Sopenharmony_ci} 244662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_device_name); 244762306a36Sopenharmony_ci 244862306a36Sopenharmony_cistatic void __dm_destroy(struct mapped_device *md, bool wait) 244962306a36Sopenharmony_ci{ 245062306a36Sopenharmony_ci struct dm_table *map; 245162306a36Sopenharmony_ci int srcu_idx; 245262306a36Sopenharmony_ci 245362306a36Sopenharmony_ci might_sleep(); 245462306a36Sopenharmony_ci 245562306a36Sopenharmony_ci spin_lock(&_minor_lock); 245662306a36Sopenharmony_ci idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md)))); 245762306a36Sopenharmony_ci set_bit(DMF_FREEING, &md->flags); 245862306a36Sopenharmony_ci spin_unlock(&_minor_lock); 245962306a36Sopenharmony_ci 246062306a36Sopenharmony_ci blk_mark_disk_dead(md->disk); 246162306a36Sopenharmony_ci 246262306a36Sopenharmony_ci /* 246362306a36Sopenharmony_ci * Take suspend_lock so that presuspend and postsuspend methods 246462306a36Sopenharmony_ci * do not race with internal suspend. 246562306a36Sopenharmony_ci */ 246662306a36Sopenharmony_ci mutex_lock(&md->suspend_lock); 246762306a36Sopenharmony_ci map = dm_get_live_table(md, &srcu_idx); 246862306a36Sopenharmony_ci if (!dm_suspended_md(md)) { 246962306a36Sopenharmony_ci dm_table_presuspend_targets(map); 247062306a36Sopenharmony_ci set_bit(DMF_SUSPENDED, &md->flags); 247162306a36Sopenharmony_ci set_bit(DMF_POST_SUSPENDING, &md->flags); 247262306a36Sopenharmony_ci dm_table_postsuspend_targets(map); 247362306a36Sopenharmony_ci } 247462306a36Sopenharmony_ci /* dm_put_live_table must be before fsleep, otherwise deadlock is possible */ 247562306a36Sopenharmony_ci dm_put_live_table(md, srcu_idx); 247662306a36Sopenharmony_ci mutex_unlock(&md->suspend_lock); 247762306a36Sopenharmony_ci 247862306a36Sopenharmony_ci /* 247962306a36Sopenharmony_ci * Rare, but there may be I/O requests still going to complete, 248062306a36Sopenharmony_ci * for example. Wait for all references to disappear. 248162306a36Sopenharmony_ci * No one should increment the reference count of the mapped_device, 248262306a36Sopenharmony_ci * after the mapped_device state becomes DMF_FREEING. 248362306a36Sopenharmony_ci */ 248462306a36Sopenharmony_ci if (wait) 248562306a36Sopenharmony_ci while (atomic_read(&md->holders)) 248662306a36Sopenharmony_ci fsleep(1000); 248762306a36Sopenharmony_ci else if (atomic_read(&md->holders)) 248862306a36Sopenharmony_ci DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)", 248962306a36Sopenharmony_ci dm_device_name(md), atomic_read(&md->holders)); 249062306a36Sopenharmony_ci 249162306a36Sopenharmony_ci dm_table_destroy(__unbind(md)); 249262306a36Sopenharmony_ci free_dev(md); 249362306a36Sopenharmony_ci} 249462306a36Sopenharmony_ci 249562306a36Sopenharmony_civoid dm_destroy(struct mapped_device *md) 249662306a36Sopenharmony_ci{ 249762306a36Sopenharmony_ci __dm_destroy(md, true); 249862306a36Sopenharmony_ci} 249962306a36Sopenharmony_ci 250062306a36Sopenharmony_civoid dm_destroy_immediate(struct mapped_device *md) 250162306a36Sopenharmony_ci{ 250262306a36Sopenharmony_ci __dm_destroy(md, false); 250362306a36Sopenharmony_ci} 250462306a36Sopenharmony_ci 250562306a36Sopenharmony_civoid dm_put(struct mapped_device *md) 250662306a36Sopenharmony_ci{ 250762306a36Sopenharmony_ci atomic_dec(&md->holders); 250862306a36Sopenharmony_ci} 250962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_put); 251062306a36Sopenharmony_ci 251162306a36Sopenharmony_cistatic bool dm_in_flight_bios(struct mapped_device *md) 251262306a36Sopenharmony_ci{ 251362306a36Sopenharmony_ci int cpu; 251462306a36Sopenharmony_ci unsigned long sum = 0; 251562306a36Sopenharmony_ci 251662306a36Sopenharmony_ci for_each_possible_cpu(cpu) 251762306a36Sopenharmony_ci sum += *per_cpu_ptr(md->pending_io, cpu); 251862306a36Sopenharmony_ci 251962306a36Sopenharmony_ci return sum != 0; 252062306a36Sopenharmony_ci} 252162306a36Sopenharmony_ci 252262306a36Sopenharmony_cistatic int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int task_state) 252362306a36Sopenharmony_ci{ 252462306a36Sopenharmony_ci int r = 0; 252562306a36Sopenharmony_ci DEFINE_WAIT(wait); 252662306a36Sopenharmony_ci 252762306a36Sopenharmony_ci while (true) { 252862306a36Sopenharmony_ci prepare_to_wait(&md->wait, &wait, task_state); 252962306a36Sopenharmony_ci 253062306a36Sopenharmony_ci if (!dm_in_flight_bios(md)) 253162306a36Sopenharmony_ci break; 253262306a36Sopenharmony_ci 253362306a36Sopenharmony_ci if (signal_pending_state(task_state, current)) { 253462306a36Sopenharmony_ci r = -EINTR; 253562306a36Sopenharmony_ci break; 253662306a36Sopenharmony_ci } 253762306a36Sopenharmony_ci 253862306a36Sopenharmony_ci io_schedule(); 253962306a36Sopenharmony_ci } 254062306a36Sopenharmony_ci finish_wait(&md->wait, &wait); 254162306a36Sopenharmony_ci 254262306a36Sopenharmony_ci smp_rmb(); 254362306a36Sopenharmony_ci 254462306a36Sopenharmony_ci return r; 254562306a36Sopenharmony_ci} 254662306a36Sopenharmony_ci 254762306a36Sopenharmony_cistatic int dm_wait_for_completion(struct mapped_device *md, unsigned int task_state) 254862306a36Sopenharmony_ci{ 254962306a36Sopenharmony_ci int r = 0; 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci if (!queue_is_mq(md->queue)) 255262306a36Sopenharmony_ci return dm_wait_for_bios_completion(md, task_state); 255362306a36Sopenharmony_ci 255462306a36Sopenharmony_ci while (true) { 255562306a36Sopenharmony_ci if (!blk_mq_queue_inflight(md->queue)) 255662306a36Sopenharmony_ci break; 255762306a36Sopenharmony_ci 255862306a36Sopenharmony_ci if (signal_pending_state(task_state, current)) { 255962306a36Sopenharmony_ci r = -EINTR; 256062306a36Sopenharmony_ci break; 256162306a36Sopenharmony_ci } 256262306a36Sopenharmony_ci 256362306a36Sopenharmony_ci fsleep(5000); 256462306a36Sopenharmony_ci } 256562306a36Sopenharmony_ci 256662306a36Sopenharmony_ci return r; 256762306a36Sopenharmony_ci} 256862306a36Sopenharmony_ci 256962306a36Sopenharmony_ci/* 257062306a36Sopenharmony_ci * Process the deferred bios 257162306a36Sopenharmony_ci */ 257262306a36Sopenharmony_cistatic void dm_wq_work(struct work_struct *work) 257362306a36Sopenharmony_ci{ 257462306a36Sopenharmony_ci struct mapped_device *md = container_of(work, struct mapped_device, work); 257562306a36Sopenharmony_ci struct bio *bio; 257662306a36Sopenharmony_ci 257762306a36Sopenharmony_ci while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { 257862306a36Sopenharmony_ci spin_lock_irq(&md->deferred_lock); 257962306a36Sopenharmony_ci bio = bio_list_pop(&md->deferred); 258062306a36Sopenharmony_ci spin_unlock_irq(&md->deferred_lock); 258162306a36Sopenharmony_ci 258262306a36Sopenharmony_ci if (!bio) 258362306a36Sopenharmony_ci break; 258462306a36Sopenharmony_ci 258562306a36Sopenharmony_ci submit_bio_noacct(bio); 258662306a36Sopenharmony_ci cond_resched(); 258762306a36Sopenharmony_ci } 258862306a36Sopenharmony_ci} 258962306a36Sopenharmony_ci 259062306a36Sopenharmony_cistatic void dm_queue_flush(struct mapped_device *md) 259162306a36Sopenharmony_ci{ 259262306a36Sopenharmony_ci clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 259362306a36Sopenharmony_ci smp_mb__after_atomic(); 259462306a36Sopenharmony_ci queue_work(md->wq, &md->work); 259562306a36Sopenharmony_ci} 259662306a36Sopenharmony_ci 259762306a36Sopenharmony_ci/* 259862306a36Sopenharmony_ci * Swap in a new table, returning the old one for the caller to destroy. 259962306a36Sopenharmony_ci */ 260062306a36Sopenharmony_cistruct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) 260162306a36Sopenharmony_ci{ 260262306a36Sopenharmony_ci struct dm_table *live_map = NULL, *map = ERR_PTR(-EINVAL); 260362306a36Sopenharmony_ci struct queue_limits limits; 260462306a36Sopenharmony_ci int r; 260562306a36Sopenharmony_ci 260662306a36Sopenharmony_ci mutex_lock(&md->suspend_lock); 260762306a36Sopenharmony_ci 260862306a36Sopenharmony_ci /* device must be suspended */ 260962306a36Sopenharmony_ci if (!dm_suspended_md(md)) 261062306a36Sopenharmony_ci goto out; 261162306a36Sopenharmony_ci 261262306a36Sopenharmony_ci /* 261362306a36Sopenharmony_ci * If the new table has no data devices, retain the existing limits. 261462306a36Sopenharmony_ci * This helps multipath with queue_if_no_path if all paths disappear, 261562306a36Sopenharmony_ci * then new I/O is queued based on these limits, and then some paths 261662306a36Sopenharmony_ci * reappear. 261762306a36Sopenharmony_ci */ 261862306a36Sopenharmony_ci if (dm_table_has_no_data_devices(table)) { 261962306a36Sopenharmony_ci live_map = dm_get_live_table_fast(md); 262062306a36Sopenharmony_ci if (live_map) 262162306a36Sopenharmony_ci limits = md->queue->limits; 262262306a36Sopenharmony_ci dm_put_live_table_fast(md); 262362306a36Sopenharmony_ci } 262462306a36Sopenharmony_ci 262562306a36Sopenharmony_ci if (!live_map) { 262662306a36Sopenharmony_ci r = dm_calculate_queue_limits(table, &limits); 262762306a36Sopenharmony_ci if (r) { 262862306a36Sopenharmony_ci map = ERR_PTR(r); 262962306a36Sopenharmony_ci goto out; 263062306a36Sopenharmony_ci } 263162306a36Sopenharmony_ci } 263262306a36Sopenharmony_ci 263362306a36Sopenharmony_ci map = __bind(md, table, &limits); 263462306a36Sopenharmony_ci dm_issue_global_event(); 263562306a36Sopenharmony_ci 263662306a36Sopenharmony_ciout: 263762306a36Sopenharmony_ci mutex_unlock(&md->suspend_lock); 263862306a36Sopenharmony_ci return map; 263962306a36Sopenharmony_ci} 264062306a36Sopenharmony_ci 264162306a36Sopenharmony_ci/* 264262306a36Sopenharmony_ci * Functions to lock and unlock any filesystem running on the 264362306a36Sopenharmony_ci * device. 264462306a36Sopenharmony_ci */ 264562306a36Sopenharmony_cistatic int lock_fs(struct mapped_device *md) 264662306a36Sopenharmony_ci{ 264762306a36Sopenharmony_ci int r; 264862306a36Sopenharmony_ci 264962306a36Sopenharmony_ci WARN_ON(test_bit(DMF_FROZEN, &md->flags)); 265062306a36Sopenharmony_ci 265162306a36Sopenharmony_ci r = freeze_bdev(md->disk->part0); 265262306a36Sopenharmony_ci if (!r) 265362306a36Sopenharmony_ci set_bit(DMF_FROZEN, &md->flags); 265462306a36Sopenharmony_ci return r; 265562306a36Sopenharmony_ci} 265662306a36Sopenharmony_ci 265762306a36Sopenharmony_cistatic void unlock_fs(struct mapped_device *md) 265862306a36Sopenharmony_ci{ 265962306a36Sopenharmony_ci if (!test_bit(DMF_FROZEN, &md->flags)) 266062306a36Sopenharmony_ci return; 266162306a36Sopenharmony_ci thaw_bdev(md->disk->part0); 266262306a36Sopenharmony_ci clear_bit(DMF_FROZEN, &md->flags); 266362306a36Sopenharmony_ci} 266462306a36Sopenharmony_ci 266562306a36Sopenharmony_ci/* 266662306a36Sopenharmony_ci * @suspend_flags: DM_SUSPEND_LOCKFS_FLAG and/or DM_SUSPEND_NOFLUSH_FLAG 266762306a36Sopenharmony_ci * @task_state: e.g. TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE 266862306a36Sopenharmony_ci * @dmf_suspended_flag: DMF_SUSPENDED or DMF_SUSPENDED_INTERNALLY 266962306a36Sopenharmony_ci * 267062306a36Sopenharmony_ci * If __dm_suspend returns 0, the device is completely quiescent 267162306a36Sopenharmony_ci * now. There is no request-processing activity. All new requests 267262306a36Sopenharmony_ci * are being added to md->deferred list. 267362306a36Sopenharmony_ci */ 267462306a36Sopenharmony_cistatic int __dm_suspend(struct mapped_device *md, struct dm_table *map, 267562306a36Sopenharmony_ci unsigned int suspend_flags, unsigned int task_state, 267662306a36Sopenharmony_ci int dmf_suspended_flag) 267762306a36Sopenharmony_ci{ 267862306a36Sopenharmony_ci bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; 267962306a36Sopenharmony_ci bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG; 268062306a36Sopenharmony_ci int r; 268162306a36Sopenharmony_ci 268262306a36Sopenharmony_ci lockdep_assert_held(&md->suspend_lock); 268362306a36Sopenharmony_ci 268462306a36Sopenharmony_ci /* 268562306a36Sopenharmony_ci * DMF_NOFLUSH_SUSPENDING must be set before presuspend. 268662306a36Sopenharmony_ci * This flag is cleared before dm_suspend returns. 268762306a36Sopenharmony_ci */ 268862306a36Sopenharmony_ci if (noflush) 268962306a36Sopenharmony_ci set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 269062306a36Sopenharmony_ci else 269162306a36Sopenharmony_ci DMDEBUG("%s: suspending with flush", dm_device_name(md)); 269262306a36Sopenharmony_ci 269362306a36Sopenharmony_ci /* 269462306a36Sopenharmony_ci * This gets reverted if there's an error later and the targets 269562306a36Sopenharmony_ci * provide the .presuspend_undo hook. 269662306a36Sopenharmony_ci */ 269762306a36Sopenharmony_ci dm_table_presuspend_targets(map); 269862306a36Sopenharmony_ci 269962306a36Sopenharmony_ci /* 270062306a36Sopenharmony_ci * Flush I/O to the device. 270162306a36Sopenharmony_ci * Any I/O submitted after lock_fs() may not be flushed. 270262306a36Sopenharmony_ci * noflush takes precedence over do_lockfs. 270362306a36Sopenharmony_ci * (lock_fs() flushes I/Os and waits for them to complete.) 270462306a36Sopenharmony_ci */ 270562306a36Sopenharmony_ci if (!noflush && do_lockfs) { 270662306a36Sopenharmony_ci r = lock_fs(md); 270762306a36Sopenharmony_ci if (r) { 270862306a36Sopenharmony_ci dm_table_presuspend_undo_targets(map); 270962306a36Sopenharmony_ci return r; 271062306a36Sopenharmony_ci } 271162306a36Sopenharmony_ci } 271262306a36Sopenharmony_ci 271362306a36Sopenharmony_ci /* 271462306a36Sopenharmony_ci * Here we must make sure that no processes are submitting requests 271562306a36Sopenharmony_ci * to target drivers i.e. no one may be executing 271662306a36Sopenharmony_ci * dm_split_and_process_bio from dm_submit_bio. 271762306a36Sopenharmony_ci * 271862306a36Sopenharmony_ci * To get all processes out of dm_split_and_process_bio in dm_submit_bio, 271962306a36Sopenharmony_ci * we take the write lock. To prevent any process from reentering 272062306a36Sopenharmony_ci * dm_split_and_process_bio from dm_submit_bio and quiesce the thread 272162306a36Sopenharmony_ci * (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND and call 272262306a36Sopenharmony_ci * flush_workqueue(md->wq). 272362306a36Sopenharmony_ci */ 272462306a36Sopenharmony_ci set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 272562306a36Sopenharmony_ci if (map) 272662306a36Sopenharmony_ci synchronize_srcu(&md->io_barrier); 272762306a36Sopenharmony_ci 272862306a36Sopenharmony_ci /* 272962306a36Sopenharmony_ci * Stop md->queue before flushing md->wq in case request-based 273062306a36Sopenharmony_ci * dm defers requests to md->wq from md->queue. 273162306a36Sopenharmony_ci */ 273262306a36Sopenharmony_ci if (dm_request_based(md)) 273362306a36Sopenharmony_ci dm_stop_queue(md->queue); 273462306a36Sopenharmony_ci 273562306a36Sopenharmony_ci flush_workqueue(md->wq); 273662306a36Sopenharmony_ci 273762306a36Sopenharmony_ci /* 273862306a36Sopenharmony_ci * At this point no more requests are entering target request routines. 273962306a36Sopenharmony_ci * We call dm_wait_for_completion to wait for all existing requests 274062306a36Sopenharmony_ci * to finish. 274162306a36Sopenharmony_ci */ 274262306a36Sopenharmony_ci r = dm_wait_for_completion(md, task_state); 274362306a36Sopenharmony_ci if (!r) 274462306a36Sopenharmony_ci set_bit(dmf_suspended_flag, &md->flags); 274562306a36Sopenharmony_ci 274662306a36Sopenharmony_ci if (noflush) 274762306a36Sopenharmony_ci clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); 274862306a36Sopenharmony_ci if (map) 274962306a36Sopenharmony_ci synchronize_srcu(&md->io_barrier); 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci /* were we interrupted ? */ 275262306a36Sopenharmony_ci if (r < 0) { 275362306a36Sopenharmony_ci dm_queue_flush(md); 275462306a36Sopenharmony_ci 275562306a36Sopenharmony_ci if (dm_request_based(md)) 275662306a36Sopenharmony_ci dm_start_queue(md->queue); 275762306a36Sopenharmony_ci 275862306a36Sopenharmony_ci unlock_fs(md); 275962306a36Sopenharmony_ci dm_table_presuspend_undo_targets(map); 276062306a36Sopenharmony_ci /* pushback list is already flushed, so skip flush */ 276162306a36Sopenharmony_ci } 276262306a36Sopenharmony_ci 276362306a36Sopenharmony_ci return r; 276462306a36Sopenharmony_ci} 276562306a36Sopenharmony_ci 276662306a36Sopenharmony_ci/* 276762306a36Sopenharmony_ci * We need to be able to change a mapping table under a mounted 276862306a36Sopenharmony_ci * filesystem. For example we might want to move some data in 276962306a36Sopenharmony_ci * the background. Before the table can be swapped with 277062306a36Sopenharmony_ci * dm_bind_table, dm_suspend must be called to flush any in 277162306a36Sopenharmony_ci * flight bios and ensure that any further io gets deferred. 277262306a36Sopenharmony_ci */ 277362306a36Sopenharmony_ci/* 277462306a36Sopenharmony_ci * Suspend mechanism in request-based dm. 277562306a36Sopenharmony_ci * 277662306a36Sopenharmony_ci * 1. Flush all I/Os by lock_fs() if needed. 277762306a36Sopenharmony_ci * 2. Stop dispatching any I/O by stopping the request_queue. 277862306a36Sopenharmony_ci * 3. Wait for all in-flight I/Os to be completed or requeued. 277962306a36Sopenharmony_ci * 278062306a36Sopenharmony_ci * To abort suspend, start the request_queue. 278162306a36Sopenharmony_ci */ 278262306a36Sopenharmony_ciint dm_suspend(struct mapped_device *md, unsigned int suspend_flags) 278362306a36Sopenharmony_ci{ 278462306a36Sopenharmony_ci struct dm_table *map = NULL; 278562306a36Sopenharmony_ci int r = 0; 278662306a36Sopenharmony_ci 278762306a36Sopenharmony_ciretry: 278862306a36Sopenharmony_ci mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); 278962306a36Sopenharmony_ci 279062306a36Sopenharmony_ci if (dm_suspended_md(md)) { 279162306a36Sopenharmony_ci r = -EINVAL; 279262306a36Sopenharmony_ci goto out_unlock; 279362306a36Sopenharmony_ci } 279462306a36Sopenharmony_ci 279562306a36Sopenharmony_ci if (dm_suspended_internally_md(md)) { 279662306a36Sopenharmony_ci /* already internally suspended, wait for internal resume */ 279762306a36Sopenharmony_ci mutex_unlock(&md->suspend_lock); 279862306a36Sopenharmony_ci r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); 279962306a36Sopenharmony_ci if (r) 280062306a36Sopenharmony_ci return r; 280162306a36Sopenharmony_ci goto retry; 280262306a36Sopenharmony_ci } 280362306a36Sopenharmony_ci 280462306a36Sopenharmony_ci map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 280562306a36Sopenharmony_ci if (!map) { 280662306a36Sopenharmony_ci /* avoid deadlock with fs/namespace.c:do_mount() */ 280762306a36Sopenharmony_ci suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; 280862306a36Sopenharmony_ci } 280962306a36Sopenharmony_ci 281062306a36Sopenharmony_ci r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE, DMF_SUSPENDED); 281162306a36Sopenharmony_ci if (r) 281262306a36Sopenharmony_ci goto out_unlock; 281362306a36Sopenharmony_ci 281462306a36Sopenharmony_ci set_bit(DMF_POST_SUSPENDING, &md->flags); 281562306a36Sopenharmony_ci dm_table_postsuspend_targets(map); 281662306a36Sopenharmony_ci clear_bit(DMF_POST_SUSPENDING, &md->flags); 281762306a36Sopenharmony_ci 281862306a36Sopenharmony_ciout_unlock: 281962306a36Sopenharmony_ci mutex_unlock(&md->suspend_lock); 282062306a36Sopenharmony_ci return r; 282162306a36Sopenharmony_ci} 282262306a36Sopenharmony_ci 282362306a36Sopenharmony_cistatic int __dm_resume(struct mapped_device *md, struct dm_table *map) 282462306a36Sopenharmony_ci{ 282562306a36Sopenharmony_ci if (map) { 282662306a36Sopenharmony_ci int r = dm_table_resume_targets(map); 282762306a36Sopenharmony_ci 282862306a36Sopenharmony_ci if (r) 282962306a36Sopenharmony_ci return r; 283062306a36Sopenharmony_ci } 283162306a36Sopenharmony_ci 283262306a36Sopenharmony_ci dm_queue_flush(md); 283362306a36Sopenharmony_ci 283462306a36Sopenharmony_ci /* 283562306a36Sopenharmony_ci * Flushing deferred I/Os must be done after targets are resumed 283662306a36Sopenharmony_ci * so that mapping of targets can work correctly. 283762306a36Sopenharmony_ci * Request-based dm is queueing the deferred I/Os in its request_queue. 283862306a36Sopenharmony_ci */ 283962306a36Sopenharmony_ci if (dm_request_based(md)) 284062306a36Sopenharmony_ci dm_start_queue(md->queue); 284162306a36Sopenharmony_ci 284262306a36Sopenharmony_ci unlock_fs(md); 284362306a36Sopenharmony_ci 284462306a36Sopenharmony_ci return 0; 284562306a36Sopenharmony_ci} 284662306a36Sopenharmony_ci 284762306a36Sopenharmony_ciint dm_resume(struct mapped_device *md) 284862306a36Sopenharmony_ci{ 284962306a36Sopenharmony_ci int r; 285062306a36Sopenharmony_ci struct dm_table *map = NULL; 285162306a36Sopenharmony_ci 285262306a36Sopenharmony_ciretry: 285362306a36Sopenharmony_ci r = -EINVAL; 285462306a36Sopenharmony_ci mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); 285562306a36Sopenharmony_ci 285662306a36Sopenharmony_ci if (!dm_suspended_md(md)) 285762306a36Sopenharmony_ci goto out; 285862306a36Sopenharmony_ci 285962306a36Sopenharmony_ci if (dm_suspended_internally_md(md)) { 286062306a36Sopenharmony_ci /* already internally suspended, wait for internal resume */ 286162306a36Sopenharmony_ci mutex_unlock(&md->suspend_lock); 286262306a36Sopenharmony_ci r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); 286362306a36Sopenharmony_ci if (r) 286462306a36Sopenharmony_ci return r; 286562306a36Sopenharmony_ci goto retry; 286662306a36Sopenharmony_ci } 286762306a36Sopenharmony_ci 286862306a36Sopenharmony_ci map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 286962306a36Sopenharmony_ci if (!map || !dm_table_get_size(map)) 287062306a36Sopenharmony_ci goto out; 287162306a36Sopenharmony_ci 287262306a36Sopenharmony_ci r = __dm_resume(md, map); 287362306a36Sopenharmony_ci if (r) 287462306a36Sopenharmony_ci goto out; 287562306a36Sopenharmony_ci 287662306a36Sopenharmony_ci clear_bit(DMF_SUSPENDED, &md->flags); 287762306a36Sopenharmony_ciout: 287862306a36Sopenharmony_ci mutex_unlock(&md->suspend_lock); 287962306a36Sopenharmony_ci 288062306a36Sopenharmony_ci return r; 288162306a36Sopenharmony_ci} 288262306a36Sopenharmony_ci 288362306a36Sopenharmony_ci/* 288462306a36Sopenharmony_ci * Internal suspend/resume works like userspace-driven suspend. It waits 288562306a36Sopenharmony_ci * until all bios finish and prevents issuing new bios to the target drivers. 288662306a36Sopenharmony_ci * It may be used only from the kernel. 288762306a36Sopenharmony_ci */ 288862306a36Sopenharmony_ci 288962306a36Sopenharmony_cistatic void __dm_internal_suspend(struct mapped_device *md, unsigned int suspend_flags) 289062306a36Sopenharmony_ci{ 289162306a36Sopenharmony_ci struct dm_table *map = NULL; 289262306a36Sopenharmony_ci 289362306a36Sopenharmony_ci lockdep_assert_held(&md->suspend_lock); 289462306a36Sopenharmony_ci 289562306a36Sopenharmony_ci if (md->internal_suspend_count++) 289662306a36Sopenharmony_ci return; /* nested internal suspend */ 289762306a36Sopenharmony_ci 289862306a36Sopenharmony_ci if (dm_suspended_md(md)) { 289962306a36Sopenharmony_ci set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); 290062306a36Sopenharmony_ci return; /* nest suspend */ 290162306a36Sopenharmony_ci } 290262306a36Sopenharmony_ci 290362306a36Sopenharmony_ci map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 290462306a36Sopenharmony_ci 290562306a36Sopenharmony_ci /* 290662306a36Sopenharmony_ci * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is 290762306a36Sopenharmony_ci * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend 290862306a36Sopenharmony_ci * would require changing .presuspend to return an error -- avoid this 290962306a36Sopenharmony_ci * until there is a need for more elaborate variants of internal suspend. 291062306a36Sopenharmony_ci */ 291162306a36Sopenharmony_ci (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE, 291262306a36Sopenharmony_ci DMF_SUSPENDED_INTERNALLY); 291362306a36Sopenharmony_ci 291462306a36Sopenharmony_ci set_bit(DMF_POST_SUSPENDING, &md->flags); 291562306a36Sopenharmony_ci dm_table_postsuspend_targets(map); 291662306a36Sopenharmony_ci clear_bit(DMF_POST_SUSPENDING, &md->flags); 291762306a36Sopenharmony_ci} 291862306a36Sopenharmony_ci 291962306a36Sopenharmony_cistatic void __dm_internal_resume(struct mapped_device *md) 292062306a36Sopenharmony_ci{ 292162306a36Sopenharmony_ci int r; 292262306a36Sopenharmony_ci struct dm_table *map; 292362306a36Sopenharmony_ci 292462306a36Sopenharmony_ci BUG_ON(!md->internal_suspend_count); 292562306a36Sopenharmony_ci 292662306a36Sopenharmony_ci if (--md->internal_suspend_count) 292762306a36Sopenharmony_ci return; /* resume from nested internal suspend */ 292862306a36Sopenharmony_ci 292962306a36Sopenharmony_ci if (dm_suspended_md(md)) 293062306a36Sopenharmony_ci goto done; /* resume from nested suspend */ 293162306a36Sopenharmony_ci 293262306a36Sopenharmony_ci map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 293362306a36Sopenharmony_ci r = __dm_resume(md, map); 293462306a36Sopenharmony_ci if (r) { 293562306a36Sopenharmony_ci /* 293662306a36Sopenharmony_ci * If a preresume method of some target failed, we are in a 293762306a36Sopenharmony_ci * tricky situation. We can't return an error to the caller. We 293862306a36Sopenharmony_ci * can't fake success because then the "resume" and 293962306a36Sopenharmony_ci * "postsuspend" methods would not be paired correctly, and it 294062306a36Sopenharmony_ci * would break various targets, for example it would cause list 294162306a36Sopenharmony_ci * corruption in the "origin" target. 294262306a36Sopenharmony_ci * 294362306a36Sopenharmony_ci * So, we fake normal suspend here, to make sure that the 294462306a36Sopenharmony_ci * "resume" and "postsuspend" methods will be paired correctly. 294562306a36Sopenharmony_ci */ 294662306a36Sopenharmony_ci DMERR("Preresume method failed: %d", r); 294762306a36Sopenharmony_ci set_bit(DMF_SUSPENDED, &md->flags); 294862306a36Sopenharmony_ci } 294962306a36Sopenharmony_cidone: 295062306a36Sopenharmony_ci clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); 295162306a36Sopenharmony_ci smp_mb__after_atomic(); 295262306a36Sopenharmony_ci wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY); 295362306a36Sopenharmony_ci} 295462306a36Sopenharmony_ci 295562306a36Sopenharmony_civoid dm_internal_suspend_noflush(struct mapped_device *md) 295662306a36Sopenharmony_ci{ 295762306a36Sopenharmony_ci mutex_lock(&md->suspend_lock); 295862306a36Sopenharmony_ci __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG); 295962306a36Sopenharmony_ci mutex_unlock(&md->suspend_lock); 296062306a36Sopenharmony_ci} 296162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_suspend_noflush); 296262306a36Sopenharmony_ci 296362306a36Sopenharmony_civoid dm_internal_resume(struct mapped_device *md) 296462306a36Sopenharmony_ci{ 296562306a36Sopenharmony_ci mutex_lock(&md->suspend_lock); 296662306a36Sopenharmony_ci __dm_internal_resume(md); 296762306a36Sopenharmony_ci mutex_unlock(&md->suspend_lock); 296862306a36Sopenharmony_ci} 296962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_resume); 297062306a36Sopenharmony_ci 297162306a36Sopenharmony_ci/* 297262306a36Sopenharmony_ci * Fast variants of internal suspend/resume hold md->suspend_lock, 297362306a36Sopenharmony_ci * which prevents interaction with userspace-driven suspend. 297462306a36Sopenharmony_ci */ 297562306a36Sopenharmony_ci 297662306a36Sopenharmony_civoid dm_internal_suspend_fast(struct mapped_device *md) 297762306a36Sopenharmony_ci{ 297862306a36Sopenharmony_ci mutex_lock(&md->suspend_lock); 297962306a36Sopenharmony_ci if (dm_suspended_md(md) || dm_suspended_internally_md(md)) 298062306a36Sopenharmony_ci return; 298162306a36Sopenharmony_ci 298262306a36Sopenharmony_ci set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); 298362306a36Sopenharmony_ci synchronize_srcu(&md->io_barrier); 298462306a36Sopenharmony_ci flush_workqueue(md->wq); 298562306a36Sopenharmony_ci dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); 298662306a36Sopenharmony_ci} 298762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_suspend_fast); 298862306a36Sopenharmony_ci 298962306a36Sopenharmony_civoid dm_internal_resume_fast(struct mapped_device *md) 299062306a36Sopenharmony_ci{ 299162306a36Sopenharmony_ci if (dm_suspended_md(md) || dm_suspended_internally_md(md)) 299262306a36Sopenharmony_ci goto done; 299362306a36Sopenharmony_ci 299462306a36Sopenharmony_ci dm_queue_flush(md); 299562306a36Sopenharmony_ci 299662306a36Sopenharmony_cidone: 299762306a36Sopenharmony_ci mutex_unlock(&md->suspend_lock); 299862306a36Sopenharmony_ci} 299962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_internal_resume_fast); 300062306a36Sopenharmony_ci 300162306a36Sopenharmony_ci/* 300262306a36Sopenharmony_ci *--------------------------------------------------------------- 300362306a36Sopenharmony_ci * Event notification. 300462306a36Sopenharmony_ci *--------------------------------------------------------------- 300562306a36Sopenharmony_ci */ 300662306a36Sopenharmony_ciint dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, 300762306a36Sopenharmony_ci unsigned int cookie, bool need_resize_uevent) 300862306a36Sopenharmony_ci{ 300962306a36Sopenharmony_ci int r; 301062306a36Sopenharmony_ci unsigned int noio_flag; 301162306a36Sopenharmony_ci char udev_cookie[DM_COOKIE_LENGTH]; 301262306a36Sopenharmony_ci char *envp[3] = { NULL, NULL, NULL }; 301362306a36Sopenharmony_ci char **envpp = envp; 301462306a36Sopenharmony_ci if (cookie) { 301562306a36Sopenharmony_ci snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", 301662306a36Sopenharmony_ci DM_COOKIE_ENV_VAR_NAME, cookie); 301762306a36Sopenharmony_ci *envpp++ = udev_cookie; 301862306a36Sopenharmony_ci } 301962306a36Sopenharmony_ci if (need_resize_uevent) { 302062306a36Sopenharmony_ci *envpp++ = "RESIZE=1"; 302162306a36Sopenharmony_ci } 302262306a36Sopenharmony_ci 302362306a36Sopenharmony_ci noio_flag = memalloc_noio_save(); 302462306a36Sopenharmony_ci 302562306a36Sopenharmony_ci r = kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp); 302662306a36Sopenharmony_ci 302762306a36Sopenharmony_ci memalloc_noio_restore(noio_flag); 302862306a36Sopenharmony_ci 302962306a36Sopenharmony_ci return r; 303062306a36Sopenharmony_ci} 303162306a36Sopenharmony_ci 303262306a36Sopenharmony_ciuint32_t dm_next_uevent_seq(struct mapped_device *md) 303362306a36Sopenharmony_ci{ 303462306a36Sopenharmony_ci return atomic_add_return(1, &md->uevent_seq); 303562306a36Sopenharmony_ci} 303662306a36Sopenharmony_ci 303762306a36Sopenharmony_ciuint32_t dm_get_event_nr(struct mapped_device *md) 303862306a36Sopenharmony_ci{ 303962306a36Sopenharmony_ci return atomic_read(&md->event_nr); 304062306a36Sopenharmony_ci} 304162306a36Sopenharmony_ci 304262306a36Sopenharmony_ciint dm_wait_event(struct mapped_device *md, int event_nr) 304362306a36Sopenharmony_ci{ 304462306a36Sopenharmony_ci return wait_event_interruptible(md->eventq, 304562306a36Sopenharmony_ci (event_nr != atomic_read(&md->event_nr))); 304662306a36Sopenharmony_ci} 304762306a36Sopenharmony_ci 304862306a36Sopenharmony_civoid dm_uevent_add(struct mapped_device *md, struct list_head *elist) 304962306a36Sopenharmony_ci{ 305062306a36Sopenharmony_ci unsigned long flags; 305162306a36Sopenharmony_ci 305262306a36Sopenharmony_ci spin_lock_irqsave(&md->uevent_lock, flags); 305362306a36Sopenharmony_ci list_add(elist, &md->uevent_list); 305462306a36Sopenharmony_ci spin_unlock_irqrestore(&md->uevent_lock, flags); 305562306a36Sopenharmony_ci} 305662306a36Sopenharmony_ci 305762306a36Sopenharmony_ci/* 305862306a36Sopenharmony_ci * The gendisk is only valid as long as you have a reference 305962306a36Sopenharmony_ci * count on 'md'. 306062306a36Sopenharmony_ci */ 306162306a36Sopenharmony_cistruct gendisk *dm_disk(struct mapped_device *md) 306262306a36Sopenharmony_ci{ 306362306a36Sopenharmony_ci return md->disk; 306462306a36Sopenharmony_ci} 306562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_disk); 306662306a36Sopenharmony_ci 306762306a36Sopenharmony_cistruct kobject *dm_kobject(struct mapped_device *md) 306862306a36Sopenharmony_ci{ 306962306a36Sopenharmony_ci return &md->kobj_holder.kobj; 307062306a36Sopenharmony_ci} 307162306a36Sopenharmony_ci 307262306a36Sopenharmony_cistruct mapped_device *dm_get_from_kobject(struct kobject *kobj) 307362306a36Sopenharmony_ci{ 307462306a36Sopenharmony_ci struct mapped_device *md; 307562306a36Sopenharmony_ci 307662306a36Sopenharmony_ci md = container_of(kobj, struct mapped_device, kobj_holder.kobj); 307762306a36Sopenharmony_ci 307862306a36Sopenharmony_ci spin_lock(&_minor_lock); 307962306a36Sopenharmony_ci if (test_bit(DMF_FREEING, &md->flags) || dm_deleting_md(md)) { 308062306a36Sopenharmony_ci md = NULL; 308162306a36Sopenharmony_ci goto out; 308262306a36Sopenharmony_ci } 308362306a36Sopenharmony_ci dm_get(md); 308462306a36Sopenharmony_ciout: 308562306a36Sopenharmony_ci spin_unlock(&_minor_lock); 308662306a36Sopenharmony_ci 308762306a36Sopenharmony_ci return md; 308862306a36Sopenharmony_ci} 308962306a36Sopenharmony_ci 309062306a36Sopenharmony_ciint dm_suspended_md(struct mapped_device *md) 309162306a36Sopenharmony_ci{ 309262306a36Sopenharmony_ci return test_bit(DMF_SUSPENDED, &md->flags); 309362306a36Sopenharmony_ci} 309462306a36Sopenharmony_ci 309562306a36Sopenharmony_cistatic int dm_post_suspending_md(struct mapped_device *md) 309662306a36Sopenharmony_ci{ 309762306a36Sopenharmony_ci return test_bit(DMF_POST_SUSPENDING, &md->flags); 309862306a36Sopenharmony_ci} 309962306a36Sopenharmony_ci 310062306a36Sopenharmony_ciint dm_suspended_internally_md(struct mapped_device *md) 310162306a36Sopenharmony_ci{ 310262306a36Sopenharmony_ci return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); 310362306a36Sopenharmony_ci} 310462306a36Sopenharmony_ci 310562306a36Sopenharmony_ciint dm_test_deferred_remove_flag(struct mapped_device *md) 310662306a36Sopenharmony_ci{ 310762306a36Sopenharmony_ci return test_bit(DMF_DEFERRED_REMOVE, &md->flags); 310862306a36Sopenharmony_ci} 310962306a36Sopenharmony_ci 311062306a36Sopenharmony_ciint dm_suspended(struct dm_target *ti) 311162306a36Sopenharmony_ci{ 311262306a36Sopenharmony_ci return dm_suspended_md(ti->table->md); 311362306a36Sopenharmony_ci} 311462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_suspended); 311562306a36Sopenharmony_ci 311662306a36Sopenharmony_ciint dm_post_suspending(struct dm_target *ti) 311762306a36Sopenharmony_ci{ 311862306a36Sopenharmony_ci return dm_post_suspending_md(ti->table->md); 311962306a36Sopenharmony_ci} 312062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_post_suspending); 312162306a36Sopenharmony_ci 312262306a36Sopenharmony_ciint dm_noflush_suspending(struct dm_target *ti) 312362306a36Sopenharmony_ci{ 312462306a36Sopenharmony_ci return __noflush_suspending(ti->table->md); 312562306a36Sopenharmony_ci} 312662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(dm_noflush_suspending); 312762306a36Sopenharmony_ci 312862306a36Sopenharmony_civoid dm_free_md_mempools(struct dm_md_mempools *pools) 312962306a36Sopenharmony_ci{ 313062306a36Sopenharmony_ci if (!pools) 313162306a36Sopenharmony_ci return; 313262306a36Sopenharmony_ci 313362306a36Sopenharmony_ci bioset_exit(&pools->bs); 313462306a36Sopenharmony_ci bioset_exit(&pools->io_bs); 313562306a36Sopenharmony_ci 313662306a36Sopenharmony_ci kfree(pools); 313762306a36Sopenharmony_ci} 313862306a36Sopenharmony_ci 313962306a36Sopenharmony_cistruct dm_pr { 314062306a36Sopenharmony_ci u64 old_key; 314162306a36Sopenharmony_ci u64 new_key; 314262306a36Sopenharmony_ci u32 flags; 314362306a36Sopenharmony_ci bool abort; 314462306a36Sopenharmony_ci bool fail_early; 314562306a36Sopenharmony_ci int ret; 314662306a36Sopenharmony_ci enum pr_type type; 314762306a36Sopenharmony_ci struct pr_keys *read_keys; 314862306a36Sopenharmony_ci struct pr_held_reservation *rsv; 314962306a36Sopenharmony_ci}; 315062306a36Sopenharmony_ci 315162306a36Sopenharmony_cistatic int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn, 315262306a36Sopenharmony_ci struct dm_pr *pr) 315362306a36Sopenharmony_ci{ 315462306a36Sopenharmony_ci struct mapped_device *md = bdev->bd_disk->private_data; 315562306a36Sopenharmony_ci struct dm_table *table; 315662306a36Sopenharmony_ci struct dm_target *ti; 315762306a36Sopenharmony_ci int ret = -ENOTTY, srcu_idx; 315862306a36Sopenharmony_ci 315962306a36Sopenharmony_ci table = dm_get_live_table(md, &srcu_idx); 316062306a36Sopenharmony_ci if (!table || !dm_table_get_size(table)) 316162306a36Sopenharmony_ci goto out; 316262306a36Sopenharmony_ci 316362306a36Sopenharmony_ci /* We only support devices that have a single target */ 316462306a36Sopenharmony_ci if (table->num_targets != 1) 316562306a36Sopenharmony_ci goto out; 316662306a36Sopenharmony_ci ti = dm_table_get_target(table, 0); 316762306a36Sopenharmony_ci 316862306a36Sopenharmony_ci if (dm_suspended_md(md)) { 316962306a36Sopenharmony_ci ret = -EAGAIN; 317062306a36Sopenharmony_ci goto out; 317162306a36Sopenharmony_ci } 317262306a36Sopenharmony_ci 317362306a36Sopenharmony_ci ret = -EINVAL; 317462306a36Sopenharmony_ci if (!ti->type->iterate_devices) 317562306a36Sopenharmony_ci goto out; 317662306a36Sopenharmony_ci 317762306a36Sopenharmony_ci ti->type->iterate_devices(ti, fn, pr); 317862306a36Sopenharmony_ci ret = 0; 317962306a36Sopenharmony_ciout: 318062306a36Sopenharmony_ci dm_put_live_table(md, srcu_idx); 318162306a36Sopenharmony_ci return ret; 318262306a36Sopenharmony_ci} 318362306a36Sopenharmony_ci 318462306a36Sopenharmony_ci/* 318562306a36Sopenharmony_ci * For register / unregister we need to manually call out to every path. 318662306a36Sopenharmony_ci */ 318762306a36Sopenharmony_cistatic int __dm_pr_register(struct dm_target *ti, struct dm_dev *dev, 318862306a36Sopenharmony_ci sector_t start, sector_t len, void *data) 318962306a36Sopenharmony_ci{ 319062306a36Sopenharmony_ci struct dm_pr *pr = data; 319162306a36Sopenharmony_ci const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; 319262306a36Sopenharmony_ci int ret; 319362306a36Sopenharmony_ci 319462306a36Sopenharmony_ci if (!ops || !ops->pr_register) { 319562306a36Sopenharmony_ci pr->ret = -EOPNOTSUPP; 319662306a36Sopenharmony_ci return -1; 319762306a36Sopenharmony_ci } 319862306a36Sopenharmony_ci 319962306a36Sopenharmony_ci ret = ops->pr_register(dev->bdev, pr->old_key, pr->new_key, pr->flags); 320062306a36Sopenharmony_ci if (!ret) 320162306a36Sopenharmony_ci return 0; 320262306a36Sopenharmony_ci 320362306a36Sopenharmony_ci if (!pr->ret) 320462306a36Sopenharmony_ci pr->ret = ret; 320562306a36Sopenharmony_ci 320662306a36Sopenharmony_ci if (pr->fail_early) 320762306a36Sopenharmony_ci return -1; 320862306a36Sopenharmony_ci 320962306a36Sopenharmony_ci return 0; 321062306a36Sopenharmony_ci} 321162306a36Sopenharmony_ci 321262306a36Sopenharmony_cistatic int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key, 321362306a36Sopenharmony_ci u32 flags) 321462306a36Sopenharmony_ci{ 321562306a36Sopenharmony_ci struct dm_pr pr = { 321662306a36Sopenharmony_ci .old_key = old_key, 321762306a36Sopenharmony_ci .new_key = new_key, 321862306a36Sopenharmony_ci .flags = flags, 321962306a36Sopenharmony_ci .fail_early = true, 322062306a36Sopenharmony_ci .ret = 0, 322162306a36Sopenharmony_ci }; 322262306a36Sopenharmony_ci int ret; 322362306a36Sopenharmony_ci 322462306a36Sopenharmony_ci ret = dm_call_pr(bdev, __dm_pr_register, &pr); 322562306a36Sopenharmony_ci if (ret) { 322662306a36Sopenharmony_ci /* Didn't even get to register a path */ 322762306a36Sopenharmony_ci return ret; 322862306a36Sopenharmony_ci } 322962306a36Sopenharmony_ci 323062306a36Sopenharmony_ci if (!pr.ret) 323162306a36Sopenharmony_ci return 0; 323262306a36Sopenharmony_ci ret = pr.ret; 323362306a36Sopenharmony_ci 323462306a36Sopenharmony_ci if (!new_key) 323562306a36Sopenharmony_ci return ret; 323662306a36Sopenharmony_ci 323762306a36Sopenharmony_ci /* unregister all paths if we failed to register any path */ 323862306a36Sopenharmony_ci pr.old_key = new_key; 323962306a36Sopenharmony_ci pr.new_key = 0; 324062306a36Sopenharmony_ci pr.flags = 0; 324162306a36Sopenharmony_ci pr.fail_early = false; 324262306a36Sopenharmony_ci (void) dm_call_pr(bdev, __dm_pr_register, &pr); 324362306a36Sopenharmony_ci return ret; 324462306a36Sopenharmony_ci} 324562306a36Sopenharmony_ci 324662306a36Sopenharmony_ci 324762306a36Sopenharmony_cistatic int __dm_pr_reserve(struct dm_target *ti, struct dm_dev *dev, 324862306a36Sopenharmony_ci sector_t start, sector_t len, void *data) 324962306a36Sopenharmony_ci{ 325062306a36Sopenharmony_ci struct dm_pr *pr = data; 325162306a36Sopenharmony_ci const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; 325262306a36Sopenharmony_ci 325362306a36Sopenharmony_ci if (!ops || !ops->pr_reserve) { 325462306a36Sopenharmony_ci pr->ret = -EOPNOTSUPP; 325562306a36Sopenharmony_ci return -1; 325662306a36Sopenharmony_ci } 325762306a36Sopenharmony_ci 325862306a36Sopenharmony_ci pr->ret = ops->pr_reserve(dev->bdev, pr->old_key, pr->type, pr->flags); 325962306a36Sopenharmony_ci if (!pr->ret) 326062306a36Sopenharmony_ci return -1; 326162306a36Sopenharmony_ci 326262306a36Sopenharmony_ci return 0; 326362306a36Sopenharmony_ci} 326462306a36Sopenharmony_ci 326562306a36Sopenharmony_cistatic int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type, 326662306a36Sopenharmony_ci u32 flags) 326762306a36Sopenharmony_ci{ 326862306a36Sopenharmony_ci struct dm_pr pr = { 326962306a36Sopenharmony_ci .old_key = key, 327062306a36Sopenharmony_ci .flags = flags, 327162306a36Sopenharmony_ci .type = type, 327262306a36Sopenharmony_ci .fail_early = false, 327362306a36Sopenharmony_ci .ret = 0, 327462306a36Sopenharmony_ci }; 327562306a36Sopenharmony_ci int ret; 327662306a36Sopenharmony_ci 327762306a36Sopenharmony_ci ret = dm_call_pr(bdev, __dm_pr_reserve, &pr); 327862306a36Sopenharmony_ci if (ret) 327962306a36Sopenharmony_ci return ret; 328062306a36Sopenharmony_ci 328162306a36Sopenharmony_ci return pr.ret; 328262306a36Sopenharmony_ci} 328362306a36Sopenharmony_ci 328462306a36Sopenharmony_ci/* 328562306a36Sopenharmony_ci * If there is a non-All Registrants type of reservation, the release must be 328662306a36Sopenharmony_ci * sent down the holding path. For the cases where there is no reservation or 328762306a36Sopenharmony_ci * the path is not the holder the device will also return success, so we must 328862306a36Sopenharmony_ci * try each path to make sure we got the correct path. 328962306a36Sopenharmony_ci */ 329062306a36Sopenharmony_cistatic int __dm_pr_release(struct dm_target *ti, struct dm_dev *dev, 329162306a36Sopenharmony_ci sector_t start, sector_t len, void *data) 329262306a36Sopenharmony_ci{ 329362306a36Sopenharmony_ci struct dm_pr *pr = data; 329462306a36Sopenharmony_ci const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; 329562306a36Sopenharmony_ci 329662306a36Sopenharmony_ci if (!ops || !ops->pr_release) { 329762306a36Sopenharmony_ci pr->ret = -EOPNOTSUPP; 329862306a36Sopenharmony_ci return -1; 329962306a36Sopenharmony_ci } 330062306a36Sopenharmony_ci 330162306a36Sopenharmony_ci pr->ret = ops->pr_release(dev->bdev, pr->old_key, pr->type); 330262306a36Sopenharmony_ci if (pr->ret) 330362306a36Sopenharmony_ci return -1; 330462306a36Sopenharmony_ci 330562306a36Sopenharmony_ci return 0; 330662306a36Sopenharmony_ci} 330762306a36Sopenharmony_ci 330862306a36Sopenharmony_cistatic int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type) 330962306a36Sopenharmony_ci{ 331062306a36Sopenharmony_ci struct dm_pr pr = { 331162306a36Sopenharmony_ci .old_key = key, 331262306a36Sopenharmony_ci .type = type, 331362306a36Sopenharmony_ci .fail_early = false, 331462306a36Sopenharmony_ci }; 331562306a36Sopenharmony_ci int ret; 331662306a36Sopenharmony_ci 331762306a36Sopenharmony_ci ret = dm_call_pr(bdev, __dm_pr_release, &pr); 331862306a36Sopenharmony_ci if (ret) 331962306a36Sopenharmony_ci return ret; 332062306a36Sopenharmony_ci 332162306a36Sopenharmony_ci return pr.ret; 332262306a36Sopenharmony_ci} 332362306a36Sopenharmony_ci 332462306a36Sopenharmony_cistatic int __dm_pr_preempt(struct dm_target *ti, struct dm_dev *dev, 332562306a36Sopenharmony_ci sector_t start, sector_t len, void *data) 332662306a36Sopenharmony_ci{ 332762306a36Sopenharmony_ci struct dm_pr *pr = data; 332862306a36Sopenharmony_ci const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; 332962306a36Sopenharmony_ci 333062306a36Sopenharmony_ci if (!ops || !ops->pr_preempt) { 333162306a36Sopenharmony_ci pr->ret = -EOPNOTSUPP; 333262306a36Sopenharmony_ci return -1; 333362306a36Sopenharmony_ci } 333462306a36Sopenharmony_ci 333562306a36Sopenharmony_ci pr->ret = ops->pr_preempt(dev->bdev, pr->old_key, pr->new_key, pr->type, 333662306a36Sopenharmony_ci pr->abort); 333762306a36Sopenharmony_ci if (!pr->ret) 333862306a36Sopenharmony_ci return -1; 333962306a36Sopenharmony_ci 334062306a36Sopenharmony_ci return 0; 334162306a36Sopenharmony_ci} 334262306a36Sopenharmony_ci 334362306a36Sopenharmony_cistatic int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key, 334462306a36Sopenharmony_ci enum pr_type type, bool abort) 334562306a36Sopenharmony_ci{ 334662306a36Sopenharmony_ci struct dm_pr pr = { 334762306a36Sopenharmony_ci .new_key = new_key, 334862306a36Sopenharmony_ci .old_key = old_key, 334962306a36Sopenharmony_ci .type = type, 335062306a36Sopenharmony_ci .fail_early = false, 335162306a36Sopenharmony_ci }; 335262306a36Sopenharmony_ci int ret; 335362306a36Sopenharmony_ci 335462306a36Sopenharmony_ci ret = dm_call_pr(bdev, __dm_pr_preempt, &pr); 335562306a36Sopenharmony_ci if (ret) 335662306a36Sopenharmony_ci return ret; 335762306a36Sopenharmony_ci 335862306a36Sopenharmony_ci return pr.ret; 335962306a36Sopenharmony_ci} 336062306a36Sopenharmony_ci 336162306a36Sopenharmony_cistatic int dm_pr_clear(struct block_device *bdev, u64 key) 336262306a36Sopenharmony_ci{ 336362306a36Sopenharmony_ci struct mapped_device *md = bdev->bd_disk->private_data; 336462306a36Sopenharmony_ci const struct pr_ops *ops; 336562306a36Sopenharmony_ci int r, srcu_idx; 336662306a36Sopenharmony_ci 336762306a36Sopenharmony_ci r = dm_prepare_ioctl(md, &srcu_idx, &bdev); 336862306a36Sopenharmony_ci if (r < 0) 336962306a36Sopenharmony_ci goto out; 337062306a36Sopenharmony_ci 337162306a36Sopenharmony_ci ops = bdev->bd_disk->fops->pr_ops; 337262306a36Sopenharmony_ci if (ops && ops->pr_clear) 337362306a36Sopenharmony_ci r = ops->pr_clear(bdev, key); 337462306a36Sopenharmony_ci else 337562306a36Sopenharmony_ci r = -EOPNOTSUPP; 337662306a36Sopenharmony_ciout: 337762306a36Sopenharmony_ci dm_unprepare_ioctl(md, srcu_idx); 337862306a36Sopenharmony_ci return r; 337962306a36Sopenharmony_ci} 338062306a36Sopenharmony_ci 338162306a36Sopenharmony_cistatic int __dm_pr_read_keys(struct dm_target *ti, struct dm_dev *dev, 338262306a36Sopenharmony_ci sector_t start, sector_t len, void *data) 338362306a36Sopenharmony_ci{ 338462306a36Sopenharmony_ci struct dm_pr *pr = data; 338562306a36Sopenharmony_ci const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; 338662306a36Sopenharmony_ci 338762306a36Sopenharmony_ci if (!ops || !ops->pr_read_keys) { 338862306a36Sopenharmony_ci pr->ret = -EOPNOTSUPP; 338962306a36Sopenharmony_ci return -1; 339062306a36Sopenharmony_ci } 339162306a36Sopenharmony_ci 339262306a36Sopenharmony_ci pr->ret = ops->pr_read_keys(dev->bdev, pr->read_keys); 339362306a36Sopenharmony_ci if (!pr->ret) 339462306a36Sopenharmony_ci return -1; 339562306a36Sopenharmony_ci 339662306a36Sopenharmony_ci return 0; 339762306a36Sopenharmony_ci} 339862306a36Sopenharmony_ci 339962306a36Sopenharmony_cistatic int dm_pr_read_keys(struct block_device *bdev, struct pr_keys *keys) 340062306a36Sopenharmony_ci{ 340162306a36Sopenharmony_ci struct dm_pr pr = { 340262306a36Sopenharmony_ci .read_keys = keys, 340362306a36Sopenharmony_ci }; 340462306a36Sopenharmony_ci int ret; 340562306a36Sopenharmony_ci 340662306a36Sopenharmony_ci ret = dm_call_pr(bdev, __dm_pr_read_keys, &pr); 340762306a36Sopenharmony_ci if (ret) 340862306a36Sopenharmony_ci return ret; 340962306a36Sopenharmony_ci 341062306a36Sopenharmony_ci return pr.ret; 341162306a36Sopenharmony_ci} 341262306a36Sopenharmony_ci 341362306a36Sopenharmony_cistatic int __dm_pr_read_reservation(struct dm_target *ti, struct dm_dev *dev, 341462306a36Sopenharmony_ci sector_t start, sector_t len, void *data) 341562306a36Sopenharmony_ci{ 341662306a36Sopenharmony_ci struct dm_pr *pr = data; 341762306a36Sopenharmony_ci const struct pr_ops *ops = dev->bdev->bd_disk->fops->pr_ops; 341862306a36Sopenharmony_ci 341962306a36Sopenharmony_ci if (!ops || !ops->pr_read_reservation) { 342062306a36Sopenharmony_ci pr->ret = -EOPNOTSUPP; 342162306a36Sopenharmony_ci return -1; 342262306a36Sopenharmony_ci } 342362306a36Sopenharmony_ci 342462306a36Sopenharmony_ci pr->ret = ops->pr_read_reservation(dev->bdev, pr->rsv); 342562306a36Sopenharmony_ci if (!pr->ret) 342662306a36Sopenharmony_ci return -1; 342762306a36Sopenharmony_ci 342862306a36Sopenharmony_ci return 0; 342962306a36Sopenharmony_ci} 343062306a36Sopenharmony_ci 343162306a36Sopenharmony_cistatic int dm_pr_read_reservation(struct block_device *bdev, 343262306a36Sopenharmony_ci struct pr_held_reservation *rsv) 343362306a36Sopenharmony_ci{ 343462306a36Sopenharmony_ci struct dm_pr pr = { 343562306a36Sopenharmony_ci .rsv = rsv, 343662306a36Sopenharmony_ci }; 343762306a36Sopenharmony_ci int ret; 343862306a36Sopenharmony_ci 343962306a36Sopenharmony_ci ret = dm_call_pr(bdev, __dm_pr_read_reservation, &pr); 344062306a36Sopenharmony_ci if (ret) 344162306a36Sopenharmony_ci return ret; 344262306a36Sopenharmony_ci 344362306a36Sopenharmony_ci return pr.ret; 344462306a36Sopenharmony_ci} 344562306a36Sopenharmony_ci 344662306a36Sopenharmony_cistatic const struct pr_ops dm_pr_ops = { 344762306a36Sopenharmony_ci .pr_register = dm_pr_register, 344862306a36Sopenharmony_ci .pr_reserve = dm_pr_reserve, 344962306a36Sopenharmony_ci .pr_release = dm_pr_release, 345062306a36Sopenharmony_ci .pr_preempt = dm_pr_preempt, 345162306a36Sopenharmony_ci .pr_clear = dm_pr_clear, 345262306a36Sopenharmony_ci .pr_read_keys = dm_pr_read_keys, 345362306a36Sopenharmony_ci .pr_read_reservation = dm_pr_read_reservation, 345462306a36Sopenharmony_ci}; 345562306a36Sopenharmony_ci 345662306a36Sopenharmony_cistatic const struct block_device_operations dm_blk_dops = { 345762306a36Sopenharmony_ci .submit_bio = dm_submit_bio, 345862306a36Sopenharmony_ci .poll_bio = dm_poll_bio, 345962306a36Sopenharmony_ci .open = dm_blk_open, 346062306a36Sopenharmony_ci .release = dm_blk_close, 346162306a36Sopenharmony_ci .ioctl = dm_blk_ioctl, 346262306a36Sopenharmony_ci .getgeo = dm_blk_getgeo, 346362306a36Sopenharmony_ci .report_zones = dm_blk_report_zones, 346462306a36Sopenharmony_ci .pr_ops = &dm_pr_ops, 346562306a36Sopenharmony_ci .owner = THIS_MODULE 346662306a36Sopenharmony_ci}; 346762306a36Sopenharmony_ci 346862306a36Sopenharmony_cistatic const struct block_device_operations dm_rq_blk_dops = { 346962306a36Sopenharmony_ci .open = dm_blk_open, 347062306a36Sopenharmony_ci .release = dm_blk_close, 347162306a36Sopenharmony_ci .ioctl = dm_blk_ioctl, 347262306a36Sopenharmony_ci .getgeo = dm_blk_getgeo, 347362306a36Sopenharmony_ci .pr_ops = &dm_pr_ops, 347462306a36Sopenharmony_ci .owner = THIS_MODULE 347562306a36Sopenharmony_ci}; 347662306a36Sopenharmony_ci 347762306a36Sopenharmony_cistatic const struct dax_operations dm_dax_ops = { 347862306a36Sopenharmony_ci .direct_access = dm_dax_direct_access, 347962306a36Sopenharmony_ci .zero_page_range = dm_dax_zero_page_range, 348062306a36Sopenharmony_ci .recovery_write = dm_dax_recovery_write, 348162306a36Sopenharmony_ci}; 348262306a36Sopenharmony_ci 348362306a36Sopenharmony_ci/* 348462306a36Sopenharmony_ci * module hooks 348562306a36Sopenharmony_ci */ 348662306a36Sopenharmony_cimodule_init(dm_init); 348762306a36Sopenharmony_cimodule_exit(dm_exit); 348862306a36Sopenharmony_ci 348962306a36Sopenharmony_cimodule_param(major, uint, 0); 349062306a36Sopenharmony_ciMODULE_PARM_DESC(major, "The major number of the device mapper"); 349162306a36Sopenharmony_ci 349262306a36Sopenharmony_cimodule_param(reserved_bio_based_ios, uint, 0644); 349362306a36Sopenharmony_ciMODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools"); 349462306a36Sopenharmony_ci 349562306a36Sopenharmony_cimodule_param(dm_numa_node, int, 0644); 349662306a36Sopenharmony_ciMODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations"); 349762306a36Sopenharmony_ci 349862306a36Sopenharmony_cimodule_param(swap_bios, int, 0644); 349962306a36Sopenharmony_ciMODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs"); 350062306a36Sopenharmony_ci 350162306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " driver"); 350262306a36Sopenharmony_ciMODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 350362306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 3504