162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/mm.h> 762306a36Sopenharmony_ci#include <linux/bio.h> 862306a36Sopenharmony_ci#include <linux/err.h> 962306a36Sopenharmony_ci#include <linux/hash.h> 1062306a36Sopenharmony_ci#include <linux/list.h> 1162306a36Sopenharmony_ci#include <linux/log2.h> 1262306a36Sopenharmony_ci#include <linux/init.h> 1362306a36Sopenharmony_ci#include <linux/slab.h> 1462306a36Sopenharmony_ci#include <linux/wait.h> 1562306a36Sopenharmony_ci#include <linux/dm-io.h> 1662306a36Sopenharmony_ci#include <linux/mutex.h> 1762306a36Sopenharmony_ci#include <linux/atomic.h> 1862306a36Sopenharmony_ci#include <linux/bitops.h> 1962306a36Sopenharmony_ci#include <linux/blkdev.h> 2062306a36Sopenharmony_ci#include <linux/kdev_t.h> 2162306a36Sopenharmony_ci#include <linux/kernel.h> 2262306a36Sopenharmony_ci#include <linux/module.h> 2362306a36Sopenharmony_ci#include <linux/jiffies.h> 2462306a36Sopenharmony_ci#include <linux/mempool.h> 2562306a36Sopenharmony_ci#include <linux/spinlock.h> 2662306a36Sopenharmony_ci#include <linux/blk_types.h> 2762306a36Sopenharmony_ci#include <linux/dm-kcopyd.h> 2862306a36Sopenharmony_ci#include <linux/workqueue.h> 2962306a36Sopenharmony_ci#include <linux/backing-dev.h> 3062306a36Sopenharmony_ci#include <linux/device-mapper.h> 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci#include "dm.h" 3362306a36Sopenharmony_ci#include "dm-clone-metadata.h" 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci#define DM_MSG_PREFIX "clone" 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* 3862306a36Sopenharmony_ci * Minimum and maximum allowed region sizes 3962306a36Sopenharmony_ci */ 4062306a36Sopenharmony_ci#define MIN_REGION_SIZE (1 << 3) /* 4KB */ 4162306a36Sopenharmony_ci#define MAX_REGION_SIZE (1 << 21) /* 1GB */ 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci#define MIN_HYDRATIONS 256 /* Size of hydration mempool */ 4462306a36Sopenharmony_ci#define DEFAULT_HYDRATION_THRESHOLD 1 /* 1 region */ 4562306a36Sopenharmony_ci#define DEFAULT_HYDRATION_BATCH_SIZE 1 /* Hydrate in batches of 1 region */ 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci#define COMMIT_PERIOD HZ /* 1 sec */ 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci/* 5062306a36Sopenharmony_ci * Hydration hash table size: 1 << HASH_TABLE_BITS 5162306a36Sopenharmony_ci */ 5262306a36Sopenharmony_ci#define HASH_TABLE_BITS 15 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ciDECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(clone_hydration_throttle, 5562306a36Sopenharmony_ci "A percentage of time allocated for hydrating regions"); 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci/* Slab cache for struct dm_clone_region_hydration */ 5862306a36Sopenharmony_cistatic struct kmem_cache *_hydration_cache; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci/* dm-clone metadata modes */ 6162306a36Sopenharmony_cienum clone_metadata_mode { 6262306a36Sopenharmony_ci CM_WRITE, /* metadata may be changed */ 6362306a36Sopenharmony_ci CM_READ_ONLY, /* metadata may not be changed */ 6462306a36Sopenharmony_ci CM_FAIL, /* all metadata I/O fails */ 6562306a36Sopenharmony_ci}; 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_cistruct hash_table_bucket; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_cistruct clone { 7062306a36Sopenharmony_ci struct dm_target *ti; 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci struct dm_dev *metadata_dev; 7362306a36Sopenharmony_ci struct dm_dev *dest_dev; 7462306a36Sopenharmony_ci struct dm_dev *source_dev; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci unsigned long nr_regions; 7762306a36Sopenharmony_ci sector_t region_size; 7862306a36Sopenharmony_ci unsigned int region_shift; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci /* 8162306a36Sopenharmony_ci * A metadata commit and the actions taken in case it fails should run 8262306a36Sopenharmony_ci * as a single atomic step. 8362306a36Sopenharmony_ci */ 8462306a36Sopenharmony_ci struct mutex commit_lock; 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci struct dm_clone_metadata *cmd; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci /* Region hydration hash table */ 8962306a36Sopenharmony_ci struct hash_table_bucket *ht; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci atomic_t ios_in_flight; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci wait_queue_head_t hydration_stopped; 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci mempool_t hydration_pool; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci unsigned long last_commit_jiffies; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci /* 10062306a36Sopenharmony_ci * We defer incoming WRITE bios for regions that are not hydrated, 10162306a36Sopenharmony_ci * until after these regions have been hydrated. 10262306a36Sopenharmony_ci * 10362306a36Sopenharmony_ci * Also, we defer REQ_FUA and REQ_PREFLUSH bios, until after the 10462306a36Sopenharmony_ci * metadata have been committed. 10562306a36Sopenharmony_ci */ 10662306a36Sopenharmony_ci spinlock_t lock; 10762306a36Sopenharmony_ci struct bio_list deferred_bios; 10862306a36Sopenharmony_ci struct bio_list deferred_discard_bios; 10962306a36Sopenharmony_ci struct bio_list deferred_flush_bios; 11062306a36Sopenharmony_ci struct bio_list deferred_flush_completions; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci /* Maximum number of regions being copied during background hydration. */ 11362306a36Sopenharmony_ci unsigned int hydration_threshold; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci /* Number of regions to batch together during background hydration. */ 11662306a36Sopenharmony_ci unsigned int hydration_batch_size; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci /* Which region to hydrate next */ 11962306a36Sopenharmony_ci unsigned long hydration_offset; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci atomic_t hydrations_in_flight; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci /* 12462306a36Sopenharmony_ci * Save a copy of the table line rather than reconstructing it for the 12562306a36Sopenharmony_ci * status. 12662306a36Sopenharmony_ci */ 12762306a36Sopenharmony_ci unsigned int nr_ctr_args; 12862306a36Sopenharmony_ci const char **ctr_args; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci struct workqueue_struct *wq; 13162306a36Sopenharmony_ci struct work_struct worker; 13262306a36Sopenharmony_ci struct delayed_work waker; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci struct dm_kcopyd_client *kcopyd_client; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci enum clone_metadata_mode mode; 13762306a36Sopenharmony_ci unsigned long flags; 13862306a36Sopenharmony_ci}; 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci/* 14162306a36Sopenharmony_ci * dm-clone flags 14262306a36Sopenharmony_ci */ 14362306a36Sopenharmony_ci#define DM_CLONE_DISCARD_PASSDOWN 0 14462306a36Sopenharmony_ci#define DM_CLONE_HYDRATION_ENABLED 1 14562306a36Sopenharmony_ci#define DM_CLONE_HYDRATION_SUSPENDED 2 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci/* 15062306a36Sopenharmony_ci * Metadata failure handling. 15162306a36Sopenharmony_ci */ 15262306a36Sopenharmony_cistatic enum clone_metadata_mode get_clone_mode(struct clone *clone) 15362306a36Sopenharmony_ci{ 15462306a36Sopenharmony_ci return READ_ONCE(clone->mode); 15562306a36Sopenharmony_ci} 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_cistatic const char *clone_device_name(struct clone *clone) 15862306a36Sopenharmony_ci{ 15962306a36Sopenharmony_ci return dm_table_device_name(clone->ti->table); 16062306a36Sopenharmony_ci} 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_cistatic void __set_clone_mode(struct clone *clone, enum clone_metadata_mode new_mode) 16362306a36Sopenharmony_ci{ 16462306a36Sopenharmony_ci static const char * const descs[] = { 16562306a36Sopenharmony_ci "read-write", 16662306a36Sopenharmony_ci "read-only", 16762306a36Sopenharmony_ci "fail" 16862306a36Sopenharmony_ci }; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci enum clone_metadata_mode old_mode = get_clone_mode(clone); 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci /* Never move out of fail mode */ 17362306a36Sopenharmony_ci if (old_mode == CM_FAIL) 17462306a36Sopenharmony_ci new_mode = CM_FAIL; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci switch (new_mode) { 17762306a36Sopenharmony_ci case CM_FAIL: 17862306a36Sopenharmony_ci case CM_READ_ONLY: 17962306a36Sopenharmony_ci dm_clone_metadata_set_read_only(clone->cmd); 18062306a36Sopenharmony_ci break; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci case CM_WRITE: 18362306a36Sopenharmony_ci dm_clone_metadata_set_read_write(clone->cmd); 18462306a36Sopenharmony_ci break; 18562306a36Sopenharmony_ci } 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci WRITE_ONCE(clone->mode, new_mode); 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci if (new_mode != old_mode) { 19062306a36Sopenharmony_ci dm_table_event(clone->ti->table); 19162306a36Sopenharmony_ci DMINFO("%s: Switching to %s mode", clone_device_name(clone), 19262306a36Sopenharmony_ci descs[(int)new_mode]); 19362306a36Sopenharmony_ci } 19462306a36Sopenharmony_ci} 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_cistatic void __abort_transaction(struct clone *clone) 19762306a36Sopenharmony_ci{ 19862306a36Sopenharmony_ci const char *dev_name = clone_device_name(clone); 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci if (get_clone_mode(clone) >= CM_READ_ONLY) 20162306a36Sopenharmony_ci return; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci DMERR("%s: Aborting current metadata transaction", dev_name); 20462306a36Sopenharmony_ci if (dm_clone_metadata_abort(clone->cmd)) { 20562306a36Sopenharmony_ci DMERR("%s: Failed to abort metadata transaction", dev_name); 20662306a36Sopenharmony_ci __set_clone_mode(clone, CM_FAIL); 20762306a36Sopenharmony_ci } 20862306a36Sopenharmony_ci} 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_cistatic void __reload_in_core_bitset(struct clone *clone) 21162306a36Sopenharmony_ci{ 21262306a36Sopenharmony_ci const char *dev_name = clone_device_name(clone); 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci if (get_clone_mode(clone) == CM_FAIL) 21562306a36Sopenharmony_ci return; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci /* Reload the on-disk bitset */ 21862306a36Sopenharmony_ci DMINFO("%s: Reloading on-disk bitmap", dev_name); 21962306a36Sopenharmony_ci if (dm_clone_reload_in_core_bitset(clone->cmd)) { 22062306a36Sopenharmony_ci DMERR("%s: Failed to reload on-disk bitmap", dev_name); 22162306a36Sopenharmony_ci __set_clone_mode(clone, CM_FAIL); 22262306a36Sopenharmony_ci } 22362306a36Sopenharmony_ci} 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_cistatic void __metadata_operation_failed(struct clone *clone, const char *op, int r) 22662306a36Sopenharmony_ci{ 22762306a36Sopenharmony_ci DMERR("%s: Metadata operation `%s' failed: error = %d", 22862306a36Sopenharmony_ci clone_device_name(clone), op, r); 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci __abort_transaction(clone); 23162306a36Sopenharmony_ci __set_clone_mode(clone, CM_READ_ONLY); 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci /* 23462306a36Sopenharmony_ci * dm_clone_reload_in_core_bitset() may run concurrently with either 23562306a36Sopenharmony_ci * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), but 23662306a36Sopenharmony_ci * it's safe as we have already set the metadata to read-only mode. 23762306a36Sopenharmony_ci */ 23862306a36Sopenharmony_ci __reload_in_core_bitset(clone); 23962306a36Sopenharmony_ci} 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci/* Wake up anyone waiting for region hydrations to stop */ 24462306a36Sopenharmony_cistatic inline void wakeup_hydration_waiters(struct clone *clone) 24562306a36Sopenharmony_ci{ 24662306a36Sopenharmony_ci wake_up_all(&clone->hydration_stopped); 24762306a36Sopenharmony_ci} 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_cistatic inline void wake_worker(struct clone *clone) 25062306a36Sopenharmony_ci{ 25162306a36Sopenharmony_ci queue_work(clone->wq, &clone->worker); 25262306a36Sopenharmony_ci} 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci/* 25762306a36Sopenharmony_ci * bio helper functions. 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_cistatic inline void remap_to_source(struct clone *clone, struct bio *bio) 26062306a36Sopenharmony_ci{ 26162306a36Sopenharmony_ci bio_set_dev(bio, clone->source_dev->bdev); 26262306a36Sopenharmony_ci} 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_cistatic inline void remap_to_dest(struct clone *clone, struct bio *bio) 26562306a36Sopenharmony_ci{ 26662306a36Sopenharmony_ci bio_set_dev(bio, clone->dest_dev->bdev); 26762306a36Sopenharmony_ci} 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_cistatic bool bio_triggers_commit(struct clone *clone, struct bio *bio) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci return op_is_flush(bio->bi_opf) && 27262306a36Sopenharmony_ci dm_clone_changed_this_transaction(clone->cmd); 27362306a36Sopenharmony_ci} 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci/* Get the address of the region in sectors */ 27662306a36Sopenharmony_cistatic inline sector_t region_to_sector(struct clone *clone, unsigned long region_nr) 27762306a36Sopenharmony_ci{ 27862306a36Sopenharmony_ci return ((sector_t)region_nr << clone->region_shift); 27962306a36Sopenharmony_ci} 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci/* Get the region number of the bio */ 28262306a36Sopenharmony_cistatic inline unsigned long bio_to_region(struct clone *clone, struct bio *bio) 28362306a36Sopenharmony_ci{ 28462306a36Sopenharmony_ci return (bio->bi_iter.bi_sector >> clone->region_shift); 28562306a36Sopenharmony_ci} 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci/* Get the region range covered by the bio */ 28862306a36Sopenharmony_cistatic void bio_region_range(struct clone *clone, struct bio *bio, 28962306a36Sopenharmony_ci unsigned long *rs, unsigned long *nr_regions) 29062306a36Sopenharmony_ci{ 29162306a36Sopenharmony_ci unsigned long end; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci *rs = dm_sector_div_up(bio->bi_iter.bi_sector, clone->region_size); 29462306a36Sopenharmony_ci end = bio_end_sector(bio) >> clone->region_shift; 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci if (*rs >= end) 29762306a36Sopenharmony_ci *nr_regions = 0; 29862306a36Sopenharmony_ci else 29962306a36Sopenharmony_ci *nr_regions = end - *rs; 30062306a36Sopenharmony_ci} 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci/* Check whether a bio overwrites a region */ 30362306a36Sopenharmony_cistatic inline bool is_overwrite_bio(struct clone *clone, struct bio *bio) 30462306a36Sopenharmony_ci{ 30562306a36Sopenharmony_ci return (bio_data_dir(bio) == WRITE && bio_sectors(bio) == clone->region_size); 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_cistatic void fail_bios(struct bio_list *bios, blk_status_t status) 30962306a36Sopenharmony_ci{ 31062306a36Sopenharmony_ci struct bio *bio; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci while ((bio = bio_list_pop(bios))) { 31362306a36Sopenharmony_ci bio->bi_status = status; 31462306a36Sopenharmony_ci bio_endio(bio); 31562306a36Sopenharmony_ci } 31662306a36Sopenharmony_ci} 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_cistatic void submit_bios(struct bio_list *bios) 31962306a36Sopenharmony_ci{ 32062306a36Sopenharmony_ci struct bio *bio; 32162306a36Sopenharmony_ci struct blk_plug plug; 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci blk_start_plug(&plug); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci while ((bio = bio_list_pop(bios))) 32662306a36Sopenharmony_ci submit_bio_noacct(bio); 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci blk_finish_plug(&plug); 32962306a36Sopenharmony_ci} 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci/* 33262306a36Sopenharmony_ci * Submit bio to the underlying device. 33362306a36Sopenharmony_ci * 33462306a36Sopenharmony_ci * If the bio triggers a commit, delay it, until after the metadata have been 33562306a36Sopenharmony_ci * committed. 33662306a36Sopenharmony_ci * 33762306a36Sopenharmony_ci * NOTE: The bio remapping must be performed by the caller. 33862306a36Sopenharmony_ci */ 33962306a36Sopenharmony_cistatic void issue_bio(struct clone *clone, struct bio *bio) 34062306a36Sopenharmony_ci{ 34162306a36Sopenharmony_ci if (!bio_triggers_commit(clone, bio)) { 34262306a36Sopenharmony_ci submit_bio_noacct(bio); 34362306a36Sopenharmony_ci return; 34462306a36Sopenharmony_ci } 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci /* 34762306a36Sopenharmony_ci * If the metadata mode is RO or FAIL we won't be able to commit the 34862306a36Sopenharmony_ci * metadata, so we complete the bio with an error. 34962306a36Sopenharmony_ci */ 35062306a36Sopenharmony_ci if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) { 35162306a36Sopenharmony_ci bio_io_error(bio); 35262306a36Sopenharmony_ci return; 35362306a36Sopenharmony_ci } 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci /* 35662306a36Sopenharmony_ci * Batch together any bios that trigger commits and then issue a single 35762306a36Sopenharmony_ci * commit for them in process_deferred_flush_bios(). 35862306a36Sopenharmony_ci */ 35962306a36Sopenharmony_ci spin_lock_irq(&clone->lock); 36062306a36Sopenharmony_ci bio_list_add(&clone->deferred_flush_bios, bio); 36162306a36Sopenharmony_ci spin_unlock_irq(&clone->lock); 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci wake_worker(clone); 36462306a36Sopenharmony_ci} 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci/* 36762306a36Sopenharmony_ci * Remap bio to the destination device and submit it. 36862306a36Sopenharmony_ci * 36962306a36Sopenharmony_ci * If the bio triggers a commit, delay it, until after the metadata have been 37062306a36Sopenharmony_ci * committed. 37162306a36Sopenharmony_ci */ 37262306a36Sopenharmony_cistatic void remap_and_issue(struct clone *clone, struct bio *bio) 37362306a36Sopenharmony_ci{ 37462306a36Sopenharmony_ci remap_to_dest(clone, bio); 37562306a36Sopenharmony_ci issue_bio(clone, bio); 37662306a36Sopenharmony_ci} 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci/* 37962306a36Sopenharmony_ci * Issue bios that have been deferred until after their region has finished 38062306a36Sopenharmony_ci * hydrating. 38162306a36Sopenharmony_ci * 38262306a36Sopenharmony_ci * We delegate the bio submission to the worker thread, so this is safe to call 38362306a36Sopenharmony_ci * from interrupt context. 38462306a36Sopenharmony_ci */ 38562306a36Sopenharmony_cistatic void issue_deferred_bios(struct clone *clone, struct bio_list *bios) 38662306a36Sopenharmony_ci{ 38762306a36Sopenharmony_ci struct bio *bio; 38862306a36Sopenharmony_ci unsigned long flags; 38962306a36Sopenharmony_ci struct bio_list flush_bios = BIO_EMPTY_LIST; 39062306a36Sopenharmony_ci struct bio_list normal_bios = BIO_EMPTY_LIST; 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci if (bio_list_empty(bios)) 39362306a36Sopenharmony_ci return; 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci while ((bio = bio_list_pop(bios))) { 39662306a36Sopenharmony_ci if (bio_triggers_commit(clone, bio)) 39762306a36Sopenharmony_ci bio_list_add(&flush_bios, bio); 39862306a36Sopenharmony_ci else 39962306a36Sopenharmony_ci bio_list_add(&normal_bios, bio); 40062306a36Sopenharmony_ci } 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci spin_lock_irqsave(&clone->lock, flags); 40362306a36Sopenharmony_ci bio_list_merge(&clone->deferred_bios, &normal_bios); 40462306a36Sopenharmony_ci bio_list_merge(&clone->deferred_flush_bios, &flush_bios); 40562306a36Sopenharmony_ci spin_unlock_irqrestore(&clone->lock, flags); 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci wake_worker(clone); 40862306a36Sopenharmony_ci} 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_cistatic void complete_overwrite_bio(struct clone *clone, struct bio *bio) 41162306a36Sopenharmony_ci{ 41262306a36Sopenharmony_ci unsigned long flags; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci /* 41562306a36Sopenharmony_ci * If the bio has the REQ_FUA flag set we must commit the metadata 41662306a36Sopenharmony_ci * before signaling its completion. 41762306a36Sopenharmony_ci * 41862306a36Sopenharmony_ci * complete_overwrite_bio() is only called by hydration_complete(), 41962306a36Sopenharmony_ci * after having successfully updated the metadata. This means we don't 42062306a36Sopenharmony_ci * need to call dm_clone_changed_this_transaction() to check if the 42162306a36Sopenharmony_ci * metadata has changed and thus we can avoid taking the metadata spin 42262306a36Sopenharmony_ci * lock. 42362306a36Sopenharmony_ci */ 42462306a36Sopenharmony_ci if (!(bio->bi_opf & REQ_FUA)) { 42562306a36Sopenharmony_ci bio_endio(bio); 42662306a36Sopenharmony_ci return; 42762306a36Sopenharmony_ci } 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci /* 43062306a36Sopenharmony_ci * If the metadata mode is RO or FAIL we won't be able to commit the 43162306a36Sopenharmony_ci * metadata, so we complete the bio with an error. 43262306a36Sopenharmony_ci */ 43362306a36Sopenharmony_ci if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) { 43462306a36Sopenharmony_ci bio_io_error(bio); 43562306a36Sopenharmony_ci return; 43662306a36Sopenharmony_ci } 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci /* 43962306a36Sopenharmony_ci * Batch together any bios that trigger commits and then issue a single 44062306a36Sopenharmony_ci * commit for them in process_deferred_flush_bios(). 44162306a36Sopenharmony_ci */ 44262306a36Sopenharmony_ci spin_lock_irqsave(&clone->lock, flags); 44362306a36Sopenharmony_ci bio_list_add(&clone->deferred_flush_completions, bio); 44462306a36Sopenharmony_ci spin_unlock_irqrestore(&clone->lock, flags); 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci wake_worker(clone); 44762306a36Sopenharmony_ci} 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_cistatic void trim_bio(struct bio *bio, sector_t sector, unsigned int len) 45062306a36Sopenharmony_ci{ 45162306a36Sopenharmony_ci bio->bi_iter.bi_sector = sector; 45262306a36Sopenharmony_ci bio->bi_iter.bi_size = to_bytes(len); 45362306a36Sopenharmony_ci} 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_cistatic void complete_discard_bio(struct clone *clone, struct bio *bio, bool success) 45662306a36Sopenharmony_ci{ 45762306a36Sopenharmony_ci unsigned long rs, nr_regions; 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci /* 46062306a36Sopenharmony_ci * If the destination device supports discards, remap and trim the 46162306a36Sopenharmony_ci * discard bio and pass it down. Otherwise complete the bio 46262306a36Sopenharmony_ci * immediately. 46362306a36Sopenharmony_ci */ 46462306a36Sopenharmony_ci if (test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags) && success) { 46562306a36Sopenharmony_ci remap_to_dest(clone, bio); 46662306a36Sopenharmony_ci bio_region_range(clone, bio, &rs, &nr_regions); 46762306a36Sopenharmony_ci trim_bio(bio, region_to_sector(clone, rs), 46862306a36Sopenharmony_ci nr_regions << clone->region_shift); 46962306a36Sopenharmony_ci submit_bio_noacct(bio); 47062306a36Sopenharmony_ci } else 47162306a36Sopenharmony_ci bio_endio(bio); 47262306a36Sopenharmony_ci} 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_cistatic void process_discard_bio(struct clone *clone, struct bio *bio) 47562306a36Sopenharmony_ci{ 47662306a36Sopenharmony_ci unsigned long rs, nr_regions; 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci bio_region_range(clone, bio, &rs, &nr_regions); 47962306a36Sopenharmony_ci if (!nr_regions) { 48062306a36Sopenharmony_ci bio_endio(bio); 48162306a36Sopenharmony_ci return; 48262306a36Sopenharmony_ci } 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci if (WARN_ON(rs >= clone->nr_regions || (rs + nr_regions) < rs || 48562306a36Sopenharmony_ci (rs + nr_regions) > clone->nr_regions)) { 48662306a36Sopenharmony_ci DMERR("%s: Invalid range (%lu + %lu, total regions %lu) for discard (%llu + %u)", 48762306a36Sopenharmony_ci clone_device_name(clone), rs, nr_regions, 48862306a36Sopenharmony_ci clone->nr_regions, 48962306a36Sopenharmony_ci (unsigned long long)bio->bi_iter.bi_sector, 49062306a36Sopenharmony_ci bio_sectors(bio)); 49162306a36Sopenharmony_ci bio_endio(bio); 49262306a36Sopenharmony_ci return; 49362306a36Sopenharmony_ci } 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci /* 49662306a36Sopenharmony_ci * The covered regions are already hydrated so we just need to pass 49762306a36Sopenharmony_ci * down the discard. 49862306a36Sopenharmony_ci */ 49962306a36Sopenharmony_ci if (dm_clone_is_range_hydrated(clone->cmd, rs, nr_regions)) { 50062306a36Sopenharmony_ci complete_discard_bio(clone, bio, true); 50162306a36Sopenharmony_ci return; 50262306a36Sopenharmony_ci } 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci /* 50562306a36Sopenharmony_ci * If the metadata mode is RO or FAIL we won't be able to update the 50662306a36Sopenharmony_ci * metadata for the regions covered by the discard so we just ignore 50762306a36Sopenharmony_ci * it. 50862306a36Sopenharmony_ci */ 50962306a36Sopenharmony_ci if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) { 51062306a36Sopenharmony_ci bio_endio(bio); 51162306a36Sopenharmony_ci return; 51262306a36Sopenharmony_ci } 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci /* 51562306a36Sopenharmony_ci * Defer discard processing. 51662306a36Sopenharmony_ci */ 51762306a36Sopenharmony_ci spin_lock_irq(&clone->lock); 51862306a36Sopenharmony_ci bio_list_add(&clone->deferred_discard_bios, bio); 51962306a36Sopenharmony_ci spin_unlock_irq(&clone->lock); 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci wake_worker(clone); 52262306a36Sopenharmony_ci} 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci/* 52762306a36Sopenharmony_ci * dm-clone region hydrations. 52862306a36Sopenharmony_ci */ 52962306a36Sopenharmony_cistruct dm_clone_region_hydration { 53062306a36Sopenharmony_ci struct clone *clone; 53162306a36Sopenharmony_ci unsigned long region_nr; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci struct bio *overwrite_bio; 53462306a36Sopenharmony_ci bio_end_io_t *overwrite_bio_end_io; 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci struct bio_list deferred_bios; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci blk_status_t status; 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci /* Used by hydration batching */ 54162306a36Sopenharmony_ci struct list_head list; 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci /* Used by hydration hash table */ 54462306a36Sopenharmony_ci struct hlist_node h; 54562306a36Sopenharmony_ci}; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci/* 54862306a36Sopenharmony_ci * Hydration hash table implementation. 54962306a36Sopenharmony_ci * 55062306a36Sopenharmony_ci * Ideally we would like to use list_bl, which uses bit spin locks and employs 55162306a36Sopenharmony_ci * the least significant bit of the list head to lock the corresponding bucket, 55262306a36Sopenharmony_ci * reducing the memory overhead for the locks. But, currently, list_bl and bit 55362306a36Sopenharmony_ci * spin locks don't support IRQ safe versions. Since we have to take the lock 55462306a36Sopenharmony_ci * in both process and interrupt context, we must fall back to using regular 55562306a36Sopenharmony_ci * spin locks; one per hash table bucket. 55662306a36Sopenharmony_ci */ 55762306a36Sopenharmony_cistruct hash_table_bucket { 55862306a36Sopenharmony_ci struct hlist_head head; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci /* Spinlock protecting the bucket */ 56162306a36Sopenharmony_ci spinlock_t lock; 56262306a36Sopenharmony_ci}; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci#define bucket_lock_irqsave(bucket, flags) \ 56562306a36Sopenharmony_ci spin_lock_irqsave(&(bucket)->lock, flags) 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci#define bucket_unlock_irqrestore(bucket, flags) \ 56862306a36Sopenharmony_ci spin_unlock_irqrestore(&(bucket)->lock, flags) 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci#define bucket_lock_irq(bucket) \ 57162306a36Sopenharmony_ci spin_lock_irq(&(bucket)->lock) 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci#define bucket_unlock_irq(bucket) \ 57462306a36Sopenharmony_ci spin_unlock_irq(&(bucket)->lock) 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_cistatic int hash_table_init(struct clone *clone) 57762306a36Sopenharmony_ci{ 57862306a36Sopenharmony_ci unsigned int i, sz; 57962306a36Sopenharmony_ci struct hash_table_bucket *bucket; 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci sz = 1 << HASH_TABLE_BITS; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci clone->ht = kvmalloc_array(sz, sizeof(struct hash_table_bucket), GFP_KERNEL); 58462306a36Sopenharmony_ci if (!clone->ht) 58562306a36Sopenharmony_ci return -ENOMEM; 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci for (i = 0; i < sz; i++) { 58862306a36Sopenharmony_ci bucket = clone->ht + i; 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci INIT_HLIST_HEAD(&bucket->head); 59162306a36Sopenharmony_ci spin_lock_init(&bucket->lock); 59262306a36Sopenharmony_ci } 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ci return 0; 59562306a36Sopenharmony_ci} 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_cistatic void hash_table_exit(struct clone *clone) 59862306a36Sopenharmony_ci{ 59962306a36Sopenharmony_ci kvfree(clone->ht); 60062306a36Sopenharmony_ci} 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_cistatic struct hash_table_bucket *get_hash_table_bucket(struct clone *clone, 60362306a36Sopenharmony_ci unsigned long region_nr) 60462306a36Sopenharmony_ci{ 60562306a36Sopenharmony_ci return &clone->ht[hash_long(region_nr, HASH_TABLE_BITS)]; 60662306a36Sopenharmony_ci} 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci/* 60962306a36Sopenharmony_ci * Search hash table for a hydration with hd->region_nr == region_nr 61062306a36Sopenharmony_ci * 61162306a36Sopenharmony_ci * NOTE: Must be called with the bucket lock held 61262306a36Sopenharmony_ci */ 61362306a36Sopenharmony_cistatic struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket, 61462306a36Sopenharmony_ci unsigned long region_nr) 61562306a36Sopenharmony_ci{ 61662306a36Sopenharmony_ci struct dm_clone_region_hydration *hd; 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci hlist_for_each_entry(hd, &bucket->head, h) { 61962306a36Sopenharmony_ci if (hd->region_nr == region_nr) 62062306a36Sopenharmony_ci return hd; 62162306a36Sopenharmony_ci } 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci return NULL; 62462306a36Sopenharmony_ci} 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci/* 62762306a36Sopenharmony_ci * Insert a hydration into the hash table. 62862306a36Sopenharmony_ci * 62962306a36Sopenharmony_ci * NOTE: Must be called with the bucket lock held. 63062306a36Sopenharmony_ci */ 63162306a36Sopenharmony_cistatic inline void __insert_region_hydration(struct hash_table_bucket *bucket, 63262306a36Sopenharmony_ci struct dm_clone_region_hydration *hd) 63362306a36Sopenharmony_ci{ 63462306a36Sopenharmony_ci hlist_add_head(&hd->h, &bucket->head); 63562306a36Sopenharmony_ci} 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_ci/* 63862306a36Sopenharmony_ci * This function inserts a hydration into the hash table, unless someone else 63962306a36Sopenharmony_ci * managed to insert a hydration for the same region first. In the latter case 64062306a36Sopenharmony_ci * it returns the existing hydration descriptor for this region. 64162306a36Sopenharmony_ci * 64262306a36Sopenharmony_ci * NOTE: Must be called with the hydration hash table lock held. 64362306a36Sopenharmony_ci */ 64462306a36Sopenharmony_cistatic struct dm_clone_region_hydration * 64562306a36Sopenharmony_ci__find_or_insert_region_hydration(struct hash_table_bucket *bucket, 64662306a36Sopenharmony_ci struct dm_clone_region_hydration *hd) 64762306a36Sopenharmony_ci{ 64862306a36Sopenharmony_ci struct dm_clone_region_hydration *hd2; 64962306a36Sopenharmony_ci 65062306a36Sopenharmony_ci hd2 = __hash_find(bucket, hd->region_nr); 65162306a36Sopenharmony_ci if (hd2) 65262306a36Sopenharmony_ci return hd2; 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci __insert_region_hydration(bucket, hd); 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci return hd; 65762306a36Sopenharmony_ci} 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci/* Allocate a hydration */ 66262306a36Sopenharmony_cistatic struct dm_clone_region_hydration *alloc_hydration(struct clone *clone) 66362306a36Sopenharmony_ci{ 66462306a36Sopenharmony_ci struct dm_clone_region_hydration *hd; 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci /* 66762306a36Sopenharmony_ci * Allocate a hydration from the hydration mempool. 66862306a36Sopenharmony_ci * This might block but it can't fail. 66962306a36Sopenharmony_ci */ 67062306a36Sopenharmony_ci hd = mempool_alloc(&clone->hydration_pool, GFP_NOIO); 67162306a36Sopenharmony_ci hd->clone = clone; 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci return hd; 67462306a36Sopenharmony_ci} 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_cistatic inline void free_hydration(struct dm_clone_region_hydration *hd) 67762306a36Sopenharmony_ci{ 67862306a36Sopenharmony_ci mempool_free(hd, &hd->clone->hydration_pool); 67962306a36Sopenharmony_ci} 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci/* Initialize a hydration */ 68262306a36Sopenharmony_cistatic void hydration_init(struct dm_clone_region_hydration *hd, unsigned long region_nr) 68362306a36Sopenharmony_ci{ 68462306a36Sopenharmony_ci hd->region_nr = region_nr; 68562306a36Sopenharmony_ci hd->overwrite_bio = NULL; 68662306a36Sopenharmony_ci bio_list_init(&hd->deferred_bios); 68762306a36Sopenharmony_ci hd->status = 0; 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci INIT_LIST_HEAD(&hd->list); 69062306a36Sopenharmony_ci INIT_HLIST_NODE(&hd->h); 69162306a36Sopenharmony_ci} 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci/* 69662306a36Sopenharmony_ci * Update dm-clone's metadata after a region has finished hydrating and remove 69762306a36Sopenharmony_ci * hydration from the hash table. 69862306a36Sopenharmony_ci */ 69962306a36Sopenharmony_cistatic int hydration_update_metadata(struct dm_clone_region_hydration *hd) 70062306a36Sopenharmony_ci{ 70162306a36Sopenharmony_ci int r = 0; 70262306a36Sopenharmony_ci unsigned long flags; 70362306a36Sopenharmony_ci struct hash_table_bucket *bucket; 70462306a36Sopenharmony_ci struct clone *clone = hd->clone; 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) 70762306a36Sopenharmony_ci r = -EPERM; 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci /* Update the metadata */ 71062306a36Sopenharmony_ci if (likely(!r) && hd->status == BLK_STS_OK) 71162306a36Sopenharmony_ci r = dm_clone_set_region_hydrated(clone->cmd, hd->region_nr); 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci bucket = get_hash_table_bucket(clone, hd->region_nr); 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci /* Remove hydration from hash table */ 71662306a36Sopenharmony_ci bucket_lock_irqsave(bucket, flags); 71762306a36Sopenharmony_ci hlist_del(&hd->h); 71862306a36Sopenharmony_ci bucket_unlock_irqrestore(bucket, flags); 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci return r; 72162306a36Sopenharmony_ci} 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci/* 72462306a36Sopenharmony_ci * Complete a region's hydration: 72562306a36Sopenharmony_ci * 72662306a36Sopenharmony_ci * 1. Update dm-clone's metadata. 72762306a36Sopenharmony_ci * 2. Remove hydration from hash table. 72862306a36Sopenharmony_ci * 3. Complete overwrite bio. 72962306a36Sopenharmony_ci * 4. Issue deferred bios. 73062306a36Sopenharmony_ci * 5. If this was the last hydration, wake up anyone waiting for 73162306a36Sopenharmony_ci * hydrations to finish. 73262306a36Sopenharmony_ci */ 73362306a36Sopenharmony_cistatic void hydration_complete(struct dm_clone_region_hydration *hd) 73462306a36Sopenharmony_ci{ 73562306a36Sopenharmony_ci int r; 73662306a36Sopenharmony_ci blk_status_t status; 73762306a36Sopenharmony_ci struct clone *clone = hd->clone; 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci r = hydration_update_metadata(hd); 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci if (hd->status == BLK_STS_OK && likely(!r)) { 74262306a36Sopenharmony_ci if (hd->overwrite_bio) 74362306a36Sopenharmony_ci complete_overwrite_bio(clone, hd->overwrite_bio); 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci issue_deferred_bios(clone, &hd->deferred_bios); 74662306a36Sopenharmony_ci } else { 74762306a36Sopenharmony_ci status = r ? BLK_STS_IOERR : hd->status; 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci if (hd->overwrite_bio) 75062306a36Sopenharmony_ci bio_list_add(&hd->deferred_bios, hd->overwrite_bio); 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci fail_bios(&hd->deferred_bios, status); 75362306a36Sopenharmony_ci } 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci free_hydration(hd); 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci if (atomic_dec_and_test(&clone->hydrations_in_flight)) 75862306a36Sopenharmony_ci wakeup_hydration_waiters(clone); 75962306a36Sopenharmony_ci} 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_cistatic void hydration_kcopyd_callback(int read_err, unsigned long write_err, void *context) 76262306a36Sopenharmony_ci{ 76362306a36Sopenharmony_ci blk_status_t status; 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci struct dm_clone_region_hydration *tmp, *hd = context; 76662306a36Sopenharmony_ci struct clone *clone = hd->clone; 76762306a36Sopenharmony_ci 76862306a36Sopenharmony_ci LIST_HEAD(batched_hydrations); 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci if (read_err || write_err) { 77162306a36Sopenharmony_ci DMERR_LIMIT("%s: hydration failed", clone_device_name(clone)); 77262306a36Sopenharmony_ci status = BLK_STS_IOERR; 77362306a36Sopenharmony_ci } else { 77462306a36Sopenharmony_ci status = BLK_STS_OK; 77562306a36Sopenharmony_ci } 77662306a36Sopenharmony_ci list_splice_tail(&hd->list, &batched_hydrations); 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci hd->status = status; 77962306a36Sopenharmony_ci hydration_complete(hd); 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci /* Complete batched hydrations */ 78262306a36Sopenharmony_ci list_for_each_entry_safe(hd, tmp, &batched_hydrations, list) { 78362306a36Sopenharmony_ci hd->status = status; 78462306a36Sopenharmony_ci hydration_complete(hd); 78562306a36Sopenharmony_ci } 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci /* Continue background hydration, if there is no I/O in-flight */ 78862306a36Sopenharmony_ci if (test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags) && 78962306a36Sopenharmony_ci !atomic_read(&clone->ios_in_flight)) 79062306a36Sopenharmony_ci wake_worker(clone); 79162306a36Sopenharmony_ci} 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_cistatic void hydration_copy(struct dm_clone_region_hydration *hd, unsigned int nr_regions) 79462306a36Sopenharmony_ci{ 79562306a36Sopenharmony_ci unsigned long region_start, region_end; 79662306a36Sopenharmony_ci sector_t tail_size, region_size, total_size; 79762306a36Sopenharmony_ci struct dm_io_region from, to; 79862306a36Sopenharmony_ci struct clone *clone = hd->clone; 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci if (WARN_ON(!nr_regions)) 80162306a36Sopenharmony_ci return; 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci region_size = clone->region_size; 80462306a36Sopenharmony_ci region_start = hd->region_nr; 80562306a36Sopenharmony_ci region_end = region_start + nr_regions - 1; 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci total_size = region_to_sector(clone, nr_regions - 1); 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci if (region_end == clone->nr_regions - 1) { 81062306a36Sopenharmony_ci /* 81162306a36Sopenharmony_ci * The last region of the target might be smaller than 81262306a36Sopenharmony_ci * region_size. 81362306a36Sopenharmony_ci */ 81462306a36Sopenharmony_ci tail_size = clone->ti->len & (region_size - 1); 81562306a36Sopenharmony_ci if (!tail_size) 81662306a36Sopenharmony_ci tail_size = region_size; 81762306a36Sopenharmony_ci } else { 81862306a36Sopenharmony_ci tail_size = region_size; 81962306a36Sopenharmony_ci } 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci total_size += tail_size; 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci from.bdev = clone->source_dev->bdev; 82462306a36Sopenharmony_ci from.sector = region_to_sector(clone, region_start); 82562306a36Sopenharmony_ci from.count = total_size; 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci to.bdev = clone->dest_dev->bdev; 82862306a36Sopenharmony_ci to.sector = from.sector; 82962306a36Sopenharmony_ci to.count = from.count; 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci /* Issue copy */ 83262306a36Sopenharmony_ci atomic_add(nr_regions, &clone->hydrations_in_flight); 83362306a36Sopenharmony_ci dm_kcopyd_copy(clone->kcopyd_client, &from, 1, &to, 0, 83462306a36Sopenharmony_ci hydration_kcopyd_callback, hd); 83562306a36Sopenharmony_ci} 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_cistatic void overwrite_endio(struct bio *bio) 83862306a36Sopenharmony_ci{ 83962306a36Sopenharmony_ci struct dm_clone_region_hydration *hd = bio->bi_private; 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci bio->bi_end_io = hd->overwrite_bio_end_io; 84262306a36Sopenharmony_ci hd->status = bio->bi_status; 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci hydration_complete(hd); 84562306a36Sopenharmony_ci} 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_cistatic void hydration_overwrite(struct dm_clone_region_hydration *hd, struct bio *bio) 84862306a36Sopenharmony_ci{ 84962306a36Sopenharmony_ci /* 85062306a36Sopenharmony_ci * We don't need to save and restore bio->bi_private because device 85162306a36Sopenharmony_ci * mapper core generates a new bio for us to use, with clean 85262306a36Sopenharmony_ci * bi_private. 85362306a36Sopenharmony_ci */ 85462306a36Sopenharmony_ci hd->overwrite_bio = bio; 85562306a36Sopenharmony_ci hd->overwrite_bio_end_io = bio->bi_end_io; 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci bio->bi_end_io = overwrite_endio; 85862306a36Sopenharmony_ci bio->bi_private = hd; 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci atomic_inc(&hd->clone->hydrations_in_flight); 86162306a36Sopenharmony_ci submit_bio_noacct(bio); 86262306a36Sopenharmony_ci} 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci/* 86562306a36Sopenharmony_ci * Hydrate bio's region. 86662306a36Sopenharmony_ci * 86762306a36Sopenharmony_ci * This function starts the hydration of the bio's region and puts the bio in 86862306a36Sopenharmony_ci * the list of deferred bios for this region. In case, by the time this 86962306a36Sopenharmony_ci * function is called, the region has finished hydrating it's submitted to the 87062306a36Sopenharmony_ci * destination device. 87162306a36Sopenharmony_ci * 87262306a36Sopenharmony_ci * NOTE: The bio remapping must be performed by the caller. 87362306a36Sopenharmony_ci */ 87462306a36Sopenharmony_cistatic void hydrate_bio_region(struct clone *clone, struct bio *bio) 87562306a36Sopenharmony_ci{ 87662306a36Sopenharmony_ci unsigned long region_nr; 87762306a36Sopenharmony_ci struct hash_table_bucket *bucket; 87862306a36Sopenharmony_ci struct dm_clone_region_hydration *hd, *hd2; 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci region_nr = bio_to_region(clone, bio); 88162306a36Sopenharmony_ci bucket = get_hash_table_bucket(clone, region_nr); 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci bucket_lock_irq(bucket); 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci hd = __hash_find(bucket, region_nr); 88662306a36Sopenharmony_ci if (hd) { 88762306a36Sopenharmony_ci /* Someone else is hydrating the region */ 88862306a36Sopenharmony_ci bio_list_add(&hd->deferred_bios, bio); 88962306a36Sopenharmony_ci bucket_unlock_irq(bucket); 89062306a36Sopenharmony_ci return; 89162306a36Sopenharmony_ci } 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) { 89462306a36Sopenharmony_ci /* The region has been hydrated */ 89562306a36Sopenharmony_ci bucket_unlock_irq(bucket); 89662306a36Sopenharmony_ci issue_bio(clone, bio); 89762306a36Sopenharmony_ci return; 89862306a36Sopenharmony_ci } 89962306a36Sopenharmony_ci 90062306a36Sopenharmony_ci /* 90162306a36Sopenharmony_ci * We must allocate a hydration descriptor and start the hydration of 90262306a36Sopenharmony_ci * the corresponding region. 90362306a36Sopenharmony_ci */ 90462306a36Sopenharmony_ci bucket_unlock_irq(bucket); 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci hd = alloc_hydration(clone); 90762306a36Sopenharmony_ci hydration_init(hd, region_nr); 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ci bucket_lock_irq(bucket); 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci /* Check if the region has been hydrated in the meantime. */ 91262306a36Sopenharmony_ci if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) { 91362306a36Sopenharmony_ci bucket_unlock_irq(bucket); 91462306a36Sopenharmony_ci free_hydration(hd); 91562306a36Sopenharmony_ci issue_bio(clone, bio); 91662306a36Sopenharmony_ci return; 91762306a36Sopenharmony_ci } 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci hd2 = __find_or_insert_region_hydration(bucket, hd); 92062306a36Sopenharmony_ci if (hd2 != hd) { 92162306a36Sopenharmony_ci /* Someone else started the region's hydration. */ 92262306a36Sopenharmony_ci bio_list_add(&hd2->deferred_bios, bio); 92362306a36Sopenharmony_ci bucket_unlock_irq(bucket); 92462306a36Sopenharmony_ci free_hydration(hd); 92562306a36Sopenharmony_ci return; 92662306a36Sopenharmony_ci } 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci /* 92962306a36Sopenharmony_ci * If the metadata mode is RO or FAIL then there is no point starting a 93062306a36Sopenharmony_ci * hydration, since we will not be able to update the metadata when the 93162306a36Sopenharmony_ci * hydration finishes. 93262306a36Sopenharmony_ci */ 93362306a36Sopenharmony_ci if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) { 93462306a36Sopenharmony_ci hlist_del(&hd->h); 93562306a36Sopenharmony_ci bucket_unlock_irq(bucket); 93662306a36Sopenharmony_ci free_hydration(hd); 93762306a36Sopenharmony_ci bio_io_error(bio); 93862306a36Sopenharmony_ci return; 93962306a36Sopenharmony_ci } 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci /* 94262306a36Sopenharmony_ci * Start region hydration. 94362306a36Sopenharmony_ci * 94462306a36Sopenharmony_ci * If a bio overwrites a region, i.e., its size is equal to the 94562306a36Sopenharmony_ci * region's size, then we don't need to copy the region from the source 94662306a36Sopenharmony_ci * to the destination device. 94762306a36Sopenharmony_ci */ 94862306a36Sopenharmony_ci if (is_overwrite_bio(clone, bio)) { 94962306a36Sopenharmony_ci bucket_unlock_irq(bucket); 95062306a36Sopenharmony_ci hydration_overwrite(hd, bio); 95162306a36Sopenharmony_ci } else { 95262306a36Sopenharmony_ci bio_list_add(&hd->deferred_bios, bio); 95362306a36Sopenharmony_ci bucket_unlock_irq(bucket); 95462306a36Sopenharmony_ci hydration_copy(hd, 1); 95562306a36Sopenharmony_ci } 95662306a36Sopenharmony_ci} 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci/* 96162306a36Sopenharmony_ci * Background hydrations. 96262306a36Sopenharmony_ci */ 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci/* 96562306a36Sopenharmony_ci * Batch region hydrations. 96662306a36Sopenharmony_ci * 96762306a36Sopenharmony_ci * To better utilize device bandwidth we batch together the hydration of 96862306a36Sopenharmony_ci * adjacent regions. This allows us to use small region sizes, e.g., 4KB, which 96962306a36Sopenharmony_ci * is good for small, random write performance (because of the overwriting of 97062306a36Sopenharmony_ci * un-hydrated regions) and at the same time issue big copy requests to kcopyd 97162306a36Sopenharmony_ci * to achieve high hydration bandwidth. 97262306a36Sopenharmony_ci */ 97362306a36Sopenharmony_cistruct batch_info { 97462306a36Sopenharmony_ci struct dm_clone_region_hydration *head; 97562306a36Sopenharmony_ci unsigned int nr_batched_regions; 97662306a36Sopenharmony_ci}; 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_cistatic void __batch_hydration(struct batch_info *batch, 97962306a36Sopenharmony_ci struct dm_clone_region_hydration *hd) 98062306a36Sopenharmony_ci{ 98162306a36Sopenharmony_ci struct clone *clone = hd->clone; 98262306a36Sopenharmony_ci unsigned int max_batch_size = READ_ONCE(clone->hydration_batch_size); 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_ci if (batch->head) { 98562306a36Sopenharmony_ci /* Try to extend the current batch */ 98662306a36Sopenharmony_ci if (batch->nr_batched_regions < max_batch_size && 98762306a36Sopenharmony_ci (batch->head->region_nr + batch->nr_batched_regions) == hd->region_nr) { 98862306a36Sopenharmony_ci list_add_tail(&hd->list, &batch->head->list); 98962306a36Sopenharmony_ci batch->nr_batched_regions++; 99062306a36Sopenharmony_ci hd = NULL; 99162306a36Sopenharmony_ci } 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci /* Check if we should issue the current batch */ 99462306a36Sopenharmony_ci if (batch->nr_batched_regions >= max_batch_size || hd) { 99562306a36Sopenharmony_ci hydration_copy(batch->head, batch->nr_batched_regions); 99662306a36Sopenharmony_ci batch->head = NULL; 99762306a36Sopenharmony_ci batch->nr_batched_regions = 0; 99862306a36Sopenharmony_ci } 99962306a36Sopenharmony_ci } 100062306a36Sopenharmony_ci 100162306a36Sopenharmony_ci if (!hd) 100262306a36Sopenharmony_ci return; 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci /* We treat max batch sizes of zero and one equivalently */ 100562306a36Sopenharmony_ci if (max_batch_size <= 1) { 100662306a36Sopenharmony_ci hydration_copy(hd, 1); 100762306a36Sopenharmony_ci return; 100862306a36Sopenharmony_ci } 100962306a36Sopenharmony_ci 101062306a36Sopenharmony_ci /* Start a new batch */ 101162306a36Sopenharmony_ci BUG_ON(!list_empty(&hd->list)); 101262306a36Sopenharmony_ci batch->head = hd; 101362306a36Sopenharmony_ci batch->nr_batched_regions = 1; 101462306a36Sopenharmony_ci} 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_cistatic unsigned long __start_next_hydration(struct clone *clone, 101762306a36Sopenharmony_ci unsigned long offset, 101862306a36Sopenharmony_ci struct batch_info *batch) 101962306a36Sopenharmony_ci{ 102062306a36Sopenharmony_ci struct hash_table_bucket *bucket; 102162306a36Sopenharmony_ci struct dm_clone_region_hydration *hd; 102262306a36Sopenharmony_ci unsigned long nr_regions = clone->nr_regions; 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci hd = alloc_hydration(clone); 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ci /* Try to find a region to hydrate. */ 102762306a36Sopenharmony_ci do { 102862306a36Sopenharmony_ci offset = dm_clone_find_next_unhydrated_region(clone->cmd, offset); 102962306a36Sopenharmony_ci if (offset == nr_regions) 103062306a36Sopenharmony_ci break; 103162306a36Sopenharmony_ci 103262306a36Sopenharmony_ci bucket = get_hash_table_bucket(clone, offset); 103362306a36Sopenharmony_ci bucket_lock_irq(bucket); 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci if (!dm_clone_is_region_hydrated(clone->cmd, offset) && 103662306a36Sopenharmony_ci !__hash_find(bucket, offset)) { 103762306a36Sopenharmony_ci hydration_init(hd, offset); 103862306a36Sopenharmony_ci __insert_region_hydration(bucket, hd); 103962306a36Sopenharmony_ci bucket_unlock_irq(bucket); 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci /* Batch hydration */ 104262306a36Sopenharmony_ci __batch_hydration(batch, hd); 104362306a36Sopenharmony_ci 104462306a36Sopenharmony_ci return (offset + 1); 104562306a36Sopenharmony_ci } 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci bucket_unlock_irq(bucket); 104862306a36Sopenharmony_ci 104962306a36Sopenharmony_ci } while (++offset < nr_regions); 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci if (hd) 105262306a36Sopenharmony_ci free_hydration(hd); 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci return offset; 105562306a36Sopenharmony_ci} 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_ci/* 105862306a36Sopenharmony_ci * This function searches for regions that still reside in the source device 105962306a36Sopenharmony_ci * and starts their hydration. 106062306a36Sopenharmony_ci */ 106162306a36Sopenharmony_cistatic void do_hydration(struct clone *clone) 106262306a36Sopenharmony_ci{ 106362306a36Sopenharmony_ci unsigned int current_volume; 106462306a36Sopenharmony_ci unsigned long offset, nr_regions = clone->nr_regions; 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_ci struct batch_info batch = { 106762306a36Sopenharmony_ci .head = NULL, 106862306a36Sopenharmony_ci .nr_batched_regions = 0, 106962306a36Sopenharmony_ci }; 107062306a36Sopenharmony_ci 107162306a36Sopenharmony_ci if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) 107262306a36Sopenharmony_ci return; 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci if (dm_clone_is_hydration_done(clone->cmd)) 107562306a36Sopenharmony_ci return; 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci /* 107862306a36Sopenharmony_ci * Avoid race with device suspension. 107962306a36Sopenharmony_ci */ 108062306a36Sopenharmony_ci atomic_inc(&clone->hydrations_in_flight); 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci /* 108362306a36Sopenharmony_ci * Make sure atomic_inc() is ordered before test_bit(), otherwise we 108462306a36Sopenharmony_ci * might race with clone_postsuspend() and start a region hydration 108562306a36Sopenharmony_ci * after the target has been suspended. 108662306a36Sopenharmony_ci * 108762306a36Sopenharmony_ci * This is paired with the smp_mb__after_atomic() in 108862306a36Sopenharmony_ci * clone_postsuspend(). 108962306a36Sopenharmony_ci */ 109062306a36Sopenharmony_ci smp_mb__after_atomic(); 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci offset = clone->hydration_offset; 109362306a36Sopenharmony_ci while (likely(!test_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags)) && 109462306a36Sopenharmony_ci !atomic_read(&clone->ios_in_flight) && 109562306a36Sopenharmony_ci test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags) && 109662306a36Sopenharmony_ci offset < nr_regions) { 109762306a36Sopenharmony_ci current_volume = atomic_read(&clone->hydrations_in_flight); 109862306a36Sopenharmony_ci current_volume += batch.nr_batched_regions; 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci if (current_volume > READ_ONCE(clone->hydration_threshold)) 110162306a36Sopenharmony_ci break; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci offset = __start_next_hydration(clone, offset, &batch); 110462306a36Sopenharmony_ci } 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci if (batch.head) 110762306a36Sopenharmony_ci hydration_copy(batch.head, batch.nr_batched_regions); 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci if (offset >= nr_regions) 111062306a36Sopenharmony_ci offset = 0; 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ci clone->hydration_offset = offset; 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci if (atomic_dec_and_test(&clone->hydrations_in_flight)) 111562306a36Sopenharmony_ci wakeup_hydration_waiters(clone); 111662306a36Sopenharmony_ci} 111762306a36Sopenharmony_ci 111862306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 111962306a36Sopenharmony_ci 112062306a36Sopenharmony_cistatic bool need_commit_due_to_time(struct clone *clone) 112162306a36Sopenharmony_ci{ 112262306a36Sopenharmony_ci return !time_in_range(jiffies, clone->last_commit_jiffies, 112362306a36Sopenharmony_ci clone->last_commit_jiffies + COMMIT_PERIOD); 112462306a36Sopenharmony_ci} 112562306a36Sopenharmony_ci 112662306a36Sopenharmony_ci/* 112762306a36Sopenharmony_ci * A non-zero return indicates read-only or fail mode. 112862306a36Sopenharmony_ci */ 112962306a36Sopenharmony_cistatic int commit_metadata(struct clone *clone, bool *dest_dev_flushed) 113062306a36Sopenharmony_ci{ 113162306a36Sopenharmony_ci int r = 0; 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci if (dest_dev_flushed) 113462306a36Sopenharmony_ci *dest_dev_flushed = false; 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_ci mutex_lock(&clone->commit_lock); 113762306a36Sopenharmony_ci 113862306a36Sopenharmony_ci if (!dm_clone_changed_this_transaction(clone->cmd)) 113962306a36Sopenharmony_ci goto out; 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) { 114262306a36Sopenharmony_ci r = -EPERM; 114362306a36Sopenharmony_ci goto out; 114462306a36Sopenharmony_ci } 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci r = dm_clone_metadata_pre_commit(clone->cmd); 114762306a36Sopenharmony_ci if (unlikely(r)) { 114862306a36Sopenharmony_ci __metadata_operation_failed(clone, "dm_clone_metadata_pre_commit", r); 114962306a36Sopenharmony_ci goto out; 115062306a36Sopenharmony_ci } 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci r = blkdev_issue_flush(clone->dest_dev->bdev); 115362306a36Sopenharmony_ci if (unlikely(r)) { 115462306a36Sopenharmony_ci __metadata_operation_failed(clone, "flush destination device", r); 115562306a36Sopenharmony_ci goto out; 115662306a36Sopenharmony_ci } 115762306a36Sopenharmony_ci 115862306a36Sopenharmony_ci if (dest_dev_flushed) 115962306a36Sopenharmony_ci *dest_dev_flushed = true; 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci r = dm_clone_metadata_commit(clone->cmd); 116262306a36Sopenharmony_ci if (unlikely(r)) { 116362306a36Sopenharmony_ci __metadata_operation_failed(clone, "dm_clone_metadata_commit", r); 116462306a36Sopenharmony_ci goto out; 116562306a36Sopenharmony_ci } 116662306a36Sopenharmony_ci 116762306a36Sopenharmony_ci if (dm_clone_is_hydration_done(clone->cmd)) 116862306a36Sopenharmony_ci dm_table_event(clone->ti->table); 116962306a36Sopenharmony_ciout: 117062306a36Sopenharmony_ci mutex_unlock(&clone->commit_lock); 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ci return r; 117362306a36Sopenharmony_ci} 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_cistatic void process_deferred_discards(struct clone *clone) 117662306a36Sopenharmony_ci{ 117762306a36Sopenharmony_ci int r = -EPERM; 117862306a36Sopenharmony_ci struct bio *bio; 117962306a36Sopenharmony_ci struct blk_plug plug; 118062306a36Sopenharmony_ci unsigned long rs, nr_regions; 118162306a36Sopenharmony_ci struct bio_list discards = BIO_EMPTY_LIST; 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci spin_lock_irq(&clone->lock); 118462306a36Sopenharmony_ci bio_list_merge(&discards, &clone->deferred_discard_bios); 118562306a36Sopenharmony_ci bio_list_init(&clone->deferred_discard_bios); 118662306a36Sopenharmony_ci spin_unlock_irq(&clone->lock); 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci if (bio_list_empty(&discards)) 118962306a36Sopenharmony_ci return; 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_ci if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) 119262306a36Sopenharmony_ci goto out; 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_ci /* Update the metadata */ 119562306a36Sopenharmony_ci bio_list_for_each(bio, &discards) { 119662306a36Sopenharmony_ci bio_region_range(clone, bio, &rs, &nr_regions); 119762306a36Sopenharmony_ci /* 119862306a36Sopenharmony_ci * A discard request might cover regions that have been already 119962306a36Sopenharmony_ci * hydrated. There is no need to update the metadata for these 120062306a36Sopenharmony_ci * regions. 120162306a36Sopenharmony_ci */ 120262306a36Sopenharmony_ci r = dm_clone_cond_set_range(clone->cmd, rs, nr_regions); 120362306a36Sopenharmony_ci if (unlikely(r)) 120462306a36Sopenharmony_ci break; 120562306a36Sopenharmony_ci } 120662306a36Sopenharmony_ciout: 120762306a36Sopenharmony_ci blk_start_plug(&plug); 120862306a36Sopenharmony_ci while ((bio = bio_list_pop(&discards))) 120962306a36Sopenharmony_ci complete_discard_bio(clone, bio, r == 0); 121062306a36Sopenharmony_ci blk_finish_plug(&plug); 121162306a36Sopenharmony_ci} 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_cistatic void process_deferred_bios(struct clone *clone) 121462306a36Sopenharmony_ci{ 121562306a36Sopenharmony_ci struct bio_list bios = BIO_EMPTY_LIST; 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_ci spin_lock_irq(&clone->lock); 121862306a36Sopenharmony_ci bio_list_merge(&bios, &clone->deferred_bios); 121962306a36Sopenharmony_ci bio_list_init(&clone->deferred_bios); 122062306a36Sopenharmony_ci spin_unlock_irq(&clone->lock); 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci if (bio_list_empty(&bios)) 122362306a36Sopenharmony_ci return; 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci submit_bios(&bios); 122662306a36Sopenharmony_ci} 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_cistatic void process_deferred_flush_bios(struct clone *clone) 122962306a36Sopenharmony_ci{ 123062306a36Sopenharmony_ci struct bio *bio; 123162306a36Sopenharmony_ci bool dest_dev_flushed; 123262306a36Sopenharmony_ci struct bio_list bios = BIO_EMPTY_LIST; 123362306a36Sopenharmony_ci struct bio_list bio_completions = BIO_EMPTY_LIST; 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci /* 123662306a36Sopenharmony_ci * If there are any deferred flush bios, we must commit the metadata 123762306a36Sopenharmony_ci * before issuing them or signaling their completion. 123862306a36Sopenharmony_ci */ 123962306a36Sopenharmony_ci spin_lock_irq(&clone->lock); 124062306a36Sopenharmony_ci bio_list_merge(&bios, &clone->deferred_flush_bios); 124162306a36Sopenharmony_ci bio_list_init(&clone->deferred_flush_bios); 124262306a36Sopenharmony_ci 124362306a36Sopenharmony_ci bio_list_merge(&bio_completions, &clone->deferred_flush_completions); 124462306a36Sopenharmony_ci bio_list_init(&clone->deferred_flush_completions); 124562306a36Sopenharmony_ci spin_unlock_irq(&clone->lock); 124662306a36Sopenharmony_ci 124762306a36Sopenharmony_ci if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) && 124862306a36Sopenharmony_ci !(dm_clone_changed_this_transaction(clone->cmd) && need_commit_due_to_time(clone))) 124962306a36Sopenharmony_ci return; 125062306a36Sopenharmony_ci 125162306a36Sopenharmony_ci if (commit_metadata(clone, &dest_dev_flushed)) { 125262306a36Sopenharmony_ci bio_list_merge(&bios, &bio_completions); 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_ci while ((bio = bio_list_pop(&bios))) 125562306a36Sopenharmony_ci bio_io_error(bio); 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci return; 125862306a36Sopenharmony_ci } 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci clone->last_commit_jiffies = jiffies; 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci while ((bio = bio_list_pop(&bio_completions))) 126362306a36Sopenharmony_ci bio_endio(bio); 126462306a36Sopenharmony_ci 126562306a36Sopenharmony_ci while ((bio = bio_list_pop(&bios))) { 126662306a36Sopenharmony_ci if ((bio->bi_opf & REQ_PREFLUSH) && dest_dev_flushed) { 126762306a36Sopenharmony_ci /* We just flushed the destination device as part of 126862306a36Sopenharmony_ci * the metadata commit, so there is no reason to send 126962306a36Sopenharmony_ci * another flush. 127062306a36Sopenharmony_ci */ 127162306a36Sopenharmony_ci bio_endio(bio); 127262306a36Sopenharmony_ci } else { 127362306a36Sopenharmony_ci submit_bio_noacct(bio); 127462306a36Sopenharmony_ci } 127562306a36Sopenharmony_ci } 127662306a36Sopenharmony_ci} 127762306a36Sopenharmony_ci 127862306a36Sopenharmony_cistatic void do_worker(struct work_struct *work) 127962306a36Sopenharmony_ci{ 128062306a36Sopenharmony_ci struct clone *clone = container_of(work, typeof(*clone), worker); 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci process_deferred_bios(clone); 128362306a36Sopenharmony_ci process_deferred_discards(clone); 128462306a36Sopenharmony_ci 128562306a36Sopenharmony_ci /* 128662306a36Sopenharmony_ci * process_deferred_flush_bios(): 128762306a36Sopenharmony_ci * 128862306a36Sopenharmony_ci * - Commit metadata 128962306a36Sopenharmony_ci * 129062306a36Sopenharmony_ci * - Process deferred REQ_FUA completions 129162306a36Sopenharmony_ci * 129262306a36Sopenharmony_ci * - Process deferred REQ_PREFLUSH bios 129362306a36Sopenharmony_ci */ 129462306a36Sopenharmony_ci process_deferred_flush_bios(clone); 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci /* Background hydration */ 129762306a36Sopenharmony_ci do_hydration(clone); 129862306a36Sopenharmony_ci} 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_ci/* 130162306a36Sopenharmony_ci * Commit periodically so that not too much unwritten data builds up. 130262306a36Sopenharmony_ci * 130362306a36Sopenharmony_ci * Also, restart background hydration, if it has been stopped by in-flight I/O. 130462306a36Sopenharmony_ci */ 130562306a36Sopenharmony_cistatic void do_waker(struct work_struct *work) 130662306a36Sopenharmony_ci{ 130762306a36Sopenharmony_ci struct clone *clone = container_of(to_delayed_work(work), struct clone, waker); 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci wake_worker(clone); 131062306a36Sopenharmony_ci queue_delayed_work(clone->wq, &clone->waker, COMMIT_PERIOD); 131162306a36Sopenharmony_ci} 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 131462306a36Sopenharmony_ci 131562306a36Sopenharmony_ci/* 131662306a36Sopenharmony_ci * Target methods 131762306a36Sopenharmony_ci */ 131862306a36Sopenharmony_cistatic int clone_map(struct dm_target *ti, struct bio *bio) 131962306a36Sopenharmony_ci{ 132062306a36Sopenharmony_ci struct clone *clone = ti->private; 132162306a36Sopenharmony_ci unsigned long region_nr; 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_ci atomic_inc(&clone->ios_in_flight); 132462306a36Sopenharmony_ci 132562306a36Sopenharmony_ci if (unlikely(get_clone_mode(clone) == CM_FAIL)) 132662306a36Sopenharmony_ci return DM_MAPIO_KILL; 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_ci /* 132962306a36Sopenharmony_ci * REQ_PREFLUSH bios carry no data: 133062306a36Sopenharmony_ci * 133162306a36Sopenharmony_ci * - Commit metadata, if changed 133262306a36Sopenharmony_ci * 133362306a36Sopenharmony_ci * - Pass down to destination device 133462306a36Sopenharmony_ci */ 133562306a36Sopenharmony_ci if (bio->bi_opf & REQ_PREFLUSH) { 133662306a36Sopenharmony_ci remap_and_issue(clone, bio); 133762306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 133862306a36Sopenharmony_ci } 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector); 134162306a36Sopenharmony_ci 134262306a36Sopenharmony_ci /* 134362306a36Sopenharmony_ci * dm-clone interprets discards and performs a fast hydration of the 134462306a36Sopenharmony_ci * discarded regions, i.e., we skip the copy from the source device and 134562306a36Sopenharmony_ci * just mark the regions as hydrated. 134662306a36Sopenharmony_ci */ 134762306a36Sopenharmony_ci if (bio_op(bio) == REQ_OP_DISCARD) { 134862306a36Sopenharmony_ci process_discard_bio(clone, bio); 134962306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 135062306a36Sopenharmony_ci } 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci /* 135362306a36Sopenharmony_ci * If the bio's region is hydrated, redirect it to the destination 135462306a36Sopenharmony_ci * device. 135562306a36Sopenharmony_ci * 135662306a36Sopenharmony_ci * If the region is not hydrated and the bio is a READ, redirect it to 135762306a36Sopenharmony_ci * the source device. 135862306a36Sopenharmony_ci * 135962306a36Sopenharmony_ci * Else, defer WRITE bio until after its region has been hydrated and 136062306a36Sopenharmony_ci * start the region's hydration immediately. 136162306a36Sopenharmony_ci */ 136262306a36Sopenharmony_ci region_nr = bio_to_region(clone, bio); 136362306a36Sopenharmony_ci if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) { 136462306a36Sopenharmony_ci remap_and_issue(clone, bio); 136562306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 136662306a36Sopenharmony_ci } else if (bio_data_dir(bio) == READ) { 136762306a36Sopenharmony_ci remap_to_source(clone, bio); 136862306a36Sopenharmony_ci return DM_MAPIO_REMAPPED; 136962306a36Sopenharmony_ci } 137062306a36Sopenharmony_ci 137162306a36Sopenharmony_ci remap_to_dest(clone, bio); 137262306a36Sopenharmony_ci hydrate_bio_region(clone, bio); 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 137562306a36Sopenharmony_ci} 137662306a36Sopenharmony_ci 137762306a36Sopenharmony_cistatic int clone_endio(struct dm_target *ti, struct bio *bio, blk_status_t *error) 137862306a36Sopenharmony_ci{ 137962306a36Sopenharmony_ci struct clone *clone = ti->private; 138062306a36Sopenharmony_ci 138162306a36Sopenharmony_ci atomic_dec(&clone->ios_in_flight); 138262306a36Sopenharmony_ci 138362306a36Sopenharmony_ci return DM_ENDIO_DONE; 138462306a36Sopenharmony_ci} 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_cistatic void emit_flags(struct clone *clone, char *result, unsigned int maxlen, 138762306a36Sopenharmony_ci ssize_t *sz_ptr) 138862306a36Sopenharmony_ci{ 138962306a36Sopenharmony_ci ssize_t sz = *sz_ptr; 139062306a36Sopenharmony_ci unsigned int count; 139162306a36Sopenharmony_ci 139262306a36Sopenharmony_ci count = !test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags); 139362306a36Sopenharmony_ci count += !test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags); 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci DMEMIT("%u ", count); 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_ci if (!test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags)) 139862306a36Sopenharmony_ci DMEMIT("no_hydration "); 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_ci if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) 140162306a36Sopenharmony_ci DMEMIT("no_discard_passdown "); 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci *sz_ptr = sz; 140462306a36Sopenharmony_ci} 140562306a36Sopenharmony_ci 140662306a36Sopenharmony_cistatic void emit_core_args(struct clone *clone, char *result, 140762306a36Sopenharmony_ci unsigned int maxlen, ssize_t *sz_ptr) 140862306a36Sopenharmony_ci{ 140962306a36Sopenharmony_ci ssize_t sz = *sz_ptr; 141062306a36Sopenharmony_ci unsigned int count = 4; 141162306a36Sopenharmony_ci 141262306a36Sopenharmony_ci DMEMIT("%u hydration_threshold %u hydration_batch_size %u ", count, 141362306a36Sopenharmony_ci READ_ONCE(clone->hydration_threshold), 141462306a36Sopenharmony_ci READ_ONCE(clone->hydration_batch_size)); 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_ci *sz_ptr = sz; 141762306a36Sopenharmony_ci} 141862306a36Sopenharmony_ci 141962306a36Sopenharmony_ci/* 142062306a36Sopenharmony_ci * Status format: 142162306a36Sopenharmony_ci * 142262306a36Sopenharmony_ci * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 142362306a36Sopenharmony_ci * <clone region size> <#hydrated regions>/<#total regions> <#hydrating regions> 142462306a36Sopenharmony_ci * <#features> <features>* <#core args> <core args>* <clone metadata mode> 142562306a36Sopenharmony_ci */ 142662306a36Sopenharmony_cistatic void clone_status(struct dm_target *ti, status_type_t type, 142762306a36Sopenharmony_ci unsigned int status_flags, char *result, 142862306a36Sopenharmony_ci unsigned int maxlen) 142962306a36Sopenharmony_ci{ 143062306a36Sopenharmony_ci int r; 143162306a36Sopenharmony_ci unsigned int i; 143262306a36Sopenharmony_ci ssize_t sz = 0; 143362306a36Sopenharmony_ci dm_block_t nr_free_metadata_blocks = 0; 143462306a36Sopenharmony_ci dm_block_t nr_metadata_blocks = 0; 143562306a36Sopenharmony_ci char buf[BDEVNAME_SIZE]; 143662306a36Sopenharmony_ci struct clone *clone = ti->private; 143762306a36Sopenharmony_ci 143862306a36Sopenharmony_ci switch (type) { 143962306a36Sopenharmony_ci case STATUSTYPE_INFO: 144062306a36Sopenharmony_ci if (get_clone_mode(clone) == CM_FAIL) { 144162306a36Sopenharmony_ci DMEMIT("Fail"); 144262306a36Sopenharmony_ci break; 144362306a36Sopenharmony_ci } 144462306a36Sopenharmony_ci 144562306a36Sopenharmony_ci /* Commit to ensure statistics aren't out-of-date */ 144662306a36Sopenharmony_ci if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) 144762306a36Sopenharmony_ci (void) commit_metadata(clone, NULL); 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci r = dm_clone_get_free_metadata_block_count(clone->cmd, &nr_free_metadata_blocks); 145062306a36Sopenharmony_ci 145162306a36Sopenharmony_ci if (r) { 145262306a36Sopenharmony_ci DMERR("%s: dm_clone_get_free_metadata_block_count returned %d", 145362306a36Sopenharmony_ci clone_device_name(clone), r); 145462306a36Sopenharmony_ci goto error; 145562306a36Sopenharmony_ci } 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci r = dm_clone_get_metadata_dev_size(clone->cmd, &nr_metadata_blocks); 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci if (r) { 146062306a36Sopenharmony_ci DMERR("%s: dm_clone_get_metadata_dev_size returned %d", 146162306a36Sopenharmony_ci clone_device_name(clone), r); 146262306a36Sopenharmony_ci goto error; 146362306a36Sopenharmony_ci } 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci DMEMIT("%u %llu/%llu %llu %u/%lu %u ", 146662306a36Sopenharmony_ci DM_CLONE_METADATA_BLOCK_SIZE, 146762306a36Sopenharmony_ci (unsigned long long)(nr_metadata_blocks - nr_free_metadata_blocks), 146862306a36Sopenharmony_ci (unsigned long long)nr_metadata_blocks, 146962306a36Sopenharmony_ci (unsigned long long)clone->region_size, 147062306a36Sopenharmony_ci dm_clone_nr_of_hydrated_regions(clone->cmd), 147162306a36Sopenharmony_ci clone->nr_regions, 147262306a36Sopenharmony_ci atomic_read(&clone->hydrations_in_flight)); 147362306a36Sopenharmony_ci 147462306a36Sopenharmony_ci emit_flags(clone, result, maxlen, &sz); 147562306a36Sopenharmony_ci emit_core_args(clone, result, maxlen, &sz); 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci switch (get_clone_mode(clone)) { 147862306a36Sopenharmony_ci case CM_WRITE: 147962306a36Sopenharmony_ci DMEMIT("rw"); 148062306a36Sopenharmony_ci break; 148162306a36Sopenharmony_ci case CM_READ_ONLY: 148262306a36Sopenharmony_ci DMEMIT("ro"); 148362306a36Sopenharmony_ci break; 148462306a36Sopenharmony_ci case CM_FAIL: 148562306a36Sopenharmony_ci DMEMIT("Fail"); 148662306a36Sopenharmony_ci } 148762306a36Sopenharmony_ci 148862306a36Sopenharmony_ci break; 148962306a36Sopenharmony_ci 149062306a36Sopenharmony_ci case STATUSTYPE_TABLE: 149162306a36Sopenharmony_ci format_dev_t(buf, clone->metadata_dev->bdev->bd_dev); 149262306a36Sopenharmony_ci DMEMIT("%s ", buf); 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci format_dev_t(buf, clone->dest_dev->bdev->bd_dev); 149562306a36Sopenharmony_ci DMEMIT("%s ", buf); 149662306a36Sopenharmony_ci 149762306a36Sopenharmony_ci format_dev_t(buf, clone->source_dev->bdev->bd_dev); 149862306a36Sopenharmony_ci DMEMIT("%s", buf); 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci for (i = 0; i < clone->nr_ctr_args; i++) 150162306a36Sopenharmony_ci DMEMIT(" %s", clone->ctr_args[i]); 150262306a36Sopenharmony_ci break; 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_ci case STATUSTYPE_IMA: 150562306a36Sopenharmony_ci *result = '\0'; 150662306a36Sopenharmony_ci break; 150762306a36Sopenharmony_ci } 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci return; 151062306a36Sopenharmony_ci 151162306a36Sopenharmony_cierror: 151262306a36Sopenharmony_ci DMEMIT("Error"); 151362306a36Sopenharmony_ci} 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_cistatic sector_t get_dev_size(struct dm_dev *dev) 151662306a36Sopenharmony_ci{ 151762306a36Sopenharmony_ci return bdev_nr_sectors(dev->bdev); 151862306a36Sopenharmony_ci} 151962306a36Sopenharmony_ci 152062306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_ci/* 152362306a36Sopenharmony_ci * Construct a clone device mapping: 152462306a36Sopenharmony_ci * 152562306a36Sopenharmony_ci * clone <metadata dev> <destination dev> <source dev> <region size> 152662306a36Sopenharmony_ci * [<#feature args> [<feature arg>]* [<#core args> [key value]*]] 152762306a36Sopenharmony_ci * 152862306a36Sopenharmony_ci * metadata dev: Fast device holding the persistent metadata 152962306a36Sopenharmony_ci * destination dev: The destination device, which will become a clone of the 153062306a36Sopenharmony_ci * source device 153162306a36Sopenharmony_ci * source dev: The read-only source device that gets cloned 153262306a36Sopenharmony_ci * region size: dm-clone unit size in sectors 153362306a36Sopenharmony_ci * 153462306a36Sopenharmony_ci * #feature args: Number of feature arguments passed 153562306a36Sopenharmony_ci * feature args: E.g. no_hydration, no_discard_passdown 153662306a36Sopenharmony_ci * 153762306a36Sopenharmony_ci * #core arguments: An even number of core arguments 153862306a36Sopenharmony_ci * core arguments: Key/value pairs for tuning the core 153962306a36Sopenharmony_ci * E.g. 'hydration_threshold 256' 154062306a36Sopenharmony_ci */ 154162306a36Sopenharmony_cistatic int parse_feature_args(struct dm_arg_set *as, struct clone *clone) 154262306a36Sopenharmony_ci{ 154362306a36Sopenharmony_ci int r; 154462306a36Sopenharmony_ci unsigned int argc; 154562306a36Sopenharmony_ci const char *arg_name; 154662306a36Sopenharmony_ci struct dm_target *ti = clone->ti; 154762306a36Sopenharmony_ci 154862306a36Sopenharmony_ci const struct dm_arg args = { 154962306a36Sopenharmony_ci .min = 0, 155062306a36Sopenharmony_ci .max = 2, 155162306a36Sopenharmony_ci .error = "Invalid number of feature arguments" 155262306a36Sopenharmony_ci }; 155362306a36Sopenharmony_ci 155462306a36Sopenharmony_ci /* No feature arguments supplied */ 155562306a36Sopenharmony_ci if (!as->argc) 155662306a36Sopenharmony_ci return 0; 155762306a36Sopenharmony_ci 155862306a36Sopenharmony_ci r = dm_read_arg_group(&args, as, &argc, &ti->error); 155962306a36Sopenharmony_ci if (r) 156062306a36Sopenharmony_ci return r; 156162306a36Sopenharmony_ci 156262306a36Sopenharmony_ci while (argc) { 156362306a36Sopenharmony_ci arg_name = dm_shift_arg(as); 156462306a36Sopenharmony_ci argc--; 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci if (!strcasecmp(arg_name, "no_hydration")) { 156762306a36Sopenharmony_ci __clear_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags); 156862306a36Sopenharmony_ci } else if (!strcasecmp(arg_name, "no_discard_passdown")) { 156962306a36Sopenharmony_ci __clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags); 157062306a36Sopenharmony_ci } else { 157162306a36Sopenharmony_ci ti->error = "Invalid feature argument"; 157262306a36Sopenharmony_ci return -EINVAL; 157362306a36Sopenharmony_ci } 157462306a36Sopenharmony_ci } 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci return 0; 157762306a36Sopenharmony_ci} 157862306a36Sopenharmony_ci 157962306a36Sopenharmony_cistatic int parse_core_args(struct dm_arg_set *as, struct clone *clone) 158062306a36Sopenharmony_ci{ 158162306a36Sopenharmony_ci int r; 158262306a36Sopenharmony_ci unsigned int argc; 158362306a36Sopenharmony_ci unsigned int value; 158462306a36Sopenharmony_ci const char *arg_name; 158562306a36Sopenharmony_ci struct dm_target *ti = clone->ti; 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_ci const struct dm_arg args = { 158862306a36Sopenharmony_ci .min = 0, 158962306a36Sopenharmony_ci .max = 4, 159062306a36Sopenharmony_ci .error = "Invalid number of core arguments" 159162306a36Sopenharmony_ci }; 159262306a36Sopenharmony_ci 159362306a36Sopenharmony_ci /* Initialize core arguments */ 159462306a36Sopenharmony_ci clone->hydration_batch_size = DEFAULT_HYDRATION_BATCH_SIZE; 159562306a36Sopenharmony_ci clone->hydration_threshold = DEFAULT_HYDRATION_THRESHOLD; 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_ci /* No core arguments supplied */ 159862306a36Sopenharmony_ci if (!as->argc) 159962306a36Sopenharmony_ci return 0; 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci r = dm_read_arg_group(&args, as, &argc, &ti->error); 160262306a36Sopenharmony_ci if (r) 160362306a36Sopenharmony_ci return r; 160462306a36Sopenharmony_ci 160562306a36Sopenharmony_ci if (argc & 1) { 160662306a36Sopenharmony_ci ti->error = "Number of core arguments must be even"; 160762306a36Sopenharmony_ci return -EINVAL; 160862306a36Sopenharmony_ci } 160962306a36Sopenharmony_ci 161062306a36Sopenharmony_ci while (argc) { 161162306a36Sopenharmony_ci arg_name = dm_shift_arg(as); 161262306a36Sopenharmony_ci argc -= 2; 161362306a36Sopenharmony_ci 161462306a36Sopenharmony_ci if (!strcasecmp(arg_name, "hydration_threshold")) { 161562306a36Sopenharmony_ci if (kstrtouint(dm_shift_arg(as), 10, &value)) { 161662306a36Sopenharmony_ci ti->error = "Invalid value for argument `hydration_threshold'"; 161762306a36Sopenharmony_ci return -EINVAL; 161862306a36Sopenharmony_ci } 161962306a36Sopenharmony_ci clone->hydration_threshold = value; 162062306a36Sopenharmony_ci } else if (!strcasecmp(arg_name, "hydration_batch_size")) { 162162306a36Sopenharmony_ci if (kstrtouint(dm_shift_arg(as), 10, &value)) { 162262306a36Sopenharmony_ci ti->error = "Invalid value for argument `hydration_batch_size'"; 162362306a36Sopenharmony_ci return -EINVAL; 162462306a36Sopenharmony_ci } 162562306a36Sopenharmony_ci clone->hydration_batch_size = value; 162662306a36Sopenharmony_ci } else { 162762306a36Sopenharmony_ci ti->error = "Invalid core argument"; 162862306a36Sopenharmony_ci return -EINVAL; 162962306a36Sopenharmony_ci } 163062306a36Sopenharmony_ci } 163162306a36Sopenharmony_ci 163262306a36Sopenharmony_ci return 0; 163362306a36Sopenharmony_ci} 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_cistatic int parse_region_size(struct clone *clone, struct dm_arg_set *as, char **error) 163662306a36Sopenharmony_ci{ 163762306a36Sopenharmony_ci int r; 163862306a36Sopenharmony_ci unsigned int region_size; 163962306a36Sopenharmony_ci struct dm_arg arg; 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci arg.min = MIN_REGION_SIZE; 164262306a36Sopenharmony_ci arg.max = MAX_REGION_SIZE; 164362306a36Sopenharmony_ci arg.error = "Invalid region size"; 164462306a36Sopenharmony_ci 164562306a36Sopenharmony_ci r = dm_read_arg(&arg, as, ®ion_size, error); 164662306a36Sopenharmony_ci if (r) 164762306a36Sopenharmony_ci return r; 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci /* Check region size is a power of 2 */ 165062306a36Sopenharmony_ci if (!is_power_of_2(region_size)) { 165162306a36Sopenharmony_ci *error = "Region size is not a power of 2"; 165262306a36Sopenharmony_ci return -EINVAL; 165362306a36Sopenharmony_ci } 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_ci /* Validate the region size against the device logical block size */ 165662306a36Sopenharmony_ci if (region_size % (bdev_logical_block_size(clone->source_dev->bdev) >> 9) || 165762306a36Sopenharmony_ci region_size % (bdev_logical_block_size(clone->dest_dev->bdev) >> 9)) { 165862306a36Sopenharmony_ci *error = "Region size is not a multiple of device logical block size"; 165962306a36Sopenharmony_ci return -EINVAL; 166062306a36Sopenharmony_ci } 166162306a36Sopenharmony_ci 166262306a36Sopenharmony_ci clone->region_size = region_size; 166362306a36Sopenharmony_ci 166462306a36Sopenharmony_ci return 0; 166562306a36Sopenharmony_ci} 166662306a36Sopenharmony_ci 166762306a36Sopenharmony_cistatic int validate_nr_regions(unsigned long n, char **error) 166862306a36Sopenharmony_ci{ 166962306a36Sopenharmony_ci /* 167062306a36Sopenharmony_ci * dm_bitset restricts us to 2^32 regions. test_bit & co. restrict us 167162306a36Sopenharmony_ci * further to 2^31 regions. 167262306a36Sopenharmony_ci */ 167362306a36Sopenharmony_ci if (n > (1UL << 31)) { 167462306a36Sopenharmony_ci *error = "Too many regions. Consider increasing the region size"; 167562306a36Sopenharmony_ci return -EINVAL; 167662306a36Sopenharmony_ci } 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci return 0; 167962306a36Sopenharmony_ci} 168062306a36Sopenharmony_ci 168162306a36Sopenharmony_cistatic int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char **error) 168262306a36Sopenharmony_ci{ 168362306a36Sopenharmony_ci int r; 168462306a36Sopenharmony_ci sector_t metadata_dev_size; 168562306a36Sopenharmony_ci 168662306a36Sopenharmony_ci r = dm_get_device(clone->ti, dm_shift_arg(as), 168762306a36Sopenharmony_ci BLK_OPEN_READ | BLK_OPEN_WRITE, &clone->metadata_dev); 168862306a36Sopenharmony_ci if (r) { 168962306a36Sopenharmony_ci *error = "Error opening metadata device"; 169062306a36Sopenharmony_ci return r; 169162306a36Sopenharmony_ci } 169262306a36Sopenharmony_ci 169362306a36Sopenharmony_ci metadata_dev_size = get_dev_size(clone->metadata_dev); 169462306a36Sopenharmony_ci if (metadata_dev_size > DM_CLONE_METADATA_MAX_SECTORS_WARNING) 169562306a36Sopenharmony_ci DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.", 169662306a36Sopenharmony_ci clone->metadata_dev->bdev, DM_CLONE_METADATA_MAX_SECTORS); 169762306a36Sopenharmony_ci 169862306a36Sopenharmony_ci return 0; 169962306a36Sopenharmony_ci} 170062306a36Sopenharmony_ci 170162306a36Sopenharmony_cistatic int parse_dest_dev(struct clone *clone, struct dm_arg_set *as, char **error) 170262306a36Sopenharmony_ci{ 170362306a36Sopenharmony_ci int r; 170462306a36Sopenharmony_ci sector_t dest_dev_size; 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci r = dm_get_device(clone->ti, dm_shift_arg(as), 170762306a36Sopenharmony_ci BLK_OPEN_READ | BLK_OPEN_WRITE, &clone->dest_dev); 170862306a36Sopenharmony_ci if (r) { 170962306a36Sopenharmony_ci *error = "Error opening destination device"; 171062306a36Sopenharmony_ci return r; 171162306a36Sopenharmony_ci } 171262306a36Sopenharmony_ci 171362306a36Sopenharmony_ci dest_dev_size = get_dev_size(clone->dest_dev); 171462306a36Sopenharmony_ci if (dest_dev_size < clone->ti->len) { 171562306a36Sopenharmony_ci dm_put_device(clone->ti, clone->dest_dev); 171662306a36Sopenharmony_ci *error = "Device size larger than destination device"; 171762306a36Sopenharmony_ci return -EINVAL; 171862306a36Sopenharmony_ci } 171962306a36Sopenharmony_ci 172062306a36Sopenharmony_ci return 0; 172162306a36Sopenharmony_ci} 172262306a36Sopenharmony_ci 172362306a36Sopenharmony_cistatic int parse_source_dev(struct clone *clone, struct dm_arg_set *as, char **error) 172462306a36Sopenharmony_ci{ 172562306a36Sopenharmony_ci int r; 172662306a36Sopenharmony_ci sector_t source_dev_size; 172762306a36Sopenharmony_ci 172862306a36Sopenharmony_ci r = dm_get_device(clone->ti, dm_shift_arg(as), BLK_OPEN_READ, 172962306a36Sopenharmony_ci &clone->source_dev); 173062306a36Sopenharmony_ci if (r) { 173162306a36Sopenharmony_ci *error = "Error opening source device"; 173262306a36Sopenharmony_ci return r; 173362306a36Sopenharmony_ci } 173462306a36Sopenharmony_ci 173562306a36Sopenharmony_ci source_dev_size = get_dev_size(clone->source_dev); 173662306a36Sopenharmony_ci if (source_dev_size < clone->ti->len) { 173762306a36Sopenharmony_ci dm_put_device(clone->ti, clone->source_dev); 173862306a36Sopenharmony_ci *error = "Device size larger than source device"; 173962306a36Sopenharmony_ci return -EINVAL; 174062306a36Sopenharmony_ci } 174162306a36Sopenharmony_ci 174262306a36Sopenharmony_ci return 0; 174362306a36Sopenharmony_ci} 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_cistatic int copy_ctr_args(struct clone *clone, int argc, const char **argv, char **error) 174662306a36Sopenharmony_ci{ 174762306a36Sopenharmony_ci unsigned int i; 174862306a36Sopenharmony_ci const char **copy; 174962306a36Sopenharmony_ci 175062306a36Sopenharmony_ci copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL); 175162306a36Sopenharmony_ci if (!copy) 175262306a36Sopenharmony_ci goto error; 175362306a36Sopenharmony_ci 175462306a36Sopenharmony_ci for (i = 0; i < argc; i++) { 175562306a36Sopenharmony_ci copy[i] = kstrdup(argv[i], GFP_KERNEL); 175662306a36Sopenharmony_ci 175762306a36Sopenharmony_ci if (!copy[i]) { 175862306a36Sopenharmony_ci while (i--) 175962306a36Sopenharmony_ci kfree(copy[i]); 176062306a36Sopenharmony_ci kfree(copy); 176162306a36Sopenharmony_ci goto error; 176262306a36Sopenharmony_ci } 176362306a36Sopenharmony_ci } 176462306a36Sopenharmony_ci 176562306a36Sopenharmony_ci clone->nr_ctr_args = argc; 176662306a36Sopenharmony_ci clone->ctr_args = copy; 176762306a36Sopenharmony_ci return 0; 176862306a36Sopenharmony_ci 176962306a36Sopenharmony_cierror: 177062306a36Sopenharmony_ci *error = "Failed to allocate memory for table line"; 177162306a36Sopenharmony_ci return -ENOMEM; 177262306a36Sopenharmony_ci} 177362306a36Sopenharmony_ci 177462306a36Sopenharmony_cistatic int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv) 177562306a36Sopenharmony_ci{ 177662306a36Sopenharmony_ci int r; 177762306a36Sopenharmony_ci sector_t nr_regions; 177862306a36Sopenharmony_ci struct clone *clone; 177962306a36Sopenharmony_ci struct dm_arg_set as; 178062306a36Sopenharmony_ci 178162306a36Sopenharmony_ci if (argc < 4) { 178262306a36Sopenharmony_ci ti->error = "Invalid number of arguments"; 178362306a36Sopenharmony_ci return -EINVAL; 178462306a36Sopenharmony_ci } 178562306a36Sopenharmony_ci 178662306a36Sopenharmony_ci as.argc = argc; 178762306a36Sopenharmony_ci as.argv = argv; 178862306a36Sopenharmony_ci 178962306a36Sopenharmony_ci clone = kzalloc(sizeof(*clone), GFP_KERNEL); 179062306a36Sopenharmony_ci if (!clone) { 179162306a36Sopenharmony_ci ti->error = "Failed to allocate clone structure"; 179262306a36Sopenharmony_ci return -ENOMEM; 179362306a36Sopenharmony_ci } 179462306a36Sopenharmony_ci 179562306a36Sopenharmony_ci clone->ti = ti; 179662306a36Sopenharmony_ci 179762306a36Sopenharmony_ci /* Initialize dm-clone flags */ 179862306a36Sopenharmony_ci __set_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags); 179962306a36Sopenharmony_ci __set_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags); 180062306a36Sopenharmony_ci __set_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags); 180162306a36Sopenharmony_ci 180262306a36Sopenharmony_ci r = parse_metadata_dev(clone, &as, &ti->error); 180362306a36Sopenharmony_ci if (r) 180462306a36Sopenharmony_ci goto out_with_clone; 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci r = parse_dest_dev(clone, &as, &ti->error); 180762306a36Sopenharmony_ci if (r) 180862306a36Sopenharmony_ci goto out_with_meta_dev; 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ci r = parse_source_dev(clone, &as, &ti->error); 181162306a36Sopenharmony_ci if (r) 181262306a36Sopenharmony_ci goto out_with_dest_dev; 181362306a36Sopenharmony_ci 181462306a36Sopenharmony_ci r = parse_region_size(clone, &as, &ti->error); 181562306a36Sopenharmony_ci if (r) 181662306a36Sopenharmony_ci goto out_with_source_dev; 181762306a36Sopenharmony_ci 181862306a36Sopenharmony_ci clone->region_shift = __ffs(clone->region_size); 181962306a36Sopenharmony_ci nr_regions = dm_sector_div_up(ti->len, clone->region_size); 182062306a36Sopenharmony_ci 182162306a36Sopenharmony_ci /* Check for overflow */ 182262306a36Sopenharmony_ci if (nr_regions != (unsigned long)nr_regions) { 182362306a36Sopenharmony_ci ti->error = "Too many regions. Consider increasing the region size"; 182462306a36Sopenharmony_ci r = -EOVERFLOW; 182562306a36Sopenharmony_ci goto out_with_source_dev; 182662306a36Sopenharmony_ci } 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci clone->nr_regions = nr_regions; 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_ci r = validate_nr_regions(clone->nr_regions, &ti->error); 183162306a36Sopenharmony_ci if (r) 183262306a36Sopenharmony_ci goto out_with_source_dev; 183362306a36Sopenharmony_ci 183462306a36Sopenharmony_ci r = dm_set_target_max_io_len(ti, clone->region_size); 183562306a36Sopenharmony_ci if (r) { 183662306a36Sopenharmony_ci ti->error = "Failed to set max io len"; 183762306a36Sopenharmony_ci goto out_with_source_dev; 183862306a36Sopenharmony_ci } 183962306a36Sopenharmony_ci 184062306a36Sopenharmony_ci r = parse_feature_args(&as, clone); 184162306a36Sopenharmony_ci if (r) 184262306a36Sopenharmony_ci goto out_with_source_dev; 184362306a36Sopenharmony_ci 184462306a36Sopenharmony_ci r = parse_core_args(&as, clone); 184562306a36Sopenharmony_ci if (r) 184662306a36Sopenharmony_ci goto out_with_source_dev; 184762306a36Sopenharmony_ci 184862306a36Sopenharmony_ci /* Load metadata */ 184962306a36Sopenharmony_ci clone->cmd = dm_clone_metadata_open(clone->metadata_dev->bdev, ti->len, 185062306a36Sopenharmony_ci clone->region_size); 185162306a36Sopenharmony_ci if (IS_ERR(clone->cmd)) { 185262306a36Sopenharmony_ci ti->error = "Failed to load metadata"; 185362306a36Sopenharmony_ci r = PTR_ERR(clone->cmd); 185462306a36Sopenharmony_ci goto out_with_source_dev; 185562306a36Sopenharmony_ci } 185662306a36Sopenharmony_ci 185762306a36Sopenharmony_ci __set_clone_mode(clone, CM_WRITE); 185862306a36Sopenharmony_ci 185962306a36Sopenharmony_ci if (get_clone_mode(clone) != CM_WRITE) { 186062306a36Sopenharmony_ci ti->error = "Unable to get write access to metadata, please check/repair metadata"; 186162306a36Sopenharmony_ci r = -EPERM; 186262306a36Sopenharmony_ci goto out_with_metadata; 186362306a36Sopenharmony_ci } 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_ci clone->last_commit_jiffies = jiffies; 186662306a36Sopenharmony_ci 186762306a36Sopenharmony_ci /* Allocate hydration hash table */ 186862306a36Sopenharmony_ci r = hash_table_init(clone); 186962306a36Sopenharmony_ci if (r) { 187062306a36Sopenharmony_ci ti->error = "Failed to allocate hydration hash table"; 187162306a36Sopenharmony_ci goto out_with_metadata; 187262306a36Sopenharmony_ci } 187362306a36Sopenharmony_ci 187462306a36Sopenharmony_ci atomic_set(&clone->ios_in_flight, 0); 187562306a36Sopenharmony_ci init_waitqueue_head(&clone->hydration_stopped); 187662306a36Sopenharmony_ci spin_lock_init(&clone->lock); 187762306a36Sopenharmony_ci bio_list_init(&clone->deferred_bios); 187862306a36Sopenharmony_ci bio_list_init(&clone->deferred_discard_bios); 187962306a36Sopenharmony_ci bio_list_init(&clone->deferred_flush_bios); 188062306a36Sopenharmony_ci bio_list_init(&clone->deferred_flush_completions); 188162306a36Sopenharmony_ci clone->hydration_offset = 0; 188262306a36Sopenharmony_ci atomic_set(&clone->hydrations_in_flight, 0); 188362306a36Sopenharmony_ci 188462306a36Sopenharmony_ci clone->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0); 188562306a36Sopenharmony_ci if (!clone->wq) { 188662306a36Sopenharmony_ci ti->error = "Failed to allocate workqueue"; 188762306a36Sopenharmony_ci r = -ENOMEM; 188862306a36Sopenharmony_ci goto out_with_ht; 188962306a36Sopenharmony_ci } 189062306a36Sopenharmony_ci 189162306a36Sopenharmony_ci INIT_WORK(&clone->worker, do_worker); 189262306a36Sopenharmony_ci INIT_DELAYED_WORK(&clone->waker, do_waker); 189362306a36Sopenharmony_ci 189462306a36Sopenharmony_ci clone->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); 189562306a36Sopenharmony_ci if (IS_ERR(clone->kcopyd_client)) { 189662306a36Sopenharmony_ci r = PTR_ERR(clone->kcopyd_client); 189762306a36Sopenharmony_ci goto out_with_wq; 189862306a36Sopenharmony_ci } 189962306a36Sopenharmony_ci 190062306a36Sopenharmony_ci r = mempool_init_slab_pool(&clone->hydration_pool, MIN_HYDRATIONS, 190162306a36Sopenharmony_ci _hydration_cache); 190262306a36Sopenharmony_ci if (r) { 190362306a36Sopenharmony_ci ti->error = "Failed to create dm_clone_region_hydration memory pool"; 190462306a36Sopenharmony_ci goto out_with_kcopyd; 190562306a36Sopenharmony_ci } 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci /* Save a copy of the table line */ 190862306a36Sopenharmony_ci r = copy_ctr_args(clone, argc - 3, (const char **)argv + 3, &ti->error); 190962306a36Sopenharmony_ci if (r) 191062306a36Sopenharmony_ci goto out_with_mempool; 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci mutex_init(&clone->commit_lock); 191362306a36Sopenharmony_ci 191462306a36Sopenharmony_ci /* Enable flushes */ 191562306a36Sopenharmony_ci ti->num_flush_bios = 1; 191662306a36Sopenharmony_ci ti->flush_supported = true; 191762306a36Sopenharmony_ci 191862306a36Sopenharmony_ci /* Enable discards */ 191962306a36Sopenharmony_ci ti->discards_supported = true; 192062306a36Sopenharmony_ci ti->num_discard_bios = 1; 192162306a36Sopenharmony_ci 192262306a36Sopenharmony_ci ti->private = clone; 192362306a36Sopenharmony_ci 192462306a36Sopenharmony_ci return 0; 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_ciout_with_mempool: 192762306a36Sopenharmony_ci mempool_exit(&clone->hydration_pool); 192862306a36Sopenharmony_ciout_with_kcopyd: 192962306a36Sopenharmony_ci dm_kcopyd_client_destroy(clone->kcopyd_client); 193062306a36Sopenharmony_ciout_with_wq: 193162306a36Sopenharmony_ci destroy_workqueue(clone->wq); 193262306a36Sopenharmony_ciout_with_ht: 193362306a36Sopenharmony_ci hash_table_exit(clone); 193462306a36Sopenharmony_ciout_with_metadata: 193562306a36Sopenharmony_ci dm_clone_metadata_close(clone->cmd); 193662306a36Sopenharmony_ciout_with_source_dev: 193762306a36Sopenharmony_ci dm_put_device(ti, clone->source_dev); 193862306a36Sopenharmony_ciout_with_dest_dev: 193962306a36Sopenharmony_ci dm_put_device(ti, clone->dest_dev); 194062306a36Sopenharmony_ciout_with_meta_dev: 194162306a36Sopenharmony_ci dm_put_device(ti, clone->metadata_dev); 194262306a36Sopenharmony_ciout_with_clone: 194362306a36Sopenharmony_ci kfree(clone); 194462306a36Sopenharmony_ci 194562306a36Sopenharmony_ci return r; 194662306a36Sopenharmony_ci} 194762306a36Sopenharmony_ci 194862306a36Sopenharmony_cistatic void clone_dtr(struct dm_target *ti) 194962306a36Sopenharmony_ci{ 195062306a36Sopenharmony_ci unsigned int i; 195162306a36Sopenharmony_ci struct clone *clone = ti->private; 195262306a36Sopenharmony_ci 195362306a36Sopenharmony_ci mutex_destroy(&clone->commit_lock); 195462306a36Sopenharmony_ci 195562306a36Sopenharmony_ci for (i = 0; i < clone->nr_ctr_args; i++) 195662306a36Sopenharmony_ci kfree(clone->ctr_args[i]); 195762306a36Sopenharmony_ci kfree(clone->ctr_args); 195862306a36Sopenharmony_ci 195962306a36Sopenharmony_ci mempool_exit(&clone->hydration_pool); 196062306a36Sopenharmony_ci dm_kcopyd_client_destroy(clone->kcopyd_client); 196162306a36Sopenharmony_ci cancel_delayed_work_sync(&clone->waker); 196262306a36Sopenharmony_ci destroy_workqueue(clone->wq); 196362306a36Sopenharmony_ci hash_table_exit(clone); 196462306a36Sopenharmony_ci dm_clone_metadata_close(clone->cmd); 196562306a36Sopenharmony_ci dm_put_device(ti, clone->source_dev); 196662306a36Sopenharmony_ci dm_put_device(ti, clone->dest_dev); 196762306a36Sopenharmony_ci dm_put_device(ti, clone->metadata_dev); 196862306a36Sopenharmony_ci 196962306a36Sopenharmony_ci kfree(clone); 197062306a36Sopenharmony_ci} 197162306a36Sopenharmony_ci 197262306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 197362306a36Sopenharmony_ci 197462306a36Sopenharmony_cistatic void clone_postsuspend(struct dm_target *ti) 197562306a36Sopenharmony_ci{ 197662306a36Sopenharmony_ci struct clone *clone = ti->private; 197762306a36Sopenharmony_ci 197862306a36Sopenharmony_ci /* 197962306a36Sopenharmony_ci * To successfully suspend the device: 198062306a36Sopenharmony_ci * 198162306a36Sopenharmony_ci * - We cancel the delayed work for periodic commits and wait for 198262306a36Sopenharmony_ci * it to finish. 198362306a36Sopenharmony_ci * 198462306a36Sopenharmony_ci * - We stop the background hydration, i.e. we prevent new region 198562306a36Sopenharmony_ci * hydrations from starting. 198662306a36Sopenharmony_ci * 198762306a36Sopenharmony_ci * - We wait for any in-flight hydrations to finish. 198862306a36Sopenharmony_ci * 198962306a36Sopenharmony_ci * - We flush the workqueue. 199062306a36Sopenharmony_ci * 199162306a36Sopenharmony_ci * - We commit the metadata. 199262306a36Sopenharmony_ci */ 199362306a36Sopenharmony_ci cancel_delayed_work_sync(&clone->waker); 199462306a36Sopenharmony_ci 199562306a36Sopenharmony_ci set_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags); 199662306a36Sopenharmony_ci 199762306a36Sopenharmony_ci /* 199862306a36Sopenharmony_ci * Make sure set_bit() is ordered before atomic_read(), otherwise we 199962306a36Sopenharmony_ci * might race with do_hydration() and miss some started region 200062306a36Sopenharmony_ci * hydrations. 200162306a36Sopenharmony_ci * 200262306a36Sopenharmony_ci * This is paired with smp_mb__after_atomic() in do_hydration(). 200362306a36Sopenharmony_ci */ 200462306a36Sopenharmony_ci smp_mb__after_atomic(); 200562306a36Sopenharmony_ci 200662306a36Sopenharmony_ci wait_event(clone->hydration_stopped, !atomic_read(&clone->hydrations_in_flight)); 200762306a36Sopenharmony_ci flush_workqueue(clone->wq); 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci (void) commit_metadata(clone, NULL); 201062306a36Sopenharmony_ci} 201162306a36Sopenharmony_ci 201262306a36Sopenharmony_cistatic void clone_resume(struct dm_target *ti) 201362306a36Sopenharmony_ci{ 201462306a36Sopenharmony_ci struct clone *clone = ti->private; 201562306a36Sopenharmony_ci 201662306a36Sopenharmony_ci clear_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags); 201762306a36Sopenharmony_ci do_waker(&clone->waker.work); 201862306a36Sopenharmony_ci} 201962306a36Sopenharmony_ci 202062306a36Sopenharmony_ci/* 202162306a36Sopenharmony_ci * If discard_passdown was enabled verify that the destination device supports 202262306a36Sopenharmony_ci * discards. Disable discard_passdown if not. 202362306a36Sopenharmony_ci */ 202462306a36Sopenharmony_cistatic void disable_passdown_if_not_supported(struct clone *clone) 202562306a36Sopenharmony_ci{ 202662306a36Sopenharmony_ci struct block_device *dest_dev = clone->dest_dev->bdev; 202762306a36Sopenharmony_ci struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits; 202862306a36Sopenharmony_ci const char *reason = NULL; 202962306a36Sopenharmony_ci 203062306a36Sopenharmony_ci if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) 203162306a36Sopenharmony_ci return; 203262306a36Sopenharmony_ci 203362306a36Sopenharmony_ci if (!bdev_max_discard_sectors(dest_dev)) 203462306a36Sopenharmony_ci reason = "discard unsupported"; 203562306a36Sopenharmony_ci else if (dest_limits->max_discard_sectors < clone->region_size) 203662306a36Sopenharmony_ci reason = "max discard sectors smaller than a region"; 203762306a36Sopenharmony_ci 203862306a36Sopenharmony_ci if (reason) { 203962306a36Sopenharmony_ci DMWARN("Destination device (%pg) %s: Disabling discard passdown.", 204062306a36Sopenharmony_ci dest_dev, reason); 204162306a36Sopenharmony_ci clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags); 204262306a36Sopenharmony_ci } 204362306a36Sopenharmony_ci} 204462306a36Sopenharmony_ci 204562306a36Sopenharmony_cistatic void set_discard_limits(struct clone *clone, struct queue_limits *limits) 204662306a36Sopenharmony_ci{ 204762306a36Sopenharmony_ci struct block_device *dest_bdev = clone->dest_dev->bdev; 204862306a36Sopenharmony_ci struct queue_limits *dest_limits = &bdev_get_queue(dest_bdev)->limits; 204962306a36Sopenharmony_ci 205062306a36Sopenharmony_ci if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) { 205162306a36Sopenharmony_ci /* No passdown is done so we set our own virtual limits */ 205262306a36Sopenharmony_ci limits->discard_granularity = clone->region_size << SECTOR_SHIFT; 205362306a36Sopenharmony_ci limits->max_discard_sectors = round_down(UINT_MAX >> SECTOR_SHIFT, clone->region_size); 205462306a36Sopenharmony_ci return; 205562306a36Sopenharmony_ci } 205662306a36Sopenharmony_ci 205762306a36Sopenharmony_ci /* 205862306a36Sopenharmony_ci * clone_iterate_devices() is stacking both the source and destination 205962306a36Sopenharmony_ci * device limits but discards aren't passed to the source device, so 206062306a36Sopenharmony_ci * inherit destination's limits. 206162306a36Sopenharmony_ci */ 206262306a36Sopenharmony_ci limits->max_discard_sectors = dest_limits->max_discard_sectors; 206362306a36Sopenharmony_ci limits->max_hw_discard_sectors = dest_limits->max_hw_discard_sectors; 206462306a36Sopenharmony_ci limits->discard_granularity = dest_limits->discard_granularity; 206562306a36Sopenharmony_ci limits->discard_alignment = dest_limits->discard_alignment; 206662306a36Sopenharmony_ci limits->discard_misaligned = dest_limits->discard_misaligned; 206762306a36Sopenharmony_ci limits->max_discard_segments = dest_limits->max_discard_segments; 206862306a36Sopenharmony_ci} 206962306a36Sopenharmony_ci 207062306a36Sopenharmony_cistatic void clone_io_hints(struct dm_target *ti, struct queue_limits *limits) 207162306a36Sopenharmony_ci{ 207262306a36Sopenharmony_ci struct clone *clone = ti->private; 207362306a36Sopenharmony_ci u64 io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 207462306a36Sopenharmony_ci 207562306a36Sopenharmony_ci /* 207662306a36Sopenharmony_ci * If the system-determined stacked limits are compatible with 207762306a36Sopenharmony_ci * dm-clone's region size (io_opt is a factor) do not override them. 207862306a36Sopenharmony_ci */ 207962306a36Sopenharmony_ci if (io_opt_sectors < clone->region_size || 208062306a36Sopenharmony_ci do_div(io_opt_sectors, clone->region_size)) { 208162306a36Sopenharmony_ci blk_limits_io_min(limits, clone->region_size << SECTOR_SHIFT); 208262306a36Sopenharmony_ci blk_limits_io_opt(limits, clone->region_size << SECTOR_SHIFT); 208362306a36Sopenharmony_ci } 208462306a36Sopenharmony_ci 208562306a36Sopenharmony_ci disable_passdown_if_not_supported(clone); 208662306a36Sopenharmony_ci set_discard_limits(clone, limits); 208762306a36Sopenharmony_ci} 208862306a36Sopenharmony_ci 208962306a36Sopenharmony_cistatic int clone_iterate_devices(struct dm_target *ti, 209062306a36Sopenharmony_ci iterate_devices_callout_fn fn, void *data) 209162306a36Sopenharmony_ci{ 209262306a36Sopenharmony_ci int ret; 209362306a36Sopenharmony_ci struct clone *clone = ti->private; 209462306a36Sopenharmony_ci struct dm_dev *dest_dev = clone->dest_dev; 209562306a36Sopenharmony_ci struct dm_dev *source_dev = clone->source_dev; 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ci ret = fn(ti, source_dev, 0, ti->len, data); 209862306a36Sopenharmony_ci if (!ret) 209962306a36Sopenharmony_ci ret = fn(ti, dest_dev, 0, ti->len, data); 210062306a36Sopenharmony_ci return ret; 210162306a36Sopenharmony_ci} 210262306a36Sopenharmony_ci 210362306a36Sopenharmony_ci/* 210462306a36Sopenharmony_ci * dm-clone message functions. 210562306a36Sopenharmony_ci */ 210662306a36Sopenharmony_cistatic void set_hydration_threshold(struct clone *clone, unsigned int nr_regions) 210762306a36Sopenharmony_ci{ 210862306a36Sopenharmony_ci WRITE_ONCE(clone->hydration_threshold, nr_regions); 210962306a36Sopenharmony_ci 211062306a36Sopenharmony_ci /* 211162306a36Sopenharmony_ci * If user space sets hydration_threshold to zero then the hydration 211262306a36Sopenharmony_ci * will stop. If at a later time the hydration_threshold is increased 211362306a36Sopenharmony_ci * we must restart the hydration process by waking up the worker. 211462306a36Sopenharmony_ci */ 211562306a36Sopenharmony_ci wake_worker(clone); 211662306a36Sopenharmony_ci} 211762306a36Sopenharmony_ci 211862306a36Sopenharmony_cistatic void set_hydration_batch_size(struct clone *clone, unsigned int nr_regions) 211962306a36Sopenharmony_ci{ 212062306a36Sopenharmony_ci WRITE_ONCE(clone->hydration_batch_size, nr_regions); 212162306a36Sopenharmony_ci} 212262306a36Sopenharmony_ci 212362306a36Sopenharmony_cistatic void enable_hydration(struct clone *clone) 212462306a36Sopenharmony_ci{ 212562306a36Sopenharmony_ci if (!test_and_set_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags)) 212662306a36Sopenharmony_ci wake_worker(clone); 212762306a36Sopenharmony_ci} 212862306a36Sopenharmony_ci 212962306a36Sopenharmony_cistatic void disable_hydration(struct clone *clone) 213062306a36Sopenharmony_ci{ 213162306a36Sopenharmony_ci clear_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags); 213262306a36Sopenharmony_ci} 213362306a36Sopenharmony_ci 213462306a36Sopenharmony_cistatic int clone_message(struct dm_target *ti, unsigned int argc, char **argv, 213562306a36Sopenharmony_ci char *result, unsigned int maxlen) 213662306a36Sopenharmony_ci{ 213762306a36Sopenharmony_ci struct clone *clone = ti->private; 213862306a36Sopenharmony_ci unsigned int value; 213962306a36Sopenharmony_ci 214062306a36Sopenharmony_ci if (!argc) 214162306a36Sopenharmony_ci return -EINVAL; 214262306a36Sopenharmony_ci 214362306a36Sopenharmony_ci if (!strcasecmp(argv[0], "enable_hydration")) { 214462306a36Sopenharmony_ci enable_hydration(clone); 214562306a36Sopenharmony_ci return 0; 214662306a36Sopenharmony_ci } 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_ci if (!strcasecmp(argv[0], "disable_hydration")) { 214962306a36Sopenharmony_ci disable_hydration(clone); 215062306a36Sopenharmony_ci return 0; 215162306a36Sopenharmony_ci } 215262306a36Sopenharmony_ci 215362306a36Sopenharmony_ci if (argc != 2) 215462306a36Sopenharmony_ci return -EINVAL; 215562306a36Sopenharmony_ci 215662306a36Sopenharmony_ci if (!strcasecmp(argv[0], "hydration_threshold")) { 215762306a36Sopenharmony_ci if (kstrtouint(argv[1], 10, &value)) 215862306a36Sopenharmony_ci return -EINVAL; 215962306a36Sopenharmony_ci 216062306a36Sopenharmony_ci set_hydration_threshold(clone, value); 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ci return 0; 216362306a36Sopenharmony_ci } 216462306a36Sopenharmony_ci 216562306a36Sopenharmony_ci if (!strcasecmp(argv[0], "hydration_batch_size")) { 216662306a36Sopenharmony_ci if (kstrtouint(argv[1], 10, &value)) 216762306a36Sopenharmony_ci return -EINVAL; 216862306a36Sopenharmony_ci 216962306a36Sopenharmony_ci set_hydration_batch_size(clone, value); 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci return 0; 217262306a36Sopenharmony_ci } 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_ci DMERR("%s: Unsupported message `%s'", clone_device_name(clone), argv[0]); 217562306a36Sopenharmony_ci return -EINVAL; 217662306a36Sopenharmony_ci} 217762306a36Sopenharmony_ci 217862306a36Sopenharmony_cistatic struct target_type clone_target = { 217962306a36Sopenharmony_ci .name = "clone", 218062306a36Sopenharmony_ci .version = {1, 0, 0}, 218162306a36Sopenharmony_ci .module = THIS_MODULE, 218262306a36Sopenharmony_ci .ctr = clone_ctr, 218362306a36Sopenharmony_ci .dtr = clone_dtr, 218462306a36Sopenharmony_ci .map = clone_map, 218562306a36Sopenharmony_ci .end_io = clone_endio, 218662306a36Sopenharmony_ci .postsuspend = clone_postsuspend, 218762306a36Sopenharmony_ci .resume = clone_resume, 218862306a36Sopenharmony_ci .status = clone_status, 218962306a36Sopenharmony_ci .message = clone_message, 219062306a36Sopenharmony_ci .io_hints = clone_io_hints, 219162306a36Sopenharmony_ci .iterate_devices = clone_iterate_devices, 219262306a36Sopenharmony_ci}; 219362306a36Sopenharmony_ci 219462306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/ 219562306a36Sopenharmony_ci 219662306a36Sopenharmony_ci/* Module functions */ 219762306a36Sopenharmony_cistatic int __init dm_clone_init(void) 219862306a36Sopenharmony_ci{ 219962306a36Sopenharmony_ci int r; 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci _hydration_cache = KMEM_CACHE(dm_clone_region_hydration, 0); 220262306a36Sopenharmony_ci if (!_hydration_cache) 220362306a36Sopenharmony_ci return -ENOMEM; 220462306a36Sopenharmony_ci 220562306a36Sopenharmony_ci r = dm_register_target(&clone_target); 220662306a36Sopenharmony_ci if (r < 0) { 220762306a36Sopenharmony_ci kmem_cache_destroy(_hydration_cache); 220862306a36Sopenharmony_ci return r; 220962306a36Sopenharmony_ci } 221062306a36Sopenharmony_ci 221162306a36Sopenharmony_ci return 0; 221262306a36Sopenharmony_ci} 221362306a36Sopenharmony_ci 221462306a36Sopenharmony_cistatic void __exit dm_clone_exit(void) 221562306a36Sopenharmony_ci{ 221662306a36Sopenharmony_ci dm_unregister_target(&clone_target); 221762306a36Sopenharmony_ci 221862306a36Sopenharmony_ci kmem_cache_destroy(_hydration_cache); 221962306a36Sopenharmony_ci _hydration_cache = NULL; 222062306a36Sopenharmony_ci} 222162306a36Sopenharmony_ci 222262306a36Sopenharmony_ci/* Module hooks */ 222362306a36Sopenharmony_cimodule_init(dm_clone_init); 222462306a36Sopenharmony_cimodule_exit(dm_clone_exit); 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " clone target"); 222762306a36Sopenharmony_ciMODULE_AUTHOR("Nikos Tsironis <ntsironis@arrikto.com>"); 222862306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 2229