18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/mm.h>
78c2ecf20Sopenharmony_ci#include <linux/bio.h>
88c2ecf20Sopenharmony_ci#include <linux/err.h>
98c2ecf20Sopenharmony_ci#include <linux/hash.h>
108c2ecf20Sopenharmony_ci#include <linux/list.h>
118c2ecf20Sopenharmony_ci#include <linux/log2.h>
128c2ecf20Sopenharmony_ci#include <linux/init.h>
138c2ecf20Sopenharmony_ci#include <linux/slab.h>
148c2ecf20Sopenharmony_ci#include <linux/wait.h>
158c2ecf20Sopenharmony_ci#include <linux/dm-io.h>
168c2ecf20Sopenharmony_ci#include <linux/mutex.h>
178c2ecf20Sopenharmony_ci#include <linux/atomic.h>
188c2ecf20Sopenharmony_ci#include <linux/bitops.h>
198c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
208c2ecf20Sopenharmony_ci#include <linux/kdev_t.h>
218c2ecf20Sopenharmony_ci#include <linux/kernel.h>
228c2ecf20Sopenharmony_ci#include <linux/module.h>
238c2ecf20Sopenharmony_ci#include <linux/jiffies.h>
248c2ecf20Sopenharmony_ci#include <linux/mempool.h>
258c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
268c2ecf20Sopenharmony_ci#include <linux/blk_types.h>
278c2ecf20Sopenharmony_ci#include <linux/dm-kcopyd.h>
288c2ecf20Sopenharmony_ci#include <linux/workqueue.h>
298c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
308c2ecf20Sopenharmony_ci#include <linux/device-mapper.h>
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci#include "dm.h"
338c2ecf20Sopenharmony_ci#include "dm-clone-metadata.h"
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX "clone"
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci/*
388c2ecf20Sopenharmony_ci * Minimum and maximum allowed region sizes
398c2ecf20Sopenharmony_ci */
408c2ecf20Sopenharmony_ci#define MIN_REGION_SIZE (1 << 3)  /* 4KB */
418c2ecf20Sopenharmony_ci#define MAX_REGION_SIZE (1 << 21) /* 1GB */
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci#define MIN_HYDRATIONS 256 /* Size of hydration mempool */
448c2ecf20Sopenharmony_ci#define DEFAULT_HYDRATION_THRESHOLD 1 /* 1 region */
458c2ecf20Sopenharmony_ci#define DEFAULT_HYDRATION_BATCH_SIZE 1 /* Hydrate in batches of 1 region */
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci#define COMMIT_PERIOD HZ /* 1 sec */
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci/*
508c2ecf20Sopenharmony_ci * Hydration hash table size: 1 << HASH_TABLE_BITS
518c2ecf20Sopenharmony_ci */
528c2ecf20Sopenharmony_ci#define HASH_TABLE_BITS 15
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ciDECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(clone_hydration_throttle,
558c2ecf20Sopenharmony_ci	"A percentage of time allocated for hydrating regions");
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci/* Slab cache for struct dm_clone_region_hydration */
588c2ecf20Sopenharmony_cistatic struct kmem_cache *_hydration_cache;
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci/* dm-clone metadata modes */
618c2ecf20Sopenharmony_cienum clone_metadata_mode {
628c2ecf20Sopenharmony_ci	CM_WRITE,		/* metadata may be changed */
638c2ecf20Sopenharmony_ci	CM_READ_ONLY,		/* metadata may not be changed */
648c2ecf20Sopenharmony_ci	CM_FAIL,		/* all metadata I/O fails */
658c2ecf20Sopenharmony_ci};
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_cistruct hash_table_bucket;
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_cistruct clone {
708c2ecf20Sopenharmony_ci	struct dm_target *ti;
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci	struct dm_dev *metadata_dev;
738c2ecf20Sopenharmony_ci	struct dm_dev *dest_dev;
748c2ecf20Sopenharmony_ci	struct dm_dev *source_dev;
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	unsigned long nr_regions;
778c2ecf20Sopenharmony_ci	sector_t region_size;
788c2ecf20Sopenharmony_ci	unsigned int region_shift;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	/*
818c2ecf20Sopenharmony_ci	 * A metadata commit and the actions taken in case it fails should run
828c2ecf20Sopenharmony_ci	 * as a single atomic step.
838c2ecf20Sopenharmony_ci	 */
848c2ecf20Sopenharmony_ci	struct mutex commit_lock;
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	struct dm_clone_metadata *cmd;
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci	/*
898c2ecf20Sopenharmony_ci	 * bio used to flush the destination device, before committing the
908c2ecf20Sopenharmony_ci	 * metadata.
918c2ecf20Sopenharmony_ci	 */
928c2ecf20Sopenharmony_ci	struct bio flush_bio;
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	/* Region hydration hash table */
958c2ecf20Sopenharmony_ci	struct hash_table_bucket *ht;
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	atomic_t ios_in_flight;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	wait_queue_head_t hydration_stopped;
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	mempool_t hydration_pool;
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci	unsigned long last_commit_jiffies;
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci	/*
1068c2ecf20Sopenharmony_ci	 * We defer incoming WRITE bios for regions that are not hydrated,
1078c2ecf20Sopenharmony_ci	 * until after these regions have been hydrated.
1088c2ecf20Sopenharmony_ci	 *
1098c2ecf20Sopenharmony_ci	 * Also, we defer REQ_FUA and REQ_PREFLUSH bios, until after the
1108c2ecf20Sopenharmony_ci	 * metadata have been committed.
1118c2ecf20Sopenharmony_ci	 */
1128c2ecf20Sopenharmony_ci	spinlock_t lock;
1138c2ecf20Sopenharmony_ci	struct bio_list deferred_bios;
1148c2ecf20Sopenharmony_ci	struct bio_list deferred_discard_bios;
1158c2ecf20Sopenharmony_ci	struct bio_list deferred_flush_bios;
1168c2ecf20Sopenharmony_ci	struct bio_list deferred_flush_completions;
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci	/* Maximum number of regions being copied during background hydration. */
1198c2ecf20Sopenharmony_ci	unsigned int hydration_threshold;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	/* Number of regions to batch together during background hydration. */
1228c2ecf20Sopenharmony_ci	unsigned int hydration_batch_size;
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	/* Which region to hydrate next */
1258c2ecf20Sopenharmony_ci	unsigned long hydration_offset;
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	atomic_t hydrations_in_flight;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	/*
1308c2ecf20Sopenharmony_ci	 * Save a copy of the table line rather than reconstructing it for the
1318c2ecf20Sopenharmony_ci	 * status.
1328c2ecf20Sopenharmony_ci	 */
1338c2ecf20Sopenharmony_ci	unsigned int nr_ctr_args;
1348c2ecf20Sopenharmony_ci	const char **ctr_args;
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	struct workqueue_struct *wq;
1378c2ecf20Sopenharmony_ci	struct work_struct worker;
1388c2ecf20Sopenharmony_ci	struct delayed_work waker;
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	struct dm_kcopyd_client *kcopyd_client;
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_ci	enum clone_metadata_mode mode;
1438c2ecf20Sopenharmony_ci	unsigned long flags;
1448c2ecf20Sopenharmony_ci};
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci/*
1478c2ecf20Sopenharmony_ci * dm-clone flags
1488c2ecf20Sopenharmony_ci */
1498c2ecf20Sopenharmony_ci#define DM_CLONE_DISCARD_PASSDOWN 0
1508c2ecf20Sopenharmony_ci#define DM_CLONE_HYDRATION_ENABLED 1
1518c2ecf20Sopenharmony_ci#define DM_CLONE_HYDRATION_SUSPENDED 2
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci/*
1568c2ecf20Sopenharmony_ci * Metadata failure handling.
1578c2ecf20Sopenharmony_ci */
1588c2ecf20Sopenharmony_cistatic enum clone_metadata_mode get_clone_mode(struct clone *clone)
1598c2ecf20Sopenharmony_ci{
1608c2ecf20Sopenharmony_ci	return READ_ONCE(clone->mode);
1618c2ecf20Sopenharmony_ci}
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_cistatic const char *clone_device_name(struct clone *clone)
1648c2ecf20Sopenharmony_ci{
1658c2ecf20Sopenharmony_ci	return dm_table_device_name(clone->ti->table);
1668c2ecf20Sopenharmony_ci}
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_cistatic void __set_clone_mode(struct clone *clone, enum clone_metadata_mode new_mode)
1698c2ecf20Sopenharmony_ci{
1708c2ecf20Sopenharmony_ci	const char *descs[] = {
1718c2ecf20Sopenharmony_ci		"read-write",
1728c2ecf20Sopenharmony_ci		"read-only",
1738c2ecf20Sopenharmony_ci		"fail"
1748c2ecf20Sopenharmony_ci	};
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	enum clone_metadata_mode old_mode = get_clone_mode(clone);
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	/* Never move out of fail mode */
1798c2ecf20Sopenharmony_ci	if (old_mode == CM_FAIL)
1808c2ecf20Sopenharmony_ci		new_mode = CM_FAIL;
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci	switch (new_mode) {
1838c2ecf20Sopenharmony_ci	case CM_FAIL:
1848c2ecf20Sopenharmony_ci	case CM_READ_ONLY:
1858c2ecf20Sopenharmony_ci		dm_clone_metadata_set_read_only(clone->cmd);
1868c2ecf20Sopenharmony_ci		break;
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	case CM_WRITE:
1898c2ecf20Sopenharmony_ci		dm_clone_metadata_set_read_write(clone->cmd);
1908c2ecf20Sopenharmony_ci		break;
1918c2ecf20Sopenharmony_ci	}
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci	WRITE_ONCE(clone->mode, new_mode);
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci	if (new_mode != old_mode) {
1968c2ecf20Sopenharmony_ci		dm_table_event(clone->ti->table);
1978c2ecf20Sopenharmony_ci		DMINFO("%s: Switching to %s mode", clone_device_name(clone),
1988c2ecf20Sopenharmony_ci		       descs[(int)new_mode]);
1998c2ecf20Sopenharmony_ci	}
2008c2ecf20Sopenharmony_ci}
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_cistatic void __abort_transaction(struct clone *clone)
2038c2ecf20Sopenharmony_ci{
2048c2ecf20Sopenharmony_ci	const char *dev_name = clone_device_name(clone);
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	if (get_clone_mode(clone) >= CM_READ_ONLY)
2078c2ecf20Sopenharmony_ci		return;
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci	DMERR("%s: Aborting current metadata transaction", dev_name);
2108c2ecf20Sopenharmony_ci	if (dm_clone_metadata_abort(clone->cmd)) {
2118c2ecf20Sopenharmony_ci		DMERR("%s: Failed to abort metadata transaction", dev_name);
2128c2ecf20Sopenharmony_ci		__set_clone_mode(clone, CM_FAIL);
2138c2ecf20Sopenharmony_ci	}
2148c2ecf20Sopenharmony_ci}
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_cistatic void __reload_in_core_bitset(struct clone *clone)
2178c2ecf20Sopenharmony_ci{
2188c2ecf20Sopenharmony_ci	const char *dev_name = clone_device_name(clone);
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	if (get_clone_mode(clone) == CM_FAIL)
2218c2ecf20Sopenharmony_ci		return;
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci	/* Reload the on-disk bitset */
2248c2ecf20Sopenharmony_ci	DMINFO("%s: Reloading on-disk bitmap", dev_name);
2258c2ecf20Sopenharmony_ci	if (dm_clone_reload_in_core_bitset(clone->cmd)) {
2268c2ecf20Sopenharmony_ci		DMERR("%s: Failed to reload on-disk bitmap", dev_name);
2278c2ecf20Sopenharmony_ci		__set_clone_mode(clone, CM_FAIL);
2288c2ecf20Sopenharmony_ci	}
2298c2ecf20Sopenharmony_ci}
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_cistatic void __metadata_operation_failed(struct clone *clone, const char *op, int r)
2328c2ecf20Sopenharmony_ci{
2338c2ecf20Sopenharmony_ci	DMERR("%s: Metadata operation `%s' failed: error = %d",
2348c2ecf20Sopenharmony_ci	      clone_device_name(clone), op, r);
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	__abort_transaction(clone);
2378c2ecf20Sopenharmony_ci	__set_clone_mode(clone, CM_READ_ONLY);
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	/*
2408c2ecf20Sopenharmony_ci	 * dm_clone_reload_in_core_bitset() may run concurrently with either
2418c2ecf20Sopenharmony_ci	 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), but
2428c2ecf20Sopenharmony_ci	 * it's safe as we have already set the metadata to read-only mode.
2438c2ecf20Sopenharmony_ci	 */
2448c2ecf20Sopenharmony_ci	__reload_in_core_bitset(clone);
2458c2ecf20Sopenharmony_ci}
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_ci/* Wake up anyone waiting for region hydrations to stop */
2508c2ecf20Sopenharmony_cistatic inline void wakeup_hydration_waiters(struct clone *clone)
2518c2ecf20Sopenharmony_ci{
2528c2ecf20Sopenharmony_ci	wake_up_all(&clone->hydration_stopped);
2538c2ecf20Sopenharmony_ci}
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_cistatic inline void wake_worker(struct clone *clone)
2568c2ecf20Sopenharmony_ci{
2578c2ecf20Sopenharmony_ci	queue_work(clone->wq, &clone->worker);
2588c2ecf20Sopenharmony_ci}
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci/*
2638c2ecf20Sopenharmony_ci * bio helper functions.
2648c2ecf20Sopenharmony_ci */
2658c2ecf20Sopenharmony_cistatic inline void remap_to_source(struct clone *clone, struct bio *bio)
2668c2ecf20Sopenharmony_ci{
2678c2ecf20Sopenharmony_ci	bio_set_dev(bio, clone->source_dev->bdev);
2688c2ecf20Sopenharmony_ci}
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_cistatic inline void remap_to_dest(struct clone *clone, struct bio *bio)
2718c2ecf20Sopenharmony_ci{
2728c2ecf20Sopenharmony_ci	bio_set_dev(bio, clone->dest_dev->bdev);
2738c2ecf20Sopenharmony_ci}
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_cistatic bool bio_triggers_commit(struct clone *clone, struct bio *bio)
2768c2ecf20Sopenharmony_ci{
2778c2ecf20Sopenharmony_ci	return op_is_flush(bio->bi_opf) &&
2788c2ecf20Sopenharmony_ci		dm_clone_changed_this_transaction(clone->cmd);
2798c2ecf20Sopenharmony_ci}
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci/* Get the address of the region in sectors */
2828c2ecf20Sopenharmony_cistatic inline sector_t region_to_sector(struct clone *clone, unsigned long region_nr)
2838c2ecf20Sopenharmony_ci{
2848c2ecf20Sopenharmony_ci	return ((sector_t)region_nr << clone->region_shift);
2858c2ecf20Sopenharmony_ci}
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci/* Get the region number of the bio */
2888c2ecf20Sopenharmony_cistatic inline unsigned long bio_to_region(struct clone *clone, struct bio *bio)
2898c2ecf20Sopenharmony_ci{
2908c2ecf20Sopenharmony_ci	return (bio->bi_iter.bi_sector >> clone->region_shift);
2918c2ecf20Sopenharmony_ci}
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci/* Get the region range covered by the bio */
2948c2ecf20Sopenharmony_cistatic void bio_region_range(struct clone *clone, struct bio *bio,
2958c2ecf20Sopenharmony_ci			     unsigned long *rs, unsigned long *nr_regions)
2968c2ecf20Sopenharmony_ci{
2978c2ecf20Sopenharmony_ci	unsigned long end;
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci	*rs = dm_sector_div_up(bio->bi_iter.bi_sector, clone->region_size);
3008c2ecf20Sopenharmony_ci	end = bio_end_sector(bio) >> clone->region_shift;
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci	if (*rs >= end)
3038c2ecf20Sopenharmony_ci		*nr_regions = 0;
3048c2ecf20Sopenharmony_ci	else
3058c2ecf20Sopenharmony_ci		*nr_regions = end - *rs;
3068c2ecf20Sopenharmony_ci}
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci/* Check whether a bio overwrites a region */
3098c2ecf20Sopenharmony_cistatic inline bool is_overwrite_bio(struct clone *clone, struct bio *bio)
3108c2ecf20Sopenharmony_ci{
3118c2ecf20Sopenharmony_ci	return (bio_data_dir(bio) == WRITE && bio_sectors(bio) == clone->region_size);
3128c2ecf20Sopenharmony_ci}
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_cistatic void fail_bios(struct bio_list *bios, blk_status_t status)
3158c2ecf20Sopenharmony_ci{
3168c2ecf20Sopenharmony_ci	struct bio *bio;
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(bios))) {
3198c2ecf20Sopenharmony_ci		bio->bi_status = status;
3208c2ecf20Sopenharmony_ci		bio_endio(bio);
3218c2ecf20Sopenharmony_ci	}
3228c2ecf20Sopenharmony_ci}
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_cistatic void submit_bios(struct bio_list *bios)
3258c2ecf20Sopenharmony_ci{
3268c2ecf20Sopenharmony_ci	struct bio *bio;
3278c2ecf20Sopenharmony_ci	struct blk_plug plug;
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(bios)))
3328c2ecf20Sopenharmony_ci		submit_bio_noacct(bio);
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
3358c2ecf20Sopenharmony_ci}
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci/*
3388c2ecf20Sopenharmony_ci * Submit bio to the underlying device.
3398c2ecf20Sopenharmony_ci *
3408c2ecf20Sopenharmony_ci * If the bio triggers a commit, delay it, until after the metadata have been
3418c2ecf20Sopenharmony_ci * committed.
3428c2ecf20Sopenharmony_ci *
3438c2ecf20Sopenharmony_ci * NOTE: The bio remapping must be performed by the caller.
3448c2ecf20Sopenharmony_ci */
3458c2ecf20Sopenharmony_cistatic void issue_bio(struct clone *clone, struct bio *bio)
3468c2ecf20Sopenharmony_ci{
3478c2ecf20Sopenharmony_ci	if (!bio_triggers_commit(clone, bio)) {
3488c2ecf20Sopenharmony_ci		submit_bio_noacct(bio);
3498c2ecf20Sopenharmony_ci		return;
3508c2ecf20Sopenharmony_ci	}
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	/*
3538c2ecf20Sopenharmony_ci	 * If the metadata mode is RO or FAIL we won't be able to commit the
3548c2ecf20Sopenharmony_ci	 * metadata, so we complete the bio with an error.
3558c2ecf20Sopenharmony_ci	 */
3568c2ecf20Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
3578c2ecf20Sopenharmony_ci		bio_io_error(bio);
3588c2ecf20Sopenharmony_ci		return;
3598c2ecf20Sopenharmony_ci	}
3608c2ecf20Sopenharmony_ci
3618c2ecf20Sopenharmony_ci	/*
3628c2ecf20Sopenharmony_ci	 * Batch together any bios that trigger commits and then issue a single
3638c2ecf20Sopenharmony_ci	 * commit for them in process_deferred_flush_bios().
3648c2ecf20Sopenharmony_ci	 */
3658c2ecf20Sopenharmony_ci	spin_lock_irq(&clone->lock);
3668c2ecf20Sopenharmony_ci	bio_list_add(&clone->deferred_flush_bios, bio);
3678c2ecf20Sopenharmony_ci	spin_unlock_irq(&clone->lock);
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci	wake_worker(clone);
3708c2ecf20Sopenharmony_ci}
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci/*
3738c2ecf20Sopenharmony_ci * Remap bio to the destination device and submit it.
3748c2ecf20Sopenharmony_ci *
3758c2ecf20Sopenharmony_ci * If the bio triggers a commit, delay it, until after the metadata have been
3768c2ecf20Sopenharmony_ci * committed.
3778c2ecf20Sopenharmony_ci */
3788c2ecf20Sopenharmony_cistatic void remap_and_issue(struct clone *clone, struct bio *bio)
3798c2ecf20Sopenharmony_ci{
3808c2ecf20Sopenharmony_ci	remap_to_dest(clone, bio);
3818c2ecf20Sopenharmony_ci	issue_bio(clone, bio);
3828c2ecf20Sopenharmony_ci}
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci/*
3858c2ecf20Sopenharmony_ci * Issue bios that have been deferred until after their region has finished
3868c2ecf20Sopenharmony_ci * hydrating.
3878c2ecf20Sopenharmony_ci *
3888c2ecf20Sopenharmony_ci * We delegate the bio submission to the worker thread, so this is safe to call
3898c2ecf20Sopenharmony_ci * from interrupt context.
3908c2ecf20Sopenharmony_ci */
3918c2ecf20Sopenharmony_cistatic void issue_deferred_bios(struct clone *clone, struct bio_list *bios)
3928c2ecf20Sopenharmony_ci{
3938c2ecf20Sopenharmony_ci	struct bio *bio;
3948c2ecf20Sopenharmony_ci	unsigned long flags;
3958c2ecf20Sopenharmony_ci	struct bio_list flush_bios = BIO_EMPTY_LIST;
3968c2ecf20Sopenharmony_ci	struct bio_list normal_bios = BIO_EMPTY_LIST;
3978c2ecf20Sopenharmony_ci
3988c2ecf20Sopenharmony_ci	if (bio_list_empty(bios))
3998c2ecf20Sopenharmony_ci		return;
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(bios))) {
4028c2ecf20Sopenharmony_ci		if (bio_triggers_commit(clone, bio))
4038c2ecf20Sopenharmony_ci			bio_list_add(&flush_bios, bio);
4048c2ecf20Sopenharmony_ci		else
4058c2ecf20Sopenharmony_ci			bio_list_add(&normal_bios, bio);
4068c2ecf20Sopenharmony_ci	}
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci	spin_lock_irqsave(&clone->lock, flags);
4098c2ecf20Sopenharmony_ci	bio_list_merge(&clone->deferred_bios, &normal_bios);
4108c2ecf20Sopenharmony_ci	bio_list_merge(&clone->deferred_flush_bios, &flush_bios);
4118c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&clone->lock, flags);
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci	wake_worker(clone);
4148c2ecf20Sopenharmony_ci}
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_cistatic void complete_overwrite_bio(struct clone *clone, struct bio *bio)
4178c2ecf20Sopenharmony_ci{
4188c2ecf20Sopenharmony_ci	unsigned long flags;
4198c2ecf20Sopenharmony_ci
4208c2ecf20Sopenharmony_ci	/*
4218c2ecf20Sopenharmony_ci	 * If the bio has the REQ_FUA flag set we must commit the metadata
4228c2ecf20Sopenharmony_ci	 * before signaling its completion.
4238c2ecf20Sopenharmony_ci	 *
4248c2ecf20Sopenharmony_ci	 * complete_overwrite_bio() is only called by hydration_complete(),
4258c2ecf20Sopenharmony_ci	 * after having successfully updated the metadata. This means we don't
4268c2ecf20Sopenharmony_ci	 * need to call dm_clone_changed_this_transaction() to check if the
4278c2ecf20Sopenharmony_ci	 * metadata has changed and thus we can avoid taking the metadata spin
4288c2ecf20Sopenharmony_ci	 * lock.
4298c2ecf20Sopenharmony_ci	 */
4308c2ecf20Sopenharmony_ci	if (!(bio->bi_opf & REQ_FUA)) {
4318c2ecf20Sopenharmony_ci		bio_endio(bio);
4328c2ecf20Sopenharmony_ci		return;
4338c2ecf20Sopenharmony_ci	}
4348c2ecf20Sopenharmony_ci
4358c2ecf20Sopenharmony_ci	/*
4368c2ecf20Sopenharmony_ci	 * If the metadata mode is RO or FAIL we won't be able to commit the
4378c2ecf20Sopenharmony_ci	 * metadata, so we complete the bio with an error.
4388c2ecf20Sopenharmony_ci	 */
4398c2ecf20Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
4408c2ecf20Sopenharmony_ci		bio_io_error(bio);
4418c2ecf20Sopenharmony_ci		return;
4428c2ecf20Sopenharmony_ci	}
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	/*
4458c2ecf20Sopenharmony_ci	 * Batch together any bios that trigger commits and then issue a single
4468c2ecf20Sopenharmony_ci	 * commit for them in process_deferred_flush_bios().
4478c2ecf20Sopenharmony_ci	 */
4488c2ecf20Sopenharmony_ci	spin_lock_irqsave(&clone->lock, flags);
4498c2ecf20Sopenharmony_ci	bio_list_add(&clone->deferred_flush_completions, bio);
4508c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&clone->lock, flags);
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci	wake_worker(clone);
4538c2ecf20Sopenharmony_ci}
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_cistatic void trim_bio(struct bio *bio, sector_t sector, unsigned int len)
4568c2ecf20Sopenharmony_ci{
4578c2ecf20Sopenharmony_ci	bio->bi_iter.bi_sector = sector;
4588c2ecf20Sopenharmony_ci	bio->bi_iter.bi_size = to_bytes(len);
4598c2ecf20Sopenharmony_ci}
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_cistatic void complete_discard_bio(struct clone *clone, struct bio *bio, bool success)
4628c2ecf20Sopenharmony_ci{
4638c2ecf20Sopenharmony_ci	unsigned long rs, nr_regions;
4648c2ecf20Sopenharmony_ci
4658c2ecf20Sopenharmony_ci	/*
4668c2ecf20Sopenharmony_ci	 * If the destination device supports discards, remap and trim the
4678c2ecf20Sopenharmony_ci	 * discard bio and pass it down. Otherwise complete the bio
4688c2ecf20Sopenharmony_ci	 * immediately.
4698c2ecf20Sopenharmony_ci	 */
4708c2ecf20Sopenharmony_ci	if (test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags) && success) {
4718c2ecf20Sopenharmony_ci		remap_to_dest(clone, bio);
4728c2ecf20Sopenharmony_ci		bio_region_range(clone, bio, &rs, &nr_regions);
4738c2ecf20Sopenharmony_ci		trim_bio(bio, region_to_sector(clone, rs),
4748c2ecf20Sopenharmony_ci			 nr_regions << clone->region_shift);
4758c2ecf20Sopenharmony_ci		submit_bio_noacct(bio);
4768c2ecf20Sopenharmony_ci	} else
4778c2ecf20Sopenharmony_ci		bio_endio(bio);
4788c2ecf20Sopenharmony_ci}
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_cistatic void process_discard_bio(struct clone *clone, struct bio *bio)
4818c2ecf20Sopenharmony_ci{
4828c2ecf20Sopenharmony_ci	unsigned long rs, nr_regions;
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci	bio_region_range(clone, bio, &rs, &nr_regions);
4858c2ecf20Sopenharmony_ci	if (!nr_regions) {
4868c2ecf20Sopenharmony_ci		bio_endio(bio);
4878c2ecf20Sopenharmony_ci		return;
4888c2ecf20Sopenharmony_ci	}
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci	if (WARN_ON(rs >= clone->nr_regions || (rs + nr_regions) < rs ||
4918c2ecf20Sopenharmony_ci		    (rs + nr_regions) > clone->nr_regions)) {
4928c2ecf20Sopenharmony_ci		DMERR("%s: Invalid range (%lu + %lu, total regions %lu) for discard (%llu + %u)",
4938c2ecf20Sopenharmony_ci		      clone_device_name(clone), rs, nr_regions,
4948c2ecf20Sopenharmony_ci		      clone->nr_regions,
4958c2ecf20Sopenharmony_ci		      (unsigned long long)bio->bi_iter.bi_sector,
4968c2ecf20Sopenharmony_ci		      bio_sectors(bio));
4978c2ecf20Sopenharmony_ci		bio_endio(bio);
4988c2ecf20Sopenharmony_ci		return;
4998c2ecf20Sopenharmony_ci	}
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ci	/*
5028c2ecf20Sopenharmony_ci	 * The covered regions are already hydrated so we just need to pass
5038c2ecf20Sopenharmony_ci	 * down the discard.
5048c2ecf20Sopenharmony_ci	 */
5058c2ecf20Sopenharmony_ci	if (dm_clone_is_range_hydrated(clone->cmd, rs, nr_regions)) {
5068c2ecf20Sopenharmony_ci		complete_discard_bio(clone, bio, true);
5078c2ecf20Sopenharmony_ci		return;
5088c2ecf20Sopenharmony_ci	}
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci	/*
5118c2ecf20Sopenharmony_ci	 * If the metadata mode is RO or FAIL we won't be able to update the
5128c2ecf20Sopenharmony_ci	 * metadata for the regions covered by the discard so we just ignore
5138c2ecf20Sopenharmony_ci	 * it.
5148c2ecf20Sopenharmony_ci	 */
5158c2ecf20Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
5168c2ecf20Sopenharmony_ci		bio_endio(bio);
5178c2ecf20Sopenharmony_ci		return;
5188c2ecf20Sopenharmony_ci	}
5198c2ecf20Sopenharmony_ci
5208c2ecf20Sopenharmony_ci	/*
5218c2ecf20Sopenharmony_ci	 * Defer discard processing.
5228c2ecf20Sopenharmony_ci	 */
5238c2ecf20Sopenharmony_ci	spin_lock_irq(&clone->lock);
5248c2ecf20Sopenharmony_ci	bio_list_add(&clone->deferred_discard_bios, bio);
5258c2ecf20Sopenharmony_ci	spin_unlock_irq(&clone->lock);
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	wake_worker(clone);
5288c2ecf20Sopenharmony_ci}
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci/*
5338c2ecf20Sopenharmony_ci * dm-clone region hydrations.
5348c2ecf20Sopenharmony_ci */
5358c2ecf20Sopenharmony_cistruct dm_clone_region_hydration {
5368c2ecf20Sopenharmony_ci	struct clone *clone;
5378c2ecf20Sopenharmony_ci	unsigned long region_nr;
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	struct bio *overwrite_bio;
5408c2ecf20Sopenharmony_ci	bio_end_io_t *overwrite_bio_end_io;
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_ci	struct bio_list deferred_bios;
5438c2ecf20Sopenharmony_ci
5448c2ecf20Sopenharmony_ci	blk_status_t status;
5458c2ecf20Sopenharmony_ci
5468c2ecf20Sopenharmony_ci	/* Used by hydration batching */
5478c2ecf20Sopenharmony_ci	struct list_head list;
5488c2ecf20Sopenharmony_ci
5498c2ecf20Sopenharmony_ci	/* Used by hydration hash table */
5508c2ecf20Sopenharmony_ci	struct hlist_node h;
5518c2ecf20Sopenharmony_ci};
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci/*
5548c2ecf20Sopenharmony_ci * Hydration hash table implementation.
5558c2ecf20Sopenharmony_ci *
5568c2ecf20Sopenharmony_ci * Ideally we would like to use list_bl, which uses bit spin locks and employs
5578c2ecf20Sopenharmony_ci * the least significant bit of the list head to lock the corresponding bucket,
5588c2ecf20Sopenharmony_ci * reducing the memory overhead for the locks. But, currently, list_bl and bit
5598c2ecf20Sopenharmony_ci * spin locks don't support IRQ safe versions. Since we have to take the lock
5608c2ecf20Sopenharmony_ci * in both process and interrupt context, we must fall back to using regular
5618c2ecf20Sopenharmony_ci * spin locks; one per hash table bucket.
5628c2ecf20Sopenharmony_ci */
5638c2ecf20Sopenharmony_cistruct hash_table_bucket {
5648c2ecf20Sopenharmony_ci	struct hlist_head head;
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_ci	/* Spinlock protecting the bucket */
5678c2ecf20Sopenharmony_ci	spinlock_t lock;
5688c2ecf20Sopenharmony_ci};
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci#define bucket_lock_irqsave(bucket, flags) \
5718c2ecf20Sopenharmony_ci	spin_lock_irqsave(&(bucket)->lock, flags)
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci#define bucket_unlock_irqrestore(bucket, flags) \
5748c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&(bucket)->lock, flags)
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci#define bucket_lock_irq(bucket) \
5778c2ecf20Sopenharmony_ci	spin_lock_irq(&(bucket)->lock)
5788c2ecf20Sopenharmony_ci
5798c2ecf20Sopenharmony_ci#define bucket_unlock_irq(bucket) \
5808c2ecf20Sopenharmony_ci	spin_unlock_irq(&(bucket)->lock)
5818c2ecf20Sopenharmony_ci
5828c2ecf20Sopenharmony_cistatic int hash_table_init(struct clone *clone)
5838c2ecf20Sopenharmony_ci{
5848c2ecf20Sopenharmony_ci	unsigned int i, sz;
5858c2ecf20Sopenharmony_ci	struct hash_table_bucket *bucket;
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ci	sz = 1 << HASH_TABLE_BITS;
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	clone->ht = kvmalloc(sz * sizeof(struct hash_table_bucket), GFP_KERNEL);
5908c2ecf20Sopenharmony_ci	if (!clone->ht)
5918c2ecf20Sopenharmony_ci		return -ENOMEM;
5928c2ecf20Sopenharmony_ci
5938c2ecf20Sopenharmony_ci	for (i = 0; i < sz; i++) {
5948c2ecf20Sopenharmony_ci		bucket = clone->ht + i;
5958c2ecf20Sopenharmony_ci
5968c2ecf20Sopenharmony_ci		INIT_HLIST_HEAD(&bucket->head);
5978c2ecf20Sopenharmony_ci		spin_lock_init(&bucket->lock);
5988c2ecf20Sopenharmony_ci	}
5998c2ecf20Sopenharmony_ci
6008c2ecf20Sopenharmony_ci	return 0;
6018c2ecf20Sopenharmony_ci}
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_cistatic void hash_table_exit(struct clone *clone)
6048c2ecf20Sopenharmony_ci{
6058c2ecf20Sopenharmony_ci	kvfree(clone->ht);
6068c2ecf20Sopenharmony_ci}
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_cistatic struct hash_table_bucket *get_hash_table_bucket(struct clone *clone,
6098c2ecf20Sopenharmony_ci						       unsigned long region_nr)
6108c2ecf20Sopenharmony_ci{
6118c2ecf20Sopenharmony_ci	return &clone->ht[hash_long(region_nr, HASH_TABLE_BITS)];
6128c2ecf20Sopenharmony_ci}
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_ci/*
6158c2ecf20Sopenharmony_ci * Search hash table for a hydration with hd->region_nr == region_nr
6168c2ecf20Sopenharmony_ci *
6178c2ecf20Sopenharmony_ci * NOTE: Must be called with the bucket lock held
6188c2ecf20Sopenharmony_ci */
6198c2ecf20Sopenharmony_cistatic struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
6208c2ecf20Sopenharmony_ci						     unsigned long region_nr)
6218c2ecf20Sopenharmony_ci{
6228c2ecf20Sopenharmony_ci	struct dm_clone_region_hydration *hd;
6238c2ecf20Sopenharmony_ci
6248c2ecf20Sopenharmony_ci	hlist_for_each_entry(hd, &bucket->head, h) {
6258c2ecf20Sopenharmony_ci		if (hd->region_nr == region_nr)
6268c2ecf20Sopenharmony_ci			return hd;
6278c2ecf20Sopenharmony_ci	}
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci	return NULL;
6308c2ecf20Sopenharmony_ci}
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci/*
6338c2ecf20Sopenharmony_ci * Insert a hydration into the hash table.
6348c2ecf20Sopenharmony_ci *
6358c2ecf20Sopenharmony_ci * NOTE: Must be called with the bucket lock held.
6368c2ecf20Sopenharmony_ci */
6378c2ecf20Sopenharmony_cistatic inline void __insert_region_hydration(struct hash_table_bucket *bucket,
6388c2ecf20Sopenharmony_ci					     struct dm_clone_region_hydration *hd)
6398c2ecf20Sopenharmony_ci{
6408c2ecf20Sopenharmony_ci	hlist_add_head(&hd->h, &bucket->head);
6418c2ecf20Sopenharmony_ci}
6428c2ecf20Sopenharmony_ci
6438c2ecf20Sopenharmony_ci/*
6448c2ecf20Sopenharmony_ci * This function inserts a hydration into the hash table, unless someone else
6458c2ecf20Sopenharmony_ci * managed to insert a hydration for the same region first. In the latter case
6468c2ecf20Sopenharmony_ci * it returns the existing hydration descriptor for this region.
6478c2ecf20Sopenharmony_ci *
6488c2ecf20Sopenharmony_ci * NOTE: Must be called with the hydration hash table lock held.
6498c2ecf20Sopenharmony_ci */
6508c2ecf20Sopenharmony_cistatic struct dm_clone_region_hydration *
6518c2ecf20Sopenharmony_ci__find_or_insert_region_hydration(struct hash_table_bucket *bucket,
6528c2ecf20Sopenharmony_ci				  struct dm_clone_region_hydration *hd)
6538c2ecf20Sopenharmony_ci{
6548c2ecf20Sopenharmony_ci	struct dm_clone_region_hydration *hd2;
6558c2ecf20Sopenharmony_ci
6568c2ecf20Sopenharmony_ci	hd2 = __hash_find(bucket, hd->region_nr);
6578c2ecf20Sopenharmony_ci	if (hd2)
6588c2ecf20Sopenharmony_ci		return hd2;
6598c2ecf20Sopenharmony_ci
6608c2ecf20Sopenharmony_ci	__insert_region_hydration(bucket, hd);
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci	return hd;
6638c2ecf20Sopenharmony_ci}
6648c2ecf20Sopenharmony_ci
6658c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_ci/* Allocate a hydration */
6688c2ecf20Sopenharmony_cistatic struct dm_clone_region_hydration *alloc_hydration(struct clone *clone)
6698c2ecf20Sopenharmony_ci{
6708c2ecf20Sopenharmony_ci	struct dm_clone_region_hydration *hd;
6718c2ecf20Sopenharmony_ci
6728c2ecf20Sopenharmony_ci	/*
6738c2ecf20Sopenharmony_ci	 * Allocate a hydration from the hydration mempool.
6748c2ecf20Sopenharmony_ci	 * This might block but it can't fail.
6758c2ecf20Sopenharmony_ci	 */
6768c2ecf20Sopenharmony_ci	hd = mempool_alloc(&clone->hydration_pool, GFP_NOIO);
6778c2ecf20Sopenharmony_ci	hd->clone = clone;
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_ci	return hd;
6808c2ecf20Sopenharmony_ci}
6818c2ecf20Sopenharmony_ci
6828c2ecf20Sopenharmony_cistatic inline void free_hydration(struct dm_clone_region_hydration *hd)
6838c2ecf20Sopenharmony_ci{
6848c2ecf20Sopenharmony_ci	mempool_free(hd, &hd->clone->hydration_pool);
6858c2ecf20Sopenharmony_ci}
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ci/* Initialize a hydration */
6888c2ecf20Sopenharmony_cistatic void hydration_init(struct dm_clone_region_hydration *hd, unsigned long region_nr)
6898c2ecf20Sopenharmony_ci{
6908c2ecf20Sopenharmony_ci	hd->region_nr = region_nr;
6918c2ecf20Sopenharmony_ci	hd->overwrite_bio = NULL;
6928c2ecf20Sopenharmony_ci	bio_list_init(&hd->deferred_bios);
6938c2ecf20Sopenharmony_ci	hd->status = 0;
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&hd->list);
6968c2ecf20Sopenharmony_ci	INIT_HLIST_NODE(&hd->h);
6978c2ecf20Sopenharmony_ci}
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
7008c2ecf20Sopenharmony_ci
7018c2ecf20Sopenharmony_ci/*
7028c2ecf20Sopenharmony_ci * Update dm-clone's metadata after a region has finished hydrating and remove
7038c2ecf20Sopenharmony_ci * hydration from the hash table.
7048c2ecf20Sopenharmony_ci */
7058c2ecf20Sopenharmony_cistatic int hydration_update_metadata(struct dm_clone_region_hydration *hd)
7068c2ecf20Sopenharmony_ci{
7078c2ecf20Sopenharmony_ci	int r = 0;
7088c2ecf20Sopenharmony_ci	unsigned long flags;
7098c2ecf20Sopenharmony_ci	struct hash_table_bucket *bucket;
7108c2ecf20Sopenharmony_ci	struct clone *clone = hd->clone;
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY))
7138c2ecf20Sopenharmony_ci		r = -EPERM;
7148c2ecf20Sopenharmony_ci
7158c2ecf20Sopenharmony_ci	/* Update the metadata */
7168c2ecf20Sopenharmony_ci	if (likely(!r) && hd->status == BLK_STS_OK)
7178c2ecf20Sopenharmony_ci		r = dm_clone_set_region_hydrated(clone->cmd, hd->region_nr);
7188c2ecf20Sopenharmony_ci
7198c2ecf20Sopenharmony_ci	bucket = get_hash_table_bucket(clone, hd->region_nr);
7208c2ecf20Sopenharmony_ci
7218c2ecf20Sopenharmony_ci	/* Remove hydration from hash table */
7228c2ecf20Sopenharmony_ci	bucket_lock_irqsave(bucket, flags);
7238c2ecf20Sopenharmony_ci	hlist_del(&hd->h);
7248c2ecf20Sopenharmony_ci	bucket_unlock_irqrestore(bucket, flags);
7258c2ecf20Sopenharmony_ci
7268c2ecf20Sopenharmony_ci	return r;
7278c2ecf20Sopenharmony_ci}
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_ci/*
7308c2ecf20Sopenharmony_ci * Complete a region's hydration:
7318c2ecf20Sopenharmony_ci *
7328c2ecf20Sopenharmony_ci *	1. Update dm-clone's metadata.
7338c2ecf20Sopenharmony_ci *	2. Remove hydration from hash table.
7348c2ecf20Sopenharmony_ci *	3. Complete overwrite bio.
7358c2ecf20Sopenharmony_ci *	4. Issue deferred bios.
7368c2ecf20Sopenharmony_ci *	5. If this was the last hydration, wake up anyone waiting for
7378c2ecf20Sopenharmony_ci *	   hydrations to finish.
7388c2ecf20Sopenharmony_ci */
7398c2ecf20Sopenharmony_cistatic void hydration_complete(struct dm_clone_region_hydration *hd)
7408c2ecf20Sopenharmony_ci{
7418c2ecf20Sopenharmony_ci	int r;
7428c2ecf20Sopenharmony_ci	blk_status_t status;
7438c2ecf20Sopenharmony_ci	struct clone *clone = hd->clone;
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci	r = hydration_update_metadata(hd);
7468c2ecf20Sopenharmony_ci
7478c2ecf20Sopenharmony_ci	if (hd->status == BLK_STS_OK && likely(!r)) {
7488c2ecf20Sopenharmony_ci		if (hd->overwrite_bio)
7498c2ecf20Sopenharmony_ci			complete_overwrite_bio(clone, hd->overwrite_bio);
7508c2ecf20Sopenharmony_ci
7518c2ecf20Sopenharmony_ci		issue_deferred_bios(clone, &hd->deferred_bios);
7528c2ecf20Sopenharmony_ci	} else {
7538c2ecf20Sopenharmony_ci		status = r ? BLK_STS_IOERR : hd->status;
7548c2ecf20Sopenharmony_ci
7558c2ecf20Sopenharmony_ci		if (hd->overwrite_bio)
7568c2ecf20Sopenharmony_ci			bio_list_add(&hd->deferred_bios, hd->overwrite_bio);
7578c2ecf20Sopenharmony_ci
7588c2ecf20Sopenharmony_ci		fail_bios(&hd->deferred_bios, status);
7598c2ecf20Sopenharmony_ci	}
7608c2ecf20Sopenharmony_ci
7618c2ecf20Sopenharmony_ci	free_hydration(hd);
7628c2ecf20Sopenharmony_ci
7638c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&clone->hydrations_in_flight))
7648c2ecf20Sopenharmony_ci		wakeup_hydration_waiters(clone);
7658c2ecf20Sopenharmony_ci}
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_cistatic void hydration_kcopyd_callback(int read_err, unsigned long write_err, void *context)
7688c2ecf20Sopenharmony_ci{
7698c2ecf20Sopenharmony_ci	blk_status_t status;
7708c2ecf20Sopenharmony_ci
7718c2ecf20Sopenharmony_ci	struct dm_clone_region_hydration *tmp, *hd = context;
7728c2ecf20Sopenharmony_ci	struct clone *clone = hd->clone;
7738c2ecf20Sopenharmony_ci
7748c2ecf20Sopenharmony_ci	LIST_HEAD(batched_hydrations);
7758c2ecf20Sopenharmony_ci
7768c2ecf20Sopenharmony_ci	if (read_err || write_err) {
7778c2ecf20Sopenharmony_ci		DMERR_LIMIT("%s: hydration failed", clone_device_name(clone));
7788c2ecf20Sopenharmony_ci		status = BLK_STS_IOERR;
7798c2ecf20Sopenharmony_ci	} else {
7808c2ecf20Sopenharmony_ci		status = BLK_STS_OK;
7818c2ecf20Sopenharmony_ci	}
7828c2ecf20Sopenharmony_ci	list_splice_tail(&hd->list, &batched_hydrations);
7838c2ecf20Sopenharmony_ci
7848c2ecf20Sopenharmony_ci	hd->status = status;
7858c2ecf20Sopenharmony_ci	hydration_complete(hd);
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci	/* Complete batched hydrations */
7888c2ecf20Sopenharmony_ci	list_for_each_entry_safe(hd, tmp, &batched_hydrations, list) {
7898c2ecf20Sopenharmony_ci		hd->status = status;
7908c2ecf20Sopenharmony_ci		hydration_complete(hd);
7918c2ecf20Sopenharmony_ci	}
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_ci	/* Continue background hydration, if there is no I/O in-flight */
7948c2ecf20Sopenharmony_ci	if (test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags) &&
7958c2ecf20Sopenharmony_ci	    !atomic_read(&clone->ios_in_flight))
7968c2ecf20Sopenharmony_ci		wake_worker(clone);
7978c2ecf20Sopenharmony_ci}
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_cistatic void hydration_copy(struct dm_clone_region_hydration *hd, unsigned int nr_regions)
8008c2ecf20Sopenharmony_ci{
8018c2ecf20Sopenharmony_ci	unsigned long region_start, region_end;
8028c2ecf20Sopenharmony_ci	sector_t tail_size, region_size, total_size;
8038c2ecf20Sopenharmony_ci	struct dm_io_region from, to;
8048c2ecf20Sopenharmony_ci	struct clone *clone = hd->clone;
8058c2ecf20Sopenharmony_ci
8068c2ecf20Sopenharmony_ci	if (WARN_ON(!nr_regions))
8078c2ecf20Sopenharmony_ci		return;
8088c2ecf20Sopenharmony_ci
8098c2ecf20Sopenharmony_ci	region_size = clone->region_size;
8108c2ecf20Sopenharmony_ci	region_start = hd->region_nr;
8118c2ecf20Sopenharmony_ci	region_end = region_start + nr_regions - 1;
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_ci	total_size = region_to_sector(clone, nr_regions - 1);
8148c2ecf20Sopenharmony_ci
8158c2ecf20Sopenharmony_ci	if (region_end == clone->nr_regions - 1) {
8168c2ecf20Sopenharmony_ci		/*
8178c2ecf20Sopenharmony_ci		 * The last region of the target might be smaller than
8188c2ecf20Sopenharmony_ci		 * region_size.
8198c2ecf20Sopenharmony_ci		 */
8208c2ecf20Sopenharmony_ci		tail_size = clone->ti->len & (region_size - 1);
8218c2ecf20Sopenharmony_ci		if (!tail_size)
8228c2ecf20Sopenharmony_ci			tail_size = region_size;
8238c2ecf20Sopenharmony_ci	} else {
8248c2ecf20Sopenharmony_ci		tail_size = region_size;
8258c2ecf20Sopenharmony_ci	}
8268c2ecf20Sopenharmony_ci
8278c2ecf20Sopenharmony_ci	total_size += tail_size;
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_ci	from.bdev = clone->source_dev->bdev;
8308c2ecf20Sopenharmony_ci	from.sector = region_to_sector(clone, region_start);
8318c2ecf20Sopenharmony_ci	from.count = total_size;
8328c2ecf20Sopenharmony_ci
8338c2ecf20Sopenharmony_ci	to.bdev = clone->dest_dev->bdev;
8348c2ecf20Sopenharmony_ci	to.sector = from.sector;
8358c2ecf20Sopenharmony_ci	to.count = from.count;
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_ci	/* Issue copy */
8388c2ecf20Sopenharmony_ci	atomic_add(nr_regions, &clone->hydrations_in_flight);
8398c2ecf20Sopenharmony_ci	dm_kcopyd_copy(clone->kcopyd_client, &from, 1, &to, 0,
8408c2ecf20Sopenharmony_ci		       hydration_kcopyd_callback, hd);
8418c2ecf20Sopenharmony_ci}
8428c2ecf20Sopenharmony_ci
8438c2ecf20Sopenharmony_cistatic void overwrite_endio(struct bio *bio)
8448c2ecf20Sopenharmony_ci{
8458c2ecf20Sopenharmony_ci	struct dm_clone_region_hydration *hd = bio->bi_private;
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_ci	bio->bi_end_io = hd->overwrite_bio_end_io;
8488c2ecf20Sopenharmony_ci	hd->status = bio->bi_status;
8498c2ecf20Sopenharmony_ci
8508c2ecf20Sopenharmony_ci	hydration_complete(hd);
8518c2ecf20Sopenharmony_ci}
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_cistatic void hydration_overwrite(struct dm_clone_region_hydration *hd, struct bio *bio)
8548c2ecf20Sopenharmony_ci{
8558c2ecf20Sopenharmony_ci	/*
8568c2ecf20Sopenharmony_ci	 * We don't need to save and restore bio->bi_private because device
8578c2ecf20Sopenharmony_ci	 * mapper core generates a new bio for us to use, with clean
8588c2ecf20Sopenharmony_ci	 * bi_private.
8598c2ecf20Sopenharmony_ci	 */
8608c2ecf20Sopenharmony_ci	hd->overwrite_bio = bio;
8618c2ecf20Sopenharmony_ci	hd->overwrite_bio_end_io = bio->bi_end_io;
8628c2ecf20Sopenharmony_ci
8638c2ecf20Sopenharmony_ci	bio->bi_end_io = overwrite_endio;
8648c2ecf20Sopenharmony_ci	bio->bi_private = hd;
8658c2ecf20Sopenharmony_ci
8668c2ecf20Sopenharmony_ci	atomic_inc(&hd->clone->hydrations_in_flight);
8678c2ecf20Sopenharmony_ci	submit_bio_noacct(bio);
8688c2ecf20Sopenharmony_ci}
8698c2ecf20Sopenharmony_ci
8708c2ecf20Sopenharmony_ci/*
8718c2ecf20Sopenharmony_ci * Hydrate bio's region.
8728c2ecf20Sopenharmony_ci *
8738c2ecf20Sopenharmony_ci * This function starts the hydration of the bio's region and puts the bio in
8748c2ecf20Sopenharmony_ci * the list of deferred bios for this region. In case, by the time this
8758c2ecf20Sopenharmony_ci * function is called, the region has finished hydrating it's submitted to the
8768c2ecf20Sopenharmony_ci * destination device.
8778c2ecf20Sopenharmony_ci *
8788c2ecf20Sopenharmony_ci * NOTE: The bio remapping must be performed by the caller.
8798c2ecf20Sopenharmony_ci */
8808c2ecf20Sopenharmony_cistatic void hydrate_bio_region(struct clone *clone, struct bio *bio)
8818c2ecf20Sopenharmony_ci{
8828c2ecf20Sopenharmony_ci	unsigned long region_nr;
8838c2ecf20Sopenharmony_ci	struct hash_table_bucket *bucket;
8848c2ecf20Sopenharmony_ci	struct dm_clone_region_hydration *hd, *hd2;
8858c2ecf20Sopenharmony_ci
8868c2ecf20Sopenharmony_ci	region_nr = bio_to_region(clone, bio);
8878c2ecf20Sopenharmony_ci	bucket = get_hash_table_bucket(clone, region_nr);
8888c2ecf20Sopenharmony_ci
8898c2ecf20Sopenharmony_ci	bucket_lock_irq(bucket);
8908c2ecf20Sopenharmony_ci
8918c2ecf20Sopenharmony_ci	hd = __hash_find(bucket, region_nr);
8928c2ecf20Sopenharmony_ci	if (hd) {
8938c2ecf20Sopenharmony_ci		/* Someone else is hydrating the region */
8948c2ecf20Sopenharmony_ci		bio_list_add(&hd->deferred_bios, bio);
8958c2ecf20Sopenharmony_ci		bucket_unlock_irq(bucket);
8968c2ecf20Sopenharmony_ci		return;
8978c2ecf20Sopenharmony_ci	}
8988c2ecf20Sopenharmony_ci
8998c2ecf20Sopenharmony_ci	if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
9008c2ecf20Sopenharmony_ci		/* The region has been hydrated */
9018c2ecf20Sopenharmony_ci		bucket_unlock_irq(bucket);
9028c2ecf20Sopenharmony_ci		issue_bio(clone, bio);
9038c2ecf20Sopenharmony_ci		return;
9048c2ecf20Sopenharmony_ci	}
9058c2ecf20Sopenharmony_ci
9068c2ecf20Sopenharmony_ci	/*
9078c2ecf20Sopenharmony_ci	 * We must allocate a hydration descriptor and start the hydration of
9088c2ecf20Sopenharmony_ci	 * the corresponding region.
9098c2ecf20Sopenharmony_ci	 */
9108c2ecf20Sopenharmony_ci	bucket_unlock_irq(bucket);
9118c2ecf20Sopenharmony_ci
9128c2ecf20Sopenharmony_ci	hd = alloc_hydration(clone);
9138c2ecf20Sopenharmony_ci	hydration_init(hd, region_nr);
9148c2ecf20Sopenharmony_ci
9158c2ecf20Sopenharmony_ci	bucket_lock_irq(bucket);
9168c2ecf20Sopenharmony_ci
9178c2ecf20Sopenharmony_ci	/* Check if the region has been hydrated in the meantime. */
9188c2ecf20Sopenharmony_ci	if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
9198c2ecf20Sopenharmony_ci		bucket_unlock_irq(bucket);
9208c2ecf20Sopenharmony_ci		free_hydration(hd);
9218c2ecf20Sopenharmony_ci		issue_bio(clone, bio);
9228c2ecf20Sopenharmony_ci		return;
9238c2ecf20Sopenharmony_ci	}
9248c2ecf20Sopenharmony_ci
9258c2ecf20Sopenharmony_ci	hd2 = __find_or_insert_region_hydration(bucket, hd);
9268c2ecf20Sopenharmony_ci	if (hd2 != hd) {
9278c2ecf20Sopenharmony_ci		/* Someone else started the region's hydration. */
9288c2ecf20Sopenharmony_ci		bio_list_add(&hd2->deferred_bios, bio);
9298c2ecf20Sopenharmony_ci		bucket_unlock_irq(bucket);
9308c2ecf20Sopenharmony_ci		free_hydration(hd);
9318c2ecf20Sopenharmony_ci		return;
9328c2ecf20Sopenharmony_ci	}
9338c2ecf20Sopenharmony_ci
9348c2ecf20Sopenharmony_ci	/*
9358c2ecf20Sopenharmony_ci	 * If the metadata mode is RO or FAIL then there is no point starting a
9368c2ecf20Sopenharmony_ci	 * hydration, since we will not be able to update the metadata when the
9378c2ecf20Sopenharmony_ci	 * hydration finishes.
9388c2ecf20Sopenharmony_ci	 */
9398c2ecf20Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
9408c2ecf20Sopenharmony_ci		hlist_del(&hd->h);
9418c2ecf20Sopenharmony_ci		bucket_unlock_irq(bucket);
9428c2ecf20Sopenharmony_ci		free_hydration(hd);
9438c2ecf20Sopenharmony_ci		bio_io_error(bio);
9448c2ecf20Sopenharmony_ci		return;
9458c2ecf20Sopenharmony_ci	}
9468c2ecf20Sopenharmony_ci
9478c2ecf20Sopenharmony_ci	/*
9488c2ecf20Sopenharmony_ci	 * Start region hydration.
9498c2ecf20Sopenharmony_ci	 *
9508c2ecf20Sopenharmony_ci	 * If a bio overwrites a region, i.e., its size is equal to the
9518c2ecf20Sopenharmony_ci	 * region's size, then we don't need to copy the region from the source
9528c2ecf20Sopenharmony_ci	 * to the destination device.
9538c2ecf20Sopenharmony_ci	 */
9548c2ecf20Sopenharmony_ci	if (is_overwrite_bio(clone, bio)) {
9558c2ecf20Sopenharmony_ci		bucket_unlock_irq(bucket);
9568c2ecf20Sopenharmony_ci		hydration_overwrite(hd, bio);
9578c2ecf20Sopenharmony_ci	} else {
9588c2ecf20Sopenharmony_ci		bio_list_add(&hd->deferred_bios, bio);
9598c2ecf20Sopenharmony_ci		bucket_unlock_irq(bucket);
9608c2ecf20Sopenharmony_ci		hydration_copy(hd, 1);
9618c2ecf20Sopenharmony_ci	}
9628c2ecf20Sopenharmony_ci}
9638c2ecf20Sopenharmony_ci
9648c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
9658c2ecf20Sopenharmony_ci
9668c2ecf20Sopenharmony_ci/*
9678c2ecf20Sopenharmony_ci * Background hydrations.
9688c2ecf20Sopenharmony_ci */
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_ci/*
9718c2ecf20Sopenharmony_ci * Batch region hydrations.
9728c2ecf20Sopenharmony_ci *
9738c2ecf20Sopenharmony_ci * To better utilize device bandwidth we batch together the hydration of
9748c2ecf20Sopenharmony_ci * adjacent regions. This allows us to use small region sizes, e.g., 4KB, which
9758c2ecf20Sopenharmony_ci * is good for small, random write performance (because of the overwriting of
9768c2ecf20Sopenharmony_ci * un-hydrated regions) and at the same time issue big copy requests to kcopyd
9778c2ecf20Sopenharmony_ci * to achieve high hydration bandwidth.
9788c2ecf20Sopenharmony_ci */
9798c2ecf20Sopenharmony_cistruct batch_info {
9808c2ecf20Sopenharmony_ci	struct dm_clone_region_hydration *head;
9818c2ecf20Sopenharmony_ci	unsigned int nr_batched_regions;
9828c2ecf20Sopenharmony_ci};
9838c2ecf20Sopenharmony_ci
9848c2ecf20Sopenharmony_cistatic void __batch_hydration(struct batch_info *batch,
9858c2ecf20Sopenharmony_ci			      struct dm_clone_region_hydration *hd)
9868c2ecf20Sopenharmony_ci{
9878c2ecf20Sopenharmony_ci	struct clone *clone = hd->clone;
9888c2ecf20Sopenharmony_ci	unsigned int max_batch_size = READ_ONCE(clone->hydration_batch_size);
9898c2ecf20Sopenharmony_ci
9908c2ecf20Sopenharmony_ci	if (batch->head) {
9918c2ecf20Sopenharmony_ci		/* Try to extend the current batch */
9928c2ecf20Sopenharmony_ci		if (batch->nr_batched_regions < max_batch_size &&
9938c2ecf20Sopenharmony_ci		    (batch->head->region_nr + batch->nr_batched_regions) == hd->region_nr) {
9948c2ecf20Sopenharmony_ci			list_add_tail(&hd->list, &batch->head->list);
9958c2ecf20Sopenharmony_ci			batch->nr_batched_regions++;
9968c2ecf20Sopenharmony_ci			hd = NULL;
9978c2ecf20Sopenharmony_ci		}
9988c2ecf20Sopenharmony_ci
9998c2ecf20Sopenharmony_ci		/* Check if we should issue the current batch */
10008c2ecf20Sopenharmony_ci		if (batch->nr_batched_regions >= max_batch_size || hd) {
10018c2ecf20Sopenharmony_ci			hydration_copy(batch->head, batch->nr_batched_regions);
10028c2ecf20Sopenharmony_ci			batch->head = NULL;
10038c2ecf20Sopenharmony_ci			batch->nr_batched_regions = 0;
10048c2ecf20Sopenharmony_ci		}
10058c2ecf20Sopenharmony_ci	}
10068c2ecf20Sopenharmony_ci
10078c2ecf20Sopenharmony_ci	if (!hd)
10088c2ecf20Sopenharmony_ci		return;
10098c2ecf20Sopenharmony_ci
10108c2ecf20Sopenharmony_ci	/* We treat max batch sizes of zero and one equivalently */
10118c2ecf20Sopenharmony_ci	if (max_batch_size <= 1) {
10128c2ecf20Sopenharmony_ci		hydration_copy(hd, 1);
10138c2ecf20Sopenharmony_ci		return;
10148c2ecf20Sopenharmony_ci	}
10158c2ecf20Sopenharmony_ci
10168c2ecf20Sopenharmony_ci	/* Start a new batch */
10178c2ecf20Sopenharmony_ci	BUG_ON(!list_empty(&hd->list));
10188c2ecf20Sopenharmony_ci	batch->head = hd;
10198c2ecf20Sopenharmony_ci	batch->nr_batched_regions = 1;
10208c2ecf20Sopenharmony_ci}
10218c2ecf20Sopenharmony_ci
10228c2ecf20Sopenharmony_cistatic unsigned long __start_next_hydration(struct clone *clone,
10238c2ecf20Sopenharmony_ci					    unsigned long offset,
10248c2ecf20Sopenharmony_ci					    struct batch_info *batch)
10258c2ecf20Sopenharmony_ci{
10268c2ecf20Sopenharmony_ci	struct hash_table_bucket *bucket;
10278c2ecf20Sopenharmony_ci	struct dm_clone_region_hydration *hd;
10288c2ecf20Sopenharmony_ci	unsigned long nr_regions = clone->nr_regions;
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_ci	hd = alloc_hydration(clone);
10318c2ecf20Sopenharmony_ci
10328c2ecf20Sopenharmony_ci	/* Try to find a region to hydrate. */
10338c2ecf20Sopenharmony_ci	do {
10348c2ecf20Sopenharmony_ci		offset = dm_clone_find_next_unhydrated_region(clone->cmd, offset);
10358c2ecf20Sopenharmony_ci		if (offset == nr_regions)
10368c2ecf20Sopenharmony_ci			break;
10378c2ecf20Sopenharmony_ci
10388c2ecf20Sopenharmony_ci		bucket = get_hash_table_bucket(clone, offset);
10398c2ecf20Sopenharmony_ci		bucket_lock_irq(bucket);
10408c2ecf20Sopenharmony_ci
10418c2ecf20Sopenharmony_ci		if (!dm_clone_is_region_hydrated(clone->cmd, offset) &&
10428c2ecf20Sopenharmony_ci		    !__hash_find(bucket, offset)) {
10438c2ecf20Sopenharmony_ci			hydration_init(hd, offset);
10448c2ecf20Sopenharmony_ci			__insert_region_hydration(bucket, hd);
10458c2ecf20Sopenharmony_ci			bucket_unlock_irq(bucket);
10468c2ecf20Sopenharmony_ci
10478c2ecf20Sopenharmony_ci			/* Batch hydration */
10488c2ecf20Sopenharmony_ci			__batch_hydration(batch, hd);
10498c2ecf20Sopenharmony_ci
10508c2ecf20Sopenharmony_ci			return (offset + 1);
10518c2ecf20Sopenharmony_ci		}
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci		bucket_unlock_irq(bucket);
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_ci	} while (++offset < nr_regions);
10568c2ecf20Sopenharmony_ci
10578c2ecf20Sopenharmony_ci	if (hd)
10588c2ecf20Sopenharmony_ci		free_hydration(hd);
10598c2ecf20Sopenharmony_ci
10608c2ecf20Sopenharmony_ci	return offset;
10618c2ecf20Sopenharmony_ci}
10628c2ecf20Sopenharmony_ci
10638c2ecf20Sopenharmony_ci/*
10648c2ecf20Sopenharmony_ci * This function searches for regions that still reside in the source device
10658c2ecf20Sopenharmony_ci * and starts their hydration.
10668c2ecf20Sopenharmony_ci */
10678c2ecf20Sopenharmony_cistatic void do_hydration(struct clone *clone)
10688c2ecf20Sopenharmony_ci{
10698c2ecf20Sopenharmony_ci	unsigned int current_volume;
10708c2ecf20Sopenharmony_ci	unsigned long offset, nr_regions = clone->nr_regions;
10718c2ecf20Sopenharmony_ci
10728c2ecf20Sopenharmony_ci	struct batch_info batch = {
10738c2ecf20Sopenharmony_ci		.head = NULL,
10748c2ecf20Sopenharmony_ci		.nr_batched_regions = 0,
10758c2ecf20Sopenharmony_ci	};
10768c2ecf20Sopenharmony_ci
10778c2ecf20Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY))
10788c2ecf20Sopenharmony_ci		return;
10798c2ecf20Sopenharmony_ci
10808c2ecf20Sopenharmony_ci	if (dm_clone_is_hydration_done(clone->cmd))
10818c2ecf20Sopenharmony_ci		return;
10828c2ecf20Sopenharmony_ci
10838c2ecf20Sopenharmony_ci	/*
10848c2ecf20Sopenharmony_ci	 * Avoid race with device suspension.
10858c2ecf20Sopenharmony_ci	 */
10868c2ecf20Sopenharmony_ci	atomic_inc(&clone->hydrations_in_flight);
10878c2ecf20Sopenharmony_ci
10888c2ecf20Sopenharmony_ci	/*
10898c2ecf20Sopenharmony_ci	 * Make sure atomic_inc() is ordered before test_bit(), otherwise we
10908c2ecf20Sopenharmony_ci	 * might race with clone_postsuspend() and start a region hydration
10918c2ecf20Sopenharmony_ci	 * after the target has been suspended.
10928c2ecf20Sopenharmony_ci	 *
10938c2ecf20Sopenharmony_ci	 * This is paired with the smp_mb__after_atomic() in
10948c2ecf20Sopenharmony_ci	 * clone_postsuspend().
10958c2ecf20Sopenharmony_ci	 */
10968c2ecf20Sopenharmony_ci	smp_mb__after_atomic();
10978c2ecf20Sopenharmony_ci
10988c2ecf20Sopenharmony_ci	offset = clone->hydration_offset;
10998c2ecf20Sopenharmony_ci	while (likely(!test_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags)) &&
11008c2ecf20Sopenharmony_ci	       !atomic_read(&clone->ios_in_flight) &&
11018c2ecf20Sopenharmony_ci	       test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags) &&
11028c2ecf20Sopenharmony_ci	       offset < nr_regions) {
11038c2ecf20Sopenharmony_ci		current_volume = atomic_read(&clone->hydrations_in_flight);
11048c2ecf20Sopenharmony_ci		current_volume += batch.nr_batched_regions;
11058c2ecf20Sopenharmony_ci
11068c2ecf20Sopenharmony_ci		if (current_volume > READ_ONCE(clone->hydration_threshold))
11078c2ecf20Sopenharmony_ci			break;
11088c2ecf20Sopenharmony_ci
11098c2ecf20Sopenharmony_ci		offset = __start_next_hydration(clone, offset, &batch);
11108c2ecf20Sopenharmony_ci	}
11118c2ecf20Sopenharmony_ci
11128c2ecf20Sopenharmony_ci	if (batch.head)
11138c2ecf20Sopenharmony_ci		hydration_copy(batch.head, batch.nr_batched_regions);
11148c2ecf20Sopenharmony_ci
11158c2ecf20Sopenharmony_ci	if (offset >= nr_regions)
11168c2ecf20Sopenharmony_ci		offset = 0;
11178c2ecf20Sopenharmony_ci
11188c2ecf20Sopenharmony_ci	clone->hydration_offset = offset;
11198c2ecf20Sopenharmony_ci
11208c2ecf20Sopenharmony_ci	if (atomic_dec_and_test(&clone->hydrations_in_flight))
11218c2ecf20Sopenharmony_ci		wakeup_hydration_waiters(clone);
11228c2ecf20Sopenharmony_ci}
11238c2ecf20Sopenharmony_ci
11248c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
11258c2ecf20Sopenharmony_ci
11268c2ecf20Sopenharmony_cistatic bool need_commit_due_to_time(struct clone *clone)
11278c2ecf20Sopenharmony_ci{
11288c2ecf20Sopenharmony_ci	return !time_in_range(jiffies, clone->last_commit_jiffies,
11298c2ecf20Sopenharmony_ci			      clone->last_commit_jiffies + COMMIT_PERIOD);
11308c2ecf20Sopenharmony_ci}
11318c2ecf20Sopenharmony_ci
11328c2ecf20Sopenharmony_ci/*
11338c2ecf20Sopenharmony_ci * A non-zero return indicates read-only or fail mode.
11348c2ecf20Sopenharmony_ci */
11358c2ecf20Sopenharmony_cistatic int commit_metadata(struct clone *clone, bool *dest_dev_flushed)
11368c2ecf20Sopenharmony_ci{
11378c2ecf20Sopenharmony_ci	int r = 0;
11388c2ecf20Sopenharmony_ci
11398c2ecf20Sopenharmony_ci	if (dest_dev_flushed)
11408c2ecf20Sopenharmony_ci		*dest_dev_flushed = false;
11418c2ecf20Sopenharmony_ci
11428c2ecf20Sopenharmony_ci	mutex_lock(&clone->commit_lock);
11438c2ecf20Sopenharmony_ci
11448c2ecf20Sopenharmony_ci	if (!dm_clone_changed_this_transaction(clone->cmd))
11458c2ecf20Sopenharmony_ci		goto out;
11468c2ecf20Sopenharmony_ci
11478c2ecf20Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
11488c2ecf20Sopenharmony_ci		r = -EPERM;
11498c2ecf20Sopenharmony_ci		goto out;
11508c2ecf20Sopenharmony_ci	}
11518c2ecf20Sopenharmony_ci
11528c2ecf20Sopenharmony_ci	r = dm_clone_metadata_pre_commit(clone->cmd);
11538c2ecf20Sopenharmony_ci	if (unlikely(r)) {
11548c2ecf20Sopenharmony_ci		__metadata_operation_failed(clone, "dm_clone_metadata_pre_commit", r);
11558c2ecf20Sopenharmony_ci		goto out;
11568c2ecf20Sopenharmony_ci	}
11578c2ecf20Sopenharmony_ci
11588c2ecf20Sopenharmony_ci	bio_reset(&clone->flush_bio);
11598c2ecf20Sopenharmony_ci	bio_set_dev(&clone->flush_bio, clone->dest_dev->bdev);
11608c2ecf20Sopenharmony_ci	clone->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
11618c2ecf20Sopenharmony_ci
11628c2ecf20Sopenharmony_ci	r = submit_bio_wait(&clone->flush_bio);
11638c2ecf20Sopenharmony_ci	if (unlikely(r)) {
11648c2ecf20Sopenharmony_ci		__metadata_operation_failed(clone, "flush destination device", r);
11658c2ecf20Sopenharmony_ci		goto out;
11668c2ecf20Sopenharmony_ci	}
11678c2ecf20Sopenharmony_ci
11688c2ecf20Sopenharmony_ci	if (dest_dev_flushed)
11698c2ecf20Sopenharmony_ci		*dest_dev_flushed = true;
11708c2ecf20Sopenharmony_ci
11718c2ecf20Sopenharmony_ci	r = dm_clone_metadata_commit(clone->cmd);
11728c2ecf20Sopenharmony_ci	if (unlikely(r)) {
11738c2ecf20Sopenharmony_ci		__metadata_operation_failed(clone, "dm_clone_metadata_commit", r);
11748c2ecf20Sopenharmony_ci		goto out;
11758c2ecf20Sopenharmony_ci	}
11768c2ecf20Sopenharmony_ci
11778c2ecf20Sopenharmony_ci	if (dm_clone_is_hydration_done(clone->cmd))
11788c2ecf20Sopenharmony_ci		dm_table_event(clone->ti->table);
11798c2ecf20Sopenharmony_ciout:
11808c2ecf20Sopenharmony_ci	mutex_unlock(&clone->commit_lock);
11818c2ecf20Sopenharmony_ci
11828c2ecf20Sopenharmony_ci	return r;
11838c2ecf20Sopenharmony_ci}
11848c2ecf20Sopenharmony_ci
11858c2ecf20Sopenharmony_cistatic void process_deferred_discards(struct clone *clone)
11868c2ecf20Sopenharmony_ci{
11878c2ecf20Sopenharmony_ci	int r = -EPERM;
11888c2ecf20Sopenharmony_ci	struct bio *bio;
11898c2ecf20Sopenharmony_ci	struct blk_plug plug;
11908c2ecf20Sopenharmony_ci	unsigned long rs, nr_regions;
11918c2ecf20Sopenharmony_ci	struct bio_list discards = BIO_EMPTY_LIST;
11928c2ecf20Sopenharmony_ci
11938c2ecf20Sopenharmony_ci	spin_lock_irq(&clone->lock);
11948c2ecf20Sopenharmony_ci	bio_list_merge(&discards, &clone->deferred_discard_bios);
11958c2ecf20Sopenharmony_ci	bio_list_init(&clone->deferred_discard_bios);
11968c2ecf20Sopenharmony_ci	spin_unlock_irq(&clone->lock);
11978c2ecf20Sopenharmony_ci
11988c2ecf20Sopenharmony_ci	if (bio_list_empty(&discards))
11998c2ecf20Sopenharmony_ci		return;
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY))
12028c2ecf20Sopenharmony_ci		goto out;
12038c2ecf20Sopenharmony_ci
12048c2ecf20Sopenharmony_ci	/* Update the metadata */
12058c2ecf20Sopenharmony_ci	bio_list_for_each(bio, &discards) {
12068c2ecf20Sopenharmony_ci		bio_region_range(clone, bio, &rs, &nr_regions);
12078c2ecf20Sopenharmony_ci		/*
12088c2ecf20Sopenharmony_ci		 * A discard request might cover regions that have been already
12098c2ecf20Sopenharmony_ci		 * hydrated. There is no need to update the metadata for these
12108c2ecf20Sopenharmony_ci		 * regions.
12118c2ecf20Sopenharmony_ci		 */
12128c2ecf20Sopenharmony_ci		r = dm_clone_cond_set_range(clone->cmd, rs, nr_regions);
12138c2ecf20Sopenharmony_ci		if (unlikely(r))
12148c2ecf20Sopenharmony_ci			break;
12158c2ecf20Sopenharmony_ci	}
12168c2ecf20Sopenharmony_ciout:
12178c2ecf20Sopenharmony_ci	blk_start_plug(&plug);
12188c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&discards)))
12198c2ecf20Sopenharmony_ci		complete_discard_bio(clone, bio, r == 0);
12208c2ecf20Sopenharmony_ci	blk_finish_plug(&plug);
12218c2ecf20Sopenharmony_ci}
12228c2ecf20Sopenharmony_ci
12238c2ecf20Sopenharmony_cistatic void process_deferred_bios(struct clone *clone)
12248c2ecf20Sopenharmony_ci{
12258c2ecf20Sopenharmony_ci	struct bio_list bios = BIO_EMPTY_LIST;
12268c2ecf20Sopenharmony_ci
12278c2ecf20Sopenharmony_ci	spin_lock_irq(&clone->lock);
12288c2ecf20Sopenharmony_ci	bio_list_merge(&bios, &clone->deferred_bios);
12298c2ecf20Sopenharmony_ci	bio_list_init(&clone->deferred_bios);
12308c2ecf20Sopenharmony_ci	spin_unlock_irq(&clone->lock);
12318c2ecf20Sopenharmony_ci
12328c2ecf20Sopenharmony_ci	if (bio_list_empty(&bios))
12338c2ecf20Sopenharmony_ci		return;
12348c2ecf20Sopenharmony_ci
12358c2ecf20Sopenharmony_ci	submit_bios(&bios);
12368c2ecf20Sopenharmony_ci}
12378c2ecf20Sopenharmony_ci
12388c2ecf20Sopenharmony_cistatic void process_deferred_flush_bios(struct clone *clone)
12398c2ecf20Sopenharmony_ci{
12408c2ecf20Sopenharmony_ci	struct bio *bio;
12418c2ecf20Sopenharmony_ci	bool dest_dev_flushed;
12428c2ecf20Sopenharmony_ci	struct bio_list bios = BIO_EMPTY_LIST;
12438c2ecf20Sopenharmony_ci	struct bio_list bio_completions = BIO_EMPTY_LIST;
12448c2ecf20Sopenharmony_ci
12458c2ecf20Sopenharmony_ci	/*
12468c2ecf20Sopenharmony_ci	 * If there are any deferred flush bios, we must commit the metadata
12478c2ecf20Sopenharmony_ci	 * before issuing them or signaling their completion.
12488c2ecf20Sopenharmony_ci	 */
12498c2ecf20Sopenharmony_ci	spin_lock_irq(&clone->lock);
12508c2ecf20Sopenharmony_ci	bio_list_merge(&bios, &clone->deferred_flush_bios);
12518c2ecf20Sopenharmony_ci	bio_list_init(&clone->deferred_flush_bios);
12528c2ecf20Sopenharmony_ci
12538c2ecf20Sopenharmony_ci	bio_list_merge(&bio_completions, &clone->deferred_flush_completions);
12548c2ecf20Sopenharmony_ci	bio_list_init(&clone->deferred_flush_completions);
12558c2ecf20Sopenharmony_ci	spin_unlock_irq(&clone->lock);
12568c2ecf20Sopenharmony_ci
12578c2ecf20Sopenharmony_ci	if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
12588c2ecf20Sopenharmony_ci	    !(dm_clone_changed_this_transaction(clone->cmd) && need_commit_due_to_time(clone)))
12598c2ecf20Sopenharmony_ci		return;
12608c2ecf20Sopenharmony_ci
12618c2ecf20Sopenharmony_ci	if (commit_metadata(clone, &dest_dev_flushed)) {
12628c2ecf20Sopenharmony_ci		bio_list_merge(&bios, &bio_completions);
12638c2ecf20Sopenharmony_ci
12648c2ecf20Sopenharmony_ci		while ((bio = bio_list_pop(&bios)))
12658c2ecf20Sopenharmony_ci			bio_io_error(bio);
12668c2ecf20Sopenharmony_ci
12678c2ecf20Sopenharmony_ci		return;
12688c2ecf20Sopenharmony_ci	}
12698c2ecf20Sopenharmony_ci
12708c2ecf20Sopenharmony_ci	clone->last_commit_jiffies = jiffies;
12718c2ecf20Sopenharmony_ci
12728c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bio_completions)))
12738c2ecf20Sopenharmony_ci		bio_endio(bio);
12748c2ecf20Sopenharmony_ci
12758c2ecf20Sopenharmony_ci	while ((bio = bio_list_pop(&bios))) {
12768c2ecf20Sopenharmony_ci		if ((bio->bi_opf & REQ_PREFLUSH) && dest_dev_flushed) {
12778c2ecf20Sopenharmony_ci			/* We just flushed the destination device as part of
12788c2ecf20Sopenharmony_ci			 * the metadata commit, so there is no reason to send
12798c2ecf20Sopenharmony_ci			 * another flush.
12808c2ecf20Sopenharmony_ci			 */
12818c2ecf20Sopenharmony_ci			bio_endio(bio);
12828c2ecf20Sopenharmony_ci		} else {
12838c2ecf20Sopenharmony_ci			submit_bio_noacct(bio);
12848c2ecf20Sopenharmony_ci		}
12858c2ecf20Sopenharmony_ci	}
12868c2ecf20Sopenharmony_ci}
12878c2ecf20Sopenharmony_ci
12888c2ecf20Sopenharmony_cistatic void do_worker(struct work_struct *work)
12898c2ecf20Sopenharmony_ci{
12908c2ecf20Sopenharmony_ci	struct clone *clone = container_of(work, typeof(*clone), worker);
12918c2ecf20Sopenharmony_ci
12928c2ecf20Sopenharmony_ci	process_deferred_bios(clone);
12938c2ecf20Sopenharmony_ci	process_deferred_discards(clone);
12948c2ecf20Sopenharmony_ci
12958c2ecf20Sopenharmony_ci	/*
12968c2ecf20Sopenharmony_ci	 * process_deferred_flush_bios():
12978c2ecf20Sopenharmony_ci	 *
12988c2ecf20Sopenharmony_ci	 *   - Commit metadata
12998c2ecf20Sopenharmony_ci	 *
13008c2ecf20Sopenharmony_ci	 *   - Process deferred REQ_FUA completions
13018c2ecf20Sopenharmony_ci	 *
13028c2ecf20Sopenharmony_ci	 *   - Process deferred REQ_PREFLUSH bios
13038c2ecf20Sopenharmony_ci	 */
13048c2ecf20Sopenharmony_ci	process_deferred_flush_bios(clone);
13058c2ecf20Sopenharmony_ci
13068c2ecf20Sopenharmony_ci	/* Background hydration */
13078c2ecf20Sopenharmony_ci	do_hydration(clone);
13088c2ecf20Sopenharmony_ci}
13098c2ecf20Sopenharmony_ci
13108c2ecf20Sopenharmony_ci/*
13118c2ecf20Sopenharmony_ci * Commit periodically so that not too much unwritten data builds up.
13128c2ecf20Sopenharmony_ci *
13138c2ecf20Sopenharmony_ci * Also, restart background hydration, if it has been stopped by in-flight I/O.
13148c2ecf20Sopenharmony_ci */
13158c2ecf20Sopenharmony_cistatic void do_waker(struct work_struct *work)
13168c2ecf20Sopenharmony_ci{
13178c2ecf20Sopenharmony_ci	struct clone *clone = container_of(to_delayed_work(work), struct clone, waker);
13188c2ecf20Sopenharmony_ci
13198c2ecf20Sopenharmony_ci	wake_worker(clone);
13208c2ecf20Sopenharmony_ci	queue_delayed_work(clone->wq, &clone->waker, COMMIT_PERIOD);
13218c2ecf20Sopenharmony_ci}
13228c2ecf20Sopenharmony_ci
13238c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
13248c2ecf20Sopenharmony_ci
13258c2ecf20Sopenharmony_ci/*
13268c2ecf20Sopenharmony_ci * Target methods
13278c2ecf20Sopenharmony_ci */
13288c2ecf20Sopenharmony_cistatic int clone_map(struct dm_target *ti, struct bio *bio)
13298c2ecf20Sopenharmony_ci{
13308c2ecf20Sopenharmony_ci	struct clone *clone = ti->private;
13318c2ecf20Sopenharmony_ci	unsigned long region_nr;
13328c2ecf20Sopenharmony_ci
13338c2ecf20Sopenharmony_ci	atomic_inc(&clone->ios_in_flight);
13348c2ecf20Sopenharmony_ci
13358c2ecf20Sopenharmony_ci	if (unlikely(get_clone_mode(clone) == CM_FAIL))
13368c2ecf20Sopenharmony_ci		return DM_MAPIO_KILL;
13378c2ecf20Sopenharmony_ci
13388c2ecf20Sopenharmony_ci	/*
13398c2ecf20Sopenharmony_ci	 * REQ_PREFLUSH bios carry no data:
13408c2ecf20Sopenharmony_ci	 *
13418c2ecf20Sopenharmony_ci	 * - Commit metadata, if changed
13428c2ecf20Sopenharmony_ci	 *
13438c2ecf20Sopenharmony_ci	 * - Pass down to destination device
13448c2ecf20Sopenharmony_ci	 */
13458c2ecf20Sopenharmony_ci	if (bio->bi_opf & REQ_PREFLUSH) {
13468c2ecf20Sopenharmony_ci		remap_and_issue(clone, bio);
13478c2ecf20Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
13488c2ecf20Sopenharmony_ci	}
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_ci	bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_ci	/*
13538c2ecf20Sopenharmony_ci	 * dm-clone interprets discards and performs a fast hydration of the
13548c2ecf20Sopenharmony_ci	 * discarded regions, i.e., we skip the copy from the source device and
13558c2ecf20Sopenharmony_ci	 * just mark the regions as hydrated.
13568c2ecf20Sopenharmony_ci	 */
13578c2ecf20Sopenharmony_ci	if (bio_op(bio) == REQ_OP_DISCARD) {
13588c2ecf20Sopenharmony_ci		process_discard_bio(clone, bio);
13598c2ecf20Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
13608c2ecf20Sopenharmony_ci	}
13618c2ecf20Sopenharmony_ci
13628c2ecf20Sopenharmony_ci	/*
13638c2ecf20Sopenharmony_ci	 * If the bio's region is hydrated, redirect it to the destination
13648c2ecf20Sopenharmony_ci	 * device.
13658c2ecf20Sopenharmony_ci	 *
13668c2ecf20Sopenharmony_ci	 * If the region is not hydrated and the bio is a READ, redirect it to
13678c2ecf20Sopenharmony_ci	 * the source device.
13688c2ecf20Sopenharmony_ci	 *
13698c2ecf20Sopenharmony_ci	 * Else, defer WRITE bio until after its region has been hydrated and
13708c2ecf20Sopenharmony_ci	 * start the region's hydration immediately.
13718c2ecf20Sopenharmony_ci	 */
13728c2ecf20Sopenharmony_ci	region_nr = bio_to_region(clone, bio);
13738c2ecf20Sopenharmony_ci	if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
13748c2ecf20Sopenharmony_ci		remap_and_issue(clone, bio);
13758c2ecf20Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
13768c2ecf20Sopenharmony_ci	} else if (bio_data_dir(bio) == READ) {
13778c2ecf20Sopenharmony_ci		remap_to_source(clone, bio);
13788c2ecf20Sopenharmony_ci		return DM_MAPIO_REMAPPED;
13798c2ecf20Sopenharmony_ci	}
13808c2ecf20Sopenharmony_ci
13818c2ecf20Sopenharmony_ci	remap_to_dest(clone, bio);
13828c2ecf20Sopenharmony_ci	hydrate_bio_region(clone, bio);
13838c2ecf20Sopenharmony_ci
13848c2ecf20Sopenharmony_ci	return DM_MAPIO_SUBMITTED;
13858c2ecf20Sopenharmony_ci}
13868c2ecf20Sopenharmony_ci
13878c2ecf20Sopenharmony_cistatic int clone_endio(struct dm_target *ti, struct bio *bio, blk_status_t *error)
13888c2ecf20Sopenharmony_ci{
13898c2ecf20Sopenharmony_ci	struct clone *clone = ti->private;
13908c2ecf20Sopenharmony_ci
13918c2ecf20Sopenharmony_ci	atomic_dec(&clone->ios_in_flight);
13928c2ecf20Sopenharmony_ci
13938c2ecf20Sopenharmony_ci	return DM_ENDIO_DONE;
13948c2ecf20Sopenharmony_ci}
13958c2ecf20Sopenharmony_ci
13968c2ecf20Sopenharmony_cistatic void emit_flags(struct clone *clone, char *result, unsigned int maxlen,
13978c2ecf20Sopenharmony_ci		       ssize_t *sz_ptr)
13988c2ecf20Sopenharmony_ci{
13998c2ecf20Sopenharmony_ci	ssize_t sz = *sz_ptr;
14008c2ecf20Sopenharmony_ci	unsigned int count;
14018c2ecf20Sopenharmony_ci
14028c2ecf20Sopenharmony_ci	count = !test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
14038c2ecf20Sopenharmony_ci	count += !test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
14048c2ecf20Sopenharmony_ci
14058c2ecf20Sopenharmony_ci	DMEMIT("%u ", count);
14068c2ecf20Sopenharmony_ci
14078c2ecf20Sopenharmony_ci	if (!test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags))
14088c2ecf20Sopenharmony_ci		DMEMIT("no_hydration ");
14098c2ecf20Sopenharmony_ci
14108c2ecf20Sopenharmony_ci	if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
14118c2ecf20Sopenharmony_ci		DMEMIT("no_discard_passdown ");
14128c2ecf20Sopenharmony_ci
14138c2ecf20Sopenharmony_ci	*sz_ptr = sz;
14148c2ecf20Sopenharmony_ci}
14158c2ecf20Sopenharmony_ci
14168c2ecf20Sopenharmony_cistatic void emit_core_args(struct clone *clone, char *result,
14178c2ecf20Sopenharmony_ci			   unsigned int maxlen, ssize_t *sz_ptr)
14188c2ecf20Sopenharmony_ci{
14198c2ecf20Sopenharmony_ci	ssize_t sz = *sz_ptr;
14208c2ecf20Sopenharmony_ci	unsigned int count = 4;
14218c2ecf20Sopenharmony_ci
14228c2ecf20Sopenharmony_ci	DMEMIT("%u hydration_threshold %u hydration_batch_size %u ", count,
14238c2ecf20Sopenharmony_ci	       READ_ONCE(clone->hydration_threshold),
14248c2ecf20Sopenharmony_ci	       READ_ONCE(clone->hydration_batch_size));
14258c2ecf20Sopenharmony_ci
14268c2ecf20Sopenharmony_ci	*sz_ptr = sz;
14278c2ecf20Sopenharmony_ci}
14288c2ecf20Sopenharmony_ci
14298c2ecf20Sopenharmony_ci/*
14308c2ecf20Sopenharmony_ci * Status format:
14318c2ecf20Sopenharmony_ci *
14328c2ecf20Sopenharmony_ci * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
14338c2ecf20Sopenharmony_ci * <clone region size> <#hydrated regions>/<#total regions> <#hydrating regions>
14348c2ecf20Sopenharmony_ci * <#features> <features>* <#core args> <core args>* <clone metadata mode>
14358c2ecf20Sopenharmony_ci */
14368c2ecf20Sopenharmony_cistatic void clone_status(struct dm_target *ti, status_type_t type,
14378c2ecf20Sopenharmony_ci			 unsigned int status_flags, char *result,
14388c2ecf20Sopenharmony_ci			 unsigned int maxlen)
14398c2ecf20Sopenharmony_ci{
14408c2ecf20Sopenharmony_ci	int r;
14418c2ecf20Sopenharmony_ci	unsigned int i;
14428c2ecf20Sopenharmony_ci	ssize_t sz = 0;
14438c2ecf20Sopenharmony_ci	dm_block_t nr_free_metadata_blocks = 0;
14448c2ecf20Sopenharmony_ci	dm_block_t nr_metadata_blocks = 0;
14458c2ecf20Sopenharmony_ci	char buf[BDEVNAME_SIZE];
14468c2ecf20Sopenharmony_ci	struct clone *clone = ti->private;
14478c2ecf20Sopenharmony_ci
14488c2ecf20Sopenharmony_ci	switch (type) {
14498c2ecf20Sopenharmony_ci	case STATUSTYPE_INFO:
14508c2ecf20Sopenharmony_ci		if (get_clone_mode(clone) == CM_FAIL) {
14518c2ecf20Sopenharmony_ci			DMEMIT("Fail");
14528c2ecf20Sopenharmony_ci			break;
14538c2ecf20Sopenharmony_ci		}
14548c2ecf20Sopenharmony_ci
14558c2ecf20Sopenharmony_ci		/* Commit to ensure statistics aren't out-of-date */
14568c2ecf20Sopenharmony_ci		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
14578c2ecf20Sopenharmony_ci			(void) commit_metadata(clone, NULL);
14588c2ecf20Sopenharmony_ci
14598c2ecf20Sopenharmony_ci		r = dm_clone_get_free_metadata_block_count(clone->cmd, &nr_free_metadata_blocks);
14608c2ecf20Sopenharmony_ci
14618c2ecf20Sopenharmony_ci		if (r) {
14628c2ecf20Sopenharmony_ci			DMERR("%s: dm_clone_get_free_metadata_block_count returned %d",
14638c2ecf20Sopenharmony_ci			      clone_device_name(clone), r);
14648c2ecf20Sopenharmony_ci			goto error;
14658c2ecf20Sopenharmony_ci		}
14668c2ecf20Sopenharmony_ci
14678c2ecf20Sopenharmony_ci		r = dm_clone_get_metadata_dev_size(clone->cmd, &nr_metadata_blocks);
14688c2ecf20Sopenharmony_ci
14698c2ecf20Sopenharmony_ci		if (r) {
14708c2ecf20Sopenharmony_ci			DMERR("%s: dm_clone_get_metadata_dev_size returned %d",
14718c2ecf20Sopenharmony_ci			      clone_device_name(clone), r);
14728c2ecf20Sopenharmony_ci			goto error;
14738c2ecf20Sopenharmony_ci		}
14748c2ecf20Sopenharmony_ci
14758c2ecf20Sopenharmony_ci		DMEMIT("%u %llu/%llu %llu %u/%lu %u ",
14768c2ecf20Sopenharmony_ci		       DM_CLONE_METADATA_BLOCK_SIZE,
14778c2ecf20Sopenharmony_ci		       (unsigned long long)(nr_metadata_blocks - nr_free_metadata_blocks),
14788c2ecf20Sopenharmony_ci		       (unsigned long long)nr_metadata_blocks,
14798c2ecf20Sopenharmony_ci		       (unsigned long long)clone->region_size,
14808c2ecf20Sopenharmony_ci		       dm_clone_nr_of_hydrated_regions(clone->cmd),
14818c2ecf20Sopenharmony_ci		       clone->nr_regions,
14828c2ecf20Sopenharmony_ci		       atomic_read(&clone->hydrations_in_flight));
14838c2ecf20Sopenharmony_ci
14848c2ecf20Sopenharmony_ci		emit_flags(clone, result, maxlen, &sz);
14858c2ecf20Sopenharmony_ci		emit_core_args(clone, result, maxlen, &sz);
14868c2ecf20Sopenharmony_ci
14878c2ecf20Sopenharmony_ci		switch (get_clone_mode(clone)) {
14888c2ecf20Sopenharmony_ci		case CM_WRITE:
14898c2ecf20Sopenharmony_ci			DMEMIT("rw");
14908c2ecf20Sopenharmony_ci			break;
14918c2ecf20Sopenharmony_ci		case CM_READ_ONLY:
14928c2ecf20Sopenharmony_ci			DMEMIT("ro");
14938c2ecf20Sopenharmony_ci			break;
14948c2ecf20Sopenharmony_ci		case CM_FAIL:
14958c2ecf20Sopenharmony_ci			DMEMIT("Fail");
14968c2ecf20Sopenharmony_ci		}
14978c2ecf20Sopenharmony_ci
14988c2ecf20Sopenharmony_ci		break;
14998c2ecf20Sopenharmony_ci
15008c2ecf20Sopenharmony_ci	case STATUSTYPE_TABLE:
15018c2ecf20Sopenharmony_ci		format_dev_t(buf, clone->metadata_dev->bdev->bd_dev);
15028c2ecf20Sopenharmony_ci		DMEMIT("%s ", buf);
15038c2ecf20Sopenharmony_ci
15048c2ecf20Sopenharmony_ci		format_dev_t(buf, clone->dest_dev->bdev->bd_dev);
15058c2ecf20Sopenharmony_ci		DMEMIT("%s ", buf);
15068c2ecf20Sopenharmony_ci
15078c2ecf20Sopenharmony_ci		format_dev_t(buf, clone->source_dev->bdev->bd_dev);
15088c2ecf20Sopenharmony_ci		DMEMIT("%s", buf);
15098c2ecf20Sopenharmony_ci
15108c2ecf20Sopenharmony_ci		for (i = 0; i < clone->nr_ctr_args; i++)
15118c2ecf20Sopenharmony_ci			DMEMIT(" %s", clone->ctr_args[i]);
15128c2ecf20Sopenharmony_ci	}
15138c2ecf20Sopenharmony_ci
15148c2ecf20Sopenharmony_ci	return;
15158c2ecf20Sopenharmony_ci
15168c2ecf20Sopenharmony_cierror:
15178c2ecf20Sopenharmony_ci	DMEMIT("Error");
15188c2ecf20Sopenharmony_ci}
15198c2ecf20Sopenharmony_ci
15208c2ecf20Sopenharmony_cistatic sector_t get_dev_size(struct dm_dev *dev)
15218c2ecf20Sopenharmony_ci{
15228c2ecf20Sopenharmony_ci	return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
15238c2ecf20Sopenharmony_ci}
15248c2ecf20Sopenharmony_ci
15258c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
15268c2ecf20Sopenharmony_ci
15278c2ecf20Sopenharmony_ci/*
15288c2ecf20Sopenharmony_ci * Construct a clone device mapping:
15298c2ecf20Sopenharmony_ci *
15308c2ecf20Sopenharmony_ci * clone <metadata dev> <destination dev> <source dev> <region size>
15318c2ecf20Sopenharmony_ci *	[<#feature args> [<feature arg>]* [<#core args> [key value]*]]
15328c2ecf20Sopenharmony_ci *
15338c2ecf20Sopenharmony_ci * metadata dev: Fast device holding the persistent metadata
15348c2ecf20Sopenharmony_ci * destination dev: The destination device, which will become a clone of the
15358c2ecf20Sopenharmony_ci *                  source device
15368c2ecf20Sopenharmony_ci * source dev: The read-only source device that gets cloned
15378c2ecf20Sopenharmony_ci * region size: dm-clone unit size in sectors
15388c2ecf20Sopenharmony_ci *
15398c2ecf20Sopenharmony_ci * #feature args: Number of feature arguments passed
15408c2ecf20Sopenharmony_ci * feature args: E.g. no_hydration, no_discard_passdown
15418c2ecf20Sopenharmony_ci *
15428c2ecf20Sopenharmony_ci * #core arguments: An even number of core arguments
15438c2ecf20Sopenharmony_ci * core arguments: Key/value pairs for tuning the core
15448c2ecf20Sopenharmony_ci *		   E.g. 'hydration_threshold 256'
15458c2ecf20Sopenharmony_ci */
15468c2ecf20Sopenharmony_cistatic int parse_feature_args(struct dm_arg_set *as, struct clone *clone)
15478c2ecf20Sopenharmony_ci{
15488c2ecf20Sopenharmony_ci	int r;
15498c2ecf20Sopenharmony_ci	unsigned int argc;
15508c2ecf20Sopenharmony_ci	const char *arg_name;
15518c2ecf20Sopenharmony_ci	struct dm_target *ti = clone->ti;
15528c2ecf20Sopenharmony_ci
15538c2ecf20Sopenharmony_ci	const struct dm_arg args = {
15548c2ecf20Sopenharmony_ci		.min = 0,
15558c2ecf20Sopenharmony_ci		.max = 2,
15568c2ecf20Sopenharmony_ci		.error = "Invalid number of feature arguments"
15578c2ecf20Sopenharmony_ci	};
15588c2ecf20Sopenharmony_ci
15598c2ecf20Sopenharmony_ci	/* No feature arguments supplied */
15608c2ecf20Sopenharmony_ci	if (!as->argc)
15618c2ecf20Sopenharmony_ci		return 0;
15628c2ecf20Sopenharmony_ci
15638c2ecf20Sopenharmony_ci	r = dm_read_arg_group(&args, as, &argc, &ti->error);
15648c2ecf20Sopenharmony_ci	if (r)
15658c2ecf20Sopenharmony_ci		return r;
15668c2ecf20Sopenharmony_ci
15678c2ecf20Sopenharmony_ci	while (argc) {
15688c2ecf20Sopenharmony_ci		arg_name = dm_shift_arg(as);
15698c2ecf20Sopenharmony_ci		argc--;
15708c2ecf20Sopenharmony_ci
15718c2ecf20Sopenharmony_ci		if (!strcasecmp(arg_name, "no_hydration")) {
15728c2ecf20Sopenharmony_ci			__clear_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
15738c2ecf20Sopenharmony_ci		} else if (!strcasecmp(arg_name, "no_discard_passdown")) {
15748c2ecf20Sopenharmony_ci			__clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
15758c2ecf20Sopenharmony_ci		} else {
15768c2ecf20Sopenharmony_ci			ti->error = "Invalid feature argument";
15778c2ecf20Sopenharmony_ci			return -EINVAL;
15788c2ecf20Sopenharmony_ci		}
15798c2ecf20Sopenharmony_ci	}
15808c2ecf20Sopenharmony_ci
15818c2ecf20Sopenharmony_ci	return 0;
15828c2ecf20Sopenharmony_ci}
15838c2ecf20Sopenharmony_ci
15848c2ecf20Sopenharmony_cistatic int parse_core_args(struct dm_arg_set *as, struct clone *clone)
15858c2ecf20Sopenharmony_ci{
15868c2ecf20Sopenharmony_ci	int r;
15878c2ecf20Sopenharmony_ci	unsigned int argc;
15888c2ecf20Sopenharmony_ci	unsigned int value;
15898c2ecf20Sopenharmony_ci	const char *arg_name;
15908c2ecf20Sopenharmony_ci	struct dm_target *ti = clone->ti;
15918c2ecf20Sopenharmony_ci
15928c2ecf20Sopenharmony_ci	const struct dm_arg args = {
15938c2ecf20Sopenharmony_ci		.min = 0,
15948c2ecf20Sopenharmony_ci		.max = 4,
15958c2ecf20Sopenharmony_ci		.error = "Invalid number of core arguments"
15968c2ecf20Sopenharmony_ci	};
15978c2ecf20Sopenharmony_ci
15988c2ecf20Sopenharmony_ci	/* Initialize core arguments */
15998c2ecf20Sopenharmony_ci	clone->hydration_batch_size = DEFAULT_HYDRATION_BATCH_SIZE;
16008c2ecf20Sopenharmony_ci	clone->hydration_threshold = DEFAULT_HYDRATION_THRESHOLD;
16018c2ecf20Sopenharmony_ci
16028c2ecf20Sopenharmony_ci	/* No core arguments supplied */
16038c2ecf20Sopenharmony_ci	if (!as->argc)
16048c2ecf20Sopenharmony_ci		return 0;
16058c2ecf20Sopenharmony_ci
16068c2ecf20Sopenharmony_ci	r = dm_read_arg_group(&args, as, &argc, &ti->error);
16078c2ecf20Sopenharmony_ci	if (r)
16088c2ecf20Sopenharmony_ci		return r;
16098c2ecf20Sopenharmony_ci
16108c2ecf20Sopenharmony_ci	if (argc & 1) {
16118c2ecf20Sopenharmony_ci		ti->error = "Number of core arguments must be even";
16128c2ecf20Sopenharmony_ci		return -EINVAL;
16138c2ecf20Sopenharmony_ci	}
16148c2ecf20Sopenharmony_ci
16158c2ecf20Sopenharmony_ci	while (argc) {
16168c2ecf20Sopenharmony_ci		arg_name = dm_shift_arg(as);
16178c2ecf20Sopenharmony_ci		argc -= 2;
16188c2ecf20Sopenharmony_ci
16198c2ecf20Sopenharmony_ci		if (!strcasecmp(arg_name, "hydration_threshold")) {
16208c2ecf20Sopenharmony_ci			if (kstrtouint(dm_shift_arg(as), 10, &value)) {
16218c2ecf20Sopenharmony_ci				ti->error = "Invalid value for argument `hydration_threshold'";
16228c2ecf20Sopenharmony_ci				return -EINVAL;
16238c2ecf20Sopenharmony_ci			}
16248c2ecf20Sopenharmony_ci			clone->hydration_threshold = value;
16258c2ecf20Sopenharmony_ci		} else if (!strcasecmp(arg_name, "hydration_batch_size")) {
16268c2ecf20Sopenharmony_ci			if (kstrtouint(dm_shift_arg(as), 10, &value)) {
16278c2ecf20Sopenharmony_ci				ti->error = "Invalid value for argument `hydration_batch_size'";
16288c2ecf20Sopenharmony_ci				return -EINVAL;
16298c2ecf20Sopenharmony_ci			}
16308c2ecf20Sopenharmony_ci			clone->hydration_batch_size = value;
16318c2ecf20Sopenharmony_ci		} else {
16328c2ecf20Sopenharmony_ci			ti->error = "Invalid core argument";
16338c2ecf20Sopenharmony_ci			return -EINVAL;
16348c2ecf20Sopenharmony_ci		}
16358c2ecf20Sopenharmony_ci	}
16368c2ecf20Sopenharmony_ci
16378c2ecf20Sopenharmony_ci	return 0;
16388c2ecf20Sopenharmony_ci}
16398c2ecf20Sopenharmony_ci
16408c2ecf20Sopenharmony_cistatic int parse_region_size(struct clone *clone, struct dm_arg_set *as, char **error)
16418c2ecf20Sopenharmony_ci{
16428c2ecf20Sopenharmony_ci	int r;
16438c2ecf20Sopenharmony_ci	unsigned int region_size;
16448c2ecf20Sopenharmony_ci	struct dm_arg arg;
16458c2ecf20Sopenharmony_ci
16468c2ecf20Sopenharmony_ci	arg.min = MIN_REGION_SIZE;
16478c2ecf20Sopenharmony_ci	arg.max = MAX_REGION_SIZE;
16488c2ecf20Sopenharmony_ci	arg.error = "Invalid region size";
16498c2ecf20Sopenharmony_ci
16508c2ecf20Sopenharmony_ci	r = dm_read_arg(&arg, as, &region_size, error);
16518c2ecf20Sopenharmony_ci	if (r)
16528c2ecf20Sopenharmony_ci		return r;
16538c2ecf20Sopenharmony_ci
16548c2ecf20Sopenharmony_ci	/* Check region size is a power of 2 */
16558c2ecf20Sopenharmony_ci	if (!is_power_of_2(region_size)) {
16568c2ecf20Sopenharmony_ci		*error = "Region size is not a power of 2";
16578c2ecf20Sopenharmony_ci		return -EINVAL;
16588c2ecf20Sopenharmony_ci	}
16598c2ecf20Sopenharmony_ci
16608c2ecf20Sopenharmony_ci	/* Validate the region size against the device logical block size */
16618c2ecf20Sopenharmony_ci	if (region_size % (bdev_logical_block_size(clone->source_dev->bdev) >> 9) ||
16628c2ecf20Sopenharmony_ci	    region_size % (bdev_logical_block_size(clone->dest_dev->bdev) >> 9)) {
16638c2ecf20Sopenharmony_ci		*error = "Region size is not a multiple of device logical block size";
16648c2ecf20Sopenharmony_ci		return -EINVAL;
16658c2ecf20Sopenharmony_ci	}
16668c2ecf20Sopenharmony_ci
16678c2ecf20Sopenharmony_ci	clone->region_size = region_size;
16688c2ecf20Sopenharmony_ci
16698c2ecf20Sopenharmony_ci	return 0;
16708c2ecf20Sopenharmony_ci}
16718c2ecf20Sopenharmony_ci
16728c2ecf20Sopenharmony_cistatic int validate_nr_regions(unsigned long n, char **error)
16738c2ecf20Sopenharmony_ci{
16748c2ecf20Sopenharmony_ci	/*
16758c2ecf20Sopenharmony_ci	 * dm_bitset restricts us to 2^32 regions. test_bit & co. restrict us
16768c2ecf20Sopenharmony_ci	 * further to 2^31 regions.
16778c2ecf20Sopenharmony_ci	 */
16788c2ecf20Sopenharmony_ci	if (n > (1UL << 31)) {
16798c2ecf20Sopenharmony_ci		*error = "Too many regions. Consider increasing the region size";
16808c2ecf20Sopenharmony_ci		return -EINVAL;
16818c2ecf20Sopenharmony_ci	}
16828c2ecf20Sopenharmony_ci
16838c2ecf20Sopenharmony_ci	return 0;
16848c2ecf20Sopenharmony_ci}
16858c2ecf20Sopenharmony_ci
16868c2ecf20Sopenharmony_cistatic int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char **error)
16878c2ecf20Sopenharmony_ci{
16888c2ecf20Sopenharmony_ci	int r;
16898c2ecf20Sopenharmony_ci	sector_t metadata_dev_size;
16908c2ecf20Sopenharmony_ci	char b[BDEVNAME_SIZE];
16918c2ecf20Sopenharmony_ci
16928c2ecf20Sopenharmony_ci	r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
16938c2ecf20Sopenharmony_ci			  &clone->metadata_dev);
16948c2ecf20Sopenharmony_ci	if (r) {
16958c2ecf20Sopenharmony_ci		*error = "Error opening metadata device";
16968c2ecf20Sopenharmony_ci		return r;
16978c2ecf20Sopenharmony_ci	}
16988c2ecf20Sopenharmony_ci
16998c2ecf20Sopenharmony_ci	metadata_dev_size = get_dev_size(clone->metadata_dev);
17008c2ecf20Sopenharmony_ci	if (metadata_dev_size > DM_CLONE_METADATA_MAX_SECTORS_WARNING)
17018c2ecf20Sopenharmony_ci		DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
17028c2ecf20Sopenharmony_ci		       bdevname(clone->metadata_dev->bdev, b), DM_CLONE_METADATA_MAX_SECTORS);
17038c2ecf20Sopenharmony_ci
17048c2ecf20Sopenharmony_ci	return 0;
17058c2ecf20Sopenharmony_ci}
17068c2ecf20Sopenharmony_ci
17078c2ecf20Sopenharmony_cistatic int parse_dest_dev(struct clone *clone, struct dm_arg_set *as, char **error)
17088c2ecf20Sopenharmony_ci{
17098c2ecf20Sopenharmony_ci	int r;
17108c2ecf20Sopenharmony_ci	sector_t dest_dev_size;
17118c2ecf20Sopenharmony_ci
17128c2ecf20Sopenharmony_ci	r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
17138c2ecf20Sopenharmony_ci			  &clone->dest_dev);
17148c2ecf20Sopenharmony_ci	if (r) {
17158c2ecf20Sopenharmony_ci		*error = "Error opening destination device";
17168c2ecf20Sopenharmony_ci		return r;
17178c2ecf20Sopenharmony_ci	}
17188c2ecf20Sopenharmony_ci
17198c2ecf20Sopenharmony_ci	dest_dev_size = get_dev_size(clone->dest_dev);
17208c2ecf20Sopenharmony_ci	if (dest_dev_size < clone->ti->len) {
17218c2ecf20Sopenharmony_ci		dm_put_device(clone->ti, clone->dest_dev);
17228c2ecf20Sopenharmony_ci		*error = "Device size larger than destination device";
17238c2ecf20Sopenharmony_ci		return -EINVAL;
17248c2ecf20Sopenharmony_ci	}
17258c2ecf20Sopenharmony_ci
17268c2ecf20Sopenharmony_ci	return 0;
17278c2ecf20Sopenharmony_ci}
17288c2ecf20Sopenharmony_ci
17298c2ecf20Sopenharmony_cistatic int parse_source_dev(struct clone *clone, struct dm_arg_set *as, char **error)
17308c2ecf20Sopenharmony_ci{
17318c2ecf20Sopenharmony_ci	int r;
17328c2ecf20Sopenharmony_ci	sector_t source_dev_size;
17338c2ecf20Sopenharmony_ci
17348c2ecf20Sopenharmony_ci	r = dm_get_device(clone->ti, dm_shift_arg(as), FMODE_READ,
17358c2ecf20Sopenharmony_ci			  &clone->source_dev);
17368c2ecf20Sopenharmony_ci	if (r) {
17378c2ecf20Sopenharmony_ci		*error = "Error opening source device";
17388c2ecf20Sopenharmony_ci		return r;
17398c2ecf20Sopenharmony_ci	}
17408c2ecf20Sopenharmony_ci
17418c2ecf20Sopenharmony_ci	source_dev_size = get_dev_size(clone->source_dev);
17428c2ecf20Sopenharmony_ci	if (source_dev_size < clone->ti->len) {
17438c2ecf20Sopenharmony_ci		dm_put_device(clone->ti, clone->source_dev);
17448c2ecf20Sopenharmony_ci		*error = "Device size larger than source device";
17458c2ecf20Sopenharmony_ci		return -EINVAL;
17468c2ecf20Sopenharmony_ci	}
17478c2ecf20Sopenharmony_ci
17488c2ecf20Sopenharmony_ci	return 0;
17498c2ecf20Sopenharmony_ci}
17508c2ecf20Sopenharmony_ci
17518c2ecf20Sopenharmony_cistatic int copy_ctr_args(struct clone *clone, int argc, const char **argv, char **error)
17528c2ecf20Sopenharmony_ci{
17538c2ecf20Sopenharmony_ci	unsigned int i;
17548c2ecf20Sopenharmony_ci	const char **copy;
17558c2ecf20Sopenharmony_ci
17568c2ecf20Sopenharmony_ci	copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
17578c2ecf20Sopenharmony_ci	if (!copy)
17588c2ecf20Sopenharmony_ci		goto error;
17598c2ecf20Sopenharmony_ci
17608c2ecf20Sopenharmony_ci	for (i = 0; i < argc; i++) {
17618c2ecf20Sopenharmony_ci		copy[i] = kstrdup(argv[i], GFP_KERNEL);
17628c2ecf20Sopenharmony_ci
17638c2ecf20Sopenharmony_ci		if (!copy[i]) {
17648c2ecf20Sopenharmony_ci			while (i--)
17658c2ecf20Sopenharmony_ci				kfree(copy[i]);
17668c2ecf20Sopenharmony_ci			kfree(copy);
17678c2ecf20Sopenharmony_ci			goto error;
17688c2ecf20Sopenharmony_ci		}
17698c2ecf20Sopenharmony_ci	}
17708c2ecf20Sopenharmony_ci
17718c2ecf20Sopenharmony_ci	clone->nr_ctr_args = argc;
17728c2ecf20Sopenharmony_ci	clone->ctr_args = copy;
17738c2ecf20Sopenharmony_ci	return 0;
17748c2ecf20Sopenharmony_ci
17758c2ecf20Sopenharmony_cierror:
17768c2ecf20Sopenharmony_ci	*error = "Failed to allocate memory for table line";
17778c2ecf20Sopenharmony_ci	return -ENOMEM;
17788c2ecf20Sopenharmony_ci}
17798c2ecf20Sopenharmony_ci
17808c2ecf20Sopenharmony_cistatic int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv)
17818c2ecf20Sopenharmony_ci{
17828c2ecf20Sopenharmony_ci	int r;
17838c2ecf20Sopenharmony_ci	sector_t nr_regions;
17848c2ecf20Sopenharmony_ci	struct clone *clone;
17858c2ecf20Sopenharmony_ci	struct dm_arg_set as;
17868c2ecf20Sopenharmony_ci
17878c2ecf20Sopenharmony_ci	if (argc < 4) {
17888c2ecf20Sopenharmony_ci		ti->error = "Invalid number of arguments";
17898c2ecf20Sopenharmony_ci		return -EINVAL;
17908c2ecf20Sopenharmony_ci	}
17918c2ecf20Sopenharmony_ci
17928c2ecf20Sopenharmony_ci	as.argc = argc;
17938c2ecf20Sopenharmony_ci	as.argv = argv;
17948c2ecf20Sopenharmony_ci
17958c2ecf20Sopenharmony_ci	clone = kzalloc(sizeof(*clone), GFP_KERNEL);
17968c2ecf20Sopenharmony_ci	if (!clone) {
17978c2ecf20Sopenharmony_ci		ti->error = "Failed to allocate clone structure";
17988c2ecf20Sopenharmony_ci		return -ENOMEM;
17998c2ecf20Sopenharmony_ci	}
18008c2ecf20Sopenharmony_ci
18018c2ecf20Sopenharmony_ci	clone->ti = ti;
18028c2ecf20Sopenharmony_ci
18038c2ecf20Sopenharmony_ci	/* Initialize dm-clone flags */
18048c2ecf20Sopenharmony_ci	__set_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
18058c2ecf20Sopenharmony_ci	__set_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags);
18068c2ecf20Sopenharmony_ci	__set_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
18078c2ecf20Sopenharmony_ci
18088c2ecf20Sopenharmony_ci	r = parse_metadata_dev(clone, &as, &ti->error);
18098c2ecf20Sopenharmony_ci	if (r)
18108c2ecf20Sopenharmony_ci		goto out_with_clone;
18118c2ecf20Sopenharmony_ci
18128c2ecf20Sopenharmony_ci	r = parse_dest_dev(clone, &as, &ti->error);
18138c2ecf20Sopenharmony_ci	if (r)
18148c2ecf20Sopenharmony_ci		goto out_with_meta_dev;
18158c2ecf20Sopenharmony_ci
18168c2ecf20Sopenharmony_ci	r = parse_source_dev(clone, &as, &ti->error);
18178c2ecf20Sopenharmony_ci	if (r)
18188c2ecf20Sopenharmony_ci		goto out_with_dest_dev;
18198c2ecf20Sopenharmony_ci
18208c2ecf20Sopenharmony_ci	r = parse_region_size(clone, &as, &ti->error);
18218c2ecf20Sopenharmony_ci	if (r)
18228c2ecf20Sopenharmony_ci		goto out_with_source_dev;
18238c2ecf20Sopenharmony_ci
18248c2ecf20Sopenharmony_ci	clone->region_shift = __ffs(clone->region_size);
18258c2ecf20Sopenharmony_ci	nr_regions = dm_sector_div_up(ti->len, clone->region_size);
18268c2ecf20Sopenharmony_ci
18278c2ecf20Sopenharmony_ci	/* Check for overflow */
18288c2ecf20Sopenharmony_ci	if (nr_regions != (unsigned long)nr_regions) {
18298c2ecf20Sopenharmony_ci		ti->error = "Too many regions. Consider increasing the region size";
18308c2ecf20Sopenharmony_ci		r = -EOVERFLOW;
18318c2ecf20Sopenharmony_ci		goto out_with_source_dev;
18328c2ecf20Sopenharmony_ci	}
18338c2ecf20Sopenharmony_ci
18348c2ecf20Sopenharmony_ci	clone->nr_regions = nr_regions;
18358c2ecf20Sopenharmony_ci
18368c2ecf20Sopenharmony_ci	r = validate_nr_regions(clone->nr_regions, &ti->error);
18378c2ecf20Sopenharmony_ci	if (r)
18388c2ecf20Sopenharmony_ci		goto out_with_source_dev;
18398c2ecf20Sopenharmony_ci
18408c2ecf20Sopenharmony_ci	r = dm_set_target_max_io_len(ti, clone->region_size);
18418c2ecf20Sopenharmony_ci	if (r) {
18428c2ecf20Sopenharmony_ci		ti->error = "Failed to set max io len";
18438c2ecf20Sopenharmony_ci		goto out_with_source_dev;
18448c2ecf20Sopenharmony_ci	}
18458c2ecf20Sopenharmony_ci
18468c2ecf20Sopenharmony_ci	r = parse_feature_args(&as, clone);
18478c2ecf20Sopenharmony_ci	if (r)
18488c2ecf20Sopenharmony_ci		goto out_with_source_dev;
18498c2ecf20Sopenharmony_ci
18508c2ecf20Sopenharmony_ci	r = parse_core_args(&as, clone);
18518c2ecf20Sopenharmony_ci	if (r)
18528c2ecf20Sopenharmony_ci		goto out_with_source_dev;
18538c2ecf20Sopenharmony_ci
18548c2ecf20Sopenharmony_ci	/* Load metadata */
18558c2ecf20Sopenharmony_ci	clone->cmd = dm_clone_metadata_open(clone->metadata_dev->bdev, ti->len,
18568c2ecf20Sopenharmony_ci					    clone->region_size);
18578c2ecf20Sopenharmony_ci	if (IS_ERR(clone->cmd)) {
18588c2ecf20Sopenharmony_ci		ti->error = "Failed to load metadata";
18598c2ecf20Sopenharmony_ci		r = PTR_ERR(clone->cmd);
18608c2ecf20Sopenharmony_ci		goto out_with_source_dev;
18618c2ecf20Sopenharmony_ci	}
18628c2ecf20Sopenharmony_ci
18638c2ecf20Sopenharmony_ci	__set_clone_mode(clone, CM_WRITE);
18648c2ecf20Sopenharmony_ci
18658c2ecf20Sopenharmony_ci	if (get_clone_mode(clone) != CM_WRITE) {
18668c2ecf20Sopenharmony_ci		ti->error = "Unable to get write access to metadata, please check/repair metadata";
18678c2ecf20Sopenharmony_ci		r = -EPERM;
18688c2ecf20Sopenharmony_ci		goto out_with_metadata;
18698c2ecf20Sopenharmony_ci	}
18708c2ecf20Sopenharmony_ci
18718c2ecf20Sopenharmony_ci	clone->last_commit_jiffies = jiffies;
18728c2ecf20Sopenharmony_ci
18738c2ecf20Sopenharmony_ci	/* Allocate hydration hash table */
18748c2ecf20Sopenharmony_ci	r = hash_table_init(clone);
18758c2ecf20Sopenharmony_ci	if (r) {
18768c2ecf20Sopenharmony_ci		ti->error = "Failed to allocate hydration hash table";
18778c2ecf20Sopenharmony_ci		goto out_with_metadata;
18788c2ecf20Sopenharmony_ci	}
18798c2ecf20Sopenharmony_ci
18808c2ecf20Sopenharmony_ci	atomic_set(&clone->ios_in_flight, 0);
18818c2ecf20Sopenharmony_ci	init_waitqueue_head(&clone->hydration_stopped);
18828c2ecf20Sopenharmony_ci	spin_lock_init(&clone->lock);
18838c2ecf20Sopenharmony_ci	bio_list_init(&clone->deferred_bios);
18848c2ecf20Sopenharmony_ci	bio_list_init(&clone->deferred_discard_bios);
18858c2ecf20Sopenharmony_ci	bio_list_init(&clone->deferred_flush_bios);
18868c2ecf20Sopenharmony_ci	bio_list_init(&clone->deferred_flush_completions);
18878c2ecf20Sopenharmony_ci	clone->hydration_offset = 0;
18888c2ecf20Sopenharmony_ci	atomic_set(&clone->hydrations_in_flight, 0);
18898c2ecf20Sopenharmony_ci	bio_init(&clone->flush_bio, NULL, 0);
18908c2ecf20Sopenharmony_ci
18918c2ecf20Sopenharmony_ci	clone->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
18928c2ecf20Sopenharmony_ci	if (!clone->wq) {
18938c2ecf20Sopenharmony_ci		ti->error = "Failed to allocate workqueue";
18948c2ecf20Sopenharmony_ci		r = -ENOMEM;
18958c2ecf20Sopenharmony_ci		goto out_with_ht;
18968c2ecf20Sopenharmony_ci	}
18978c2ecf20Sopenharmony_ci
18988c2ecf20Sopenharmony_ci	INIT_WORK(&clone->worker, do_worker);
18998c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&clone->waker, do_waker);
19008c2ecf20Sopenharmony_ci
19018c2ecf20Sopenharmony_ci	clone->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
19028c2ecf20Sopenharmony_ci	if (IS_ERR(clone->kcopyd_client)) {
19038c2ecf20Sopenharmony_ci		r = PTR_ERR(clone->kcopyd_client);
19048c2ecf20Sopenharmony_ci		goto out_with_wq;
19058c2ecf20Sopenharmony_ci	}
19068c2ecf20Sopenharmony_ci
19078c2ecf20Sopenharmony_ci	r = mempool_init_slab_pool(&clone->hydration_pool, MIN_HYDRATIONS,
19088c2ecf20Sopenharmony_ci				   _hydration_cache);
19098c2ecf20Sopenharmony_ci	if (r) {
19108c2ecf20Sopenharmony_ci		ti->error = "Failed to create dm_clone_region_hydration memory pool";
19118c2ecf20Sopenharmony_ci		goto out_with_kcopyd;
19128c2ecf20Sopenharmony_ci	}
19138c2ecf20Sopenharmony_ci
19148c2ecf20Sopenharmony_ci	/* Save a copy of the table line */
19158c2ecf20Sopenharmony_ci	r = copy_ctr_args(clone, argc - 3, (const char **)argv + 3, &ti->error);
19168c2ecf20Sopenharmony_ci	if (r)
19178c2ecf20Sopenharmony_ci		goto out_with_mempool;
19188c2ecf20Sopenharmony_ci
19198c2ecf20Sopenharmony_ci	mutex_init(&clone->commit_lock);
19208c2ecf20Sopenharmony_ci
19218c2ecf20Sopenharmony_ci	/* Enable flushes */
19228c2ecf20Sopenharmony_ci	ti->num_flush_bios = 1;
19238c2ecf20Sopenharmony_ci	ti->flush_supported = true;
19248c2ecf20Sopenharmony_ci
19258c2ecf20Sopenharmony_ci	/* Enable discards */
19268c2ecf20Sopenharmony_ci	ti->discards_supported = true;
19278c2ecf20Sopenharmony_ci	ti->num_discard_bios = 1;
19288c2ecf20Sopenharmony_ci
19298c2ecf20Sopenharmony_ci	ti->private = clone;
19308c2ecf20Sopenharmony_ci
19318c2ecf20Sopenharmony_ci	return 0;
19328c2ecf20Sopenharmony_ci
19338c2ecf20Sopenharmony_ciout_with_mempool:
19348c2ecf20Sopenharmony_ci	mempool_exit(&clone->hydration_pool);
19358c2ecf20Sopenharmony_ciout_with_kcopyd:
19368c2ecf20Sopenharmony_ci	dm_kcopyd_client_destroy(clone->kcopyd_client);
19378c2ecf20Sopenharmony_ciout_with_wq:
19388c2ecf20Sopenharmony_ci	destroy_workqueue(clone->wq);
19398c2ecf20Sopenharmony_ciout_with_ht:
19408c2ecf20Sopenharmony_ci	hash_table_exit(clone);
19418c2ecf20Sopenharmony_ciout_with_metadata:
19428c2ecf20Sopenharmony_ci	dm_clone_metadata_close(clone->cmd);
19438c2ecf20Sopenharmony_ciout_with_source_dev:
19448c2ecf20Sopenharmony_ci	dm_put_device(ti, clone->source_dev);
19458c2ecf20Sopenharmony_ciout_with_dest_dev:
19468c2ecf20Sopenharmony_ci	dm_put_device(ti, clone->dest_dev);
19478c2ecf20Sopenharmony_ciout_with_meta_dev:
19488c2ecf20Sopenharmony_ci	dm_put_device(ti, clone->metadata_dev);
19498c2ecf20Sopenharmony_ciout_with_clone:
19508c2ecf20Sopenharmony_ci	kfree(clone);
19518c2ecf20Sopenharmony_ci
19528c2ecf20Sopenharmony_ci	return r;
19538c2ecf20Sopenharmony_ci}
19548c2ecf20Sopenharmony_ci
19558c2ecf20Sopenharmony_cistatic void clone_dtr(struct dm_target *ti)
19568c2ecf20Sopenharmony_ci{
19578c2ecf20Sopenharmony_ci	unsigned int i;
19588c2ecf20Sopenharmony_ci	struct clone *clone = ti->private;
19598c2ecf20Sopenharmony_ci
19608c2ecf20Sopenharmony_ci	mutex_destroy(&clone->commit_lock);
19618c2ecf20Sopenharmony_ci	bio_uninit(&clone->flush_bio);
19628c2ecf20Sopenharmony_ci
19638c2ecf20Sopenharmony_ci	for (i = 0; i < clone->nr_ctr_args; i++)
19648c2ecf20Sopenharmony_ci		kfree(clone->ctr_args[i]);
19658c2ecf20Sopenharmony_ci	kfree(clone->ctr_args);
19668c2ecf20Sopenharmony_ci
19678c2ecf20Sopenharmony_ci	mempool_exit(&clone->hydration_pool);
19688c2ecf20Sopenharmony_ci	dm_kcopyd_client_destroy(clone->kcopyd_client);
19698c2ecf20Sopenharmony_ci	cancel_delayed_work_sync(&clone->waker);
19708c2ecf20Sopenharmony_ci	destroy_workqueue(clone->wq);
19718c2ecf20Sopenharmony_ci	hash_table_exit(clone);
19728c2ecf20Sopenharmony_ci	dm_clone_metadata_close(clone->cmd);
19738c2ecf20Sopenharmony_ci	dm_put_device(ti, clone->source_dev);
19748c2ecf20Sopenharmony_ci	dm_put_device(ti, clone->dest_dev);
19758c2ecf20Sopenharmony_ci	dm_put_device(ti, clone->metadata_dev);
19768c2ecf20Sopenharmony_ci
19778c2ecf20Sopenharmony_ci	kfree(clone);
19788c2ecf20Sopenharmony_ci}
19798c2ecf20Sopenharmony_ci
19808c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
19818c2ecf20Sopenharmony_ci
19828c2ecf20Sopenharmony_cistatic void clone_postsuspend(struct dm_target *ti)
19838c2ecf20Sopenharmony_ci{
19848c2ecf20Sopenharmony_ci	struct clone *clone = ti->private;
19858c2ecf20Sopenharmony_ci
19868c2ecf20Sopenharmony_ci	/*
19878c2ecf20Sopenharmony_ci	 * To successfully suspend the device:
19888c2ecf20Sopenharmony_ci	 *
19898c2ecf20Sopenharmony_ci	 *	- We cancel the delayed work for periodic commits and wait for
19908c2ecf20Sopenharmony_ci	 *	  it to finish.
19918c2ecf20Sopenharmony_ci	 *
19928c2ecf20Sopenharmony_ci	 *	- We stop the background hydration, i.e. we prevent new region
19938c2ecf20Sopenharmony_ci	 *	  hydrations from starting.
19948c2ecf20Sopenharmony_ci	 *
19958c2ecf20Sopenharmony_ci	 *	- We wait for any in-flight hydrations to finish.
19968c2ecf20Sopenharmony_ci	 *
19978c2ecf20Sopenharmony_ci	 *	- We flush the workqueue.
19988c2ecf20Sopenharmony_ci	 *
19998c2ecf20Sopenharmony_ci	 *	- We commit the metadata.
20008c2ecf20Sopenharmony_ci	 */
20018c2ecf20Sopenharmony_ci	cancel_delayed_work_sync(&clone->waker);
20028c2ecf20Sopenharmony_ci
20038c2ecf20Sopenharmony_ci	set_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags);
20048c2ecf20Sopenharmony_ci
20058c2ecf20Sopenharmony_ci	/*
20068c2ecf20Sopenharmony_ci	 * Make sure set_bit() is ordered before atomic_read(), otherwise we
20078c2ecf20Sopenharmony_ci	 * might race with do_hydration() and miss some started region
20088c2ecf20Sopenharmony_ci	 * hydrations.
20098c2ecf20Sopenharmony_ci	 *
20108c2ecf20Sopenharmony_ci	 * This is paired with smp_mb__after_atomic() in do_hydration().
20118c2ecf20Sopenharmony_ci	 */
20128c2ecf20Sopenharmony_ci	smp_mb__after_atomic();
20138c2ecf20Sopenharmony_ci
20148c2ecf20Sopenharmony_ci	wait_event(clone->hydration_stopped, !atomic_read(&clone->hydrations_in_flight));
20158c2ecf20Sopenharmony_ci	flush_workqueue(clone->wq);
20168c2ecf20Sopenharmony_ci
20178c2ecf20Sopenharmony_ci	(void) commit_metadata(clone, NULL);
20188c2ecf20Sopenharmony_ci}
20198c2ecf20Sopenharmony_ci
20208c2ecf20Sopenharmony_cistatic void clone_resume(struct dm_target *ti)
20218c2ecf20Sopenharmony_ci{
20228c2ecf20Sopenharmony_ci	struct clone *clone = ti->private;
20238c2ecf20Sopenharmony_ci
20248c2ecf20Sopenharmony_ci	clear_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags);
20258c2ecf20Sopenharmony_ci	do_waker(&clone->waker.work);
20268c2ecf20Sopenharmony_ci}
20278c2ecf20Sopenharmony_ci
20288c2ecf20Sopenharmony_cistatic bool bdev_supports_discards(struct block_device *bdev)
20298c2ecf20Sopenharmony_ci{
20308c2ecf20Sopenharmony_ci	struct request_queue *q = bdev_get_queue(bdev);
20318c2ecf20Sopenharmony_ci
20328c2ecf20Sopenharmony_ci	return (q && blk_queue_discard(q));
20338c2ecf20Sopenharmony_ci}
20348c2ecf20Sopenharmony_ci
20358c2ecf20Sopenharmony_ci/*
20368c2ecf20Sopenharmony_ci * If discard_passdown was enabled verify that the destination device supports
20378c2ecf20Sopenharmony_ci * discards. Disable discard_passdown if not.
20388c2ecf20Sopenharmony_ci */
20398c2ecf20Sopenharmony_cistatic void disable_passdown_if_not_supported(struct clone *clone)
20408c2ecf20Sopenharmony_ci{
20418c2ecf20Sopenharmony_ci	struct block_device *dest_dev = clone->dest_dev->bdev;
20428c2ecf20Sopenharmony_ci	struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits;
20438c2ecf20Sopenharmony_ci	const char *reason = NULL;
20448c2ecf20Sopenharmony_ci	char buf[BDEVNAME_SIZE];
20458c2ecf20Sopenharmony_ci
20468c2ecf20Sopenharmony_ci	if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
20478c2ecf20Sopenharmony_ci		return;
20488c2ecf20Sopenharmony_ci
20498c2ecf20Sopenharmony_ci	if (!bdev_supports_discards(dest_dev))
20508c2ecf20Sopenharmony_ci		reason = "discard unsupported";
20518c2ecf20Sopenharmony_ci	else if (dest_limits->max_discard_sectors < clone->region_size)
20528c2ecf20Sopenharmony_ci		reason = "max discard sectors smaller than a region";
20538c2ecf20Sopenharmony_ci
20548c2ecf20Sopenharmony_ci	if (reason) {
20558c2ecf20Sopenharmony_ci		DMWARN("Destination device (%s) %s: Disabling discard passdown.",
20568c2ecf20Sopenharmony_ci		       bdevname(dest_dev, buf), reason);
20578c2ecf20Sopenharmony_ci		clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
20588c2ecf20Sopenharmony_ci	}
20598c2ecf20Sopenharmony_ci}
20608c2ecf20Sopenharmony_ci
20618c2ecf20Sopenharmony_cistatic void set_discard_limits(struct clone *clone, struct queue_limits *limits)
20628c2ecf20Sopenharmony_ci{
20638c2ecf20Sopenharmony_ci	struct block_device *dest_bdev = clone->dest_dev->bdev;
20648c2ecf20Sopenharmony_ci	struct queue_limits *dest_limits = &bdev_get_queue(dest_bdev)->limits;
20658c2ecf20Sopenharmony_ci
20668c2ecf20Sopenharmony_ci	if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) {
20678c2ecf20Sopenharmony_ci		/* No passdown is done so we set our own virtual limits */
20688c2ecf20Sopenharmony_ci		limits->discard_granularity = clone->region_size << SECTOR_SHIFT;
20698c2ecf20Sopenharmony_ci		limits->max_discard_sectors = round_down(UINT_MAX >> SECTOR_SHIFT, clone->region_size);
20708c2ecf20Sopenharmony_ci		return;
20718c2ecf20Sopenharmony_ci	}
20728c2ecf20Sopenharmony_ci
20738c2ecf20Sopenharmony_ci	/*
20748c2ecf20Sopenharmony_ci	 * clone_iterate_devices() is stacking both the source and destination
20758c2ecf20Sopenharmony_ci	 * device limits but discards aren't passed to the source device, so
20768c2ecf20Sopenharmony_ci	 * inherit destination's limits.
20778c2ecf20Sopenharmony_ci	 */
20788c2ecf20Sopenharmony_ci	limits->max_discard_sectors = dest_limits->max_discard_sectors;
20798c2ecf20Sopenharmony_ci	limits->max_hw_discard_sectors = dest_limits->max_hw_discard_sectors;
20808c2ecf20Sopenharmony_ci	limits->discard_granularity = dest_limits->discard_granularity;
20818c2ecf20Sopenharmony_ci	limits->discard_alignment = dest_limits->discard_alignment;
20828c2ecf20Sopenharmony_ci	limits->discard_misaligned = dest_limits->discard_misaligned;
20838c2ecf20Sopenharmony_ci	limits->max_discard_segments = dest_limits->max_discard_segments;
20848c2ecf20Sopenharmony_ci}
20858c2ecf20Sopenharmony_ci
20868c2ecf20Sopenharmony_cistatic void clone_io_hints(struct dm_target *ti, struct queue_limits *limits)
20878c2ecf20Sopenharmony_ci{
20888c2ecf20Sopenharmony_ci	struct clone *clone = ti->private;
20898c2ecf20Sopenharmony_ci	u64 io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
20908c2ecf20Sopenharmony_ci
20918c2ecf20Sopenharmony_ci	/*
20928c2ecf20Sopenharmony_ci	 * If the system-determined stacked limits are compatible with
20938c2ecf20Sopenharmony_ci	 * dm-clone's region size (io_opt is a factor) do not override them.
20948c2ecf20Sopenharmony_ci	 */
20958c2ecf20Sopenharmony_ci	if (io_opt_sectors < clone->region_size ||
20968c2ecf20Sopenharmony_ci	    do_div(io_opt_sectors, clone->region_size)) {
20978c2ecf20Sopenharmony_ci		blk_limits_io_min(limits, clone->region_size << SECTOR_SHIFT);
20988c2ecf20Sopenharmony_ci		blk_limits_io_opt(limits, clone->region_size << SECTOR_SHIFT);
20998c2ecf20Sopenharmony_ci	}
21008c2ecf20Sopenharmony_ci
21018c2ecf20Sopenharmony_ci	disable_passdown_if_not_supported(clone);
21028c2ecf20Sopenharmony_ci	set_discard_limits(clone, limits);
21038c2ecf20Sopenharmony_ci}
21048c2ecf20Sopenharmony_ci
21058c2ecf20Sopenharmony_cistatic int clone_iterate_devices(struct dm_target *ti,
21068c2ecf20Sopenharmony_ci				 iterate_devices_callout_fn fn, void *data)
21078c2ecf20Sopenharmony_ci{
21088c2ecf20Sopenharmony_ci	int ret;
21098c2ecf20Sopenharmony_ci	struct clone *clone = ti->private;
21108c2ecf20Sopenharmony_ci	struct dm_dev *dest_dev = clone->dest_dev;
21118c2ecf20Sopenharmony_ci	struct dm_dev *source_dev = clone->source_dev;
21128c2ecf20Sopenharmony_ci
21138c2ecf20Sopenharmony_ci	ret = fn(ti, source_dev, 0, ti->len, data);
21148c2ecf20Sopenharmony_ci	if (!ret)
21158c2ecf20Sopenharmony_ci		ret = fn(ti, dest_dev, 0, ti->len, data);
21168c2ecf20Sopenharmony_ci	return ret;
21178c2ecf20Sopenharmony_ci}
21188c2ecf20Sopenharmony_ci
21198c2ecf20Sopenharmony_ci/*
21208c2ecf20Sopenharmony_ci * dm-clone message functions.
21218c2ecf20Sopenharmony_ci */
21228c2ecf20Sopenharmony_cistatic void set_hydration_threshold(struct clone *clone, unsigned int nr_regions)
21238c2ecf20Sopenharmony_ci{
21248c2ecf20Sopenharmony_ci	WRITE_ONCE(clone->hydration_threshold, nr_regions);
21258c2ecf20Sopenharmony_ci
21268c2ecf20Sopenharmony_ci	/*
21278c2ecf20Sopenharmony_ci	 * If user space sets hydration_threshold to zero then the hydration
21288c2ecf20Sopenharmony_ci	 * will stop. If at a later time the hydration_threshold is increased
21298c2ecf20Sopenharmony_ci	 * we must restart the hydration process by waking up the worker.
21308c2ecf20Sopenharmony_ci	 */
21318c2ecf20Sopenharmony_ci	wake_worker(clone);
21328c2ecf20Sopenharmony_ci}
21338c2ecf20Sopenharmony_ci
21348c2ecf20Sopenharmony_cistatic void set_hydration_batch_size(struct clone *clone, unsigned int nr_regions)
21358c2ecf20Sopenharmony_ci{
21368c2ecf20Sopenharmony_ci	WRITE_ONCE(clone->hydration_batch_size, nr_regions);
21378c2ecf20Sopenharmony_ci}
21388c2ecf20Sopenharmony_ci
21398c2ecf20Sopenharmony_cistatic void enable_hydration(struct clone *clone)
21408c2ecf20Sopenharmony_ci{
21418c2ecf20Sopenharmony_ci	if (!test_and_set_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags))
21428c2ecf20Sopenharmony_ci		wake_worker(clone);
21438c2ecf20Sopenharmony_ci}
21448c2ecf20Sopenharmony_ci
21458c2ecf20Sopenharmony_cistatic void disable_hydration(struct clone *clone)
21468c2ecf20Sopenharmony_ci{
21478c2ecf20Sopenharmony_ci	clear_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
21488c2ecf20Sopenharmony_ci}
21498c2ecf20Sopenharmony_ci
21508c2ecf20Sopenharmony_cistatic int clone_message(struct dm_target *ti, unsigned int argc, char **argv,
21518c2ecf20Sopenharmony_ci			 char *result, unsigned int maxlen)
21528c2ecf20Sopenharmony_ci{
21538c2ecf20Sopenharmony_ci	struct clone *clone = ti->private;
21548c2ecf20Sopenharmony_ci	unsigned int value;
21558c2ecf20Sopenharmony_ci
21568c2ecf20Sopenharmony_ci	if (!argc)
21578c2ecf20Sopenharmony_ci		return -EINVAL;
21588c2ecf20Sopenharmony_ci
21598c2ecf20Sopenharmony_ci	if (!strcasecmp(argv[0], "enable_hydration")) {
21608c2ecf20Sopenharmony_ci		enable_hydration(clone);
21618c2ecf20Sopenharmony_ci		return 0;
21628c2ecf20Sopenharmony_ci	}
21638c2ecf20Sopenharmony_ci
21648c2ecf20Sopenharmony_ci	if (!strcasecmp(argv[0], "disable_hydration")) {
21658c2ecf20Sopenharmony_ci		disable_hydration(clone);
21668c2ecf20Sopenharmony_ci		return 0;
21678c2ecf20Sopenharmony_ci	}
21688c2ecf20Sopenharmony_ci
21698c2ecf20Sopenharmony_ci	if (argc != 2)
21708c2ecf20Sopenharmony_ci		return -EINVAL;
21718c2ecf20Sopenharmony_ci
21728c2ecf20Sopenharmony_ci	if (!strcasecmp(argv[0], "hydration_threshold")) {
21738c2ecf20Sopenharmony_ci		if (kstrtouint(argv[1], 10, &value))
21748c2ecf20Sopenharmony_ci			return -EINVAL;
21758c2ecf20Sopenharmony_ci
21768c2ecf20Sopenharmony_ci		set_hydration_threshold(clone, value);
21778c2ecf20Sopenharmony_ci
21788c2ecf20Sopenharmony_ci		return 0;
21798c2ecf20Sopenharmony_ci	}
21808c2ecf20Sopenharmony_ci
21818c2ecf20Sopenharmony_ci	if (!strcasecmp(argv[0], "hydration_batch_size")) {
21828c2ecf20Sopenharmony_ci		if (kstrtouint(argv[1], 10, &value))
21838c2ecf20Sopenharmony_ci			return -EINVAL;
21848c2ecf20Sopenharmony_ci
21858c2ecf20Sopenharmony_ci		set_hydration_batch_size(clone, value);
21868c2ecf20Sopenharmony_ci
21878c2ecf20Sopenharmony_ci		return 0;
21888c2ecf20Sopenharmony_ci	}
21898c2ecf20Sopenharmony_ci
21908c2ecf20Sopenharmony_ci	DMERR("%s: Unsupported message `%s'", clone_device_name(clone), argv[0]);
21918c2ecf20Sopenharmony_ci	return -EINVAL;
21928c2ecf20Sopenharmony_ci}
21938c2ecf20Sopenharmony_ci
21948c2ecf20Sopenharmony_cistatic struct target_type clone_target = {
21958c2ecf20Sopenharmony_ci	.name = "clone",
21968c2ecf20Sopenharmony_ci	.version = {1, 0, 0},
21978c2ecf20Sopenharmony_ci	.module = THIS_MODULE,
21988c2ecf20Sopenharmony_ci	.ctr = clone_ctr,
21998c2ecf20Sopenharmony_ci	.dtr =  clone_dtr,
22008c2ecf20Sopenharmony_ci	.map = clone_map,
22018c2ecf20Sopenharmony_ci	.end_io = clone_endio,
22028c2ecf20Sopenharmony_ci	.postsuspend = clone_postsuspend,
22038c2ecf20Sopenharmony_ci	.resume = clone_resume,
22048c2ecf20Sopenharmony_ci	.status = clone_status,
22058c2ecf20Sopenharmony_ci	.message = clone_message,
22068c2ecf20Sopenharmony_ci	.io_hints = clone_io_hints,
22078c2ecf20Sopenharmony_ci	.iterate_devices = clone_iterate_devices,
22088c2ecf20Sopenharmony_ci};
22098c2ecf20Sopenharmony_ci
22108c2ecf20Sopenharmony_ci/*---------------------------------------------------------------------------*/
22118c2ecf20Sopenharmony_ci
22128c2ecf20Sopenharmony_ci/* Module functions */
22138c2ecf20Sopenharmony_cistatic int __init dm_clone_init(void)
22148c2ecf20Sopenharmony_ci{
22158c2ecf20Sopenharmony_ci	int r;
22168c2ecf20Sopenharmony_ci
22178c2ecf20Sopenharmony_ci	_hydration_cache = KMEM_CACHE(dm_clone_region_hydration, 0);
22188c2ecf20Sopenharmony_ci	if (!_hydration_cache)
22198c2ecf20Sopenharmony_ci		return -ENOMEM;
22208c2ecf20Sopenharmony_ci
22218c2ecf20Sopenharmony_ci	r = dm_register_target(&clone_target);
22228c2ecf20Sopenharmony_ci	if (r < 0) {
22238c2ecf20Sopenharmony_ci		DMERR("Failed to register clone target");
22248c2ecf20Sopenharmony_ci		kmem_cache_destroy(_hydration_cache);
22258c2ecf20Sopenharmony_ci		return r;
22268c2ecf20Sopenharmony_ci	}
22278c2ecf20Sopenharmony_ci
22288c2ecf20Sopenharmony_ci	return 0;
22298c2ecf20Sopenharmony_ci}
22308c2ecf20Sopenharmony_ci
22318c2ecf20Sopenharmony_cistatic void __exit dm_clone_exit(void)
22328c2ecf20Sopenharmony_ci{
22338c2ecf20Sopenharmony_ci	dm_unregister_target(&clone_target);
22348c2ecf20Sopenharmony_ci
22358c2ecf20Sopenharmony_ci	kmem_cache_destroy(_hydration_cache);
22368c2ecf20Sopenharmony_ci	_hydration_cache = NULL;
22378c2ecf20Sopenharmony_ci}
22388c2ecf20Sopenharmony_ci
22398c2ecf20Sopenharmony_ci/* Module hooks */
22408c2ecf20Sopenharmony_cimodule_init(dm_clone_init);
22418c2ecf20Sopenharmony_cimodule_exit(dm_clone_exit);
22428c2ecf20Sopenharmony_ci
22438c2ecf20Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " clone target");
22448c2ecf20Sopenharmony_ciMODULE_AUTHOR("Nikos Tsironis <ntsironis@arrikto.com>");
22458c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
2246