162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/mm.h>
762306a36Sopenharmony_ci#include <linux/bio.h>
862306a36Sopenharmony_ci#include <linux/err.h>
962306a36Sopenharmony_ci#include <linux/hash.h>
1062306a36Sopenharmony_ci#include <linux/list.h>
1162306a36Sopenharmony_ci#include <linux/log2.h>
1262306a36Sopenharmony_ci#include <linux/init.h>
1362306a36Sopenharmony_ci#include <linux/slab.h>
1462306a36Sopenharmony_ci#include <linux/wait.h>
1562306a36Sopenharmony_ci#include <linux/dm-io.h>
1662306a36Sopenharmony_ci#include <linux/mutex.h>
1762306a36Sopenharmony_ci#include <linux/atomic.h>
1862306a36Sopenharmony_ci#include <linux/bitops.h>
1962306a36Sopenharmony_ci#include <linux/blkdev.h>
2062306a36Sopenharmony_ci#include <linux/kdev_t.h>
2162306a36Sopenharmony_ci#include <linux/kernel.h>
2262306a36Sopenharmony_ci#include <linux/module.h>
2362306a36Sopenharmony_ci#include <linux/jiffies.h>
2462306a36Sopenharmony_ci#include <linux/mempool.h>
2562306a36Sopenharmony_ci#include <linux/spinlock.h>
2662306a36Sopenharmony_ci#include <linux/blk_types.h>
2762306a36Sopenharmony_ci#include <linux/dm-kcopyd.h>
2862306a36Sopenharmony_ci#include <linux/workqueue.h>
2962306a36Sopenharmony_ci#include <linux/backing-dev.h>
3062306a36Sopenharmony_ci#include <linux/device-mapper.h>
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci#include "dm.h"
3362306a36Sopenharmony_ci#include "dm-clone-metadata.h"
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci#define DM_MSG_PREFIX "clone"
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/*
3862306a36Sopenharmony_ci * Minimum and maximum allowed region sizes
3962306a36Sopenharmony_ci */
4062306a36Sopenharmony_ci#define MIN_REGION_SIZE (1 << 3)  /* 4KB */
4162306a36Sopenharmony_ci#define MAX_REGION_SIZE (1 << 21) /* 1GB */
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci#define MIN_HYDRATIONS 256 /* Size of hydration mempool */
4462306a36Sopenharmony_ci#define DEFAULT_HYDRATION_THRESHOLD 1 /* 1 region */
4562306a36Sopenharmony_ci#define DEFAULT_HYDRATION_BATCH_SIZE 1 /* Hydrate in batches of 1 region */
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci#define COMMIT_PERIOD HZ /* 1 sec */
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci/*
5062306a36Sopenharmony_ci * Hydration hash table size: 1 << HASH_TABLE_BITS
5162306a36Sopenharmony_ci */
5262306a36Sopenharmony_ci#define HASH_TABLE_BITS 15
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ciDECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(clone_hydration_throttle,
5562306a36Sopenharmony_ci	"A percentage of time allocated for hydrating regions");
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci/* Slab cache for struct dm_clone_region_hydration */
5862306a36Sopenharmony_cistatic struct kmem_cache *_hydration_cache;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci/* dm-clone metadata modes */
6162306a36Sopenharmony_cienum clone_metadata_mode {
6262306a36Sopenharmony_ci	CM_WRITE,		/* metadata may be changed */
6362306a36Sopenharmony_ci	CM_READ_ONLY,		/* metadata may not be changed */
6462306a36Sopenharmony_ci	CM_FAIL,		/* all metadata I/O fails */
6562306a36Sopenharmony_ci};
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cistruct hash_table_bucket;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_cistruct clone {
7062306a36Sopenharmony_ci	struct dm_target *ti;
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	struct dm_dev *metadata_dev;
7362306a36Sopenharmony_ci	struct dm_dev *dest_dev;
7462306a36Sopenharmony_ci	struct dm_dev *source_dev;
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	unsigned long nr_regions;
7762306a36Sopenharmony_ci	sector_t region_size;
7862306a36Sopenharmony_ci	unsigned int region_shift;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	/*
8162306a36Sopenharmony_ci	 * A metadata commit and the actions taken in case it fails should run
8262306a36Sopenharmony_ci	 * as a single atomic step.
8362306a36Sopenharmony_ci	 */
8462306a36Sopenharmony_ci	struct mutex commit_lock;
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	struct dm_clone_metadata *cmd;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	/* Region hydration hash table */
8962306a36Sopenharmony_ci	struct hash_table_bucket *ht;
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	atomic_t ios_in_flight;
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	wait_queue_head_t hydration_stopped;
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	mempool_t hydration_pool;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	unsigned long last_commit_jiffies;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	/*
10062306a36Sopenharmony_ci	 * We defer incoming WRITE bios for regions that are not hydrated,
10162306a36Sopenharmony_ci	 * until after these regions have been hydrated.
10262306a36Sopenharmony_ci	 *
10362306a36Sopenharmony_ci	 * Also, we defer REQ_FUA and REQ_PREFLUSH bios, until after the
10462306a36Sopenharmony_ci	 * metadata have been committed.
10562306a36Sopenharmony_ci	 */
10662306a36Sopenharmony_ci	spinlock_t lock;
10762306a36Sopenharmony_ci	struct bio_list deferred_bios;
10862306a36Sopenharmony_ci	struct bio_list deferred_discard_bios;
10962306a36Sopenharmony_ci	struct bio_list deferred_flush_bios;
11062306a36Sopenharmony_ci	struct bio_list deferred_flush_completions;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	/* Maximum number of regions being copied during background hydration. */
11362306a36Sopenharmony_ci	unsigned int hydration_threshold;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	/* Number of regions to batch together during background hydration. */
11662306a36Sopenharmony_ci	unsigned int hydration_batch_size;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	/* Which region to hydrate next */
11962306a36Sopenharmony_ci	unsigned long hydration_offset;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	atomic_t hydrations_in_flight;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	/*
12462306a36Sopenharmony_ci	 * Save a copy of the table line rather than reconstructing it for the
12562306a36Sopenharmony_ci	 * status.
12662306a36Sopenharmony_ci	 */
12762306a36Sopenharmony_ci	unsigned int nr_ctr_args;
12862306a36Sopenharmony_ci	const char **ctr_args;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	struct workqueue_struct *wq;
13162306a36Sopenharmony_ci	struct work_struct worker;
13262306a36Sopenharmony_ci	struct delayed_work waker;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	struct dm_kcopyd_client *kcopyd_client;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	enum clone_metadata_mode mode;
13762306a36Sopenharmony_ci	unsigned long flags;
13862306a36Sopenharmony_ci};
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci/*
14162306a36Sopenharmony_ci * dm-clone flags
14262306a36Sopenharmony_ci */
14362306a36Sopenharmony_ci#define DM_CLONE_DISCARD_PASSDOWN 0
14462306a36Sopenharmony_ci#define DM_CLONE_HYDRATION_ENABLED 1
14562306a36Sopenharmony_ci#define DM_CLONE_HYDRATION_SUSPENDED 2
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci/*
15062306a36Sopenharmony_ci * Metadata failure handling.
15162306a36Sopenharmony_ci */
15262306a36Sopenharmony_cistatic enum clone_metadata_mode get_clone_mode(struct clone *clone)
15362306a36Sopenharmony_ci{
15462306a36Sopenharmony_ci	return READ_ONCE(clone->mode);
15562306a36Sopenharmony_ci}
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_cistatic const char *clone_device_name(struct clone *clone)
15862306a36Sopenharmony_ci{
15962306a36Sopenharmony_ci	return dm_table_device_name(clone->ti->table);
16062306a36Sopenharmony_ci}
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_cistatic void __set_clone_mode(struct clone *clone, enum clone_metadata_mode new_mode)
16362306a36Sopenharmony_ci{
16462306a36Sopenharmony_ci	static const char * const descs[] = {
16562306a36Sopenharmony_ci		"read-write",
16662306a36Sopenharmony_ci		"read-only",
16762306a36Sopenharmony_ci		"fail"
16862306a36Sopenharmony_ci	};
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	enum clone_metadata_mode old_mode = get_clone_mode(clone);
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	/* Never move out of fail mode */
17362306a36Sopenharmony_ci	if (old_mode == CM_FAIL)
17462306a36Sopenharmony_ci		new_mode = CM_FAIL;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	switch (new_mode) {
17762306a36Sopenharmony_ci	case CM_FAIL:
17862306a36Sopenharmony_ci	case CM_READ_ONLY:
17962306a36Sopenharmony_ci		dm_clone_metadata_set_read_only(clone->cmd);
18062306a36Sopenharmony_ci		break;
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	case CM_WRITE:
18362306a36Sopenharmony_ci		dm_clone_metadata_set_read_write(clone->cmd);
18462306a36Sopenharmony_ci		break;
18562306a36Sopenharmony_ci	}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	WRITE_ONCE(clone->mode, new_mode);
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	if (new_mode != old_mode) {
19062306a36Sopenharmony_ci		dm_table_event(clone->ti->table);
19162306a36Sopenharmony_ci		DMINFO("%s: Switching to %s mode", clone_device_name(clone),
19262306a36Sopenharmony_ci		       descs[(int)new_mode]);
19362306a36Sopenharmony_ci	}
19462306a36Sopenharmony_ci}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_cistatic void __abort_transaction(struct clone *clone)
19762306a36Sopenharmony_ci{
19862306a36Sopenharmony_ci	const char *dev_name = clone_device_name(clone);
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	if (get_clone_mode(clone) >= CM_READ_ONLY)
20162306a36Sopenharmony_ci		return;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	DMERR("%s: Aborting current metadata transaction", dev_name);
20462306a36Sopenharmony_ci	if (dm_clone_metadata_abort(clone->cmd)) {
20562306a36Sopenharmony_ci		DMERR("%s: Failed to abort metadata transaction", dev_name);
20662306a36Sopenharmony_ci		__set_clone_mode(clone, CM_FAIL);
20762306a36Sopenharmony_ci	}
20862306a36Sopenharmony_ci}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_cistatic void __reload_in_core_bitset(struct clone *clone)
21162306a36Sopenharmony_ci{
21262306a36Sopenharmony_ci	const char *dev_name = clone_device_name(clone);
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	if (get_clone_mode(clone) == CM_FAIL)
21562306a36Sopenharmony_ci		return;
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	/* Reload the on-disk bitset */
21862306a36Sopenharmony_ci	DMINFO("%s: Reloading on-disk bitmap", dev_name);
21962306a36Sopenharmony_ci	if (dm_clone_reload_in_core_bitset(clone->cmd)) {
22062306a36Sopenharmony_ci		DMERR("%s: Failed to reload on-disk bitmap", dev_name);
22162306a36Sopenharmony_ci		__set_clone_mode(clone, CM_FAIL);
22262306a36Sopenharmony_ci	}
22362306a36Sopenharmony_ci}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_cistatic void __metadata_operation_failed(struct clone *clone, const char *op, int r)
22662306a36Sopenharmony_ci{
22762306a36Sopenharmony_ci	DMERR("%s: Metadata operation `%s' failed: error = %d",
22862306a36Sopenharmony_ci	      clone_device_name(clone), op, r);
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	__abort_transaction(clone);
23162306a36Sopenharmony_ci	__set_clone_mode(clone, CM_READ_ONLY);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	/*
23462306a36Sopenharmony_ci	 * dm_clone_reload_in_core_bitset() may run concurrently with either
23562306a36Sopenharmony_ci	 * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), but
23662306a36Sopenharmony_ci	 * it's safe as we have already set the metadata to read-only mode.
23762306a36Sopenharmony_ci	 */
23862306a36Sopenharmony_ci	__reload_in_core_bitset(clone);
23962306a36Sopenharmony_ci}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci/* Wake up anyone waiting for region hydrations to stop */
24462306a36Sopenharmony_cistatic inline void wakeup_hydration_waiters(struct clone *clone)
24562306a36Sopenharmony_ci{
24662306a36Sopenharmony_ci	wake_up_all(&clone->hydration_stopped);
24762306a36Sopenharmony_ci}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_cistatic inline void wake_worker(struct clone *clone)
25062306a36Sopenharmony_ci{
25162306a36Sopenharmony_ci	queue_work(clone->wq, &clone->worker);
25262306a36Sopenharmony_ci}
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci/*
25762306a36Sopenharmony_ci * bio helper functions.
25862306a36Sopenharmony_ci */
25962306a36Sopenharmony_cistatic inline void remap_to_source(struct clone *clone, struct bio *bio)
26062306a36Sopenharmony_ci{
26162306a36Sopenharmony_ci	bio_set_dev(bio, clone->source_dev->bdev);
26262306a36Sopenharmony_ci}
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_cistatic inline void remap_to_dest(struct clone *clone, struct bio *bio)
26562306a36Sopenharmony_ci{
26662306a36Sopenharmony_ci	bio_set_dev(bio, clone->dest_dev->bdev);
26762306a36Sopenharmony_ci}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_cistatic bool bio_triggers_commit(struct clone *clone, struct bio *bio)
27062306a36Sopenharmony_ci{
27162306a36Sopenharmony_ci	return op_is_flush(bio->bi_opf) &&
27262306a36Sopenharmony_ci		dm_clone_changed_this_transaction(clone->cmd);
27362306a36Sopenharmony_ci}
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci/* Get the address of the region in sectors */
27662306a36Sopenharmony_cistatic inline sector_t region_to_sector(struct clone *clone, unsigned long region_nr)
27762306a36Sopenharmony_ci{
27862306a36Sopenharmony_ci	return ((sector_t)region_nr << clone->region_shift);
27962306a36Sopenharmony_ci}
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci/* Get the region number of the bio */
28262306a36Sopenharmony_cistatic inline unsigned long bio_to_region(struct clone *clone, struct bio *bio)
28362306a36Sopenharmony_ci{
28462306a36Sopenharmony_ci	return (bio->bi_iter.bi_sector >> clone->region_shift);
28562306a36Sopenharmony_ci}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci/* Get the region range covered by the bio */
28862306a36Sopenharmony_cistatic void bio_region_range(struct clone *clone, struct bio *bio,
28962306a36Sopenharmony_ci			     unsigned long *rs, unsigned long *nr_regions)
29062306a36Sopenharmony_ci{
29162306a36Sopenharmony_ci	unsigned long end;
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	*rs = dm_sector_div_up(bio->bi_iter.bi_sector, clone->region_size);
29462306a36Sopenharmony_ci	end = bio_end_sector(bio) >> clone->region_shift;
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	if (*rs >= end)
29762306a36Sopenharmony_ci		*nr_regions = 0;
29862306a36Sopenharmony_ci	else
29962306a36Sopenharmony_ci		*nr_regions = end - *rs;
30062306a36Sopenharmony_ci}
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci/* Check whether a bio overwrites a region */
30362306a36Sopenharmony_cistatic inline bool is_overwrite_bio(struct clone *clone, struct bio *bio)
30462306a36Sopenharmony_ci{
30562306a36Sopenharmony_ci	return (bio_data_dir(bio) == WRITE && bio_sectors(bio) == clone->region_size);
30662306a36Sopenharmony_ci}
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_cistatic void fail_bios(struct bio_list *bios, blk_status_t status)
30962306a36Sopenharmony_ci{
31062306a36Sopenharmony_ci	struct bio *bio;
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	while ((bio = bio_list_pop(bios))) {
31362306a36Sopenharmony_ci		bio->bi_status = status;
31462306a36Sopenharmony_ci		bio_endio(bio);
31562306a36Sopenharmony_ci	}
31662306a36Sopenharmony_ci}
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_cistatic void submit_bios(struct bio_list *bios)
31962306a36Sopenharmony_ci{
32062306a36Sopenharmony_ci	struct bio *bio;
32162306a36Sopenharmony_ci	struct blk_plug plug;
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci	blk_start_plug(&plug);
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	while ((bio = bio_list_pop(bios)))
32662306a36Sopenharmony_ci		submit_bio_noacct(bio);
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	blk_finish_plug(&plug);
32962306a36Sopenharmony_ci}
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci/*
33262306a36Sopenharmony_ci * Submit bio to the underlying device.
33362306a36Sopenharmony_ci *
33462306a36Sopenharmony_ci * If the bio triggers a commit, delay it, until after the metadata have been
33562306a36Sopenharmony_ci * committed.
33662306a36Sopenharmony_ci *
33762306a36Sopenharmony_ci * NOTE: The bio remapping must be performed by the caller.
33862306a36Sopenharmony_ci */
33962306a36Sopenharmony_cistatic void issue_bio(struct clone *clone, struct bio *bio)
34062306a36Sopenharmony_ci{
34162306a36Sopenharmony_ci	if (!bio_triggers_commit(clone, bio)) {
34262306a36Sopenharmony_ci		submit_bio_noacct(bio);
34362306a36Sopenharmony_ci		return;
34462306a36Sopenharmony_ci	}
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	/*
34762306a36Sopenharmony_ci	 * If the metadata mode is RO or FAIL we won't be able to commit the
34862306a36Sopenharmony_ci	 * metadata, so we complete the bio with an error.
34962306a36Sopenharmony_ci	 */
35062306a36Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
35162306a36Sopenharmony_ci		bio_io_error(bio);
35262306a36Sopenharmony_ci		return;
35362306a36Sopenharmony_ci	}
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci	/*
35662306a36Sopenharmony_ci	 * Batch together any bios that trigger commits and then issue a single
35762306a36Sopenharmony_ci	 * commit for them in process_deferred_flush_bios().
35862306a36Sopenharmony_ci	 */
35962306a36Sopenharmony_ci	spin_lock_irq(&clone->lock);
36062306a36Sopenharmony_ci	bio_list_add(&clone->deferred_flush_bios, bio);
36162306a36Sopenharmony_ci	spin_unlock_irq(&clone->lock);
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci	wake_worker(clone);
36462306a36Sopenharmony_ci}
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci/*
36762306a36Sopenharmony_ci * Remap bio to the destination device and submit it.
36862306a36Sopenharmony_ci *
36962306a36Sopenharmony_ci * If the bio triggers a commit, delay it, until after the metadata have been
37062306a36Sopenharmony_ci * committed.
37162306a36Sopenharmony_ci */
37262306a36Sopenharmony_cistatic void remap_and_issue(struct clone *clone, struct bio *bio)
37362306a36Sopenharmony_ci{
37462306a36Sopenharmony_ci	remap_to_dest(clone, bio);
37562306a36Sopenharmony_ci	issue_bio(clone, bio);
37662306a36Sopenharmony_ci}
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci/*
37962306a36Sopenharmony_ci * Issue bios that have been deferred until after their region has finished
38062306a36Sopenharmony_ci * hydrating.
38162306a36Sopenharmony_ci *
38262306a36Sopenharmony_ci * We delegate the bio submission to the worker thread, so this is safe to call
38362306a36Sopenharmony_ci * from interrupt context.
38462306a36Sopenharmony_ci */
38562306a36Sopenharmony_cistatic void issue_deferred_bios(struct clone *clone, struct bio_list *bios)
38662306a36Sopenharmony_ci{
38762306a36Sopenharmony_ci	struct bio *bio;
38862306a36Sopenharmony_ci	unsigned long flags;
38962306a36Sopenharmony_ci	struct bio_list flush_bios = BIO_EMPTY_LIST;
39062306a36Sopenharmony_ci	struct bio_list normal_bios = BIO_EMPTY_LIST;
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	if (bio_list_empty(bios))
39362306a36Sopenharmony_ci		return;
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci	while ((bio = bio_list_pop(bios))) {
39662306a36Sopenharmony_ci		if (bio_triggers_commit(clone, bio))
39762306a36Sopenharmony_ci			bio_list_add(&flush_bios, bio);
39862306a36Sopenharmony_ci		else
39962306a36Sopenharmony_ci			bio_list_add(&normal_bios, bio);
40062306a36Sopenharmony_ci	}
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	spin_lock_irqsave(&clone->lock, flags);
40362306a36Sopenharmony_ci	bio_list_merge(&clone->deferred_bios, &normal_bios);
40462306a36Sopenharmony_ci	bio_list_merge(&clone->deferred_flush_bios, &flush_bios);
40562306a36Sopenharmony_ci	spin_unlock_irqrestore(&clone->lock, flags);
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	wake_worker(clone);
40862306a36Sopenharmony_ci}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_cistatic void complete_overwrite_bio(struct clone *clone, struct bio *bio)
41162306a36Sopenharmony_ci{
41262306a36Sopenharmony_ci	unsigned long flags;
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	/*
41562306a36Sopenharmony_ci	 * If the bio has the REQ_FUA flag set we must commit the metadata
41662306a36Sopenharmony_ci	 * before signaling its completion.
41762306a36Sopenharmony_ci	 *
41862306a36Sopenharmony_ci	 * complete_overwrite_bio() is only called by hydration_complete(),
41962306a36Sopenharmony_ci	 * after having successfully updated the metadata. This means we don't
42062306a36Sopenharmony_ci	 * need to call dm_clone_changed_this_transaction() to check if the
42162306a36Sopenharmony_ci	 * metadata has changed and thus we can avoid taking the metadata spin
42262306a36Sopenharmony_ci	 * lock.
42362306a36Sopenharmony_ci	 */
42462306a36Sopenharmony_ci	if (!(bio->bi_opf & REQ_FUA)) {
42562306a36Sopenharmony_ci		bio_endio(bio);
42662306a36Sopenharmony_ci		return;
42762306a36Sopenharmony_ci	}
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	/*
43062306a36Sopenharmony_ci	 * If the metadata mode is RO or FAIL we won't be able to commit the
43162306a36Sopenharmony_ci	 * metadata, so we complete the bio with an error.
43262306a36Sopenharmony_ci	 */
43362306a36Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
43462306a36Sopenharmony_ci		bio_io_error(bio);
43562306a36Sopenharmony_ci		return;
43662306a36Sopenharmony_ci	}
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci	/*
43962306a36Sopenharmony_ci	 * Batch together any bios that trigger commits and then issue a single
44062306a36Sopenharmony_ci	 * commit for them in process_deferred_flush_bios().
44162306a36Sopenharmony_ci	 */
44262306a36Sopenharmony_ci	spin_lock_irqsave(&clone->lock, flags);
44362306a36Sopenharmony_ci	bio_list_add(&clone->deferred_flush_completions, bio);
44462306a36Sopenharmony_ci	spin_unlock_irqrestore(&clone->lock, flags);
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	wake_worker(clone);
44762306a36Sopenharmony_ci}
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_cistatic void trim_bio(struct bio *bio, sector_t sector, unsigned int len)
45062306a36Sopenharmony_ci{
45162306a36Sopenharmony_ci	bio->bi_iter.bi_sector = sector;
45262306a36Sopenharmony_ci	bio->bi_iter.bi_size = to_bytes(len);
45362306a36Sopenharmony_ci}
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_cistatic void complete_discard_bio(struct clone *clone, struct bio *bio, bool success)
45662306a36Sopenharmony_ci{
45762306a36Sopenharmony_ci	unsigned long rs, nr_regions;
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci	/*
46062306a36Sopenharmony_ci	 * If the destination device supports discards, remap and trim the
46162306a36Sopenharmony_ci	 * discard bio and pass it down. Otherwise complete the bio
46262306a36Sopenharmony_ci	 * immediately.
46362306a36Sopenharmony_ci	 */
46462306a36Sopenharmony_ci	if (test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags) && success) {
46562306a36Sopenharmony_ci		remap_to_dest(clone, bio);
46662306a36Sopenharmony_ci		bio_region_range(clone, bio, &rs, &nr_regions);
46762306a36Sopenharmony_ci		trim_bio(bio, region_to_sector(clone, rs),
46862306a36Sopenharmony_ci			 nr_regions << clone->region_shift);
46962306a36Sopenharmony_ci		submit_bio_noacct(bio);
47062306a36Sopenharmony_ci	} else
47162306a36Sopenharmony_ci		bio_endio(bio);
47262306a36Sopenharmony_ci}
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_cistatic void process_discard_bio(struct clone *clone, struct bio *bio)
47562306a36Sopenharmony_ci{
47662306a36Sopenharmony_ci	unsigned long rs, nr_regions;
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	bio_region_range(clone, bio, &rs, &nr_regions);
47962306a36Sopenharmony_ci	if (!nr_regions) {
48062306a36Sopenharmony_ci		bio_endio(bio);
48162306a36Sopenharmony_ci		return;
48262306a36Sopenharmony_ci	}
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	if (WARN_ON(rs >= clone->nr_regions || (rs + nr_regions) < rs ||
48562306a36Sopenharmony_ci		    (rs + nr_regions) > clone->nr_regions)) {
48662306a36Sopenharmony_ci		DMERR("%s: Invalid range (%lu + %lu, total regions %lu) for discard (%llu + %u)",
48762306a36Sopenharmony_ci		      clone_device_name(clone), rs, nr_regions,
48862306a36Sopenharmony_ci		      clone->nr_regions,
48962306a36Sopenharmony_ci		      (unsigned long long)bio->bi_iter.bi_sector,
49062306a36Sopenharmony_ci		      bio_sectors(bio));
49162306a36Sopenharmony_ci		bio_endio(bio);
49262306a36Sopenharmony_ci		return;
49362306a36Sopenharmony_ci	}
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci	/*
49662306a36Sopenharmony_ci	 * The covered regions are already hydrated so we just need to pass
49762306a36Sopenharmony_ci	 * down the discard.
49862306a36Sopenharmony_ci	 */
49962306a36Sopenharmony_ci	if (dm_clone_is_range_hydrated(clone->cmd, rs, nr_regions)) {
50062306a36Sopenharmony_ci		complete_discard_bio(clone, bio, true);
50162306a36Sopenharmony_ci		return;
50262306a36Sopenharmony_ci	}
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	/*
50562306a36Sopenharmony_ci	 * If the metadata mode is RO or FAIL we won't be able to update the
50662306a36Sopenharmony_ci	 * metadata for the regions covered by the discard so we just ignore
50762306a36Sopenharmony_ci	 * it.
50862306a36Sopenharmony_ci	 */
50962306a36Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
51062306a36Sopenharmony_ci		bio_endio(bio);
51162306a36Sopenharmony_ci		return;
51262306a36Sopenharmony_ci	}
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	/*
51562306a36Sopenharmony_ci	 * Defer discard processing.
51662306a36Sopenharmony_ci	 */
51762306a36Sopenharmony_ci	spin_lock_irq(&clone->lock);
51862306a36Sopenharmony_ci	bio_list_add(&clone->deferred_discard_bios, bio);
51962306a36Sopenharmony_ci	spin_unlock_irq(&clone->lock);
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci	wake_worker(clone);
52262306a36Sopenharmony_ci}
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
52562306a36Sopenharmony_ci
52662306a36Sopenharmony_ci/*
52762306a36Sopenharmony_ci * dm-clone region hydrations.
52862306a36Sopenharmony_ci */
52962306a36Sopenharmony_cistruct dm_clone_region_hydration {
53062306a36Sopenharmony_ci	struct clone *clone;
53162306a36Sopenharmony_ci	unsigned long region_nr;
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	struct bio *overwrite_bio;
53462306a36Sopenharmony_ci	bio_end_io_t *overwrite_bio_end_io;
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	struct bio_list deferred_bios;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	blk_status_t status;
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	/* Used by hydration batching */
54162306a36Sopenharmony_ci	struct list_head list;
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	/* Used by hydration hash table */
54462306a36Sopenharmony_ci	struct hlist_node h;
54562306a36Sopenharmony_ci};
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci/*
54862306a36Sopenharmony_ci * Hydration hash table implementation.
54962306a36Sopenharmony_ci *
55062306a36Sopenharmony_ci * Ideally we would like to use list_bl, which uses bit spin locks and employs
55162306a36Sopenharmony_ci * the least significant bit of the list head to lock the corresponding bucket,
55262306a36Sopenharmony_ci * reducing the memory overhead for the locks. But, currently, list_bl and bit
55362306a36Sopenharmony_ci * spin locks don't support IRQ safe versions. Since we have to take the lock
55462306a36Sopenharmony_ci * in both process and interrupt context, we must fall back to using regular
55562306a36Sopenharmony_ci * spin locks; one per hash table bucket.
55662306a36Sopenharmony_ci */
55762306a36Sopenharmony_cistruct hash_table_bucket {
55862306a36Sopenharmony_ci	struct hlist_head head;
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	/* Spinlock protecting the bucket */
56162306a36Sopenharmony_ci	spinlock_t lock;
56262306a36Sopenharmony_ci};
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci#define bucket_lock_irqsave(bucket, flags) \
56562306a36Sopenharmony_ci	spin_lock_irqsave(&(bucket)->lock, flags)
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci#define bucket_unlock_irqrestore(bucket, flags) \
56862306a36Sopenharmony_ci	spin_unlock_irqrestore(&(bucket)->lock, flags)
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci#define bucket_lock_irq(bucket) \
57162306a36Sopenharmony_ci	spin_lock_irq(&(bucket)->lock)
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci#define bucket_unlock_irq(bucket) \
57462306a36Sopenharmony_ci	spin_unlock_irq(&(bucket)->lock)
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_cistatic int hash_table_init(struct clone *clone)
57762306a36Sopenharmony_ci{
57862306a36Sopenharmony_ci	unsigned int i, sz;
57962306a36Sopenharmony_ci	struct hash_table_bucket *bucket;
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	sz = 1 << HASH_TABLE_BITS;
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	clone->ht = kvmalloc_array(sz, sizeof(struct hash_table_bucket), GFP_KERNEL);
58462306a36Sopenharmony_ci	if (!clone->ht)
58562306a36Sopenharmony_ci		return -ENOMEM;
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	for (i = 0; i < sz; i++) {
58862306a36Sopenharmony_ci		bucket = clone->ht + i;
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci		INIT_HLIST_HEAD(&bucket->head);
59162306a36Sopenharmony_ci		spin_lock_init(&bucket->lock);
59262306a36Sopenharmony_ci	}
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	return 0;
59562306a36Sopenharmony_ci}
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_cistatic void hash_table_exit(struct clone *clone)
59862306a36Sopenharmony_ci{
59962306a36Sopenharmony_ci	kvfree(clone->ht);
60062306a36Sopenharmony_ci}
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_cistatic struct hash_table_bucket *get_hash_table_bucket(struct clone *clone,
60362306a36Sopenharmony_ci						       unsigned long region_nr)
60462306a36Sopenharmony_ci{
60562306a36Sopenharmony_ci	return &clone->ht[hash_long(region_nr, HASH_TABLE_BITS)];
60662306a36Sopenharmony_ci}
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ci/*
60962306a36Sopenharmony_ci * Search hash table for a hydration with hd->region_nr == region_nr
61062306a36Sopenharmony_ci *
61162306a36Sopenharmony_ci * NOTE: Must be called with the bucket lock held
61262306a36Sopenharmony_ci */
61362306a36Sopenharmony_cistatic struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
61462306a36Sopenharmony_ci						     unsigned long region_nr)
61562306a36Sopenharmony_ci{
61662306a36Sopenharmony_ci	struct dm_clone_region_hydration *hd;
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci	hlist_for_each_entry(hd, &bucket->head, h) {
61962306a36Sopenharmony_ci		if (hd->region_nr == region_nr)
62062306a36Sopenharmony_ci			return hd;
62162306a36Sopenharmony_ci	}
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	return NULL;
62462306a36Sopenharmony_ci}
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci/*
62762306a36Sopenharmony_ci * Insert a hydration into the hash table.
62862306a36Sopenharmony_ci *
62962306a36Sopenharmony_ci * NOTE: Must be called with the bucket lock held.
63062306a36Sopenharmony_ci */
63162306a36Sopenharmony_cistatic inline void __insert_region_hydration(struct hash_table_bucket *bucket,
63262306a36Sopenharmony_ci					     struct dm_clone_region_hydration *hd)
63362306a36Sopenharmony_ci{
63462306a36Sopenharmony_ci	hlist_add_head(&hd->h, &bucket->head);
63562306a36Sopenharmony_ci}
63662306a36Sopenharmony_ci
63762306a36Sopenharmony_ci/*
63862306a36Sopenharmony_ci * This function inserts a hydration into the hash table, unless someone else
63962306a36Sopenharmony_ci * managed to insert a hydration for the same region first. In the latter case
64062306a36Sopenharmony_ci * it returns the existing hydration descriptor for this region.
64162306a36Sopenharmony_ci *
64262306a36Sopenharmony_ci * NOTE: Must be called with the hydration hash table lock held.
64362306a36Sopenharmony_ci */
64462306a36Sopenharmony_cistatic struct dm_clone_region_hydration *
64562306a36Sopenharmony_ci__find_or_insert_region_hydration(struct hash_table_bucket *bucket,
64662306a36Sopenharmony_ci				  struct dm_clone_region_hydration *hd)
64762306a36Sopenharmony_ci{
64862306a36Sopenharmony_ci	struct dm_clone_region_hydration *hd2;
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci	hd2 = __hash_find(bucket, hd->region_nr);
65162306a36Sopenharmony_ci	if (hd2)
65262306a36Sopenharmony_ci		return hd2;
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci	__insert_region_hydration(bucket, hd);
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ci	return hd;
65762306a36Sopenharmony_ci}
65862306a36Sopenharmony_ci
65962306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci/* Allocate a hydration */
66262306a36Sopenharmony_cistatic struct dm_clone_region_hydration *alloc_hydration(struct clone *clone)
66362306a36Sopenharmony_ci{
66462306a36Sopenharmony_ci	struct dm_clone_region_hydration *hd;
66562306a36Sopenharmony_ci
66662306a36Sopenharmony_ci	/*
66762306a36Sopenharmony_ci	 * Allocate a hydration from the hydration mempool.
66862306a36Sopenharmony_ci	 * This might block but it can't fail.
66962306a36Sopenharmony_ci	 */
67062306a36Sopenharmony_ci	hd = mempool_alloc(&clone->hydration_pool, GFP_NOIO);
67162306a36Sopenharmony_ci	hd->clone = clone;
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci	return hd;
67462306a36Sopenharmony_ci}
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_cistatic inline void free_hydration(struct dm_clone_region_hydration *hd)
67762306a36Sopenharmony_ci{
67862306a36Sopenharmony_ci	mempool_free(hd, &hd->clone->hydration_pool);
67962306a36Sopenharmony_ci}
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci/* Initialize a hydration */
68262306a36Sopenharmony_cistatic void hydration_init(struct dm_clone_region_hydration *hd, unsigned long region_nr)
68362306a36Sopenharmony_ci{
68462306a36Sopenharmony_ci	hd->region_nr = region_nr;
68562306a36Sopenharmony_ci	hd->overwrite_bio = NULL;
68662306a36Sopenharmony_ci	bio_list_init(&hd->deferred_bios);
68762306a36Sopenharmony_ci	hd->status = 0;
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci	INIT_LIST_HEAD(&hd->list);
69062306a36Sopenharmony_ci	INIT_HLIST_NODE(&hd->h);
69162306a36Sopenharmony_ci}
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_ci/*
69662306a36Sopenharmony_ci * Update dm-clone's metadata after a region has finished hydrating and remove
69762306a36Sopenharmony_ci * hydration from the hash table.
69862306a36Sopenharmony_ci */
69962306a36Sopenharmony_cistatic int hydration_update_metadata(struct dm_clone_region_hydration *hd)
70062306a36Sopenharmony_ci{
70162306a36Sopenharmony_ci	int r = 0;
70262306a36Sopenharmony_ci	unsigned long flags;
70362306a36Sopenharmony_ci	struct hash_table_bucket *bucket;
70462306a36Sopenharmony_ci	struct clone *clone = hd->clone;
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY))
70762306a36Sopenharmony_ci		r = -EPERM;
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci	/* Update the metadata */
71062306a36Sopenharmony_ci	if (likely(!r) && hd->status == BLK_STS_OK)
71162306a36Sopenharmony_ci		r = dm_clone_set_region_hydrated(clone->cmd, hd->region_nr);
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ci	bucket = get_hash_table_bucket(clone, hd->region_nr);
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	/* Remove hydration from hash table */
71662306a36Sopenharmony_ci	bucket_lock_irqsave(bucket, flags);
71762306a36Sopenharmony_ci	hlist_del(&hd->h);
71862306a36Sopenharmony_ci	bucket_unlock_irqrestore(bucket, flags);
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci	return r;
72162306a36Sopenharmony_ci}
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci/*
72462306a36Sopenharmony_ci * Complete a region's hydration:
72562306a36Sopenharmony_ci *
72662306a36Sopenharmony_ci *	1. Update dm-clone's metadata.
72762306a36Sopenharmony_ci *	2. Remove hydration from hash table.
72862306a36Sopenharmony_ci *	3. Complete overwrite bio.
72962306a36Sopenharmony_ci *	4. Issue deferred bios.
73062306a36Sopenharmony_ci *	5. If this was the last hydration, wake up anyone waiting for
73162306a36Sopenharmony_ci *	   hydrations to finish.
73262306a36Sopenharmony_ci */
73362306a36Sopenharmony_cistatic void hydration_complete(struct dm_clone_region_hydration *hd)
73462306a36Sopenharmony_ci{
73562306a36Sopenharmony_ci	int r;
73662306a36Sopenharmony_ci	blk_status_t status;
73762306a36Sopenharmony_ci	struct clone *clone = hd->clone;
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	r = hydration_update_metadata(hd);
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	if (hd->status == BLK_STS_OK && likely(!r)) {
74262306a36Sopenharmony_ci		if (hd->overwrite_bio)
74362306a36Sopenharmony_ci			complete_overwrite_bio(clone, hd->overwrite_bio);
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ci		issue_deferred_bios(clone, &hd->deferred_bios);
74662306a36Sopenharmony_ci	} else {
74762306a36Sopenharmony_ci		status = r ? BLK_STS_IOERR : hd->status;
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_ci		if (hd->overwrite_bio)
75062306a36Sopenharmony_ci			bio_list_add(&hd->deferred_bios, hd->overwrite_bio);
75162306a36Sopenharmony_ci
75262306a36Sopenharmony_ci		fail_bios(&hd->deferred_bios, status);
75362306a36Sopenharmony_ci	}
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	free_hydration(hd);
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci	if (atomic_dec_and_test(&clone->hydrations_in_flight))
75862306a36Sopenharmony_ci		wakeup_hydration_waiters(clone);
75962306a36Sopenharmony_ci}
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_cistatic void hydration_kcopyd_callback(int read_err, unsigned long write_err, void *context)
76262306a36Sopenharmony_ci{
76362306a36Sopenharmony_ci	blk_status_t status;
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ci	struct dm_clone_region_hydration *tmp, *hd = context;
76662306a36Sopenharmony_ci	struct clone *clone = hd->clone;
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_ci	LIST_HEAD(batched_hydrations);
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci	if (read_err || write_err) {
77162306a36Sopenharmony_ci		DMERR_LIMIT("%s: hydration failed", clone_device_name(clone));
77262306a36Sopenharmony_ci		status = BLK_STS_IOERR;
77362306a36Sopenharmony_ci	} else {
77462306a36Sopenharmony_ci		status = BLK_STS_OK;
77562306a36Sopenharmony_ci	}
77662306a36Sopenharmony_ci	list_splice_tail(&hd->list, &batched_hydrations);
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci	hd->status = status;
77962306a36Sopenharmony_ci	hydration_complete(hd);
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci	/* Complete batched hydrations */
78262306a36Sopenharmony_ci	list_for_each_entry_safe(hd, tmp, &batched_hydrations, list) {
78362306a36Sopenharmony_ci		hd->status = status;
78462306a36Sopenharmony_ci		hydration_complete(hd);
78562306a36Sopenharmony_ci	}
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	/* Continue background hydration, if there is no I/O in-flight */
78862306a36Sopenharmony_ci	if (test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags) &&
78962306a36Sopenharmony_ci	    !atomic_read(&clone->ios_in_flight))
79062306a36Sopenharmony_ci		wake_worker(clone);
79162306a36Sopenharmony_ci}
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_cistatic void hydration_copy(struct dm_clone_region_hydration *hd, unsigned int nr_regions)
79462306a36Sopenharmony_ci{
79562306a36Sopenharmony_ci	unsigned long region_start, region_end;
79662306a36Sopenharmony_ci	sector_t tail_size, region_size, total_size;
79762306a36Sopenharmony_ci	struct dm_io_region from, to;
79862306a36Sopenharmony_ci	struct clone *clone = hd->clone;
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci	if (WARN_ON(!nr_regions))
80162306a36Sopenharmony_ci		return;
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	region_size = clone->region_size;
80462306a36Sopenharmony_ci	region_start = hd->region_nr;
80562306a36Sopenharmony_ci	region_end = region_start + nr_regions - 1;
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci	total_size = region_to_sector(clone, nr_regions - 1);
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_ci	if (region_end == clone->nr_regions - 1) {
81062306a36Sopenharmony_ci		/*
81162306a36Sopenharmony_ci		 * The last region of the target might be smaller than
81262306a36Sopenharmony_ci		 * region_size.
81362306a36Sopenharmony_ci		 */
81462306a36Sopenharmony_ci		tail_size = clone->ti->len & (region_size - 1);
81562306a36Sopenharmony_ci		if (!tail_size)
81662306a36Sopenharmony_ci			tail_size = region_size;
81762306a36Sopenharmony_ci	} else {
81862306a36Sopenharmony_ci		tail_size = region_size;
81962306a36Sopenharmony_ci	}
82062306a36Sopenharmony_ci
82162306a36Sopenharmony_ci	total_size += tail_size;
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci	from.bdev = clone->source_dev->bdev;
82462306a36Sopenharmony_ci	from.sector = region_to_sector(clone, region_start);
82562306a36Sopenharmony_ci	from.count = total_size;
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci	to.bdev = clone->dest_dev->bdev;
82862306a36Sopenharmony_ci	to.sector = from.sector;
82962306a36Sopenharmony_ci	to.count = from.count;
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci	/* Issue copy */
83262306a36Sopenharmony_ci	atomic_add(nr_regions, &clone->hydrations_in_flight);
83362306a36Sopenharmony_ci	dm_kcopyd_copy(clone->kcopyd_client, &from, 1, &to, 0,
83462306a36Sopenharmony_ci		       hydration_kcopyd_callback, hd);
83562306a36Sopenharmony_ci}
83662306a36Sopenharmony_ci
83762306a36Sopenharmony_cistatic void overwrite_endio(struct bio *bio)
83862306a36Sopenharmony_ci{
83962306a36Sopenharmony_ci	struct dm_clone_region_hydration *hd = bio->bi_private;
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	bio->bi_end_io = hd->overwrite_bio_end_io;
84262306a36Sopenharmony_ci	hd->status = bio->bi_status;
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci	hydration_complete(hd);
84562306a36Sopenharmony_ci}
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_cistatic void hydration_overwrite(struct dm_clone_region_hydration *hd, struct bio *bio)
84862306a36Sopenharmony_ci{
84962306a36Sopenharmony_ci	/*
85062306a36Sopenharmony_ci	 * We don't need to save and restore bio->bi_private because device
85162306a36Sopenharmony_ci	 * mapper core generates a new bio for us to use, with clean
85262306a36Sopenharmony_ci	 * bi_private.
85362306a36Sopenharmony_ci	 */
85462306a36Sopenharmony_ci	hd->overwrite_bio = bio;
85562306a36Sopenharmony_ci	hd->overwrite_bio_end_io = bio->bi_end_io;
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci	bio->bi_end_io = overwrite_endio;
85862306a36Sopenharmony_ci	bio->bi_private = hd;
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci	atomic_inc(&hd->clone->hydrations_in_flight);
86162306a36Sopenharmony_ci	submit_bio_noacct(bio);
86262306a36Sopenharmony_ci}
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_ci/*
86562306a36Sopenharmony_ci * Hydrate bio's region.
86662306a36Sopenharmony_ci *
86762306a36Sopenharmony_ci * This function starts the hydration of the bio's region and puts the bio in
86862306a36Sopenharmony_ci * the list of deferred bios for this region. In case, by the time this
86962306a36Sopenharmony_ci * function is called, the region has finished hydrating it's submitted to the
87062306a36Sopenharmony_ci * destination device.
87162306a36Sopenharmony_ci *
87262306a36Sopenharmony_ci * NOTE: The bio remapping must be performed by the caller.
87362306a36Sopenharmony_ci */
87462306a36Sopenharmony_cistatic void hydrate_bio_region(struct clone *clone, struct bio *bio)
87562306a36Sopenharmony_ci{
87662306a36Sopenharmony_ci	unsigned long region_nr;
87762306a36Sopenharmony_ci	struct hash_table_bucket *bucket;
87862306a36Sopenharmony_ci	struct dm_clone_region_hydration *hd, *hd2;
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	region_nr = bio_to_region(clone, bio);
88162306a36Sopenharmony_ci	bucket = get_hash_table_bucket(clone, region_nr);
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci	bucket_lock_irq(bucket);
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	hd = __hash_find(bucket, region_nr);
88662306a36Sopenharmony_ci	if (hd) {
88762306a36Sopenharmony_ci		/* Someone else is hydrating the region */
88862306a36Sopenharmony_ci		bio_list_add(&hd->deferred_bios, bio);
88962306a36Sopenharmony_ci		bucket_unlock_irq(bucket);
89062306a36Sopenharmony_ci		return;
89162306a36Sopenharmony_ci	}
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci	if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
89462306a36Sopenharmony_ci		/* The region has been hydrated */
89562306a36Sopenharmony_ci		bucket_unlock_irq(bucket);
89662306a36Sopenharmony_ci		issue_bio(clone, bio);
89762306a36Sopenharmony_ci		return;
89862306a36Sopenharmony_ci	}
89962306a36Sopenharmony_ci
90062306a36Sopenharmony_ci	/*
90162306a36Sopenharmony_ci	 * We must allocate a hydration descriptor and start the hydration of
90262306a36Sopenharmony_ci	 * the corresponding region.
90362306a36Sopenharmony_ci	 */
90462306a36Sopenharmony_ci	bucket_unlock_irq(bucket);
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci	hd = alloc_hydration(clone);
90762306a36Sopenharmony_ci	hydration_init(hd, region_nr);
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	bucket_lock_irq(bucket);
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	/* Check if the region has been hydrated in the meantime. */
91262306a36Sopenharmony_ci	if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
91362306a36Sopenharmony_ci		bucket_unlock_irq(bucket);
91462306a36Sopenharmony_ci		free_hydration(hd);
91562306a36Sopenharmony_ci		issue_bio(clone, bio);
91662306a36Sopenharmony_ci		return;
91762306a36Sopenharmony_ci	}
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_ci	hd2 = __find_or_insert_region_hydration(bucket, hd);
92062306a36Sopenharmony_ci	if (hd2 != hd) {
92162306a36Sopenharmony_ci		/* Someone else started the region's hydration. */
92262306a36Sopenharmony_ci		bio_list_add(&hd2->deferred_bios, bio);
92362306a36Sopenharmony_ci		bucket_unlock_irq(bucket);
92462306a36Sopenharmony_ci		free_hydration(hd);
92562306a36Sopenharmony_ci		return;
92662306a36Sopenharmony_ci	}
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	/*
92962306a36Sopenharmony_ci	 * If the metadata mode is RO or FAIL then there is no point starting a
93062306a36Sopenharmony_ci	 * hydration, since we will not be able to update the metadata when the
93162306a36Sopenharmony_ci	 * hydration finishes.
93262306a36Sopenharmony_ci	 */
93362306a36Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
93462306a36Sopenharmony_ci		hlist_del(&hd->h);
93562306a36Sopenharmony_ci		bucket_unlock_irq(bucket);
93662306a36Sopenharmony_ci		free_hydration(hd);
93762306a36Sopenharmony_ci		bio_io_error(bio);
93862306a36Sopenharmony_ci		return;
93962306a36Sopenharmony_ci	}
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_ci	/*
94262306a36Sopenharmony_ci	 * Start region hydration.
94362306a36Sopenharmony_ci	 *
94462306a36Sopenharmony_ci	 * If a bio overwrites a region, i.e., its size is equal to the
94562306a36Sopenharmony_ci	 * region's size, then we don't need to copy the region from the source
94662306a36Sopenharmony_ci	 * to the destination device.
94762306a36Sopenharmony_ci	 */
94862306a36Sopenharmony_ci	if (is_overwrite_bio(clone, bio)) {
94962306a36Sopenharmony_ci		bucket_unlock_irq(bucket);
95062306a36Sopenharmony_ci		hydration_overwrite(hd, bio);
95162306a36Sopenharmony_ci	} else {
95262306a36Sopenharmony_ci		bio_list_add(&hd->deferred_bios, bio);
95362306a36Sopenharmony_ci		bucket_unlock_irq(bucket);
95462306a36Sopenharmony_ci		hydration_copy(hd, 1);
95562306a36Sopenharmony_ci	}
95662306a36Sopenharmony_ci}
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci/*
96162306a36Sopenharmony_ci * Background hydrations.
96262306a36Sopenharmony_ci */
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci/*
96562306a36Sopenharmony_ci * Batch region hydrations.
96662306a36Sopenharmony_ci *
96762306a36Sopenharmony_ci * To better utilize device bandwidth we batch together the hydration of
96862306a36Sopenharmony_ci * adjacent regions. This allows us to use small region sizes, e.g., 4KB, which
96962306a36Sopenharmony_ci * is good for small, random write performance (because of the overwriting of
97062306a36Sopenharmony_ci * un-hydrated regions) and at the same time issue big copy requests to kcopyd
97162306a36Sopenharmony_ci * to achieve high hydration bandwidth.
97262306a36Sopenharmony_ci */
97362306a36Sopenharmony_cistruct batch_info {
97462306a36Sopenharmony_ci	struct dm_clone_region_hydration *head;
97562306a36Sopenharmony_ci	unsigned int nr_batched_regions;
97662306a36Sopenharmony_ci};
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_cistatic void __batch_hydration(struct batch_info *batch,
97962306a36Sopenharmony_ci			      struct dm_clone_region_hydration *hd)
98062306a36Sopenharmony_ci{
98162306a36Sopenharmony_ci	struct clone *clone = hd->clone;
98262306a36Sopenharmony_ci	unsigned int max_batch_size = READ_ONCE(clone->hydration_batch_size);
98362306a36Sopenharmony_ci
98462306a36Sopenharmony_ci	if (batch->head) {
98562306a36Sopenharmony_ci		/* Try to extend the current batch */
98662306a36Sopenharmony_ci		if (batch->nr_batched_regions < max_batch_size &&
98762306a36Sopenharmony_ci		    (batch->head->region_nr + batch->nr_batched_regions) == hd->region_nr) {
98862306a36Sopenharmony_ci			list_add_tail(&hd->list, &batch->head->list);
98962306a36Sopenharmony_ci			batch->nr_batched_regions++;
99062306a36Sopenharmony_ci			hd = NULL;
99162306a36Sopenharmony_ci		}
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci		/* Check if we should issue the current batch */
99462306a36Sopenharmony_ci		if (batch->nr_batched_regions >= max_batch_size || hd) {
99562306a36Sopenharmony_ci			hydration_copy(batch->head, batch->nr_batched_regions);
99662306a36Sopenharmony_ci			batch->head = NULL;
99762306a36Sopenharmony_ci			batch->nr_batched_regions = 0;
99862306a36Sopenharmony_ci		}
99962306a36Sopenharmony_ci	}
100062306a36Sopenharmony_ci
100162306a36Sopenharmony_ci	if (!hd)
100262306a36Sopenharmony_ci		return;
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci	/* We treat max batch sizes of zero and one equivalently */
100562306a36Sopenharmony_ci	if (max_batch_size <= 1) {
100662306a36Sopenharmony_ci		hydration_copy(hd, 1);
100762306a36Sopenharmony_ci		return;
100862306a36Sopenharmony_ci	}
100962306a36Sopenharmony_ci
101062306a36Sopenharmony_ci	/* Start a new batch */
101162306a36Sopenharmony_ci	BUG_ON(!list_empty(&hd->list));
101262306a36Sopenharmony_ci	batch->head = hd;
101362306a36Sopenharmony_ci	batch->nr_batched_regions = 1;
101462306a36Sopenharmony_ci}
101562306a36Sopenharmony_ci
101662306a36Sopenharmony_cistatic unsigned long __start_next_hydration(struct clone *clone,
101762306a36Sopenharmony_ci					    unsigned long offset,
101862306a36Sopenharmony_ci					    struct batch_info *batch)
101962306a36Sopenharmony_ci{
102062306a36Sopenharmony_ci	struct hash_table_bucket *bucket;
102162306a36Sopenharmony_ci	struct dm_clone_region_hydration *hd;
102262306a36Sopenharmony_ci	unsigned long nr_regions = clone->nr_regions;
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci	hd = alloc_hydration(clone);
102562306a36Sopenharmony_ci
102662306a36Sopenharmony_ci	/* Try to find a region to hydrate. */
102762306a36Sopenharmony_ci	do {
102862306a36Sopenharmony_ci		offset = dm_clone_find_next_unhydrated_region(clone->cmd, offset);
102962306a36Sopenharmony_ci		if (offset == nr_regions)
103062306a36Sopenharmony_ci			break;
103162306a36Sopenharmony_ci
103262306a36Sopenharmony_ci		bucket = get_hash_table_bucket(clone, offset);
103362306a36Sopenharmony_ci		bucket_lock_irq(bucket);
103462306a36Sopenharmony_ci
103562306a36Sopenharmony_ci		if (!dm_clone_is_region_hydrated(clone->cmd, offset) &&
103662306a36Sopenharmony_ci		    !__hash_find(bucket, offset)) {
103762306a36Sopenharmony_ci			hydration_init(hd, offset);
103862306a36Sopenharmony_ci			__insert_region_hydration(bucket, hd);
103962306a36Sopenharmony_ci			bucket_unlock_irq(bucket);
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci			/* Batch hydration */
104262306a36Sopenharmony_ci			__batch_hydration(batch, hd);
104362306a36Sopenharmony_ci
104462306a36Sopenharmony_ci			return (offset + 1);
104562306a36Sopenharmony_ci		}
104662306a36Sopenharmony_ci
104762306a36Sopenharmony_ci		bucket_unlock_irq(bucket);
104862306a36Sopenharmony_ci
104962306a36Sopenharmony_ci	} while (++offset < nr_regions);
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci	if (hd)
105262306a36Sopenharmony_ci		free_hydration(hd);
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_ci	return offset;
105562306a36Sopenharmony_ci}
105662306a36Sopenharmony_ci
105762306a36Sopenharmony_ci/*
105862306a36Sopenharmony_ci * This function searches for regions that still reside in the source device
105962306a36Sopenharmony_ci * and starts their hydration.
106062306a36Sopenharmony_ci */
106162306a36Sopenharmony_cistatic void do_hydration(struct clone *clone)
106262306a36Sopenharmony_ci{
106362306a36Sopenharmony_ci	unsigned int current_volume;
106462306a36Sopenharmony_ci	unsigned long offset, nr_regions = clone->nr_regions;
106562306a36Sopenharmony_ci
106662306a36Sopenharmony_ci	struct batch_info batch = {
106762306a36Sopenharmony_ci		.head = NULL,
106862306a36Sopenharmony_ci		.nr_batched_regions = 0,
106962306a36Sopenharmony_ci	};
107062306a36Sopenharmony_ci
107162306a36Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY))
107262306a36Sopenharmony_ci		return;
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci	if (dm_clone_is_hydration_done(clone->cmd))
107562306a36Sopenharmony_ci		return;
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_ci	/*
107862306a36Sopenharmony_ci	 * Avoid race with device suspension.
107962306a36Sopenharmony_ci	 */
108062306a36Sopenharmony_ci	atomic_inc(&clone->hydrations_in_flight);
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_ci	/*
108362306a36Sopenharmony_ci	 * Make sure atomic_inc() is ordered before test_bit(), otherwise we
108462306a36Sopenharmony_ci	 * might race with clone_postsuspend() and start a region hydration
108562306a36Sopenharmony_ci	 * after the target has been suspended.
108662306a36Sopenharmony_ci	 *
108762306a36Sopenharmony_ci	 * This is paired with the smp_mb__after_atomic() in
108862306a36Sopenharmony_ci	 * clone_postsuspend().
108962306a36Sopenharmony_ci	 */
109062306a36Sopenharmony_ci	smp_mb__after_atomic();
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_ci	offset = clone->hydration_offset;
109362306a36Sopenharmony_ci	while (likely(!test_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags)) &&
109462306a36Sopenharmony_ci	       !atomic_read(&clone->ios_in_flight) &&
109562306a36Sopenharmony_ci	       test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags) &&
109662306a36Sopenharmony_ci	       offset < nr_regions) {
109762306a36Sopenharmony_ci		current_volume = atomic_read(&clone->hydrations_in_flight);
109862306a36Sopenharmony_ci		current_volume += batch.nr_batched_regions;
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_ci		if (current_volume > READ_ONCE(clone->hydration_threshold))
110162306a36Sopenharmony_ci			break;
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci		offset = __start_next_hydration(clone, offset, &batch);
110462306a36Sopenharmony_ci	}
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci	if (batch.head)
110762306a36Sopenharmony_ci		hydration_copy(batch.head, batch.nr_batched_regions);
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci	if (offset >= nr_regions)
111062306a36Sopenharmony_ci		offset = 0;
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_ci	clone->hydration_offset = offset;
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci	if (atomic_dec_and_test(&clone->hydrations_in_flight))
111562306a36Sopenharmony_ci		wakeup_hydration_waiters(clone);
111662306a36Sopenharmony_ci}
111762306a36Sopenharmony_ci
111862306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
111962306a36Sopenharmony_ci
112062306a36Sopenharmony_cistatic bool need_commit_due_to_time(struct clone *clone)
112162306a36Sopenharmony_ci{
112262306a36Sopenharmony_ci	return !time_in_range(jiffies, clone->last_commit_jiffies,
112362306a36Sopenharmony_ci			      clone->last_commit_jiffies + COMMIT_PERIOD);
112462306a36Sopenharmony_ci}
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci/*
112762306a36Sopenharmony_ci * A non-zero return indicates read-only or fail mode.
112862306a36Sopenharmony_ci */
112962306a36Sopenharmony_cistatic int commit_metadata(struct clone *clone, bool *dest_dev_flushed)
113062306a36Sopenharmony_ci{
113162306a36Sopenharmony_ci	int r = 0;
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ci	if (dest_dev_flushed)
113462306a36Sopenharmony_ci		*dest_dev_flushed = false;
113562306a36Sopenharmony_ci
113662306a36Sopenharmony_ci	mutex_lock(&clone->commit_lock);
113762306a36Sopenharmony_ci
113862306a36Sopenharmony_ci	if (!dm_clone_changed_this_transaction(clone->cmd))
113962306a36Sopenharmony_ci		goto out;
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
114262306a36Sopenharmony_ci		r = -EPERM;
114362306a36Sopenharmony_ci		goto out;
114462306a36Sopenharmony_ci	}
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_ci	r = dm_clone_metadata_pre_commit(clone->cmd);
114762306a36Sopenharmony_ci	if (unlikely(r)) {
114862306a36Sopenharmony_ci		__metadata_operation_failed(clone, "dm_clone_metadata_pre_commit", r);
114962306a36Sopenharmony_ci		goto out;
115062306a36Sopenharmony_ci	}
115162306a36Sopenharmony_ci
115262306a36Sopenharmony_ci	r = blkdev_issue_flush(clone->dest_dev->bdev);
115362306a36Sopenharmony_ci	if (unlikely(r)) {
115462306a36Sopenharmony_ci		__metadata_operation_failed(clone, "flush destination device", r);
115562306a36Sopenharmony_ci		goto out;
115662306a36Sopenharmony_ci	}
115762306a36Sopenharmony_ci
115862306a36Sopenharmony_ci	if (dest_dev_flushed)
115962306a36Sopenharmony_ci		*dest_dev_flushed = true;
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_ci	r = dm_clone_metadata_commit(clone->cmd);
116262306a36Sopenharmony_ci	if (unlikely(r)) {
116362306a36Sopenharmony_ci		__metadata_operation_failed(clone, "dm_clone_metadata_commit", r);
116462306a36Sopenharmony_ci		goto out;
116562306a36Sopenharmony_ci	}
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci	if (dm_clone_is_hydration_done(clone->cmd))
116862306a36Sopenharmony_ci		dm_table_event(clone->ti->table);
116962306a36Sopenharmony_ciout:
117062306a36Sopenharmony_ci	mutex_unlock(&clone->commit_lock);
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci	return r;
117362306a36Sopenharmony_ci}
117462306a36Sopenharmony_ci
117562306a36Sopenharmony_cistatic void process_deferred_discards(struct clone *clone)
117662306a36Sopenharmony_ci{
117762306a36Sopenharmony_ci	int r = -EPERM;
117862306a36Sopenharmony_ci	struct bio *bio;
117962306a36Sopenharmony_ci	struct blk_plug plug;
118062306a36Sopenharmony_ci	unsigned long rs, nr_regions;
118162306a36Sopenharmony_ci	struct bio_list discards = BIO_EMPTY_LIST;
118262306a36Sopenharmony_ci
118362306a36Sopenharmony_ci	spin_lock_irq(&clone->lock);
118462306a36Sopenharmony_ci	bio_list_merge(&discards, &clone->deferred_discard_bios);
118562306a36Sopenharmony_ci	bio_list_init(&clone->deferred_discard_bios);
118662306a36Sopenharmony_ci	spin_unlock_irq(&clone->lock);
118762306a36Sopenharmony_ci
118862306a36Sopenharmony_ci	if (bio_list_empty(&discards))
118962306a36Sopenharmony_ci		return;
119062306a36Sopenharmony_ci
119162306a36Sopenharmony_ci	if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY))
119262306a36Sopenharmony_ci		goto out;
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_ci	/* Update the metadata */
119562306a36Sopenharmony_ci	bio_list_for_each(bio, &discards) {
119662306a36Sopenharmony_ci		bio_region_range(clone, bio, &rs, &nr_regions);
119762306a36Sopenharmony_ci		/*
119862306a36Sopenharmony_ci		 * A discard request might cover regions that have been already
119962306a36Sopenharmony_ci		 * hydrated. There is no need to update the metadata for these
120062306a36Sopenharmony_ci		 * regions.
120162306a36Sopenharmony_ci		 */
120262306a36Sopenharmony_ci		r = dm_clone_cond_set_range(clone->cmd, rs, nr_regions);
120362306a36Sopenharmony_ci		if (unlikely(r))
120462306a36Sopenharmony_ci			break;
120562306a36Sopenharmony_ci	}
120662306a36Sopenharmony_ciout:
120762306a36Sopenharmony_ci	blk_start_plug(&plug);
120862306a36Sopenharmony_ci	while ((bio = bio_list_pop(&discards)))
120962306a36Sopenharmony_ci		complete_discard_bio(clone, bio, r == 0);
121062306a36Sopenharmony_ci	blk_finish_plug(&plug);
121162306a36Sopenharmony_ci}
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_cistatic void process_deferred_bios(struct clone *clone)
121462306a36Sopenharmony_ci{
121562306a36Sopenharmony_ci	struct bio_list bios = BIO_EMPTY_LIST;
121662306a36Sopenharmony_ci
121762306a36Sopenharmony_ci	spin_lock_irq(&clone->lock);
121862306a36Sopenharmony_ci	bio_list_merge(&bios, &clone->deferred_bios);
121962306a36Sopenharmony_ci	bio_list_init(&clone->deferred_bios);
122062306a36Sopenharmony_ci	spin_unlock_irq(&clone->lock);
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_ci	if (bio_list_empty(&bios))
122362306a36Sopenharmony_ci		return;
122462306a36Sopenharmony_ci
122562306a36Sopenharmony_ci	submit_bios(&bios);
122662306a36Sopenharmony_ci}
122762306a36Sopenharmony_ci
122862306a36Sopenharmony_cistatic void process_deferred_flush_bios(struct clone *clone)
122962306a36Sopenharmony_ci{
123062306a36Sopenharmony_ci	struct bio *bio;
123162306a36Sopenharmony_ci	bool dest_dev_flushed;
123262306a36Sopenharmony_ci	struct bio_list bios = BIO_EMPTY_LIST;
123362306a36Sopenharmony_ci	struct bio_list bio_completions = BIO_EMPTY_LIST;
123462306a36Sopenharmony_ci
123562306a36Sopenharmony_ci	/*
123662306a36Sopenharmony_ci	 * If there are any deferred flush bios, we must commit the metadata
123762306a36Sopenharmony_ci	 * before issuing them or signaling their completion.
123862306a36Sopenharmony_ci	 */
123962306a36Sopenharmony_ci	spin_lock_irq(&clone->lock);
124062306a36Sopenharmony_ci	bio_list_merge(&bios, &clone->deferred_flush_bios);
124162306a36Sopenharmony_ci	bio_list_init(&clone->deferred_flush_bios);
124262306a36Sopenharmony_ci
124362306a36Sopenharmony_ci	bio_list_merge(&bio_completions, &clone->deferred_flush_completions);
124462306a36Sopenharmony_ci	bio_list_init(&clone->deferred_flush_completions);
124562306a36Sopenharmony_ci	spin_unlock_irq(&clone->lock);
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_ci	if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
124862306a36Sopenharmony_ci	    !(dm_clone_changed_this_transaction(clone->cmd) && need_commit_due_to_time(clone)))
124962306a36Sopenharmony_ci		return;
125062306a36Sopenharmony_ci
125162306a36Sopenharmony_ci	if (commit_metadata(clone, &dest_dev_flushed)) {
125262306a36Sopenharmony_ci		bio_list_merge(&bios, &bio_completions);
125362306a36Sopenharmony_ci
125462306a36Sopenharmony_ci		while ((bio = bio_list_pop(&bios)))
125562306a36Sopenharmony_ci			bio_io_error(bio);
125662306a36Sopenharmony_ci
125762306a36Sopenharmony_ci		return;
125862306a36Sopenharmony_ci	}
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci	clone->last_commit_jiffies = jiffies;
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_ci	while ((bio = bio_list_pop(&bio_completions)))
126362306a36Sopenharmony_ci		bio_endio(bio);
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci	while ((bio = bio_list_pop(&bios))) {
126662306a36Sopenharmony_ci		if ((bio->bi_opf & REQ_PREFLUSH) && dest_dev_flushed) {
126762306a36Sopenharmony_ci			/* We just flushed the destination device as part of
126862306a36Sopenharmony_ci			 * the metadata commit, so there is no reason to send
126962306a36Sopenharmony_ci			 * another flush.
127062306a36Sopenharmony_ci			 */
127162306a36Sopenharmony_ci			bio_endio(bio);
127262306a36Sopenharmony_ci		} else {
127362306a36Sopenharmony_ci			submit_bio_noacct(bio);
127462306a36Sopenharmony_ci		}
127562306a36Sopenharmony_ci	}
127662306a36Sopenharmony_ci}
127762306a36Sopenharmony_ci
127862306a36Sopenharmony_cistatic void do_worker(struct work_struct *work)
127962306a36Sopenharmony_ci{
128062306a36Sopenharmony_ci	struct clone *clone = container_of(work, typeof(*clone), worker);
128162306a36Sopenharmony_ci
128262306a36Sopenharmony_ci	process_deferred_bios(clone);
128362306a36Sopenharmony_ci	process_deferred_discards(clone);
128462306a36Sopenharmony_ci
128562306a36Sopenharmony_ci	/*
128662306a36Sopenharmony_ci	 * process_deferred_flush_bios():
128762306a36Sopenharmony_ci	 *
128862306a36Sopenharmony_ci	 *   - Commit metadata
128962306a36Sopenharmony_ci	 *
129062306a36Sopenharmony_ci	 *   - Process deferred REQ_FUA completions
129162306a36Sopenharmony_ci	 *
129262306a36Sopenharmony_ci	 *   - Process deferred REQ_PREFLUSH bios
129362306a36Sopenharmony_ci	 */
129462306a36Sopenharmony_ci	process_deferred_flush_bios(clone);
129562306a36Sopenharmony_ci
129662306a36Sopenharmony_ci	/* Background hydration */
129762306a36Sopenharmony_ci	do_hydration(clone);
129862306a36Sopenharmony_ci}
129962306a36Sopenharmony_ci
130062306a36Sopenharmony_ci/*
130162306a36Sopenharmony_ci * Commit periodically so that not too much unwritten data builds up.
130262306a36Sopenharmony_ci *
130362306a36Sopenharmony_ci * Also, restart background hydration, if it has been stopped by in-flight I/O.
130462306a36Sopenharmony_ci */
130562306a36Sopenharmony_cistatic void do_waker(struct work_struct *work)
130662306a36Sopenharmony_ci{
130762306a36Sopenharmony_ci	struct clone *clone = container_of(to_delayed_work(work), struct clone, waker);
130862306a36Sopenharmony_ci
130962306a36Sopenharmony_ci	wake_worker(clone);
131062306a36Sopenharmony_ci	queue_delayed_work(clone->wq, &clone->waker, COMMIT_PERIOD);
131162306a36Sopenharmony_ci}
131262306a36Sopenharmony_ci
131362306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
131462306a36Sopenharmony_ci
131562306a36Sopenharmony_ci/*
131662306a36Sopenharmony_ci * Target methods
131762306a36Sopenharmony_ci */
131862306a36Sopenharmony_cistatic int clone_map(struct dm_target *ti, struct bio *bio)
131962306a36Sopenharmony_ci{
132062306a36Sopenharmony_ci	struct clone *clone = ti->private;
132162306a36Sopenharmony_ci	unsigned long region_nr;
132262306a36Sopenharmony_ci
132362306a36Sopenharmony_ci	atomic_inc(&clone->ios_in_flight);
132462306a36Sopenharmony_ci
132562306a36Sopenharmony_ci	if (unlikely(get_clone_mode(clone) == CM_FAIL))
132662306a36Sopenharmony_ci		return DM_MAPIO_KILL;
132762306a36Sopenharmony_ci
132862306a36Sopenharmony_ci	/*
132962306a36Sopenharmony_ci	 * REQ_PREFLUSH bios carry no data:
133062306a36Sopenharmony_ci	 *
133162306a36Sopenharmony_ci	 * - Commit metadata, if changed
133262306a36Sopenharmony_ci	 *
133362306a36Sopenharmony_ci	 * - Pass down to destination device
133462306a36Sopenharmony_ci	 */
133562306a36Sopenharmony_ci	if (bio->bi_opf & REQ_PREFLUSH) {
133662306a36Sopenharmony_ci		remap_and_issue(clone, bio);
133762306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
133862306a36Sopenharmony_ci	}
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci	bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
134162306a36Sopenharmony_ci
134262306a36Sopenharmony_ci	/*
134362306a36Sopenharmony_ci	 * dm-clone interprets discards and performs a fast hydration of the
134462306a36Sopenharmony_ci	 * discarded regions, i.e., we skip the copy from the source device and
134562306a36Sopenharmony_ci	 * just mark the regions as hydrated.
134662306a36Sopenharmony_ci	 */
134762306a36Sopenharmony_ci	if (bio_op(bio) == REQ_OP_DISCARD) {
134862306a36Sopenharmony_ci		process_discard_bio(clone, bio);
134962306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
135062306a36Sopenharmony_ci	}
135162306a36Sopenharmony_ci
135262306a36Sopenharmony_ci	/*
135362306a36Sopenharmony_ci	 * If the bio's region is hydrated, redirect it to the destination
135462306a36Sopenharmony_ci	 * device.
135562306a36Sopenharmony_ci	 *
135662306a36Sopenharmony_ci	 * If the region is not hydrated and the bio is a READ, redirect it to
135762306a36Sopenharmony_ci	 * the source device.
135862306a36Sopenharmony_ci	 *
135962306a36Sopenharmony_ci	 * Else, defer WRITE bio until after its region has been hydrated and
136062306a36Sopenharmony_ci	 * start the region's hydration immediately.
136162306a36Sopenharmony_ci	 */
136262306a36Sopenharmony_ci	region_nr = bio_to_region(clone, bio);
136362306a36Sopenharmony_ci	if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
136462306a36Sopenharmony_ci		remap_and_issue(clone, bio);
136562306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
136662306a36Sopenharmony_ci	} else if (bio_data_dir(bio) == READ) {
136762306a36Sopenharmony_ci		remap_to_source(clone, bio);
136862306a36Sopenharmony_ci		return DM_MAPIO_REMAPPED;
136962306a36Sopenharmony_ci	}
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci	remap_to_dest(clone, bio);
137262306a36Sopenharmony_ci	hydrate_bio_region(clone, bio);
137362306a36Sopenharmony_ci
137462306a36Sopenharmony_ci	return DM_MAPIO_SUBMITTED;
137562306a36Sopenharmony_ci}
137662306a36Sopenharmony_ci
137762306a36Sopenharmony_cistatic int clone_endio(struct dm_target *ti, struct bio *bio, blk_status_t *error)
137862306a36Sopenharmony_ci{
137962306a36Sopenharmony_ci	struct clone *clone = ti->private;
138062306a36Sopenharmony_ci
138162306a36Sopenharmony_ci	atomic_dec(&clone->ios_in_flight);
138262306a36Sopenharmony_ci
138362306a36Sopenharmony_ci	return DM_ENDIO_DONE;
138462306a36Sopenharmony_ci}
138562306a36Sopenharmony_ci
138662306a36Sopenharmony_cistatic void emit_flags(struct clone *clone, char *result, unsigned int maxlen,
138762306a36Sopenharmony_ci		       ssize_t *sz_ptr)
138862306a36Sopenharmony_ci{
138962306a36Sopenharmony_ci	ssize_t sz = *sz_ptr;
139062306a36Sopenharmony_ci	unsigned int count;
139162306a36Sopenharmony_ci
139262306a36Sopenharmony_ci	count = !test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
139362306a36Sopenharmony_ci	count += !test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
139462306a36Sopenharmony_ci
139562306a36Sopenharmony_ci	DMEMIT("%u ", count);
139662306a36Sopenharmony_ci
139762306a36Sopenharmony_ci	if (!test_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags))
139862306a36Sopenharmony_ci		DMEMIT("no_hydration ");
139962306a36Sopenharmony_ci
140062306a36Sopenharmony_ci	if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
140162306a36Sopenharmony_ci		DMEMIT("no_discard_passdown ");
140262306a36Sopenharmony_ci
140362306a36Sopenharmony_ci	*sz_ptr = sz;
140462306a36Sopenharmony_ci}
140562306a36Sopenharmony_ci
140662306a36Sopenharmony_cistatic void emit_core_args(struct clone *clone, char *result,
140762306a36Sopenharmony_ci			   unsigned int maxlen, ssize_t *sz_ptr)
140862306a36Sopenharmony_ci{
140962306a36Sopenharmony_ci	ssize_t sz = *sz_ptr;
141062306a36Sopenharmony_ci	unsigned int count = 4;
141162306a36Sopenharmony_ci
141262306a36Sopenharmony_ci	DMEMIT("%u hydration_threshold %u hydration_batch_size %u ", count,
141362306a36Sopenharmony_ci	       READ_ONCE(clone->hydration_threshold),
141462306a36Sopenharmony_ci	       READ_ONCE(clone->hydration_batch_size));
141562306a36Sopenharmony_ci
141662306a36Sopenharmony_ci	*sz_ptr = sz;
141762306a36Sopenharmony_ci}
141862306a36Sopenharmony_ci
141962306a36Sopenharmony_ci/*
142062306a36Sopenharmony_ci * Status format:
142162306a36Sopenharmony_ci *
142262306a36Sopenharmony_ci * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
142362306a36Sopenharmony_ci * <clone region size> <#hydrated regions>/<#total regions> <#hydrating regions>
142462306a36Sopenharmony_ci * <#features> <features>* <#core args> <core args>* <clone metadata mode>
142562306a36Sopenharmony_ci */
142662306a36Sopenharmony_cistatic void clone_status(struct dm_target *ti, status_type_t type,
142762306a36Sopenharmony_ci			 unsigned int status_flags, char *result,
142862306a36Sopenharmony_ci			 unsigned int maxlen)
142962306a36Sopenharmony_ci{
143062306a36Sopenharmony_ci	int r;
143162306a36Sopenharmony_ci	unsigned int i;
143262306a36Sopenharmony_ci	ssize_t sz = 0;
143362306a36Sopenharmony_ci	dm_block_t nr_free_metadata_blocks = 0;
143462306a36Sopenharmony_ci	dm_block_t nr_metadata_blocks = 0;
143562306a36Sopenharmony_ci	char buf[BDEVNAME_SIZE];
143662306a36Sopenharmony_ci	struct clone *clone = ti->private;
143762306a36Sopenharmony_ci
143862306a36Sopenharmony_ci	switch (type) {
143962306a36Sopenharmony_ci	case STATUSTYPE_INFO:
144062306a36Sopenharmony_ci		if (get_clone_mode(clone) == CM_FAIL) {
144162306a36Sopenharmony_ci			DMEMIT("Fail");
144262306a36Sopenharmony_ci			break;
144362306a36Sopenharmony_ci		}
144462306a36Sopenharmony_ci
144562306a36Sopenharmony_ci		/* Commit to ensure statistics aren't out-of-date */
144662306a36Sopenharmony_ci		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
144762306a36Sopenharmony_ci			(void) commit_metadata(clone, NULL);
144862306a36Sopenharmony_ci
144962306a36Sopenharmony_ci		r = dm_clone_get_free_metadata_block_count(clone->cmd, &nr_free_metadata_blocks);
145062306a36Sopenharmony_ci
145162306a36Sopenharmony_ci		if (r) {
145262306a36Sopenharmony_ci			DMERR("%s: dm_clone_get_free_metadata_block_count returned %d",
145362306a36Sopenharmony_ci			      clone_device_name(clone), r);
145462306a36Sopenharmony_ci			goto error;
145562306a36Sopenharmony_ci		}
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_ci		r = dm_clone_get_metadata_dev_size(clone->cmd, &nr_metadata_blocks);
145862306a36Sopenharmony_ci
145962306a36Sopenharmony_ci		if (r) {
146062306a36Sopenharmony_ci			DMERR("%s: dm_clone_get_metadata_dev_size returned %d",
146162306a36Sopenharmony_ci			      clone_device_name(clone), r);
146262306a36Sopenharmony_ci			goto error;
146362306a36Sopenharmony_ci		}
146462306a36Sopenharmony_ci
146562306a36Sopenharmony_ci		DMEMIT("%u %llu/%llu %llu %u/%lu %u ",
146662306a36Sopenharmony_ci		       DM_CLONE_METADATA_BLOCK_SIZE,
146762306a36Sopenharmony_ci		       (unsigned long long)(nr_metadata_blocks - nr_free_metadata_blocks),
146862306a36Sopenharmony_ci		       (unsigned long long)nr_metadata_blocks,
146962306a36Sopenharmony_ci		       (unsigned long long)clone->region_size,
147062306a36Sopenharmony_ci		       dm_clone_nr_of_hydrated_regions(clone->cmd),
147162306a36Sopenharmony_ci		       clone->nr_regions,
147262306a36Sopenharmony_ci		       atomic_read(&clone->hydrations_in_flight));
147362306a36Sopenharmony_ci
147462306a36Sopenharmony_ci		emit_flags(clone, result, maxlen, &sz);
147562306a36Sopenharmony_ci		emit_core_args(clone, result, maxlen, &sz);
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_ci		switch (get_clone_mode(clone)) {
147862306a36Sopenharmony_ci		case CM_WRITE:
147962306a36Sopenharmony_ci			DMEMIT("rw");
148062306a36Sopenharmony_ci			break;
148162306a36Sopenharmony_ci		case CM_READ_ONLY:
148262306a36Sopenharmony_ci			DMEMIT("ro");
148362306a36Sopenharmony_ci			break;
148462306a36Sopenharmony_ci		case CM_FAIL:
148562306a36Sopenharmony_ci			DMEMIT("Fail");
148662306a36Sopenharmony_ci		}
148762306a36Sopenharmony_ci
148862306a36Sopenharmony_ci		break;
148962306a36Sopenharmony_ci
149062306a36Sopenharmony_ci	case STATUSTYPE_TABLE:
149162306a36Sopenharmony_ci		format_dev_t(buf, clone->metadata_dev->bdev->bd_dev);
149262306a36Sopenharmony_ci		DMEMIT("%s ", buf);
149362306a36Sopenharmony_ci
149462306a36Sopenharmony_ci		format_dev_t(buf, clone->dest_dev->bdev->bd_dev);
149562306a36Sopenharmony_ci		DMEMIT("%s ", buf);
149662306a36Sopenharmony_ci
149762306a36Sopenharmony_ci		format_dev_t(buf, clone->source_dev->bdev->bd_dev);
149862306a36Sopenharmony_ci		DMEMIT("%s", buf);
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci		for (i = 0; i < clone->nr_ctr_args; i++)
150162306a36Sopenharmony_ci			DMEMIT(" %s", clone->ctr_args[i]);
150262306a36Sopenharmony_ci		break;
150362306a36Sopenharmony_ci
150462306a36Sopenharmony_ci	case STATUSTYPE_IMA:
150562306a36Sopenharmony_ci		*result = '\0';
150662306a36Sopenharmony_ci		break;
150762306a36Sopenharmony_ci	}
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci	return;
151062306a36Sopenharmony_ci
151162306a36Sopenharmony_cierror:
151262306a36Sopenharmony_ci	DMEMIT("Error");
151362306a36Sopenharmony_ci}
151462306a36Sopenharmony_ci
151562306a36Sopenharmony_cistatic sector_t get_dev_size(struct dm_dev *dev)
151662306a36Sopenharmony_ci{
151762306a36Sopenharmony_ci	return bdev_nr_sectors(dev->bdev);
151862306a36Sopenharmony_ci}
151962306a36Sopenharmony_ci
152062306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
152162306a36Sopenharmony_ci
152262306a36Sopenharmony_ci/*
152362306a36Sopenharmony_ci * Construct a clone device mapping:
152462306a36Sopenharmony_ci *
152562306a36Sopenharmony_ci * clone <metadata dev> <destination dev> <source dev> <region size>
152662306a36Sopenharmony_ci *	[<#feature args> [<feature arg>]* [<#core args> [key value]*]]
152762306a36Sopenharmony_ci *
152862306a36Sopenharmony_ci * metadata dev: Fast device holding the persistent metadata
152962306a36Sopenharmony_ci * destination dev: The destination device, which will become a clone of the
153062306a36Sopenharmony_ci *                  source device
153162306a36Sopenharmony_ci * source dev: The read-only source device that gets cloned
153262306a36Sopenharmony_ci * region size: dm-clone unit size in sectors
153362306a36Sopenharmony_ci *
153462306a36Sopenharmony_ci * #feature args: Number of feature arguments passed
153562306a36Sopenharmony_ci * feature args: E.g. no_hydration, no_discard_passdown
153662306a36Sopenharmony_ci *
153762306a36Sopenharmony_ci * #core arguments: An even number of core arguments
153862306a36Sopenharmony_ci * core arguments: Key/value pairs for tuning the core
153962306a36Sopenharmony_ci *		   E.g. 'hydration_threshold 256'
154062306a36Sopenharmony_ci */
154162306a36Sopenharmony_cistatic int parse_feature_args(struct dm_arg_set *as, struct clone *clone)
154262306a36Sopenharmony_ci{
154362306a36Sopenharmony_ci	int r;
154462306a36Sopenharmony_ci	unsigned int argc;
154562306a36Sopenharmony_ci	const char *arg_name;
154662306a36Sopenharmony_ci	struct dm_target *ti = clone->ti;
154762306a36Sopenharmony_ci
154862306a36Sopenharmony_ci	const struct dm_arg args = {
154962306a36Sopenharmony_ci		.min = 0,
155062306a36Sopenharmony_ci		.max = 2,
155162306a36Sopenharmony_ci		.error = "Invalid number of feature arguments"
155262306a36Sopenharmony_ci	};
155362306a36Sopenharmony_ci
155462306a36Sopenharmony_ci	/* No feature arguments supplied */
155562306a36Sopenharmony_ci	if (!as->argc)
155662306a36Sopenharmony_ci		return 0;
155762306a36Sopenharmony_ci
155862306a36Sopenharmony_ci	r = dm_read_arg_group(&args, as, &argc, &ti->error);
155962306a36Sopenharmony_ci	if (r)
156062306a36Sopenharmony_ci		return r;
156162306a36Sopenharmony_ci
156262306a36Sopenharmony_ci	while (argc) {
156362306a36Sopenharmony_ci		arg_name = dm_shift_arg(as);
156462306a36Sopenharmony_ci		argc--;
156562306a36Sopenharmony_ci
156662306a36Sopenharmony_ci		if (!strcasecmp(arg_name, "no_hydration")) {
156762306a36Sopenharmony_ci			__clear_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
156862306a36Sopenharmony_ci		} else if (!strcasecmp(arg_name, "no_discard_passdown")) {
156962306a36Sopenharmony_ci			__clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
157062306a36Sopenharmony_ci		} else {
157162306a36Sopenharmony_ci			ti->error = "Invalid feature argument";
157262306a36Sopenharmony_ci			return -EINVAL;
157362306a36Sopenharmony_ci		}
157462306a36Sopenharmony_ci	}
157562306a36Sopenharmony_ci
157662306a36Sopenharmony_ci	return 0;
157762306a36Sopenharmony_ci}
157862306a36Sopenharmony_ci
157962306a36Sopenharmony_cistatic int parse_core_args(struct dm_arg_set *as, struct clone *clone)
158062306a36Sopenharmony_ci{
158162306a36Sopenharmony_ci	int r;
158262306a36Sopenharmony_ci	unsigned int argc;
158362306a36Sopenharmony_ci	unsigned int value;
158462306a36Sopenharmony_ci	const char *arg_name;
158562306a36Sopenharmony_ci	struct dm_target *ti = clone->ti;
158662306a36Sopenharmony_ci
158762306a36Sopenharmony_ci	const struct dm_arg args = {
158862306a36Sopenharmony_ci		.min = 0,
158962306a36Sopenharmony_ci		.max = 4,
159062306a36Sopenharmony_ci		.error = "Invalid number of core arguments"
159162306a36Sopenharmony_ci	};
159262306a36Sopenharmony_ci
159362306a36Sopenharmony_ci	/* Initialize core arguments */
159462306a36Sopenharmony_ci	clone->hydration_batch_size = DEFAULT_HYDRATION_BATCH_SIZE;
159562306a36Sopenharmony_ci	clone->hydration_threshold = DEFAULT_HYDRATION_THRESHOLD;
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_ci	/* No core arguments supplied */
159862306a36Sopenharmony_ci	if (!as->argc)
159962306a36Sopenharmony_ci		return 0;
160062306a36Sopenharmony_ci
160162306a36Sopenharmony_ci	r = dm_read_arg_group(&args, as, &argc, &ti->error);
160262306a36Sopenharmony_ci	if (r)
160362306a36Sopenharmony_ci		return r;
160462306a36Sopenharmony_ci
160562306a36Sopenharmony_ci	if (argc & 1) {
160662306a36Sopenharmony_ci		ti->error = "Number of core arguments must be even";
160762306a36Sopenharmony_ci		return -EINVAL;
160862306a36Sopenharmony_ci	}
160962306a36Sopenharmony_ci
161062306a36Sopenharmony_ci	while (argc) {
161162306a36Sopenharmony_ci		arg_name = dm_shift_arg(as);
161262306a36Sopenharmony_ci		argc -= 2;
161362306a36Sopenharmony_ci
161462306a36Sopenharmony_ci		if (!strcasecmp(arg_name, "hydration_threshold")) {
161562306a36Sopenharmony_ci			if (kstrtouint(dm_shift_arg(as), 10, &value)) {
161662306a36Sopenharmony_ci				ti->error = "Invalid value for argument `hydration_threshold'";
161762306a36Sopenharmony_ci				return -EINVAL;
161862306a36Sopenharmony_ci			}
161962306a36Sopenharmony_ci			clone->hydration_threshold = value;
162062306a36Sopenharmony_ci		} else if (!strcasecmp(arg_name, "hydration_batch_size")) {
162162306a36Sopenharmony_ci			if (kstrtouint(dm_shift_arg(as), 10, &value)) {
162262306a36Sopenharmony_ci				ti->error = "Invalid value for argument `hydration_batch_size'";
162362306a36Sopenharmony_ci				return -EINVAL;
162462306a36Sopenharmony_ci			}
162562306a36Sopenharmony_ci			clone->hydration_batch_size = value;
162662306a36Sopenharmony_ci		} else {
162762306a36Sopenharmony_ci			ti->error = "Invalid core argument";
162862306a36Sopenharmony_ci			return -EINVAL;
162962306a36Sopenharmony_ci		}
163062306a36Sopenharmony_ci	}
163162306a36Sopenharmony_ci
163262306a36Sopenharmony_ci	return 0;
163362306a36Sopenharmony_ci}
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_cistatic int parse_region_size(struct clone *clone, struct dm_arg_set *as, char **error)
163662306a36Sopenharmony_ci{
163762306a36Sopenharmony_ci	int r;
163862306a36Sopenharmony_ci	unsigned int region_size;
163962306a36Sopenharmony_ci	struct dm_arg arg;
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_ci	arg.min = MIN_REGION_SIZE;
164262306a36Sopenharmony_ci	arg.max = MAX_REGION_SIZE;
164362306a36Sopenharmony_ci	arg.error = "Invalid region size";
164462306a36Sopenharmony_ci
164562306a36Sopenharmony_ci	r = dm_read_arg(&arg, as, &region_size, error);
164662306a36Sopenharmony_ci	if (r)
164762306a36Sopenharmony_ci		return r;
164862306a36Sopenharmony_ci
164962306a36Sopenharmony_ci	/* Check region size is a power of 2 */
165062306a36Sopenharmony_ci	if (!is_power_of_2(region_size)) {
165162306a36Sopenharmony_ci		*error = "Region size is not a power of 2";
165262306a36Sopenharmony_ci		return -EINVAL;
165362306a36Sopenharmony_ci	}
165462306a36Sopenharmony_ci
165562306a36Sopenharmony_ci	/* Validate the region size against the device logical block size */
165662306a36Sopenharmony_ci	if (region_size % (bdev_logical_block_size(clone->source_dev->bdev) >> 9) ||
165762306a36Sopenharmony_ci	    region_size % (bdev_logical_block_size(clone->dest_dev->bdev) >> 9)) {
165862306a36Sopenharmony_ci		*error = "Region size is not a multiple of device logical block size";
165962306a36Sopenharmony_ci		return -EINVAL;
166062306a36Sopenharmony_ci	}
166162306a36Sopenharmony_ci
166262306a36Sopenharmony_ci	clone->region_size = region_size;
166362306a36Sopenharmony_ci
166462306a36Sopenharmony_ci	return 0;
166562306a36Sopenharmony_ci}
166662306a36Sopenharmony_ci
166762306a36Sopenharmony_cistatic int validate_nr_regions(unsigned long n, char **error)
166862306a36Sopenharmony_ci{
166962306a36Sopenharmony_ci	/*
167062306a36Sopenharmony_ci	 * dm_bitset restricts us to 2^32 regions. test_bit & co. restrict us
167162306a36Sopenharmony_ci	 * further to 2^31 regions.
167262306a36Sopenharmony_ci	 */
167362306a36Sopenharmony_ci	if (n > (1UL << 31)) {
167462306a36Sopenharmony_ci		*error = "Too many regions. Consider increasing the region size";
167562306a36Sopenharmony_ci		return -EINVAL;
167662306a36Sopenharmony_ci	}
167762306a36Sopenharmony_ci
167862306a36Sopenharmony_ci	return 0;
167962306a36Sopenharmony_ci}
168062306a36Sopenharmony_ci
168162306a36Sopenharmony_cistatic int parse_metadata_dev(struct clone *clone, struct dm_arg_set *as, char **error)
168262306a36Sopenharmony_ci{
168362306a36Sopenharmony_ci	int r;
168462306a36Sopenharmony_ci	sector_t metadata_dev_size;
168562306a36Sopenharmony_ci
168662306a36Sopenharmony_ci	r = dm_get_device(clone->ti, dm_shift_arg(as),
168762306a36Sopenharmony_ci			  BLK_OPEN_READ | BLK_OPEN_WRITE, &clone->metadata_dev);
168862306a36Sopenharmony_ci	if (r) {
168962306a36Sopenharmony_ci		*error = "Error opening metadata device";
169062306a36Sopenharmony_ci		return r;
169162306a36Sopenharmony_ci	}
169262306a36Sopenharmony_ci
169362306a36Sopenharmony_ci	metadata_dev_size = get_dev_size(clone->metadata_dev);
169462306a36Sopenharmony_ci	if (metadata_dev_size > DM_CLONE_METADATA_MAX_SECTORS_WARNING)
169562306a36Sopenharmony_ci		DMWARN("Metadata device %pg is larger than %u sectors: excess space will not be used.",
169662306a36Sopenharmony_ci		       clone->metadata_dev->bdev, DM_CLONE_METADATA_MAX_SECTORS);
169762306a36Sopenharmony_ci
169862306a36Sopenharmony_ci	return 0;
169962306a36Sopenharmony_ci}
170062306a36Sopenharmony_ci
170162306a36Sopenharmony_cistatic int parse_dest_dev(struct clone *clone, struct dm_arg_set *as, char **error)
170262306a36Sopenharmony_ci{
170362306a36Sopenharmony_ci	int r;
170462306a36Sopenharmony_ci	sector_t dest_dev_size;
170562306a36Sopenharmony_ci
170662306a36Sopenharmony_ci	r = dm_get_device(clone->ti, dm_shift_arg(as),
170762306a36Sopenharmony_ci			  BLK_OPEN_READ | BLK_OPEN_WRITE, &clone->dest_dev);
170862306a36Sopenharmony_ci	if (r) {
170962306a36Sopenharmony_ci		*error = "Error opening destination device";
171062306a36Sopenharmony_ci		return r;
171162306a36Sopenharmony_ci	}
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ci	dest_dev_size = get_dev_size(clone->dest_dev);
171462306a36Sopenharmony_ci	if (dest_dev_size < clone->ti->len) {
171562306a36Sopenharmony_ci		dm_put_device(clone->ti, clone->dest_dev);
171662306a36Sopenharmony_ci		*error = "Device size larger than destination device";
171762306a36Sopenharmony_ci		return -EINVAL;
171862306a36Sopenharmony_ci	}
171962306a36Sopenharmony_ci
172062306a36Sopenharmony_ci	return 0;
172162306a36Sopenharmony_ci}
172262306a36Sopenharmony_ci
172362306a36Sopenharmony_cistatic int parse_source_dev(struct clone *clone, struct dm_arg_set *as, char **error)
172462306a36Sopenharmony_ci{
172562306a36Sopenharmony_ci	int r;
172662306a36Sopenharmony_ci	sector_t source_dev_size;
172762306a36Sopenharmony_ci
172862306a36Sopenharmony_ci	r = dm_get_device(clone->ti, dm_shift_arg(as), BLK_OPEN_READ,
172962306a36Sopenharmony_ci			  &clone->source_dev);
173062306a36Sopenharmony_ci	if (r) {
173162306a36Sopenharmony_ci		*error = "Error opening source device";
173262306a36Sopenharmony_ci		return r;
173362306a36Sopenharmony_ci	}
173462306a36Sopenharmony_ci
173562306a36Sopenharmony_ci	source_dev_size = get_dev_size(clone->source_dev);
173662306a36Sopenharmony_ci	if (source_dev_size < clone->ti->len) {
173762306a36Sopenharmony_ci		dm_put_device(clone->ti, clone->source_dev);
173862306a36Sopenharmony_ci		*error = "Device size larger than source device";
173962306a36Sopenharmony_ci		return -EINVAL;
174062306a36Sopenharmony_ci	}
174162306a36Sopenharmony_ci
174262306a36Sopenharmony_ci	return 0;
174362306a36Sopenharmony_ci}
174462306a36Sopenharmony_ci
174562306a36Sopenharmony_cistatic int copy_ctr_args(struct clone *clone, int argc, const char **argv, char **error)
174662306a36Sopenharmony_ci{
174762306a36Sopenharmony_ci	unsigned int i;
174862306a36Sopenharmony_ci	const char **copy;
174962306a36Sopenharmony_ci
175062306a36Sopenharmony_ci	copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
175162306a36Sopenharmony_ci	if (!copy)
175262306a36Sopenharmony_ci		goto error;
175362306a36Sopenharmony_ci
175462306a36Sopenharmony_ci	for (i = 0; i < argc; i++) {
175562306a36Sopenharmony_ci		copy[i] = kstrdup(argv[i], GFP_KERNEL);
175662306a36Sopenharmony_ci
175762306a36Sopenharmony_ci		if (!copy[i]) {
175862306a36Sopenharmony_ci			while (i--)
175962306a36Sopenharmony_ci				kfree(copy[i]);
176062306a36Sopenharmony_ci			kfree(copy);
176162306a36Sopenharmony_ci			goto error;
176262306a36Sopenharmony_ci		}
176362306a36Sopenharmony_ci	}
176462306a36Sopenharmony_ci
176562306a36Sopenharmony_ci	clone->nr_ctr_args = argc;
176662306a36Sopenharmony_ci	clone->ctr_args = copy;
176762306a36Sopenharmony_ci	return 0;
176862306a36Sopenharmony_ci
176962306a36Sopenharmony_cierror:
177062306a36Sopenharmony_ci	*error = "Failed to allocate memory for table line";
177162306a36Sopenharmony_ci	return -ENOMEM;
177262306a36Sopenharmony_ci}
177362306a36Sopenharmony_ci
177462306a36Sopenharmony_cistatic int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv)
177562306a36Sopenharmony_ci{
177662306a36Sopenharmony_ci	int r;
177762306a36Sopenharmony_ci	sector_t nr_regions;
177862306a36Sopenharmony_ci	struct clone *clone;
177962306a36Sopenharmony_ci	struct dm_arg_set as;
178062306a36Sopenharmony_ci
178162306a36Sopenharmony_ci	if (argc < 4) {
178262306a36Sopenharmony_ci		ti->error = "Invalid number of arguments";
178362306a36Sopenharmony_ci		return -EINVAL;
178462306a36Sopenharmony_ci	}
178562306a36Sopenharmony_ci
178662306a36Sopenharmony_ci	as.argc = argc;
178762306a36Sopenharmony_ci	as.argv = argv;
178862306a36Sopenharmony_ci
178962306a36Sopenharmony_ci	clone = kzalloc(sizeof(*clone), GFP_KERNEL);
179062306a36Sopenharmony_ci	if (!clone) {
179162306a36Sopenharmony_ci		ti->error = "Failed to allocate clone structure";
179262306a36Sopenharmony_ci		return -ENOMEM;
179362306a36Sopenharmony_ci	}
179462306a36Sopenharmony_ci
179562306a36Sopenharmony_ci	clone->ti = ti;
179662306a36Sopenharmony_ci
179762306a36Sopenharmony_ci	/* Initialize dm-clone flags */
179862306a36Sopenharmony_ci	__set_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
179962306a36Sopenharmony_ci	__set_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags);
180062306a36Sopenharmony_ci	__set_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
180162306a36Sopenharmony_ci
180262306a36Sopenharmony_ci	r = parse_metadata_dev(clone, &as, &ti->error);
180362306a36Sopenharmony_ci	if (r)
180462306a36Sopenharmony_ci		goto out_with_clone;
180562306a36Sopenharmony_ci
180662306a36Sopenharmony_ci	r = parse_dest_dev(clone, &as, &ti->error);
180762306a36Sopenharmony_ci	if (r)
180862306a36Sopenharmony_ci		goto out_with_meta_dev;
180962306a36Sopenharmony_ci
181062306a36Sopenharmony_ci	r = parse_source_dev(clone, &as, &ti->error);
181162306a36Sopenharmony_ci	if (r)
181262306a36Sopenharmony_ci		goto out_with_dest_dev;
181362306a36Sopenharmony_ci
181462306a36Sopenharmony_ci	r = parse_region_size(clone, &as, &ti->error);
181562306a36Sopenharmony_ci	if (r)
181662306a36Sopenharmony_ci		goto out_with_source_dev;
181762306a36Sopenharmony_ci
181862306a36Sopenharmony_ci	clone->region_shift = __ffs(clone->region_size);
181962306a36Sopenharmony_ci	nr_regions = dm_sector_div_up(ti->len, clone->region_size);
182062306a36Sopenharmony_ci
182162306a36Sopenharmony_ci	/* Check for overflow */
182262306a36Sopenharmony_ci	if (nr_regions != (unsigned long)nr_regions) {
182362306a36Sopenharmony_ci		ti->error = "Too many regions. Consider increasing the region size";
182462306a36Sopenharmony_ci		r = -EOVERFLOW;
182562306a36Sopenharmony_ci		goto out_with_source_dev;
182662306a36Sopenharmony_ci	}
182762306a36Sopenharmony_ci
182862306a36Sopenharmony_ci	clone->nr_regions = nr_regions;
182962306a36Sopenharmony_ci
183062306a36Sopenharmony_ci	r = validate_nr_regions(clone->nr_regions, &ti->error);
183162306a36Sopenharmony_ci	if (r)
183262306a36Sopenharmony_ci		goto out_with_source_dev;
183362306a36Sopenharmony_ci
183462306a36Sopenharmony_ci	r = dm_set_target_max_io_len(ti, clone->region_size);
183562306a36Sopenharmony_ci	if (r) {
183662306a36Sopenharmony_ci		ti->error = "Failed to set max io len";
183762306a36Sopenharmony_ci		goto out_with_source_dev;
183862306a36Sopenharmony_ci	}
183962306a36Sopenharmony_ci
184062306a36Sopenharmony_ci	r = parse_feature_args(&as, clone);
184162306a36Sopenharmony_ci	if (r)
184262306a36Sopenharmony_ci		goto out_with_source_dev;
184362306a36Sopenharmony_ci
184462306a36Sopenharmony_ci	r = parse_core_args(&as, clone);
184562306a36Sopenharmony_ci	if (r)
184662306a36Sopenharmony_ci		goto out_with_source_dev;
184762306a36Sopenharmony_ci
184862306a36Sopenharmony_ci	/* Load metadata */
184962306a36Sopenharmony_ci	clone->cmd = dm_clone_metadata_open(clone->metadata_dev->bdev, ti->len,
185062306a36Sopenharmony_ci					    clone->region_size);
185162306a36Sopenharmony_ci	if (IS_ERR(clone->cmd)) {
185262306a36Sopenharmony_ci		ti->error = "Failed to load metadata";
185362306a36Sopenharmony_ci		r = PTR_ERR(clone->cmd);
185462306a36Sopenharmony_ci		goto out_with_source_dev;
185562306a36Sopenharmony_ci	}
185662306a36Sopenharmony_ci
185762306a36Sopenharmony_ci	__set_clone_mode(clone, CM_WRITE);
185862306a36Sopenharmony_ci
185962306a36Sopenharmony_ci	if (get_clone_mode(clone) != CM_WRITE) {
186062306a36Sopenharmony_ci		ti->error = "Unable to get write access to metadata, please check/repair metadata";
186162306a36Sopenharmony_ci		r = -EPERM;
186262306a36Sopenharmony_ci		goto out_with_metadata;
186362306a36Sopenharmony_ci	}
186462306a36Sopenharmony_ci
186562306a36Sopenharmony_ci	clone->last_commit_jiffies = jiffies;
186662306a36Sopenharmony_ci
186762306a36Sopenharmony_ci	/* Allocate hydration hash table */
186862306a36Sopenharmony_ci	r = hash_table_init(clone);
186962306a36Sopenharmony_ci	if (r) {
187062306a36Sopenharmony_ci		ti->error = "Failed to allocate hydration hash table";
187162306a36Sopenharmony_ci		goto out_with_metadata;
187262306a36Sopenharmony_ci	}
187362306a36Sopenharmony_ci
187462306a36Sopenharmony_ci	atomic_set(&clone->ios_in_flight, 0);
187562306a36Sopenharmony_ci	init_waitqueue_head(&clone->hydration_stopped);
187662306a36Sopenharmony_ci	spin_lock_init(&clone->lock);
187762306a36Sopenharmony_ci	bio_list_init(&clone->deferred_bios);
187862306a36Sopenharmony_ci	bio_list_init(&clone->deferred_discard_bios);
187962306a36Sopenharmony_ci	bio_list_init(&clone->deferred_flush_bios);
188062306a36Sopenharmony_ci	bio_list_init(&clone->deferred_flush_completions);
188162306a36Sopenharmony_ci	clone->hydration_offset = 0;
188262306a36Sopenharmony_ci	atomic_set(&clone->hydrations_in_flight, 0);
188362306a36Sopenharmony_ci
188462306a36Sopenharmony_ci	clone->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
188562306a36Sopenharmony_ci	if (!clone->wq) {
188662306a36Sopenharmony_ci		ti->error = "Failed to allocate workqueue";
188762306a36Sopenharmony_ci		r = -ENOMEM;
188862306a36Sopenharmony_ci		goto out_with_ht;
188962306a36Sopenharmony_ci	}
189062306a36Sopenharmony_ci
189162306a36Sopenharmony_ci	INIT_WORK(&clone->worker, do_worker);
189262306a36Sopenharmony_ci	INIT_DELAYED_WORK(&clone->waker, do_waker);
189362306a36Sopenharmony_ci
189462306a36Sopenharmony_ci	clone->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
189562306a36Sopenharmony_ci	if (IS_ERR(clone->kcopyd_client)) {
189662306a36Sopenharmony_ci		r = PTR_ERR(clone->kcopyd_client);
189762306a36Sopenharmony_ci		goto out_with_wq;
189862306a36Sopenharmony_ci	}
189962306a36Sopenharmony_ci
190062306a36Sopenharmony_ci	r = mempool_init_slab_pool(&clone->hydration_pool, MIN_HYDRATIONS,
190162306a36Sopenharmony_ci				   _hydration_cache);
190262306a36Sopenharmony_ci	if (r) {
190362306a36Sopenharmony_ci		ti->error = "Failed to create dm_clone_region_hydration memory pool";
190462306a36Sopenharmony_ci		goto out_with_kcopyd;
190562306a36Sopenharmony_ci	}
190662306a36Sopenharmony_ci
190762306a36Sopenharmony_ci	/* Save a copy of the table line */
190862306a36Sopenharmony_ci	r = copy_ctr_args(clone, argc - 3, (const char **)argv + 3, &ti->error);
190962306a36Sopenharmony_ci	if (r)
191062306a36Sopenharmony_ci		goto out_with_mempool;
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	mutex_init(&clone->commit_lock);
191362306a36Sopenharmony_ci
191462306a36Sopenharmony_ci	/* Enable flushes */
191562306a36Sopenharmony_ci	ti->num_flush_bios = 1;
191662306a36Sopenharmony_ci	ti->flush_supported = true;
191762306a36Sopenharmony_ci
191862306a36Sopenharmony_ci	/* Enable discards */
191962306a36Sopenharmony_ci	ti->discards_supported = true;
192062306a36Sopenharmony_ci	ti->num_discard_bios = 1;
192162306a36Sopenharmony_ci
192262306a36Sopenharmony_ci	ti->private = clone;
192362306a36Sopenharmony_ci
192462306a36Sopenharmony_ci	return 0;
192562306a36Sopenharmony_ci
192662306a36Sopenharmony_ciout_with_mempool:
192762306a36Sopenharmony_ci	mempool_exit(&clone->hydration_pool);
192862306a36Sopenharmony_ciout_with_kcopyd:
192962306a36Sopenharmony_ci	dm_kcopyd_client_destroy(clone->kcopyd_client);
193062306a36Sopenharmony_ciout_with_wq:
193162306a36Sopenharmony_ci	destroy_workqueue(clone->wq);
193262306a36Sopenharmony_ciout_with_ht:
193362306a36Sopenharmony_ci	hash_table_exit(clone);
193462306a36Sopenharmony_ciout_with_metadata:
193562306a36Sopenharmony_ci	dm_clone_metadata_close(clone->cmd);
193662306a36Sopenharmony_ciout_with_source_dev:
193762306a36Sopenharmony_ci	dm_put_device(ti, clone->source_dev);
193862306a36Sopenharmony_ciout_with_dest_dev:
193962306a36Sopenharmony_ci	dm_put_device(ti, clone->dest_dev);
194062306a36Sopenharmony_ciout_with_meta_dev:
194162306a36Sopenharmony_ci	dm_put_device(ti, clone->metadata_dev);
194262306a36Sopenharmony_ciout_with_clone:
194362306a36Sopenharmony_ci	kfree(clone);
194462306a36Sopenharmony_ci
194562306a36Sopenharmony_ci	return r;
194662306a36Sopenharmony_ci}
194762306a36Sopenharmony_ci
194862306a36Sopenharmony_cistatic void clone_dtr(struct dm_target *ti)
194962306a36Sopenharmony_ci{
195062306a36Sopenharmony_ci	unsigned int i;
195162306a36Sopenharmony_ci	struct clone *clone = ti->private;
195262306a36Sopenharmony_ci
195362306a36Sopenharmony_ci	mutex_destroy(&clone->commit_lock);
195462306a36Sopenharmony_ci
195562306a36Sopenharmony_ci	for (i = 0; i < clone->nr_ctr_args; i++)
195662306a36Sopenharmony_ci		kfree(clone->ctr_args[i]);
195762306a36Sopenharmony_ci	kfree(clone->ctr_args);
195862306a36Sopenharmony_ci
195962306a36Sopenharmony_ci	mempool_exit(&clone->hydration_pool);
196062306a36Sopenharmony_ci	dm_kcopyd_client_destroy(clone->kcopyd_client);
196162306a36Sopenharmony_ci	cancel_delayed_work_sync(&clone->waker);
196262306a36Sopenharmony_ci	destroy_workqueue(clone->wq);
196362306a36Sopenharmony_ci	hash_table_exit(clone);
196462306a36Sopenharmony_ci	dm_clone_metadata_close(clone->cmd);
196562306a36Sopenharmony_ci	dm_put_device(ti, clone->source_dev);
196662306a36Sopenharmony_ci	dm_put_device(ti, clone->dest_dev);
196762306a36Sopenharmony_ci	dm_put_device(ti, clone->metadata_dev);
196862306a36Sopenharmony_ci
196962306a36Sopenharmony_ci	kfree(clone);
197062306a36Sopenharmony_ci}
197162306a36Sopenharmony_ci
197262306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
197362306a36Sopenharmony_ci
197462306a36Sopenharmony_cistatic void clone_postsuspend(struct dm_target *ti)
197562306a36Sopenharmony_ci{
197662306a36Sopenharmony_ci	struct clone *clone = ti->private;
197762306a36Sopenharmony_ci
197862306a36Sopenharmony_ci	/*
197962306a36Sopenharmony_ci	 * To successfully suspend the device:
198062306a36Sopenharmony_ci	 *
198162306a36Sopenharmony_ci	 *	- We cancel the delayed work for periodic commits and wait for
198262306a36Sopenharmony_ci	 *	  it to finish.
198362306a36Sopenharmony_ci	 *
198462306a36Sopenharmony_ci	 *	- We stop the background hydration, i.e. we prevent new region
198562306a36Sopenharmony_ci	 *	  hydrations from starting.
198662306a36Sopenharmony_ci	 *
198762306a36Sopenharmony_ci	 *	- We wait for any in-flight hydrations to finish.
198862306a36Sopenharmony_ci	 *
198962306a36Sopenharmony_ci	 *	- We flush the workqueue.
199062306a36Sopenharmony_ci	 *
199162306a36Sopenharmony_ci	 *	- We commit the metadata.
199262306a36Sopenharmony_ci	 */
199362306a36Sopenharmony_ci	cancel_delayed_work_sync(&clone->waker);
199462306a36Sopenharmony_ci
199562306a36Sopenharmony_ci	set_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags);
199662306a36Sopenharmony_ci
199762306a36Sopenharmony_ci	/*
199862306a36Sopenharmony_ci	 * Make sure set_bit() is ordered before atomic_read(), otherwise we
199962306a36Sopenharmony_ci	 * might race with do_hydration() and miss some started region
200062306a36Sopenharmony_ci	 * hydrations.
200162306a36Sopenharmony_ci	 *
200262306a36Sopenharmony_ci	 * This is paired with smp_mb__after_atomic() in do_hydration().
200362306a36Sopenharmony_ci	 */
200462306a36Sopenharmony_ci	smp_mb__after_atomic();
200562306a36Sopenharmony_ci
200662306a36Sopenharmony_ci	wait_event(clone->hydration_stopped, !atomic_read(&clone->hydrations_in_flight));
200762306a36Sopenharmony_ci	flush_workqueue(clone->wq);
200862306a36Sopenharmony_ci
200962306a36Sopenharmony_ci	(void) commit_metadata(clone, NULL);
201062306a36Sopenharmony_ci}
201162306a36Sopenharmony_ci
201262306a36Sopenharmony_cistatic void clone_resume(struct dm_target *ti)
201362306a36Sopenharmony_ci{
201462306a36Sopenharmony_ci	struct clone *clone = ti->private;
201562306a36Sopenharmony_ci
201662306a36Sopenharmony_ci	clear_bit(DM_CLONE_HYDRATION_SUSPENDED, &clone->flags);
201762306a36Sopenharmony_ci	do_waker(&clone->waker.work);
201862306a36Sopenharmony_ci}
201962306a36Sopenharmony_ci
202062306a36Sopenharmony_ci/*
202162306a36Sopenharmony_ci * If discard_passdown was enabled verify that the destination device supports
202262306a36Sopenharmony_ci * discards. Disable discard_passdown if not.
202362306a36Sopenharmony_ci */
202462306a36Sopenharmony_cistatic void disable_passdown_if_not_supported(struct clone *clone)
202562306a36Sopenharmony_ci{
202662306a36Sopenharmony_ci	struct block_device *dest_dev = clone->dest_dev->bdev;
202762306a36Sopenharmony_ci	struct queue_limits *dest_limits = &bdev_get_queue(dest_dev)->limits;
202862306a36Sopenharmony_ci	const char *reason = NULL;
202962306a36Sopenharmony_ci
203062306a36Sopenharmony_ci	if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags))
203162306a36Sopenharmony_ci		return;
203262306a36Sopenharmony_ci
203362306a36Sopenharmony_ci	if (!bdev_max_discard_sectors(dest_dev))
203462306a36Sopenharmony_ci		reason = "discard unsupported";
203562306a36Sopenharmony_ci	else if (dest_limits->max_discard_sectors < clone->region_size)
203662306a36Sopenharmony_ci		reason = "max discard sectors smaller than a region";
203762306a36Sopenharmony_ci
203862306a36Sopenharmony_ci	if (reason) {
203962306a36Sopenharmony_ci		DMWARN("Destination device (%pg) %s: Disabling discard passdown.",
204062306a36Sopenharmony_ci		       dest_dev, reason);
204162306a36Sopenharmony_ci		clear_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags);
204262306a36Sopenharmony_ci	}
204362306a36Sopenharmony_ci}
204462306a36Sopenharmony_ci
204562306a36Sopenharmony_cistatic void set_discard_limits(struct clone *clone, struct queue_limits *limits)
204662306a36Sopenharmony_ci{
204762306a36Sopenharmony_ci	struct block_device *dest_bdev = clone->dest_dev->bdev;
204862306a36Sopenharmony_ci	struct queue_limits *dest_limits = &bdev_get_queue(dest_bdev)->limits;
204962306a36Sopenharmony_ci
205062306a36Sopenharmony_ci	if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) {
205162306a36Sopenharmony_ci		/* No passdown is done so we set our own virtual limits */
205262306a36Sopenharmony_ci		limits->discard_granularity = clone->region_size << SECTOR_SHIFT;
205362306a36Sopenharmony_ci		limits->max_discard_sectors = round_down(UINT_MAX >> SECTOR_SHIFT, clone->region_size);
205462306a36Sopenharmony_ci		return;
205562306a36Sopenharmony_ci	}
205662306a36Sopenharmony_ci
205762306a36Sopenharmony_ci	/*
205862306a36Sopenharmony_ci	 * clone_iterate_devices() is stacking both the source and destination
205962306a36Sopenharmony_ci	 * device limits but discards aren't passed to the source device, so
206062306a36Sopenharmony_ci	 * inherit destination's limits.
206162306a36Sopenharmony_ci	 */
206262306a36Sopenharmony_ci	limits->max_discard_sectors = dest_limits->max_discard_sectors;
206362306a36Sopenharmony_ci	limits->max_hw_discard_sectors = dest_limits->max_hw_discard_sectors;
206462306a36Sopenharmony_ci	limits->discard_granularity = dest_limits->discard_granularity;
206562306a36Sopenharmony_ci	limits->discard_alignment = dest_limits->discard_alignment;
206662306a36Sopenharmony_ci	limits->discard_misaligned = dest_limits->discard_misaligned;
206762306a36Sopenharmony_ci	limits->max_discard_segments = dest_limits->max_discard_segments;
206862306a36Sopenharmony_ci}
206962306a36Sopenharmony_ci
207062306a36Sopenharmony_cistatic void clone_io_hints(struct dm_target *ti, struct queue_limits *limits)
207162306a36Sopenharmony_ci{
207262306a36Sopenharmony_ci	struct clone *clone = ti->private;
207362306a36Sopenharmony_ci	u64 io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
207462306a36Sopenharmony_ci
207562306a36Sopenharmony_ci	/*
207662306a36Sopenharmony_ci	 * If the system-determined stacked limits are compatible with
207762306a36Sopenharmony_ci	 * dm-clone's region size (io_opt is a factor) do not override them.
207862306a36Sopenharmony_ci	 */
207962306a36Sopenharmony_ci	if (io_opt_sectors < clone->region_size ||
208062306a36Sopenharmony_ci	    do_div(io_opt_sectors, clone->region_size)) {
208162306a36Sopenharmony_ci		blk_limits_io_min(limits, clone->region_size << SECTOR_SHIFT);
208262306a36Sopenharmony_ci		blk_limits_io_opt(limits, clone->region_size << SECTOR_SHIFT);
208362306a36Sopenharmony_ci	}
208462306a36Sopenharmony_ci
208562306a36Sopenharmony_ci	disable_passdown_if_not_supported(clone);
208662306a36Sopenharmony_ci	set_discard_limits(clone, limits);
208762306a36Sopenharmony_ci}
208862306a36Sopenharmony_ci
208962306a36Sopenharmony_cistatic int clone_iterate_devices(struct dm_target *ti,
209062306a36Sopenharmony_ci				 iterate_devices_callout_fn fn, void *data)
209162306a36Sopenharmony_ci{
209262306a36Sopenharmony_ci	int ret;
209362306a36Sopenharmony_ci	struct clone *clone = ti->private;
209462306a36Sopenharmony_ci	struct dm_dev *dest_dev = clone->dest_dev;
209562306a36Sopenharmony_ci	struct dm_dev *source_dev = clone->source_dev;
209662306a36Sopenharmony_ci
209762306a36Sopenharmony_ci	ret = fn(ti, source_dev, 0, ti->len, data);
209862306a36Sopenharmony_ci	if (!ret)
209962306a36Sopenharmony_ci		ret = fn(ti, dest_dev, 0, ti->len, data);
210062306a36Sopenharmony_ci	return ret;
210162306a36Sopenharmony_ci}
210262306a36Sopenharmony_ci
210362306a36Sopenharmony_ci/*
210462306a36Sopenharmony_ci * dm-clone message functions.
210562306a36Sopenharmony_ci */
210662306a36Sopenharmony_cistatic void set_hydration_threshold(struct clone *clone, unsigned int nr_regions)
210762306a36Sopenharmony_ci{
210862306a36Sopenharmony_ci	WRITE_ONCE(clone->hydration_threshold, nr_regions);
210962306a36Sopenharmony_ci
211062306a36Sopenharmony_ci	/*
211162306a36Sopenharmony_ci	 * If user space sets hydration_threshold to zero then the hydration
211262306a36Sopenharmony_ci	 * will stop. If at a later time the hydration_threshold is increased
211362306a36Sopenharmony_ci	 * we must restart the hydration process by waking up the worker.
211462306a36Sopenharmony_ci	 */
211562306a36Sopenharmony_ci	wake_worker(clone);
211662306a36Sopenharmony_ci}
211762306a36Sopenharmony_ci
211862306a36Sopenharmony_cistatic void set_hydration_batch_size(struct clone *clone, unsigned int nr_regions)
211962306a36Sopenharmony_ci{
212062306a36Sopenharmony_ci	WRITE_ONCE(clone->hydration_batch_size, nr_regions);
212162306a36Sopenharmony_ci}
212262306a36Sopenharmony_ci
212362306a36Sopenharmony_cistatic void enable_hydration(struct clone *clone)
212462306a36Sopenharmony_ci{
212562306a36Sopenharmony_ci	if (!test_and_set_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags))
212662306a36Sopenharmony_ci		wake_worker(clone);
212762306a36Sopenharmony_ci}
212862306a36Sopenharmony_ci
212962306a36Sopenharmony_cistatic void disable_hydration(struct clone *clone)
213062306a36Sopenharmony_ci{
213162306a36Sopenharmony_ci	clear_bit(DM_CLONE_HYDRATION_ENABLED, &clone->flags);
213262306a36Sopenharmony_ci}
213362306a36Sopenharmony_ci
213462306a36Sopenharmony_cistatic int clone_message(struct dm_target *ti, unsigned int argc, char **argv,
213562306a36Sopenharmony_ci			 char *result, unsigned int maxlen)
213662306a36Sopenharmony_ci{
213762306a36Sopenharmony_ci	struct clone *clone = ti->private;
213862306a36Sopenharmony_ci	unsigned int value;
213962306a36Sopenharmony_ci
214062306a36Sopenharmony_ci	if (!argc)
214162306a36Sopenharmony_ci		return -EINVAL;
214262306a36Sopenharmony_ci
214362306a36Sopenharmony_ci	if (!strcasecmp(argv[0], "enable_hydration")) {
214462306a36Sopenharmony_ci		enable_hydration(clone);
214562306a36Sopenharmony_ci		return 0;
214662306a36Sopenharmony_ci	}
214762306a36Sopenharmony_ci
214862306a36Sopenharmony_ci	if (!strcasecmp(argv[0], "disable_hydration")) {
214962306a36Sopenharmony_ci		disable_hydration(clone);
215062306a36Sopenharmony_ci		return 0;
215162306a36Sopenharmony_ci	}
215262306a36Sopenharmony_ci
215362306a36Sopenharmony_ci	if (argc != 2)
215462306a36Sopenharmony_ci		return -EINVAL;
215562306a36Sopenharmony_ci
215662306a36Sopenharmony_ci	if (!strcasecmp(argv[0], "hydration_threshold")) {
215762306a36Sopenharmony_ci		if (kstrtouint(argv[1], 10, &value))
215862306a36Sopenharmony_ci			return -EINVAL;
215962306a36Sopenharmony_ci
216062306a36Sopenharmony_ci		set_hydration_threshold(clone, value);
216162306a36Sopenharmony_ci
216262306a36Sopenharmony_ci		return 0;
216362306a36Sopenharmony_ci	}
216462306a36Sopenharmony_ci
216562306a36Sopenharmony_ci	if (!strcasecmp(argv[0], "hydration_batch_size")) {
216662306a36Sopenharmony_ci		if (kstrtouint(argv[1], 10, &value))
216762306a36Sopenharmony_ci			return -EINVAL;
216862306a36Sopenharmony_ci
216962306a36Sopenharmony_ci		set_hydration_batch_size(clone, value);
217062306a36Sopenharmony_ci
217162306a36Sopenharmony_ci		return 0;
217262306a36Sopenharmony_ci	}
217362306a36Sopenharmony_ci
217462306a36Sopenharmony_ci	DMERR("%s: Unsupported message `%s'", clone_device_name(clone), argv[0]);
217562306a36Sopenharmony_ci	return -EINVAL;
217662306a36Sopenharmony_ci}
217762306a36Sopenharmony_ci
217862306a36Sopenharmony_cistatic struct target_type clone_target = {
217962306a36Sopenharmony_ci	.name = "clone",
218062306a36Sopenharmony_ci	.version = {1, 0, 0},
218162306a36Sopenharmony_ci	.module = THIS_MODULE,
218262306a36Sopenharmony_ci	.ctr = clone_ctr,
218362306a36Sopenharmony_ci	.dtr =  clone_dtr,
218462306a36Sopenharmony_ci	.map = clone_map,
218562306a36Sopenharmony_ci	.end_io = clone_endio,
218662306a36Sopenharmony_ci	.postsuspend = clone_postsuspend,
218762306a36Sopenharmony_ci	.resume = clone_resume,
218862306a36Sopenharmony_ci	.status = clone_status,
218962306a36Sopenharmony_ci	.message = clone_message,
219062306a36Sopenharmony_ci	.io_hints = clone_io_hints,
219162306a36Sopenharmony_ci	.iterate_devices = clone_iterate_devices,
219262306a36Sopenharmony_ci};
219362306a36Sopenharmony_ci
219462306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
219562306a36Sopenharmony_ci
219662306a36Sopenharmony_ci/* Module functions */
219762306a36Sopenharmony_cistatic int __init dm_clone_init(void)
219862306a36Sopenharmony_ci{
219962306a36Sopenharmony_ci	int r;
220062306a36Sopenharmony_ci
220162306a36Sopenharmony_ci	_hydration_cache = KMEM_CACHE(dm_clone_region_hydration, 0);
220262306a36Sopenharmony_ci	if (!_hydration_cache)
220362306a36Sopenharmony_ci		return -ENOMEM;
220462306a36Sopenharmony_ci
220562306a36Sopenharmony_ci	r = dm_register_target(&clone_target);
220662306a36Sopenharmony_ci	if (r < 0) {
220762306a36Sopenharmony_ci		kmem_cache_destroy(_hydration_cache);
220862306a36Sopenharmony_ci		return r;
220962306a36Sopenharmony_ci	}
221062306a36Sopenharmony_ci
221162306a36Sopenharmony_ci	return 0;
221262306a36Sopenharmony_ci}
221362306a36Sopenharmony_ci
221462306a36Sopenharmony_cistatic void __exit dm_clone_exit(void)
221562306a36Sopenharmony_ci{
221662306a36Sopenharmony_ci	dm_unregister_target(&clone_target);
221762306a36Sopenharmony_ci
221862306a36Sopenharmony_ci	kmem_cache_destroy(_hydration_cache);
221962306a36Sopenharmony_ci	_hydration_cache = NULL;
222062306a36Sopenharmony_ci}
222162306a36Sopenharmony_ci
222262306a36Sopenharmony_ci/* Module hooks */
222362306a36Sopenharmony_cimodule_init(dm_clone_init);
222462306a36Sopenharmony_cimodule_exit(dm_clone_exit);
222562306a36Sopenharmony_ci
222662306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " clone target");
222762306a36Sopenharmony_ciMODULE_AUTHOR("Nikos Tsironis <ntsironis@arrikto.com>");
222862306a36Sopenharmony_ciMODULE_LICENSE("GPL");
2229