xref: /kernel/linux/linux-6.6/drivers/md/dm-snap.c (revision 62306a36)
162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * This file is released under the GPL.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <linux/blkdev.h>
962306a36Sopenharmony_ci#include <linux/device-mapper.h>
1062306a36Sopenharmony_ci#include <linux/delay.h>
1162306a36Sopenharmony_ci#include <linux/fs.h>
1262306a36Sopenharmony_ci#include <linux/init.h>
1362306a36Sopenharmony_ci#include <linux/kdev_t.h>
1462306a36Sopenharmony_ci#include <linux/list.h>
1562306a36Sopenharmony_ci#include <linux/list_bl.h>
1662306a36Sopenharmony_ci#include <linux/mempool.h>
1762306a36Sopenharmony_ci#include <linux/module.h>
1862306a36Sopenharmony_ci#include <linux/slab.h>
1962306a36Sopenharmony_ci#include <linux/vmalloc.h>
2062306a36Sopenharmony_ci#include <linux/log2.h>
2162306a36Sopenharmony_ci#include <linux/dm-kcopyd.h>
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#include "dm.h"
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#include "dm-exception-store.h"
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#define DM_MSG_PREFIX "snapshots"
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_cistatic const char dm_snapshot_merge_target_name[] = "snapshot-merge";
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#define dm_target_is_snapshot_merge(ti) \
3262306a36Sopenharmony_ci	((ti)->type->name == dm_snapshot_merge_target_name)
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci/*
3562306a36Sopenharmony_ci * The size of the mempool used to track chunks in use.
3662306a36Sopenharmony_ci */
3762306a36Sopenharmony_ci#define MIN_IOS 256
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#define DM_TRACKED_CHUNK_HASH_SIZE	16
4062306a36Sopenharmony_ci#define DM_TRACKED_CHUNK_HASH(x)	((unsigned long)(x) & \
4162306a36Sopenharmony_ci					 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_cistruct dm_exception_table {
4462306a36Sopenharmony_ci	uint32_t hash_mask;
4562306a36Sopenharmony_ci	unsigned int hash_shift;
4662306a36Sopenharmony_ci	struct hlist_bl_head *table;
4762306a36Sopenharmony_ci};
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_cistruct dm_snapshot {
5062306a36Sopenharmony_ci	struct rw_semaphore lock;
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	struct dm_dev *origin;
5362306a36Sopenharmony_ci	struct dm_dev *cow;
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	struct dm_target *ti;
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	/* List of snapshots per Origin */
5862306a36Sopenharmony_ci	struct list_head list;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	/*
6162306a36Sopenharmony_ci	 * You can't use a snapshot if this is 0 (e.g. if full).
6262306a36Sopenharmony_ci	 * A snapshot-merge target never clears this.
6362306a36Sopenharmony_ci	 */
6462306a36Sopenharmony_ci	int valid;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	/*
6762306a36Sopenharmony_ci	 * The snapshot overflowed because of a write to the snapshot device.
6862306a36Sopenharmony_ci	 * We don't have to invalidate the snapshot in this case, but we need
6962306a36Sopenharmony_ci	 * to prevent further writes.
7062306a36Sopenharmony_ci	 */
7162306a36Sopenharmony_ci	int snapshot_overflowed;
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	/* Origin writes don't trigger exceptions until this is set */
7462306a36Sopenharmony_ci	int active;
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	atomic_t pending_exceptions_count;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	spinlock_t pe_allocation_lock;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	/* Protected by "pe_allocation_lock" */
8162306a36Sopenharmony_ci	sector_t exception_start_sequence;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	/* Protected by kcopyd single-threaded callback */
8462306a36Sopenharmony_ci	sector_t exception_complete_sequence;
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci	/*
8762306a36Sopenharmony_ci	 * A list of pending exceptions that completed out of order.
8862306a36Sopenharmony_ci	 * Protected by kcopyd single-threaded callback.
8962306a36Sopenharmony_ci	 */
9062306a36Sopenharmony_ci	struct rb_root out_of_order_tree;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	mempool_t pending_pool;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	struct dm_exception_table pending;
9562306a36Sopenharmony_ci	struct dm_exception_table complete;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	/*
9862306a36Sopenharmony_ci	 * pe_lock protects all pending_exception operations and access
9962306a36Sopenharmony_ci	 * as well as the snapshot_bios list.
10062306a36Sopenharmony_ci	 */
10162306a36Sopenharmony_ci	spinlock_t pe_lock;
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	/* Chunks with outstanding reads */
10462306a36Sopenharmony_ci	spinlock_t tracked_chunk_lock;
10562306a36Sopenharmony_ci	struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	/* The on disk metadata handler */
10862306a36Sopenharmony_ci	struct dm_exception_store *store;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	unsigned int in_progress;
11162306a36Sopenharmony_ci	struct wait_queue_head in_progress_wait;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	struct dm_kcopyd_client *kcopyd_client;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	/* Wait for events based on state_bits */
11662306a36Sopenharmony_ci	unsigned long state_bits;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	/* Range of chunks currently being merged. */
11962306a36Sopenharmony_ci	chunk_t first_merging_chunk;
12062306a36Sopenharmony_ci	int num_merging_chunks;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	/*
12362306a36Sopenharmony_ci	 * The merge operation failed if this flag is set.
12462306a36Sopenharmony_ci	 * Failure modes are handled as follows:
12562306a36Sopenharmony_ci	 * - I/O error reading the header
12662306a36Sopenharmony_ci	 *	=> don't load the target; abort.
12762306a36Sopenharmony_ci	 * - Header does not have "valid" flag set
12862306a36Sopenharmony_ci	 *	=> use the origin; forget about the snapshot.
12962306a36Sopenharmony_ci	 * - I/O error when reading exceptions
13062306a36Sopenharmony_ci	 *	=> don't load the target; abort.
13162306a36Sopenharmony_ci	 *         (We can't use the intermediate origin state.)
13262306a36Sopenharmony_ci	 * - I/O error while merging
13362306a36Sopenharmony_ci	 *	=> stop merging; set merge_failed; process I/O normally.
13462306a36Sopenharmony_ci	 */
13562306a36Sopenharmony_ci	bool merge_failed:1;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	bool discard_zeroes_cow:1;
13862306a36Sopenharmony_ci	bool discard_passdown_origin:1;
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	/*
14162306a36Sopenharmony_ci	 * Incoming bios that overlap with chunks being merged must wait
14262306a36Sopenharmony_ci	 * for them to be committed.
14362306a36Sopenharmony_ci	 */
14462306a36Sopenharmony_ci	struct bio_list bios_queued_during_merge;
14562306a36Sopenharmony_ci};
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci/*
14862306a36Sopenharmony_ci * state_bits:
14962306a36Sopenharmony_ci *   RUNNING_MERGE  - Merge operation is in progress.
15062306a36Sopenharmony_ci *   SHUTDOWN_MERGE - Set to signal that merge needs to be stopped;
15162306a36Sopenharmony_ci *                    cleared afterwards.
15262306a36Sopenharmony_ci */
15362306a36Sopenharmony_ci#define RUNNING_MERGE          0
15462306a36Sopenharmony_ci#define SHUTDOWN_MERGE         1
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci/*
15762306a36Sopenharmony_ci * Maximum number of chunks being copied on write.
15862306a36Sopenharmony_ci *
15962306a36Sopenharmony_ci * The value was decided experimentally as a trade-off between memory
16062306a36Sopenharmony_ci * consumption, stalling the kernel's workqueues and maintaining a high enough
16162306a36Sopenharmony_ci * throughput.
16262306a36Sopenharmony_ci */
16362306a36Sopenharmony_ci#define DEFAULT_COW_THRESHOLD 2048
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_cistatic unsigned int cow_threshold = DEFAULT_COW_THRESHOLD;
16662306a36Sopenharmony_cimodule_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644);
16762306a36Sopenharmony_ciMODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ciDECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
17062306a36Sopenharmony_ci		"A percentage of time allocated for copy on write");
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_cistruct dm_dev *dm_snap_origin(struct dm_snapshot *s)
17362306a36Sopenharmony_ci{
17462306a36Sopenharmony_ci	return s->origin;
17562306a36Sopenharmony_ci}
17662306a36Sopenharmony_ciEXPORT_SYMBOL(dm_snap_origin);
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_cistruct dm_dev *dm_snap_cow(struct dm_snapshot *s)
17962306a36Sopenharmony_ci{
18062306a36Sopenharmony_ci	return s->cow;
18162306a36Sopenharmony_ci}
18262306a36Sopenharmony_ciEXPORT_SYMBOL(dm_snap_cow);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cistatic sector_t chunk_to_sector(struct dm_exception_store *store,
18562306a36Sopenharmony_ci				chunk_t chunk)
18662306a36Sopenharmony_ci{
18762306a36Sopenharmony_ci	return chunk << store->chunk_shift;
18862306a36Sopenharmony_ci}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_cistatic int bdev_equal(struct block_device *lhs, struct block_device *rhs)
19162306a36Sopenharmony_ci{
19262306a36Sopenharmony_ci	/*
19362306a36Sopenharmony_ci	 * There is only ever one instance of a particular block
19462306a36Sopenharmony_ci	 * device so we can compare pointers safely.
19562306a36Sopenharmony_ci	 */
19662306a36Sopenharmony_ci	return lhs == rhs;
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_cistruct dm_snap_pending_exception {
20062306a36Sopenharmony_ci	struct dm_exception e;
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	/*
20362306a36Sopenharmony_ci	 * Origin buffers waiting for this to complete are held
20462306a36Sopenharmony_ci	 * in a bio list
20562306a36Sopenharmony_ci	 */
20662306a36Sopenharmony_ci	struct bio_list origin_bios;
20762306a36Sopenharmony_ci	struct bio_list snapshot_bios;
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci	/* Pointer back to snapshot context */
21062306a36Sopenharmony_ci	struct dm_snapshot *snap;
21162306a36Sopenharmony_ci
21262306a36Sopenharmony_ci	/*
21362306a36Sopenharmony_ci	 * 1 indicates the exception has already been sent to
21462306a36Sopenharmony_ci	 * kcopyd.
21562306a36Sopenharmony_ci	 */
21662306a36Sopenharmony_ci	int started;
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	/* There was copying error. */
21962306a36Sopenharmony_ci	int copy_error;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	/* A sequence number, it is used for in-order completion. */
22262306a36Sopenharmony_ci	sector_t exception_sequence;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	struct rb_node out_of_order_node;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	/*
22762306a36Sopenharmony_ci	 * For writing a complete chunk, bypassing the copy.
22862306a36Sopenharmony_ci	 */
22962306a36Sopenharmony_ci	struct bio *full_bio;
23062306a36Sopenharmony_ci	bio_end_io_t *full_bio_end_io;
23162306a36Sopenharmony_ci};
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci/*
23462306a36Sopenharmony_ci * Hash table mapping origin volumes to lists of snapshots and
23562306a36Sopenharmony_ci * a lock to protect it
23662306a36Sopenharmony_ci */
23762306a36Sopenharmony_cistatic struct kmem_cache *exception_cache;
23862306a36Sopenharmony_cistatic struct kmem_cache *pending_cache;
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_cistruct dm_snap_tracked_chunk {
24162306a36Sopenharmony_ci	struct hlist_node node;
24262306a36Sopenharmony_ci	chunk_t chunk;
24362306a36Sopenharmony_ci};
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_cistatic void init_tracked_chunk(struct bio *bio)
24662306a36Sopenharmony_ci{
24762306a36Sopenharmony_ci	struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	INIT_HLIST_NODE(&c->node);
25062306a36Sopenharmony_ci}
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_cistatic bool is_bio_tracked(struct bio *bio)
25362306a36Sopenharmony_ci{
25462306a36Sopenharmony_ci	struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	return !hlist_unhashed(&c->node);
25762306a36Sopenharmony_ci}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_cistatic void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk)
26062306a36Sopenharmony_ci{
26162306a36Sopenharmony_ci	struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	c->chunk = chunk;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	spin_lock_irq(&s->tracked_chunk_lock);
26662306a36Sopenharmony_ci	hlist_add_head(&c->node,
26762306a36Sopenharmony_ci		       &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
26862306a36Sopenharmony_ci	spin_unlock_irq(&s->tracked_chunk_lock);
26962306a36Sopenharmony_ci}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_cistatic void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio)
27262306a36Sopenharmony_ci{
27362306a36Sopenharmony_ci	struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
27462306a36Sopenharmony_ci	unsigned long flags;
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_ci	spin_lock_irqsave(&s->tracked_chunk_lock, flags);
27762306a36Sopenharmony_ci	hlist_del(&c->node);
27862306a36Sopenharmony_ci	spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
27962306a36Sopenharmony_ci}
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_cistatic int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
28262306a36Sopenharmony_ci{
28362306a36Sopenharmony_ci	struct dm_snap_tracked_chunk *c;
28462306a36Sopenharmony_ci	int found = 0;
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci	spin_lock_irq(&s->tracked_chunk_lock);
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci	hlist_for_each_entry(c,
28962306a36Sopenharmony_ci	    &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
29062306a36Sopenharmony_ci		if (c->chunk == chunk) {
29162306a36Sopenharmony_ci			found = 1;
29262306a36Sopenharmony_ci			break;
29362306a36Sopenharmony_ci		}
29462306a36Sopenharmony_ci	}
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	spin_unlock_irq(&s->tracked_chunk_lock);
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	return found;
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci/*
30262306a36Sopenharmony_ci * This conflicting I/O is extremely improbable in the caller,
30362306a36Sopenharmony_ci * so fsleep(1000) is sufficient and there is no need for a wait queue.
30462306a36Sopenharmony_ci */
30562306a36Sopenharmony_cistatic void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk)
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	while (__chunk_is_tracked(s, chunk))
30862306a36Sopenharmony_ci		fsleep(1000);
30962306a36Sopenharmony_ci}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci/*
31262306a36Sopenharmony_ci * One of these per registered origin, held in the snapshot_origins hash
31362306a36Sopenharmony_ci */
31462306a36Sopenharmony_cistruct origin {
31562306a36Sopenharmony_ci	/* The origin device */
31662306a36Sopenharmony_ci	struct block_device *bdev;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci	struct list_head hash_list;
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	/* List of snapshots for this origin */
32162306a36Sopenharmony_ci	struct list_head snapshots;
32262306a36Sopenharmony_ci};
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci/*
32562306a36Sopenharmony_ci * This structure is allocated for each origin target
32662306a36Sopenharmony_ci */
32762306a36Sopenharmony_cistruct dm_origin {
32862306a36Sopenharmony_ci	struct dm_dev *dev;
32962306a36Sopenharmony_ci	struct dm_target *ti;
33062306a36Sopenharmony_ci	unsigned int split_boundary;
33162306a36Sopenharmony_ci	struct list_head hash_list;
33262306a36Sopenharmony_ci};
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci/*
33562306a36Sopenharmony_ci * Size of the hash table for origin volumes. If we make this
33662306a36Sopenharmony_ci * the size of the minors list then it should be nearly perfect
33762306a36Sopenharmony_ci */
33862306a36Sopenharmony_ci#define ORIGIN_HASH_SIZE 256
33962306a36Sopenharmony_ci#define ORIGIN_MASK      0xFF
34062306a36Sopenharmony_cistatic struct list_head *_origins;
34162306a36Sopenharmony_cistatic struct list_head *_dm_origins;
34262306a36Sopenharmony_cistatic struct rw_semaphore _origins_lock;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_cistatic DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done);
34562306a36Sopenharmony_cistatic DEFINE_SPINLOCK(_pending_exceptions_done_spinlock);
34662306a36Sopenharmony_cistatic uint64_t _pending_exceptions_done_count;
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_cistatic int init_origin_hash(void)
34962306a36Sopenharmony_ci{
35062306a36Sopenharmony_ci	int i;
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	_origins = kmalloc_array(ORIGIN_HASH_SIZE, sizeof(struct list_head),
35362306a36Sopenharmony_ci				 GFP_KERNEL);
35462306a36Sopenharmony_ci	if (!_origins) {
35562306a36Sopenharmony_ci		DMERR("unable to allocate memory for _origins");
35662306a36Sopenharmony_ci		return -ENOMEM;
35762306a36Sopenharmony_ci	}
35862306a36Sopenharmony_ci	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
35962306a36Sopenharmony_ci		INIT_LIST_HEAD(_origins + i);
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	_dm_origins = kmalloc_array(ORIGIN_HASH_SIZE,
36262306a36Sopenharmony_ci				    sizeof(struct list_head),
36362306a36Sopenharmony_ci				    GFP_KERNEL);
36462306a36Sopenharmony_ci	if (!_dm_origins) {
36562306a36Sopenharmony_ci		DMERR("unable to allocate memory for _dm_origins");
36662306a36Sopenharmony_ci		kfree(_origins);
36762306a36Sopenharmony_ci		return -ENOMEM;
36862306a36Sopenharmony_ci	}
36962306a36Sopenharmony_ci	for (i = 0; i < ORIGIN_HASH_SIZE; i++)
37062306a36Sopenharmony_ci		INIT_LIST_HEAD(_dm_origins + i);
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	init_rwsem(&_origins_lock);
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	return 0;
37562306a36Sopenharmony_ci}
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_cistatic void exit_origin_hash(void)
37862306a36Sopenharmony_ci{
37962306a36Sopenharmony_ci	kfree(_origins);
38062306a36Sopenharmony_ci	kfree(_dm_origins);
38162306a36Sopenharmony_ci}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_cistatic unsigned int origin_hash(struct block_device *bdev)
38462306a36Sopenharmony_ci{
38562306a36Sopenharmony_ci	return bdev->bd_dev & ORIGIN_MASK;
38662306a36Sopenharmony_ci}
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_cistatic struct origin *__lookup_origin(struct block_device *origin)
38962306a36Sopenharmony_ci{
39062306a36Sopenharmony_ci	struct list_head *ol;
39162306a36Sopenharmony_ci	struct origin *o;
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	ol = &_origins[origin_hash(origin)];
39462306a36Sopenharmony_ci	list_for_each_entry(o, ol, hash_list)
39562306a36Sopenharmony_ci		if (bdev_equal(o->bdev, origin))
39662306a36Sopenharmony_ci			return o;
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	return NULL;
39962306a36Sopenharmony_ci}
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_cistatic void __insert_origin(struct origin *o)
40262306a36Sopenharmony_ci{
40362306a36Sopenharmony_ci	struct list_head *sl = &_origins[origin_hash(o->bdev)];
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci	list_add_tail(&o->hash_list, sl);
40662306a36Sopenharmony_ci}
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_cistatic struct dm_origin *__lookup_dm_origin(struct block_device *origin)
40962306a36Sopenharmony_ci{
41062306a36Sopenharmony_ci	struct list_head *ol;
41162306a36Sopenharmony_ci	struct dm_origin *o;
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci	ol = &_dm_origins[origin_hash(origin)];
41462306a36Sopenharmony_ci	list_for_each_entry(o, ol, hash_list)
41562306a36Sopenharmony_ci		if (bdev_equal(o->dev->bdev, origin))
41662306a36Sopenharmony_ci			return o;
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	return NULL;
41962306a36Sopenharmony_ci}
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_cistatic void __insert_dm_origin(struct dm_origin *o)
42262306a36Sopenharmony_ci{
42362306a36Sopenharmony_ci	struct list_head *sl = &_dm_origins[origin_hash(o->dev->bdev)];
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	list_add_tail(&o->hash_list, sl);
42662306a36Sopenharmony_ci}
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_cistatic void __remove_dm_origin(struct dm_origin *o)
42962306a36Sopenharmony_ci{
43062306a36Sopenharmony_ci	list_del(&o->hash_list);
43162306a36Sopenharmony_ci}
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci/*
43462306a36Sopenharmony_ci * _origins_lock must be held when calling this function.
43562306a36Sopenharmony_ci * Returns number of snapshots registered using the supplied cow device, plus:
43662306a36Sopenharmony_ci * snap_src - a snapshot suitable for use as a source of exception handover
43762306a36Sopenharmony_ci * snap_dest - a snapshot capable of receiving exception handover.
43862306a36Sopenharmony_ci * snap_merge - an existing snapshot-merge target linked to the same origin.
43962306a36Sopenharmony_ci *   There can be at most one snapshot-merge target. The parameter is optional.
44062306a36Sopenharmony_ci *
44162306a36Sopenharmony_ci * Possible return values and states of snap_src and snap_dest.
44262306a36Sopenharmony_ci *   0: NULL, NULL  - first new snapshot
44362306a36Sopenharmony_ci *   1: snap_src, NULL - normal snapshot
44462306a36Sopenharmony_ci *   2: snap_src, snap_dest  - waiting for handover
44562306a36Sopenharmony_ci *   2: snap_src, NULL - handed over, waiting for old to be deleted
44662306a36Sopenharmony_ci *   1: NULL, snap_dest - source got destroyed without handover
44762306a36Sopenharmony_ci */
44862306a36Sopenharmony_cistatic int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
44962306a36Sopenharmony_ci					struct dm_snapshot **snap_src,
45062306a36Sopenharmony_ci					struct dm_snapshot **snap_dest,
45162306a36Sopenharmony_ci					struct dm_snapshot **snap_merge)
45262306a36Sopenharmony_ci{
45362306a36Sopenharmony_ci	struct dm_snapshot *s;
45462306a36Sopenharmony_ci	struct origin *o;
45562306a36Sopenharmony_ci	int count = 0;
45662306a36Sopenharmony_ci	int active;
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	o = __lookup_origin(snap->origin->bdev);
45962306a36Sopenharmony_ci	if (!o)
46062306a36Sopenharmony_ci		goto out;
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	list_for_each_entry(s, &o->snapshots, list) {
46362306a36Sopenharmony_ci		if (dm_target_is_snapshot_merge(s->ti) && snap_merge)
46462306a36Sopenharmony_ci			*snap_merge = s;
46562306a36Sopenharmony_ci		if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
46662306a36Sopenharmony_ci			continue;
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci		down_read(&s->lock);
46962306a36Sopenharmony_ci		active = s->active;
47062306a36Sopenharmony_ci		up_read(&s->lock);
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci		if (active) {
47362306a36Sopenharmony_ci			if (snap_src)
47462306a36Sopenharmony_ci				*snap_src = s;
47562306a36Sopenharmony_ci		} else if (snap_dest)
47662306a36Sopenharmony_ci			*snap_dest = s;
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci		count++;
47962306a36Sopenharmony_ci	}
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ciout:
48262306a36Sopenharmony_ci	return count;
48362306a36Sopenharmony_ci}
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci/*
48662306a36Sopenharmony_ci * On success, returns 1 if this snapshot is a handover destination,
48762306a36Sopenharmony_ci * otherwise returns 0.
48862306a36Sopenharmony_ci */
48962306a36Sopenharmony_cistatic int __validate_exception_handover(struct dm_snapshot *snap)
49062306a36Sopenharmony_ci{
49162306a36Sopenharmony_ci	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
49262306a36Sopenharmony_ci	struct dm_snapshot *snap_merge = NULL;
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	/* Does snapshot need exceptions handed over to it? */
49562306a36Sopenharmony_ci	if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest,
49662306a36Sopenharmony_ci					  &snap_merge) == 2) ||
49762306a36Sopenharmony_ci	    snap_dest) {
49862306a36Sopenharmony_ci		snap->ti->error = "Snapshot cow pairing for exception table handover failed";
49962306a36Sopenharmony_ci		return -EINVAL;
50062306a36Sopenharmony_ci	}
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_ci	/*
50362306a36Sopenharmony_ci	 * If no snap_src was found, snap cannot become a handover
50462306a36Sopenharmony_ci	 * destination.
50562306a36Sopenharmony_ci	 */
50662306a36Sopenharmony_ci	if (!snap_src)
50762306a36Sopenharmony_ci		return 0;
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	/*
51062306a36Sopenharmony_ci	 * Non-snapshot-merge handover?
51162306a36Sopenharmony_ci	 */
51262306a36Sopenharmony_ci	if (!dm_target_is_snapshot_merge(snap->ti))
51362306a36Sopenharmony_ci		return 1;
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	/*
51662306a36Sopenharmony_ci	 * Do not allow more than one merging snapshot.
51762306a36Sopenharmony_ci	 */
51862306a36Sopenharmony_ci	if (snap_merge) {
51962306a36Sopenharmony_ci		snap->ti->error = "A snapshot is already merging.";
52062306a36Sopenharmony_ci		return -EINVAL;
52162306a36Sopenharmony_ci	}
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci	if (!snap_src->store->type->prepare_merge ||
52462306a36Sopenharmony_ci	    !snap_src->store->type->commit_merge) {
52562306a36Sopenharmony_ci		snap->ti->error = "Snapshot exception store does not support snapshot-merge.";
52662306a36Sopenharmony_ci		return -EINVAL;
52762306a36Sopenharmony_ci	}
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	return 1;
53062306a36Sopenharmony_ci}
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_cistatic void __insert_snapshot(struct origin *o, struct dm_snapshot *s)
53362306a36Sopenharmony_ci{
53462306a36Sopenharmony_ci	struct dm_snapshot *l;
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci	/* Sort the list according to chunk size, largest-first smallest-last */
53762306a36Sopenharmony_ci	list_for_each_entry(l, &o->snapshots, list)
53862306a36Sopenharmony_ci		if (l->store->chunk_size < s->store->chunk_size)
53962306a36Sopenharmony_ci			break;
54062306a36Sopenharmony_ci	list_add_tail(&s->list, &l->list);
54162306a36Sopenharmony_ci}
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci/*
54462306a36Sopenharmony_ci * Make a note of the snapshot and its origin so we can look it
54562306a36Sopenharmony_ci * up when the origin has a write on it.
54662306a36Sopenharmony_ci *
54762306a36Sopenharmony_ci * Also validate snapshot exception store handovers.
54862306a36Sopenharmony_ci * On success, returns 1 if this registration is a handover destination,
54962306a36Sopenharmony_ci * otherwise returns 0.
55062306a36Sopenharmony_ci */
55162306a36Sopenharmony_cistatic int register_snapshot(struct dm_snapshot *snap)
55262306a36Sopenharmony_ci{
55362306a36Sopenharmony_ci	struct origin *o, *new_o = NULL;
55462306a36Sopenharmony_ci	struct block_device *bdev = snap->origin->bdev;
55562306a36Sopenharmony_ci	int r = 0;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
55862306a36Sopenharmony_ci	if (!new_o)
55962306a36Sopenharmony_ci		return -ENOMEM;
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	down_write(&_origins_lock);
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	r = __validate_exception_handover(snap);
56462306a36Sopenharmony_ci	if (r < 0) {
56562306a36Sopenharmony_ci		kfree(new_o);
56662306a36Sopenharmony_ci		goto out;
56762306a36Sopenharmony_ci	}
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	o = __lookup_origin(bdev);
57062306a36Sopenharmony_ci	if (o)
57162306a36Sopenharmony_ci		kfree(new_o);
57262306a36Sopenharmony_ci	else {
57362306a36Sopenharmony_ci		/* New origin */
57462306a36Sopenharmony_ci		o = new_o;
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci		/* Initialise the struct */
57762306a36Sopenharmony_ci		INIT_LIST_HEAD(&o->snapshots);
57862306a36Sopenharmony_ci		o->bdev = bdev;
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci		__insert_origin(o);
58162306a36Sopenharmony_ci	}
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	__insert_snapshot(o, snap);
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ciout:
58662306a36Sopenharmony_ci	up_write(&_origins_lock);
58762306a36Sopenharmony_ci
58862306a36Sopenharmony_ci	return r;
58962306a36Sopenharmony_ci}
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci/*
59262306a36Sopenharmony_ci * Move snapshot to correct place in list according to chunk size.
59362306a36Sopenharmony_ci */
59462306a36Sopenharmony_cistatic void reregister_snapshot(struct dm_snapshot *s)
59562306a36Sopenharmony_ci{
59662306a36Sopenharmony_ci	struct block_device *bdev = s->origin->bdev;
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_ci	down_write(&_origins_lock);
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	list_del(&s->list);
60162306a36Sopenharmony_ci	__insert_snapshot(__lookup_origin(bdev), s);
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci	up_write(&_origins_lock);
60462306a36Sopenharmony_ci}
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_cistatic void unregister_snapshot(struct dm_snapshot *s)
60762306a36Sopenharmony_ci{
60862306a36Sopenharmony_ci	struct origin *o;
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	down_write(&_origins_lock);
61162306a36Sopenharmony_ci	o = __lookup_origin(s->origin->bdev);
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_ci	list_del(&s->list);
61462306a36Sopenharmony_ci	if (o && list_empty(&o->snapshots)) {
61562306a36Sopenharmony_ci		list_del(&o->hash_list);
61662306a36Sopenharmony_ci		kfree(o);
61762306a36Sopenharmony_ci	}
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	up_write(&_origins_lock);
62062306a36Sopenharmony_ci}
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci/*
62362306a36Sopenharmony_ci * Implementation of the exception hash tables.
62462306a36Sopenharmony_ci * The lowest hash_shift bits of the chunk number are ignored, allowing
62562306a36Sopenharmony_ci * some consecutive chunks to be grouped together.
62662306a36Sopenharmony_ci */
62762306a36Sopenharmony_cistatic uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci/* Lock to protect access to the completed and pending exception hash tables. */
63062306a36Sopenharmony_cistruct dm_exception_table_lock {
63162306a36Sopenharmony_ci	struct hlist_bl_head *complete_slot;
63262306a36Sopenharmony_ci	struct hlist_bl_head *pending_slot;
63362306a36Sopenharmony_ci};
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_cistatic void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
63662306a36Sopenharmony_ci					 struct dm_exception_table_lock *lock)
63762306a36Sopenharmony_ci{
63862306a36Sopenharmony_ci	struct dm_exception_table *complete = &s->complete;
63962306a36Sopenharmony_ci	struct dm_exception_table *pending = &s->pending;
64062306a36Sopenharmony_ci
64162306a36Sopenharmony_ci	lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
64262306a36Sopenharmony_ci	lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
64362306a36Sopenharmony_ci}
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_cistatic void dm_exception_table_lock(struct dm_exception_table_lock *lock)
64662306a36Sopenharmony_ci{
64762306a36Sopenharmony_ci	hlist_bl_lock(lock->complete_slot);
64862306a36Sopenharmony_ci	hlist_bl_lock(lock->pending_slot);
64962306a36Sopenharmony_ci}
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_cistatic void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
65262306a36Sopenharmony_ci{
65362306a36Sopenharmony_ci	hlist_bl_unlock(lock->pending_slot);
65462306a36Sopenharmony_ci	hlist_bl_unlock(lock->complete_slot);
65562306a36Sopenharmony_ci}
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_cistatic int dm_exception_table_init(struct dm_exception_table *et,
65862306a36Sopenharmony_ci				   uint32_t size, unsigned int hash_shift)
65962306a36Sopenharmony_ci{
66062306a36Sopenharmony_ci	unsigned int i;
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci	et->hash_shift = hash_shift;
66362306a36Sopenharmony_ci	et->hash_mask = size - 1;
66462306a36Sopenharmony_ci	et->table = kvmalloc_array(size, sizeof(struct hlist_bl_head),
66562306a36Sopenharmony_ci				   GFP_KERNEL);
66662306a36Sopenharmony_ci	if (!et->table)
66762306a36Sopenharmony_ci		return -ENOMEM;
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	for (i = 0; i < size; i++)
67062306a36Sopenharmony_ci		INIT_HLIST_BL_HEAD(et->table + i);
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci	return 0;
67362306a36Sopenharmony_ci}
67462306a36Sopenharmony_ci
67562306a36Sopenharmony_cistatic void dm_exception_table_exit(struct dm_exception_table *et,
67662306a36Sopenharmony_ci				    struct kmem_cache *mem)
67762306a36Sopenharmony_ci{
67862306a36Sopenharmony_ci	struct hlist_bl_head *slot;
67962306a36Sopenharmony_ci	struct dm_exception *ex;
68062306a36Sopenharmony_ci	struct hlist_bl_node *pos, *n;
68162306a36Sopenharmony_ci	int i, size;
68262306a36Sopenharmony_ci
68362306a36Sopenharmony_ci	size = et->hash_mask + 1;
68462306a36Sopenharmony_ci	for (i = 0; i < size; i++) {
68562306a36Sopenharmony_ci		slot = et->table + i;
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci		hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list)
68862306a36Sopenharmony_ci			kmem_cache_free(mem, ex);
68962306a36Sopenharmony_ci	}
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci	kvfree(et->table);
69262306a36Sopenharmony_ci}
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_cistatic uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
69562306a36Sopenharmony_ci{
69662306a36Sopenharmony_ci	return (chunk >> et->hash_shift) & et->hash_mask;
69762306a36Sopenharmony_ci}
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_cistatic void dm_remove_exception(struct dm_exception *e)
70062306a36Sopenharmony_ci{
70162306a36Sopenharmony_ci	hlist_bl_del(&e->hash_list);
70262306a36Sopenharmony_ci}
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci/*
70562306a36Sopenharmony_ci * Return the exception data for a sector, or NULL if not
70662306a36Sopenharmony_ci * remapped.
70762306a36Sopenharmony_ci */
70862306a36Sopenharmony_cistatic struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
70962306a36Sopenharmony_ci						chunk_t chunk)
71062306a36Sopenharmony_ci{
71162306a36Sopenharmony_ci	struct hlist_bl_head *slot;
71262306a36Sopenharmony_ci	struct hlist_bl_node *pos;
71362306a36Sopenharmony_ci	struct dm_exception *e;
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	slot = &et->table[exception_hash(et, chunk)];
71662306a36Sopenharmony_ci	hlist_bl_for_each_entry(e, pos, slot, hash_list)
71762306a36Sopenharmony_ci		if (chunk >= e->old_chunk &&
71862306a36Sopenharmony_ci		    chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
71962306a36Sopenharmony_ci			return e;
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	return NULL;
72262306a36Sopenharmony_ci}
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_cistatic struct dm_exception *alloc_completed_exception(gfp_t gfp)
72562306a36Sopenharmony_ci{
72662306a36Sopenharmony_ci	struct dm_exception *e;
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci	e = kmem_cache_alloc(exception_cache, gfp);
72962306a36Sopenharmony_ci	if (!e && gfp == GFP_NOIO)
73062306a36Sopenharmony_ci		e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	return e;
73362306a36Sopenharmony_ci}
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_cistatic void free_completed_exception(struct dm_exception *e)
73662306a36Sopenharmony_ci{
73762306a36Sopenharmony_ci	kmem_cache_free(exception_cache, e);
73862306a36Sopenharmony_ci}
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_cistatic struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
74162306a36Sopenharmony_ci{
74262306a36Sopenharmony_ci	struct dm_snap_pending_exception *pe = mempool_alloc(&s->pending_pool,
74362306a36Sopenharmony_ci							     GFP_NOIO);
74462306a36Sopenharmony_ci
74562306a36Sopenharmony_ci	atomic_inc(&s->pending_exceptions_count);
74662306a36Sopenharmony_ci	pe->snap = s;
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_ci	return pe;
74962306a36Sopenharmony_ci}
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_cistatic void free_pending_exception(struct dm_snap_pending_exception *pe)
75262306a36Sopenharmony_ci{
75362306a36Sopenharmony_ci	struct dm_snapshot *s = pe->snap;
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	mempool_free(pe, &s->pending_pool);
75662306a36Sopenharmony_ci	smp_mb__before_atomic();
75762306a36Sopenharmony_ci	atomic_dec(&s->pending_exceptions_count);
75862306a36Sopenharmony_ci}
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_cistatic void dm_insert_exception(struct dm_exception_table *eh,
76162306a36Sopenharmony_ci				struct dm_exception *new_e)
76262306a36Sopenharmony_ci{
76362306a36Sopenharmony_ci	struct hlist_bl_head *l;
76462306a36Sopenharmony_ci	struct hlist_bl_node *pos;
76562306a36Sopenharmony_ci	struct dm_exception *e = NULL;
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	l = &eh->table[exception_hash(eh, new_e->old_chunk)];
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ci	/* Add immediately if this table doesn't support consecutive chunks */
77062306a36Sopenharmony_ci	if (!eh->hash_shift)
77162306a36Sopenharmony_ci		goto out;
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	/* List is ordered by old_chunk */
77462306a36Sopenharmony_ci	hlist_bl_for_each_entry(e, pos, l, hash_list) {
77562306a36Sopenharmony_ci		/* Insert after an existing chunk? */
77662306a36Sopenharmony_ci		if (new_e->old_chunk == (e->old_chunk +
77762306a36Sopenharmony_ci					 dm_consecutive_chunk_count(e) + 1) &&
77862306a36Sopenharmony_ci		    new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
77962306a36Sopenharmony_ci					 dm_consecutive_chunk_count(e) + 1)) {
78062306a36Sopenharmony_ci			dm_consecutive_chunk_count_inc(e);
78162306a36Sopenharmony_ci			free_completed_exception(new_e);
78262306a36Sopenharmony_ci			return;
78362306a36Sopenharmony_ci		}
78462306a36Sopenharmony_ci
78562306a36Sopenharmony_ci		/* Insert before an existing chunk? */
78662306a36Sopenharmony_ci		if (new_e->old_chunk == (e->old_chunk - 1) &&
78762306a36Sopenharmony_ci		    new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
78862306a36Sopenharmony_ci			dm_consecutive_chunk_count_inc(e);
78962306a36Sopenharmony_ci			e->old_chunk--;
79062306a36Sopenharmony_ci			e->new_chunk--;
79162306a36Sopenharmony_ci			free_completed_exception(new_e);
79262306a36Sopenharmony_ci			return;
79362306a36Sopenharmony_ci		}
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci		if (new_e->old_chunk < e->old_chunk)
79662306a36Sopenharmony_ci			break;
79762306a36Sopenharmony_ci	}
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ciout:
80062306a36Sopenharmony_ci	if (!e) {
80162306a36Sopenharmony_ci		/*
80262306a36Sopenharmony_ci		 * Either the table doesn't support consecutive chunks or slot
80362306a36Sopenharmony_ci		 * l is empty.
80462306a36Sopenharmony_ci		 */
80562306a36Sopenharmony_ci		hlist_bl_add_head(&new_e->hash_list, l);
80662306a36Sopenharmony_ci	} else if (new_e->old_chunk < e->old_chunk) {
80762306a36Sopenharmony_ci		/* Add before an existing exception */
80862306a36Sopenharmony_ci		hlist_bl_add_before(&new_e->hash_list, &e->hash_list);
80962306a36Sopenharmony_ci	} else {
81062306a36Sopenharmony_ci		/* Add to l's tail: e is the last exception in this slot */
81162306a36Sopenharmony_ci		hlist_bl_add_behind(&new_e->hash_list, &e->hash_list);
81262306a36Sopenharmony_ci	}
81362306a36Sopenharmony_ci}
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci/*
81662306a36Sopenharmony_ci * Callback used by the exception stores to load exceptions when
81762306a36Sopenharmony_ci * initialising.
81862306a36Sopenharmony_ci */
81962306a36Sopenharmony_cistatic int dm_add_exception(void *context, chunk_t old, chunk_t new)
82062306a36Sopenharmony_ci{
82162306a36Sopenharmony_ci	struct dm_exception_table_lock lock;
82262306a36Sopenharmony_ci	struct dm_snapshot *s = context;
82362306a36Sopenharmony_ci	struct dm_exception *e;
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_ci	e = alloc_completed_exception(GFP_KERNEL);
82662306a36Sopenharmony_ci	if (!e)
82762306a36Sopenharmony_ci		return -ENOMEM;
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	e->old_chunk = old;
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci	/* Consecutive_count is implicitly initialised to zero */
83262306a36Sopenharmony_ci	e->new_chunk = new;
83362306a36Sopenharmony_ci
83462306a36Sopenharmony_ci	/*
83562306a36Sopenharmony_ci	 * Although there is no need to lock access to the exception tables
83662306a36Sopenharmony_ci	 * here, if we don't then hlist_bl_add_head(), called by
83762306a36Sopenharmony_ci	 * dm_insert_exception(), will complain about accessing the
83862306a36Sopenharmony_ci	 * corresponding list without locking it first.
83962306a36Sopenharmony_ci	 */
84062306a36Sopenharmony_ci	dm_exception_table_lock_init(s, old, &lock);
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ci	dm_exception_table_lock(&lock);
84362306a36Sopenharmony_ci	dm_insert_exception(&s->complete, e);
84462306a36Sopenharmony_ci	dm_exception_table_unlock(&lock);
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ci	return 0;
84762306a36Sopenharmony_ci}
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci/*
85062306a36Sopenharmony_ci * Return a minimum chunk size of all snapshots that have the specified origin.
85162306a36Sopenharmony_ci * Return zero if the origin has no snapshots.
85262306a36Sopenharmony_ci */
85362306a36Sopenharmony_cistatic uint32_t __minimum_chunk_size(struct origin *o)
85462306a36Sopenharmony_ci{
85562306a36Sopenharmony_ci	struct dm_snapshot *snap;
85662306a36Sopenharmony_ci	unsigned int chunk_size = rounddown_pow_of_two(UINT_MAX);
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	if (o)
85962306a36Sopenharmony_ci		list_for_each_entry(snap, &o->snapshots, list)
86062306a36Sopenharmony_ci			chunk_size = min_not_zero(chunk_size,
86162306a36Sopenharmony_ci						  snap->store->chunk_size);
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	return (uint32_t) chunk_size;
86462306a36Sopenharmony_ci}
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci/*
86762306a36Sopenharmony_ci * Hard coded magic.
86862306a36Sopenharmony_ci */
86962306a36Sopenharmony_cistatic int calc_max_buckets(void)
87062306a36Sopenharmony_ci{
87162306a36Sopenharmony_ci	/* use a fixed size of 2MB */
87262306a36Sopenharmony_ci	unsigned long mem = 2 * 1024 * 1024;
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	mem /= sizeof(struct hlist_bl_head);
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_ci	return mem;
87762306a36Sopenharmony_ci}
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci/*
88062306a36Sopenharmony_ci * Allocate room for a suitable hash table.
88162306a36Sopenharmony_ci */
88262306a36Sopenharmony_cistatic int init_hash_tables(struct dm_snapshot *s)
88362306a36Sopenharmony_ci{
88462306a36Sopenharmony_ci	sector_t hash_size, cow_dev_size, max_buckets;
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci	/*
88762306a36Sopenharmony_ci	 * Calculate based on the size of the original volume or
88862306a36Sopenharmony_ci	 * the COW volume...
88962306a36Sopenharmony_ci	 */
89062306a36Sopenharmony_ci	cow_dev_size = get_dev_size(s->cow->bdev);
89162306a36Sopenharmony_ci	max_buckets = calc_max_buckets();
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci	hash_size = cow_dev_size >> s->store->chunk_shift;
89462306a36Sopenharmony_ci	hash_size = min(hash_size, max_buckets);
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci	if (hash_size < 64)
89762306a36Sopenharmony_ci		hash_size = 64;
89862306a36Sopenharmony_ci	hash_size = rounddown_pow_of_two(hash_size);
89962306a36Sopenharmony_ci	if (dm_exception_table_init(&s->complete, hash_size,
90062306a36Sopenharmony_ci				    DM_CHUNK_CONSECUTIVE_BITS))
90162306a36Sopenharmony_ci		return -ENOMEM;
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	/*
90462306a36Sopenharmony_ci	 * Allocate hash table for in-flight exceptions
90562306a36Sopenharmony_ci	 * Make this smaller than the real hash table
90662306a36Sopenharmony_ci	 */
90762306a36Sopenharmony_ci	hash_size >>= 3;
90862306a36Sopenharmony_ci	if (hash_size < 64)
90962306a36Sopenharmony_ci		hash_size = 64;
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	if (dm_exception_table_init(&s->pending, hash_size, 0)) {
91262306a36Sopenharmony_ci		dm_exception_table_exit(&s->complete, exception_cache);
91362306a36Sopenharmony_ci		return -ENOMEM;
91462306a36Sopenharmony_ci	}
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci	return 0;
91762306a36Sopenharmony_ci}
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_cistatic void merge_shutdown(struct dm_snapshot *s)
92062306a36Sopenharmony_ci{
92162306a36Sopenharmony_ci	clear_bit_unlock(RUNNING_MERGE, &s->state_bits);
92262306a36Sopenharmony_ci	smp_mb__after_atomic();
92362306a36Sopenharmony_ci	wake_up_bit(&s->state_bits, RUNNING_MERGE);
92462306a36Sopenharmony_ci}
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_cistatic struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s)
92762306a36Sopenharmony_ci{
92862306a36Sopenharmony_ci	s->first_merging_chunk = 0;
92962306a36Sopenharmony_ci	s->num_merging_chunks = 0;
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ci	return bio_list_get(&s->bios_queued_during_merge);
93262306a36Sopenharmony_ci}
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci/*
93562306a36Sopenharmony_ci * Remove one chunk from the index of completed exceptions.
93662306a36Sopenharmony_ci */
93762306a36Sopenharmony_cistatic int __remove_single_exception_chunk(struct dm_snapshot *s,
93862306a36Sopenharmony_ci					   chunk_t old_chunk)
93962306a36Sopenharmony_ci{
94062306a36Sopenharmony_ci	struct dm_exception *e;
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci	e = dm_lookup_exception(&s->complete, old_chunk);
94362306a36Sopenharmony_ci	if (!e) {
94462306a36Sopenharmony_ci		DMERR("Corruption detected: exception for block %llu is on disk but not in memory",
94562306a36Sopenharmony_ci		      (unsigned long long)old_chunk);
94662306a36Sopenharmony_ci		return -EINVAL;
94762306a36Sopenharmony_ci	}
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_ci	/*
95062306a36Sopenharmony_ci	 * If this is the only chunk using this exception, remove exception.
95162306a36Sopenharmony_ci	 */
95262306a36Sopenharmony_ci	if (!dm_consecutive_chunk_count(e)) {
95362306a36Sopenharmony_ci		dm_remove_exception(e);
95462306a36Sopenharmony_ci		free_completed_exception(e);
95562306a36Sopenharmony_ci		return 0;
95662306a36Sopenharmony_ci	}
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci	/*
95962306a36Sopenharmony_ci	 * The chunk may be either at the beginning or the end of a
96062306a36Sopenharmony_ci	 * group of consecutive chunks - never in the middle.  We are
96162306a36Sopenharmony_ci	 * removing chunks in the opposite order to that in which they
96262306a36Sopenharmony_ci	 * were added, so this should always be true.
96362306a36Sopenharmony_ci	 * Decrement the consecutive chunk counter and adjust the
96462306a36Sopenharmony_ci	 * starting point if necessary.
96562306a36Sopenharmony_ci	 */
96662306a36Sopenharmony_ci	if (old_chunk == e->old_chunk) {
96762306a36Sopenharmony_ci		e->old_chunk++;
96862306a36Sopenharmony_ci		e->new_chunk++;
96962306a36Sopenharmony_ci	} else if (old_chunk != e->old_chunk +
97062306a36Sopenharmony_ci		   dm_consecutive_chunk_count(e)) {
97162306a36Sopenharmony_ci		DMERR("Attempt to merge block %llu from the middle of a chunk range [%llu - %llu]",
97262306a36Sopenharmony_ci		      (unsigned long long)old_chunk,
97362306a36Sopenharmony_ci		      (unsigned long long)e->old_chunk,
97462306a36Sopenharmony_ci		      (unsigned long long)
97562306a36Sopenharmony_ci		      e->old_chunk + dm_consecutive_chunk_count(e));
97662306a36Sopenharmony_ci		return -EINVAL;
97762306a36Sopenharmony_ci	}
97862306a36Sopenharmony_ci
97962306a36Sopenharmony_ci	dm_consecutive_chunk_count_dec(e);
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci	return 0;
98262306a36Sopenharmony_ci}
98362306a36Sopenharmony_ci
98462306a36Sopenharmony_cistatic void flush_bios(struct bio *bio);
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_cistatic int remove_single_exception_chunk(struct dm_snapshot *s)
98762306a36Sopenharmony_ci{
98862306a36Sopenharmony_ci	struct bio *b = NULL;
98962306a36Sopenharmony_ci	int r;
99062306a36Sopenharmony_ci	chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1;
99162306a36Sopenharmony_ci
99262306a36Sopenharmony_ci	down_write(&s->lock);
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci	/*
99562306a36Sopenharmony_ci	 * Process chunks (and associated exceptions) in reverse order
99662306a36Sopenharmony_ci	 * so that dm_consecutive_chunk_count_dec() accounting works.
99762306a36Sopenharmony_ci	 */
99862306a36Sopenharmony_ci	do {
99962306a36Sopenharmony_ci		r = __remove_single_exception_chunk(s, old_chunk);
100062306a36Sopenharmony_ci		if (r)
100162306a36Sopenharmony_ci			goto out;
100262306a36Sopenharmony_ci	} while (old_chunk-- > s->first_merging_chunk);
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci	b = __release_queued_bios_after_merge(s);
100562306a36Sopenharmony_ci
100662306a36Sopenharmony_ciout:
100762306a36Sopenharmony_ci	up_write(&s->lock);
100862306a36Sopenharmony_ci	if (b)
100962306a36Sopenharmony_ci		flush_bios(b);
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci	return r;
101262306a36Sopenharmony_ci}
101362306a36Sopenharmony_ci
101462306a36Sopenharmony_cistatic int origin_write_extent(struct dm_snapshot *merging_snap,
101562306a36Sopenharmony_ci			       sector_t sector, unsigned int chunk_size);
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_cistatic void merge_callback(int read_err, unsigned long write_err,
101862306a36Sopenharmony_ci			   void *context);
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_cistatic uint64_t read_pending_exceptions_done_count(void)
102162306a36Sopenharmony_ci{
102262306a36Sopenharmony_ci	uint64_t pending_exceptions_done;
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci	spin_lock(&_pending_exceptions_done_spinlock);
102562306a36Sopenharmony_ci	pending_exceptions_done = _pending_exceptions_done_count;
102662306a36Sopenharmony_ci	spin_unlock(&_pending_exceptions_done_spinlock);
102762306a36Sopenharmony_ci
102862306a36Sopenharmony_ci	return pending_exceptions_done;
102962306a36Sopenharmony_ci}
103062306a36Sopenharmony_ci
103162306a36Sopenharmony_cistatic void increment_pending_exceptions_done_count(void)
103262306a36Sopenharmony_ci{
103362306a36Sopenharmony_ci	spin_lock(&_pending_exceptions_done_spinlock);
103462306a36Sopenharmony_ci	_pending_exceptions_done_count++;
103562306a36Sopenharmony_ci	spin_unlock(&_pending_exceptions_done_spinlock);
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_ci	wake_up_all(&_pending_exceptions_done);
103862306a36Sopenharmony_ci}
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_cistatic void snapshot_merge_next_chunks(struct dm_snapshot *s)
104162306a36Sopenharmony_ci{
104262306a36Sopenharmony_ci	int i, linear_chunks;
104362306a36Sopenharmony_ci	chunk_t old_chunk, new_chunk;
104462306a36Sopenharmony_ci	struct dm_io_region src, dest;
104562306a36Sopenharmony_ci	sector_t io_size;
104662306a36Sopenharmony_ci	uint64_t previous_count;
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci	BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits));
104962306a36Sopenharmony_ci	if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits)))
105062306a36Sopenharmony_ci		goto shut;
105162306a36Sopenharmony_ci
105262306a36Sopenharmony_ci	/*
105362306a36Sopenharmony_ci	 * valid flag never changes during merge, so no lock required.
105462306a36Sopenharmony_ci	 */
105562306a36Sopenharmony_ci	if (!s->valid) {
105662306a36Sopenharmony_ci		DMERR("Snapshot is invalid: can't merge");
105762306a36Sopenharmony_ci		goto shut;
105862306a36Sopenharmony_ci	}
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ci	linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk,
106162306a36Sopenharmony_ci						      &new_chunk);
106262306a36Sopenharmony_ci	if (linear_chunks <= 0) {
106362306a36Sopenharmony_ci		if (linear_chunks < 0) {
106462306a36Sopenharmony_ci			DMERR("Read error in exception store: shutting down merge");
106562306a36Sopenharmony_ci			down_write(&s->lock);
106662306a36Sopenharmony_ci			s->merge_failed = true;
106762306a36Sopenharmony_ci			up_write(&s->lock);
106862306a36Sopenharmony_ci		}
106962306a36Sopenharmony_ci		goto shut;
107062306a36Sopenharmony_ci	}
107162306a36Sopenharmony_ci
107262306a36Sopenharmony_ci	/* Adjust old_chunk and new_chunk to reflect start of linear region */
107362306a36Sopenharmony_ci	old_chunk = old_chunk + 1 - linear_chunks;
107462306a36Sopenharmony_ci	new_chunk = new_chunk + 1 - linear_chunks;
107562306a36Sopenharmony_ci
107662306a36Sopenharmony_ci	/*
107762306a36Sopenharmony_ci	 * Use one (potentially large) I/O to copy all 'linear_chunks'
107862306a36Sopenharmony_ci	 * from the exception store to the origin
107962306a36Sopenharmony_ci	 */
108062306a36Sopenharmony_ci	io_size = linear_chunks * s->store->chunk_size;
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_ci	dest.bdev = s->origin->bdev;
108362306a36Sopenharmony_ci	dest.sector = chunk_to_sector(s->store, old_chunk);
108462306a36Sopenharmony_ci	dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci	src.bdev = s->cow->bdev;
108762306a36Sopenharmony_ci	src.sector = chunk_to_sector(s->store, new_chunk);
108862306a36Sopenharmony_ci	src.count = dest.count;
108962306a36Sopenharmony_ci
109062306a36Sopenharmony_ci	/*
109162306a36Sopenharmony_ci	 * Reallocate any exceptions needed in other snapshots then
109262306a36Sopenharmony_ci	 * wait for the pending exceptions to complete.
109362306a36Sopenharmony_ci	 * Each time any pending exception (globally on the system)
109462306a36Sopenharmony_ci	 * completes we are woken and repeat the process to find out
109562306a36Sopenharmony_ci	 * if we can proceed.  While this may not seem a particularly
109662306a36Sopenharmony_ci	 * efficient algorithm, it is not expected to have any
109762306a36Sopenharmony_ci	 * significant impact on performance.
109862306a36Sopenharmony_ci	 */
109962306a36Sopenharmony_ci	previous_count = read_pending_exceptions_done_count();
110062306a36Sopenharmony_ci	while (origin_write_extent(s, dest.sector, io_size)) {
110162306a36Sopenharmony_ci		wait_event(_pending_exceptions_done,
110262306a36Sopenharmony_ci			   (read_pending_exceptions_done_count() !=
110362306a36Sopenharmony_ci			    previous_count));
110462306a36Sopenharmony_ci		/* Retry after the wait, until all exceptions are done. */
110562306a36Sopenharmony_ci		previous_count = read_pending_exceptions_done_count();
110662306a36Sopenharmony_ci	}
110762306a36Sopenharmony_ci
110862306a36Sopenharmony_ci	down_write(&s->lock);
110962306a36Sopenharmony_ci	s->first_merging_chunk = old_chunk;
111062306a36Sopenharmony_ci	s->num_merging_chunks = linear_chunks;
111162306a36Sopenharmony_ci	up_write(&s->lock);
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_ci	/* Wait until writes to all 'linear_chunks' drain */
111462306a36Sopenharmony_ci	for (i = 0; i < linear_chunks; i++)
111562306a36Sopenharmony_ci		__check_for_conflicting_io(s, old_chunk + i);
111662306a36Sopenharmony_ci
111762306a36Sopenharmony_ci	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
111862306a36Sopenharmony_ci	return;
111962306a36Sopenharmony_ci
112062306a36Sopenharmony_cishut:
112162306a36Sopenharmony_ci	merge_shutdown(s);
112262306a36Sopenharmony_ci}
112362306a36Sopenharmony_ci
112462306a36Sopenharmony_cistatic void error_bios(struct bio *bio);
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_cistatic void merge_callback(int read_err, unsigned long write_err, void *context)
112762306a36Sopenharmony_ci{
112862306a36Sopenharmony_ci	struct dm_snapshot *s = context;
112962306a36Sopenharmony_ci	struct bio *b = NULL;
113062306a36Sopenharmony_ci
113162306a36Sopenharmony_ci	if (read_err || write_err) {
113262306a36Sopenharmony_ci		if (read_err)
113362306a36Sopenharmony_ci			DMERR("Read error: shutting down merge.");
113462306a36Sopenharmony_ci		else
113562306a36Sopenharmony_ci			DMERR("Write error: shutting down merge.");
113662306a36Sopenharmony_ci		goto shut;
113762306a36Sopenharmony_ci	}
113862306a36Sopenharmony_ci
113962306a36Sopenharmony_ci	if (blkdev_issue_flush(s->origin->bdev) < 0) {
114062306a36Sopenharmony_ci		DMERR("Flush after merge failed: shutting down merge");
114162306a36Sopenharmony_ci		goto shut;
114262306a36Sopenharmony_ci	}
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ci	if (s->store->type->commit_merge(s->store,
114562306a36Sopenharmony_ci					 s->num_merging_chunks) < 0) {
114662306a36Sopenharmony_ci		DMERR("Write error in exception store: shutting down merge");
114762306a36Sopenharmony_ci		goto shut;
114862306a36Sopenharmony_ci	}
114962306a36Sopenharmony_ci
115062306a36Sopenharmony_ci	if (remove_single_exception_chunk(s) < 0)
115162306a36Sopenharmony_ci		goto shut;
115262306a36Sopenharmony_ci
115362306a36Sopenharmony_ci	snapshot_merge_next_chunks(s);
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci	return;
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_cishut:
115862306a36Sopenharmony_ci	down_write(&s->lock);
115962306a36Sopenharmony_ci	s->merge_failed = true;
116062306a36Sopenharmony_ci	b = __release_queued_bios_after_merge(s);
116162306a36Sopenharmony_ci	up_write(&s->lock);
116262306a36Sopenharmony_ci	error_bios(b);
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_ci	merge_shutdown(s);
116562306a36Sopenharmony_ci}
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_cistatic void start_merge(struct dm_snapshot *s)
116862306a36Sopenharmony_ci{
116962306a36Sopenharmony_ci	if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits))
117062306a36Sopenharmony_ci		snapshot_merge_next_chunks(s);
117162306a36Sopenharmony_ci}
117262306a36Sopenharmony_ci
117362306a36Sopenharmony_ci/*
117462306a36Sopenharmony_ci * Stop the merging process and wait until it finishes.
117562306a36Sopenharmony_ci */
117662306a36Sopenharmony_cistatic void stop_merge(struct dm_snapshot *s)
117762306a36Sopenharmony_ci{
117862306a36Sopenharmony_ci	set_bit(SHUTDOWN_MERGE, &s->state_bits);
117962306a36Sopenharmony_ci	wait_on_bit(&s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE);
118062306a36Sopenharmony_ci	clear_bit(SHUTDOWN_MERGE, &s->state_bits);
118162306a36Sopenharmony_ci}
118262306a36Sopenharmony_ci
118362306a36Sopenharmony_cistatic int parse_snapshot_features(struct dm_arg_set *as, struct dm_snapshot *s,
118462306a36Sopenharmony_ci				   struct dm_target *ti)
118562306a36Sopenharmony_ci{
118662306a36Sopenharmony_ci	int r;
118762306a36Sopenharmony_ci	unsigned int argc;
118862306a36Sopenharmony_ci	const char *arg_name;
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_ci	static const struct dm_arg _args[] = {
119162306a36Sopenharmony_ci		{0, 2, "Invalid number of feature arguments"},
119262306a36Sopenharmony_ci	};
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_ci	/*
119562306a36Sopenharmony_ci	 * No feature arguments supplied.
119662306a36Sopenharmony_ci	 */
119762306a36Sopenharmony_ci	if (!as->argc)
119862306a36Sopenharmony_ci		return 0;
119962306a36Sopenharmony_ci
120062306a36Sopenharmony_ci	r = dm_read_arg_group(_args, as, &argc, &ti->error);
120162306a36Sopenharmony_ci	if (r)
120262306a36Sopenharmony_ci		return -EINVAL;
120362306a36Sopenharmony_ci
120462306a36Sopenharmony_ci	while (argc && !r) {
120562306a36Sopenharmony_ci		arg_name = dm_shift_arg(as);
120662306a36Sopenharmony_ci		argc--;
120762306a36Sopenharmony_ci
120862306a36Sopenharmony_ci		if (!strcasecmp(arg_name, "discard_zeroes_cow"))
120962306a36Sopenharmony_ci			s->discard_zeroes_cow = true;
121062306a36Sopenharmony_ci
121162306a36Sopenharmony_ci		else if (!strcasecmp(arg_name, "discard_passdown_origin"))
121262306a36Sopenharmony_ci			s->discard_passdown_origin = true;
121362306a36Sopenharmony_ci
121462306a36Sopenharmony_ci		else {
121562306a36Sopenharmony_ci			ti->error = "Unrecognised feature requested";
121662306a36Sopenharmony_ci			r = -EINVAL;
121762306a36Sopenharmony_ci			break;
121862306a36Sopenharmony_ci		}
121962306a36Sopenharmony_ci	}
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_ci	if (!s->discard_zeroes_cow && s->discard_passdown_origin) {
122262306a36Sopenharmony_ci		/*
122362306a36Sopenharmony_ci		 * TODO: really these are disjoint.. but ti->num_discard_bios
122462306a36Sopenharmony_ci		 * and dm_bio_get_target_bio_nr() require rigid constraints.
122562306a36Sopenharmony_ci		 */
122662306a36Sopenharmony_ci		ti->error = "discard_passdown_origin feature depends on discard_zeroes_cow";
122762306a36Sopenharmony_ci		r = -EINVAL;
122862306a36Sopenharmony_ci	}
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_ci	return r;
123162306a36Sopenharmony_ci}
123262306a36Sopenharmony_ci
123362306a36Sopenharmony_ci/*
123462306a36Sopenharmony_ci * Construct a snapshot mapping:
123562306a36Sopenharmony_ci * <origin_dev> <COW-dev> <p|po|n> <chunk-size> [<# feature args> [<arg>]*]
123662306a36Sopenharmony_ci */
123762306a36Sopenharmony_cistatic int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
123862306a36Sopenharmony_ci{
123962306a36Sopenharmony_ci	struct dm_snapshot *s;
124062306a36Sopenharmony_ci	struct dm_arg_set as;
124162306a36Sopenharmony_ci	int i;
124262306a36Sopenharmony_ci	int r = -EINVAL;
124362306a36Sopenharmony_ci	char *origin_path, *cow_path;
124462306a36Sopenharmony_ci	unsigned int args_used, num_flush_bios = 1;
124562306a36Sopenharmony_ci	blk_mode_t origin_mode = BLK_OPEN_READ;
124662306a36Sopenharmony_ci
124762306a36Sopenharmony_ci	if (argc < 4) {
124862306a36Sopenharmony_ci		ti->error = "requires 4 or more arguments";
124962306a36Sopenharmony_ci		r = -EINVAL;
125062306a36Sopenharmony_ci		goto bad;
125162306a36Sopenharmony_ci	}
125262306a36Sopenharmony_ci
125362306a36Sopenharmony_ci	if (dm_target_is_snapshot_merge(ti)) {
125462306a36Sopenharmony_ci		num_flush_bios = 2;
125562306a36Sopenharmony_ci		origin_mode = BLK_OPEN_WRITE;
125662306a36Sopenharmony_ci	}
125762306a36Sopenharmony_ci
125862306a36Sopenharmony_ci	s = kzalloc(sizeof(*s), GFP_KERNEL);
125962306a36Sopenharmony_ci	if (!s) {
126062306a36Sopenharmony_ci		ti->error = "Cannot allocate private snapshot structure";
126162306a36Sopenharmony_ci		r = -ENOMEM;
126262306a36Sopenharmony_ci		goto bad;
126362306a36Sopenharmony_ci	}
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci	as.argc = argc;
126662306a36Sopenharmony_ci	as.argv = argv;
126762306a36Sopenharmony_ci	dm_consume_args(&as, 4);
126862306a36Sopenharmony_ci	r = parse_snapshot_features(&as, s, ti);
126962306a36Sopenharmony_ci	if (r)
127062306a36Sopenharmony_ci		goto bad_features;
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_ci	origin_path = argv[0];
127362306a36Sopenharmony_ci	argv++;
127462306a36Sopenharmony_ci	argc--;
127562306a36Sopenharmony_ci
127662306a36Sopenharmony_ci	r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
127762306a36Sopenharmony_ci	if (r) {
127862306a36Sopenharmony_ci		ti->error = "Cannot get origin device";
127962306a36Sopenharmony_ci		goto bad_origin;
128062306a36Sopenharmony_ci	}
128162306a36Sopenharmony_ci
128262306a36Sopenharmony_ci	cow_path = argv[0];
128362306a36Sopenharmony_ci	argv++;
128462306a36Sopenharmony_ci	argc--;
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_ci	r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow);
128762306a36Sopenharmony_ci	if (r) {
128862306a36Sopenharmony_ci		ti->error = "Cannot get COW device";
128962306a36Sopenharmony_ci		goto bad_cow;
129062306a36Sopenharmony_ci	}
129162306a36Sopenharmony_ci	if (s->cow->bdev && s->cow->bdev == s->origin->bdev) {
129262306a36Sopenharmony_ci		ti->error = "COW device cannot be the same as origin device";
129362306a36Sopenharmony_ci		r = -EINVAL;
129462306a36Sopenharmony_ci		goto bad_store;
129562306a36Sopenharmony_ci	}
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ci	r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store);
129862306a36Sopenharmony_ci	if (r) {
129962306a36Sopenharmony_ci		ti->error = "Couldn't create exception store";
130062306a36Sopenharmony_ci		r = -EINVAL;
130162306a36Sopenharmony_ci		goto bad_store;
130262306a36Sopenharmony_ci	}
130362306a36Sopenharmony_ci
130462306a36Sopenharmony_ci	argv += args_used;
130562306a36Sopenharmony_ci	argc -= args_used;
130662306a36Sopenharmony_ci
130762306a36Sopenharmony_ci	s->ti = ti;
130862306a36Sopenharmony_ci	s->valid = 1;
130962306a36Sopenharmony_ci	s->snapshot_overflowed = 0;
131062306a36Sopenharmony_ci	s->active = 0;
131162306a36Sopenharmony_ci	atomic_set(&s->pending_exceptions_count, 0);
131262306a36Sopenharmony_ci	spin_lock_init(&s->pe_allocation_lock);
131362306a36Sopenharmony_ci	s->exception_start_sequence = 0;
131462306a36Sopenharmony_ci	s->exception_complete_sequence = 0;
131562306a36Sopenharmony_ci	s->out_of_order_tree = RB_ROOT;
131662306a36Sopenharmony_ci	init_rwsem(&s->lock);
131762306a36Sopenharmony_ci	INIT_LIST_HEAD(&s->list);
131862306a36Sopenharmony_ci	spin_lock_init(&s->pe_lock);
131962306a36Sopenharmony_ci	s->state_bits = 0;
132062306a36Sopenharmony_ci	s->merge_failed = false;
132162306a36Sopenharmony_ci	s->first_merging_chunk = 0;
132262306a36Sopenharmony_ci	s->num_merging_chunks = 0;
132362306a36Sopenharmony_ci	bio_list_init(&s->bios_queued_during_merge);
132462306a36Sopenharmony_ci
132562306a36Sopenharmony_ci	/* Allocate hash table for COW data */
132662306a36Sopenharmony_ci	if (init_hash_tables(s)) {
132762306a36Sopenharmony_ci		ti->error = "Unable to allocate hash table space";
132862306a36Sopenharmony_ci		r = -ENOMEM;
132962306a36Sopenharmony_ci		goto bad_hash_tables;
133062306a36Sopenharmony_ci	}
133162306a36Sopenharmony_ci
133262306a36Sopenharmony_ci	init_waitqueue_head(&s->in_progress_wait);
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci	s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
133562306a36Sopenharmony_ci	if (IS_ERR(s->kcopyd_client)) {
133662306a36Sopenharmony_ci		r = PTR_ERR(s->kcopyd_client);
133762306a36Sopenharmony_ci		ti->error = "Could not create kcopyd client";
133862306a36Sopenharmony_ci		goto bad_kcopyd;
133962306a36Sopenharmony_ci	}
134062306a36Sopenharmony_ci
134162306a36Sopenharmony_ci	r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache);
134262306a36Sopenharmony_ci	if (r) {
134362306a36Sopenharmony_ci		ti->error = "Could not allocate mempool for pending exceptions";
134462306a36Sopenharmony_ci		goto bad_pending_pool;
134562306a36Sopenharmony_ci	}
134662306a36Sopenharmony_ci
134762306a36Sopenharmony_ci	for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
134862306a36Sopenharmony_ci		INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
134962306a36Sopenharmony_ci
135062306a36Sopenharmony_ci	spin_lock_init(&s->tracked_chunk_lock);
135162306a36Sopenharmony_ci
135262306a36Sopenharmony_ci	ti->private = s;
135362306a36Sopenharmony_ci	ti->num_flush_bios = num_flush_bios;
135462306a36Sopenharmony_ci	if (s->discard_zeroes_cow)
135562306a36Sopenharmony_ci		ti->num_discard_bios = (s->discard_passdown_origin ? 2 : 1);
135662306a36Sopenharmony_ci	ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk);
135762306a36Sopenharmony_ci
135862306a36Sopenharmony_ci	/* Add snapshot to the list of snapshots for this origin */
135962306a36Sopenharmony_ci	/* Exceptions aren't triggered till snapshot_resume() is called */
136062306a36Sopenharmony_ci	r = register_snapshot(s);
136162306a36Sopenharmony_ci	if (r == -ENOMEM) {
136262306a36Sopenharmony_ci		ti->error = "Snapshot origin struct allocation failed";
136362306a36Sopenharmony_ci		goto bad_load_and_register;
136462306a36Sopenharmony_ci	} else if (r < 0) {
136562306a36Sopenharmony_ci		/* invalid handover, register_snapshot has set ti->error */
136662306a36Sopenharmony_ci		goto bad_load_and_register;
136762306a36Sopenharmony_ci	}
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_ci	/*
137062306a36Sopenharmony_ci	 * Metadata must only be loaded into one table at once, so skip this
137162306a36Sopenharmony_ci	 * if metadata will be handed over during resume.
137262306a36Sopenharmony_ci	 * Chunk size will be set during the handover - set it to zero to
137362306a36Sopenharmony_ci	 * ensure it's ignored.
137462306a36Sopenharmony_ci	 */
137562306a36Sopenharmony_ci	if (r > 0) {
137662306a36Sopenharmony_ci		s->store->chunk_size = 0;
137762306a36Sopenharmony_ci		return 0;
137862306a36Sopenharmony_ci	}
137962306a36Sopenharmony_ci
138062306a36Sopenharmony_ci	r = s->store->type->read_metadata(s->store, dm_add_exception,
138162306a36Sopenharmony_ci					  (void *)s);
138262306a36Sopenharmony_ci	if (r < 0) {
138362306a36Sopenharmony_ci		ti->error = "Failed to read snapshot metadata";
138462306a36Sopenharmony_ci		goto bad_read_metadata;
138562306a36Sopenharmony_ci	} else if (r > 0) {
138662306a36Sopenharmony_ci		s->valid = 0;
138762306a36Sopenharmony_ci		DMWARN("Snapshot is marked invalid.");
138862306a36Sopenharmony_ci	}
138962306a36Sopenharmony_ci
139062306a36Sopenharmony_ci	if (!s->store->chunk_size) {
139162306a36Sopenharmony_ci		ti->error = "Chunk size not set";
139262306a36Sopenharmony_ci		r = -EINVAL;
139362306a36Sopenharmony_ci		goto bad_read_metadata;
139462306a36Sopenharmony_ci	}
139562306a36Sopenharmony_ci
139662306a36Sopenharmony_ci	r = dm_set_target_max_io_len(ti, s->store->chunk_size);
139762306a36Sopenharmony_ci	if (r)
139862306a36Sopenharmony_ci		goto bad_read_metadata;
139962306a36Sopenharmony_ci
140062306a36Sopenharmony_ci	return 0;
140162306a36Sopenharmony_ci
140262306a36Sopenharmony_cibad_read_metadata:
140362306a36Sopenharmony_ci	unregister_snapshot(s);
140462306a36Sopenharmony_cibad_load_and_register:
140562306a36Sopenharmony_ci	mempool_exit(&s->pending_pool);
140662306a36Sopenharmony_cibad_pending_pool:
140762306a36Sopenharmony_ci	dm_kcopyd_client_destroy(s->kcopyd_client);
140862306a36Sopenharmony_cibad_kcopyd:
140962306a36Sopenharmony_ci	dm_exception_table_exit(&s->pending, pending_cache);
141062306a36Sopenharmony_ci	dm_exception_table_exit(&s->complete, exception_cache);
141162306a36Sopenharmony_cibad_hash_tables:
141262306a36Sopenharmony_ci	dm_exception_store_destroy(s->store);
141362306a36Sopenharmony_cibad_store:
141462306a36Sopenharmony_ci	dm_put_device(ti, s->cow);
141562306a36Sopenharmony_cibad_cow:
141662306a36Sopenharmony_ci	dm_put_device(ti, s->origin);
141762306a36Sopenharmony_cibad_origin:
141862306a36Sopenharmony_cibad_features:
141962306a36Sopenharmony_ci	kfree(s);
142062306a36Sopenharmony_cibad:
142162306a36Sopenharmony_ci	return r;
142262306a36Sopenharmony_ci}
142362306a36Sopenharmony_ci
142462306a36Sopenharmony_cistatic void __free_exceptions(struct dm_snapshot *s)
142562306a36Sopenharmony_ci{
142662306a36Sopenharmony_ci	dm_kcopyd_client_destroy(s->kcopyd_client);
142762306a36Sopenharmony_ci	s->kcopyd_client = NULL;
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	dm_exception_table_exit(&s->pending, pending_cache);
143062306a36Sopenharmony_ci	dm_exception_table_exit(&s->complete, exception_cache);
143162306a36Sopenharmony_ci}
143262306a36Sopenharmony_ci
143362306a36Sopenharmony_cistatic void __handover_exceptions(struct dm_snapshot *snap_src,
143462306a36Sopenharmony_ci				  struct dm_snapshot *snap_dest)
143562306a36Sopenharmony_ci{
143662306a36Sopenharmony_ci	union {
143762306a36Sopenharmony_ci		struct dm_exception_table table_swap;
143862306a36Sopenharmony_ci		struct dm_exception_store *store_swap;
143962306a36Sopenharmony_ci	} u;
144062306a36Sopenharmony_ci
144162306a36Sopenharmony_ci	/*
144262306a36Sopenharmony_ci	 * Swap all snapshot context information between the two instances.
144362306a36Sopenharmony_ci	 */
144462306a36Sopenharmony_ci	u.table_swap = snap_dest->complete;
144562306a36Sopenharmony_ci	snap_dest->complete = snap_src->complete;
144662306a36Sopenharmony_ci	snap_src->complete = u.table_swap;
144762306a36Sopenharmony_ci
144862306a36Sopenharmony_ci	u.store_swap = snap_dest->store;
144962306a36Sopenharmony_ci	snap_dest->store = snap_src->store;
145062306a36Sopenharmony_ci	snap_dest->store->userspace_supports_overflow = u.store_swap->userspace_supports_overflow;
145162306a36Sopenharmony_ci	snap_src->store = u.store_swap;
145262306a36Sopenharmony_ci
145362306a36Sopenharmony_ci	snap_dest->store->snap = snap_dest;
145462306a36Sopenharmony_ci	snap_src->store->snap = snap_src;
145562306a36Sopenharmony_ci
145662306a36Sopenharmony_ci	snap_dest->ti->max_io_len = snap_dest->store->chunk_size;
145762306a36Sopenharmony_ci	snap_dest->valid = snap_src->valid;
145862306a36Sopenharmony_ci	snap_dest->snapshot_overflowed = snap_src->snapshot_overflowed;
145962306a36Sopenharmony_ci
146062306a36Sopenharmony_ci	/*
146162306a36Sopenharmony_ci	 * Set source invalid to ensure it receives no further I/O.
146262306a36Sopenharmony_ci	 */
146362306a36Sopenharmony_ci	snap_src->valid = 0;
146462306a36Sopenharmony_ci}
146562306a36Sopenharmony_ci
146662306a36Sopenharmony_cistatic void snapshot_dtr(struct dm_target *ti)
146762306a36Sopenharmony_ci{
146862306a36Sopenharmony_ci#ifdef CONFIG_DM_DEBUG
146962306a36Sopenharmony_ci	int i;
147062306a36Sopenharmony_ci#endif
147162306a36Sopenharmony_ci	struct dm_snapshot *s = ti->private;
147262306a36Sopenharmony_ci	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
147362306a36Sopenharmony_ci
147462306a36Sopenharmony_ci	down_read(&_origins_lock);
147562306a36Sopenharmony_ci	/* Check whether exception handover must be cancelled */
147662306a36Sopenharmony_ci	(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
147762306a36Sopenharmony_ci	if (snap_src && snap_dest && (s == snap_src)) {
147862306a36Sopenharmony_ci		down_write(&snap_dest->lock);
147962306a36Sopenharmony_ci		snap_dest->valid = 0;
148062306a36Sopenharmony_ci		up_write(&snap_dest->lock);
148162306a36Sopenharmony_ci		DMERR("Cancelling snapshot handover.");
148262306a36Sopenharmony_ci	}
148362306a36Sopenharmony_ci	up_read(&_origins_lock);
148462306a36Sopenharmony_ci
148562306a36Sopenharmony_ci	if (dm_target_is_snapshot_merge(ti))
148662306a36Sopenharmony_ci		stop_merge(s);
148762306a36Sopenharmony_ci
148862306a36Sopenharmony_ci	/* Prevent further origin writes from using this snapshot. */
148962306a36Sopenharmony_ci	/* After this returns there can be no new kcopyd jobs. */
149062306a36Sopenharmony_ci	unregister_snapshot(s);
149162306a36Sopenharmony_ci
149262306a36Sopenharmony_ci	while (atomic_read(&s->pending_exceptions_count))
149362306a36Sopenharmony_ci		fsleep(1000);
149462306a36Sopenharmony_ci	/*
149562306a36Sopenharmony_ci	 * Ensure instructions in mempool_exit aren't reordered
149662306a36Sopenharmony_ci	 * before atomic_read.
149762306a36Sopenharmony_ci	 */
149862306a36Sopenharmony_ci	smp_mb();
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci#ifdef CONFIG_DM_DEBUG
150162306a36Sopenharmony_ci	for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
150262306a36Sopenharmony_ci		BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
150362306a36Sopenharmony_ci#endif
150462306a36Sopenharmony_ci
150562306a36Sopenharmony_ci	__free_exceptions(s);
150662306a36Sopenharmony_ci
150762306a36Sopenharmony_ci	mempool_exit(&s->pending_pool);
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci	dm_exception_store_destroy(s->store);
151062306a36Sopenharmony_ci
151162306a36Sopenharmony_ci	dm_put_device(ti, s->cow);
151262306a36Sopenharmony_ci
151362306a36Sopenharmony_ci	dm_put_device(ti, s->origin);
151462306a36Sopenharmony_ci
151562306a36Sopenharmony_ci	WARN_ON(s->in_progress);
151662306a36Sopenharmony_ci
151762306a36Sopenharmony_ci	kfree(s);
151862306a36Sopenharmony_ci}
151962306a36Sopenharmony_ci
152062306a36Sopenharmony_cistatic void account_start_copy(struct dm_snapshot *s)
152162306a36Sopenharmony_ci{
152262306a36Sopenharmony_ci	spin_lock(&s->in_progress_wait.lock);
152362306a36Sopenharmony_ci	s->in_progress++;
152462306a36Sopenharmony_ci	spin_unlock(&s->in_progress_wait.lock);
152562306a36Sopenharmony_ci}
152662306a36Sopenharmony_ci
152762306a36Sopenharmony_cistatic void account_end_copy(struct dm_snapshot *s)
152862306a36Sopenharmony_ci{
152962306a36Sopenharmony_ci	spin_lock(&s->in_progress_wait.lock);
153062306a36Sopenharmony_ci	BUG_ON(!s->in_progress);
153162306a36Sopenharmony_ci	s->in_progress--;
153262306a36Sopenharmony_ci	if (likely(s->in_progress <= cow_threshold) &&
153362306a36Sopenharmony_ci	    unlikely(waitqueue_active(&s->in_progress_wait)))
153462306a36Sopenharmony_ci		wake_up_locked(&s->in_progress_wait);
153562306a36Sopenharmony_ci	spin_unlock(&s->in_progress_wait.lock);
153662306a36Sopenharmony_ci}
153762306a36Sopenharmony_ci
153862306a36Sopenharmony_cistatic bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
153962306a36Sopenharmony_ci{
154062306a36Sopenharmony_ci	if (unlikely(s->in_progress > cow_threshold)) {
154162306a36Sopenharmony_ci		spin_lock(&s->in_progress_wait.lock);
154262306a36Sopenharmony_ci		if (likely(s->in_progress > cow_threshold)) {
154362306a36Sopenharmony_ci			/*
154462306a36Sopenharmony_ci			 * NOTE: this throttle doesn't account for whether
154562306a36Sopenharmony_ci			 * the caller is servicing an IO that will trigger a COW
154662306a36Sopenharmony_ci			 * so excess throttling may result for chunks not required
154762306a36Sopenharmony_ci			 * to be COW'd.  But if cow_threshold was reached, extra
154862306a36Sopenharmony_ci			 * throttling is unlikely to negatively impact performance.
154962306a36Sopenharmony_ci			 */
155062306a36Sopenharmony_ci			DECLARE_WAITQUEUE(wait, current);
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_ci			__add_wait_queue(&s->in_progress_wait, &wait);
155362306a36Sopenharmony_ci			__set_current_state(TASK_UNINTERRUPTIBLE);
155462306a36Sopenharmony_ci			spin_unlock(&s->in_progress_wait.lock);
155562306a36Sopenharmony_ci			if (unlock_origins)
155662306a36Sopenharmony_ci				up_read(&_origins_lock);
155762306a36Sopenharmony_ci			io_schedule();
155862306a36Sopenharmony_ci			remove_wait_queue(&s->in_progress_wait, &wait);
155962306a36Sopenharmony_ci			return false;
156062306a36Sopenharmony_ci		}
156162306a36Sopenharmony_ci		spin_unlock(&s->in_progress_wait.lock);
156262306a36Sopenharmony_ci	}
156362306a36Sopenharmony_ci	return true;
156462306a36Sopenharmony_ci}
156562306a36Sopenharmony_ci
156662306a36Sopenharmony_ci/*
156762306a36Sopenharmony_ci * Flush a list of buffers.
156862306a36Sopenharmony_ci */
156962306a36Sopenharmony_cistatic void flush_bios(struct bio *bio)
157062306a36Sopenharmony_ci{
157162306a36Sopenharmony_ci	struct bio *n;
157262306a36Sopenharmony_ci
157362306a36Sopenharmony_ci	while (bio) {
157462306a36Sopenharmony_ci		n = bio->bi_next;
157562306a36Sopenharmony_ci		bio->bi_next = NULL;
157662306a36Sopenharmony_ci		submit_bio_noacct(bio);
157762306a36Sopenharmony_ci		bio = n;
157862306a36Sopenharmony_ci	}
157962306a36Sopenharmony_ci}
158062306a36Sopenharmony_ci
158162306a36Sopenharmony_cistatic int do_origin(struct dm_dev *origin, struct bio *bio, bool limit);
158262306a36Sopenharmony_ci
158362306a36Sopenharmony_ci/*
158462306a36Sopenharmony_ci * Flush a list of buffers.
158562306a36Sopenharmony_ci */
158662306a36Sopenharmony_cistatic void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
158762306a36Sopenharmony_ci{
158862306a36Sopenharmony_ci	struct bio *n;
158962306a36Sopenharmony_ci	int r;
159062306a36Sopenharmony_ci
159162306a36Sopenharmony_ci	while (bio) {
159262306a36Sopenharmony_ci		n = bio->bi_next;
159362306a36Sopenharmony_ci		bio->bi_next = NULL;
159462306a36Sopenharmony_ci		r = do_origin(s->origin, bio, false);
159562306a36Sopenharmony_ci		if (r == DM_MAPIO_REMAPPED)
159662306a36Sopenharmony_ci			submit_bio_noacct(bio);
159762306a36Sopenharmony_ci		bio = n;
159862306a36Sopenharmony_ci	}
159962306a36Sopenharmony_ci}
160062306a36Sopenharmony_ci
160162306a36Sopenharmony_ci/*
160262306a36Sopenharmony_ci * Error a list of buffers.
160362306a36Sopenharmony_ci */
160462306a36Sopenharmony_cistatic void error_bios(struct bio *bio)
160562306a36Sopenharmony_ci{
160662306a36Sopenharmony_ci	struct bio *n;
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_ci	while (bio) {
160962306a36Sopenharmony_ci		n = bio->bi_next;
161062306a36Sopenharmony_ci		bio->bi_next = NULL;
161162306a36Sopenharmony_ci		bio_io_error(bio);
161262306a36Sopenharmony_ci		bio = n;
161362306a36Sopenharmony_ci	}
161462306a36Sopenharmony_ci}
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_cistatic void __invalidate_snapshot(struct dm_snapshot *s, int err)
161762306a36Sopenharmony_ci{
161862306a36Sopenharmony_ci	if (!s->valid)
161962306a36Sopenharmony_ci		return;
162062306a36Sopenharmony_ci
162162306a36Sopenharmony_ci	if (err == -EIO)
162262306a36Sopenharmony_ci		DMERR("Invalidating snapshot: Error reading/writing.");
162362306a36Sopenharmony_ci	else if (err == -ENOMEM)
162462306a36Sopenharmony_ci		DMERR("Invalidating snapshot: Unable to allocate exception.");
162562306a36Sopenharmony_ci
162662306a36Sopenharmony_ci	if (s->store->type->drop_snapshot)
162762306a36Sopenharmony_ci		s->store->type->drop_snapshot(s->store);
162862306a36Sopenharmony_ci
162962306a36Sopenharmony_ci	s->valid = 0;
163062306a36Sopenharmony_ci
163162306a36Sopenharmony_ci	dm_table_event(s->ti->table);
163262306a36Sopenharmony_ci}
163362306a36Sopenharmony_ci
163462306a36Sopenharmony_cistatic void invalidate_snapshot(struct dm_snapshot *s, int err)
163562306a36Sopenharmony_ci{
163662306a36Sopenharmony_ci	down_write(&s->lock);
163762306a36Sopenharmony_ci	__invalidate_snapshot(s, err);
163862306a36Sopenharmony_ci	up_write(&s->lock);
163962306a36Sopenharmony_ci}
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_cistatic void pending_complete(void *context, int success)
164262306a36Sopenharmony_ci{
164362306a36Sopenharmony_ci	struct dm_snap_pending_exception *pe = context;
164462306a36Sopenharmony_ci	struct dm_exception *e;
164562306a36Sopenharmony_ci	struct dm_snapshot *s = pe->snap;
164662306a36Sopenharmony_ci	struct bio *origin_bios = NULL;
164762306a36Sopenharmony_ci	struct bio *snapshot_bios = NULL;
164862306a36Sopenharmony_ci	struct bio *full_bio = NULL;
164962306a36Sopenharmony_ci	struct dm_exception_table_lock lock;
165062306a36Sopenharmony_ci	int error = 0;
165162306a36Sopenharmony_ci
165262306a36Sopenharmony_ci	dm_exception_table_lock_init(s, pe->e.old_chunk, &lock);
165362306a36Sopenharmony_ci
165462306a36Sopenharmony_ci	if (!success) {
165562306a36Sopenharmony_ci		/* Read/write error - snapshot is unusable */
165662306a36Sopenharmony_ci		invalidate_snapshot(s, -EIO);
165762306a36Sopenharmony_ci		error = 1;
165862306a36Sopenharmony_ci
165962306a36Sopenharmony_ci		dm_exception_table_lock(&lock);
166062306a36Sopenharmony_ci		goto out;
166162306a36Sopenharmony_ci	}
166262306a36Sopenharmony_ci
166362306a36Sopenharmony_ci	e = alloc_completed_exception(GFP_NOIO);
166462306a36Sopenharmony_ci	if (!e) {
166562306a36Sopenharmony_ci		invalidate_snapshot(s, -ENOMEM);
166662306a36Sopenharmony_ci		error = 1;
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_ci		dm_exception_table_lock(&lock);
166962306a36Sopenharmony_ci		goto out;
167062306a36Sopenharmony_ci	}
167162306a36Sopenharmony_ci	*e = pe->e;
167262306a36Sopenharmony_ci
167362306a36Sopenharmony_ci	down_read(&s->lock);
167462306a36Sopenharmony_ci	dm_exception_table_lock(&lock);
167562306a36Sopenharmony_ci	if (!s->valid) {
167662306a36Sopenharmony_ci		up_read(&s->lock);
167762306a36Sopenharmony_ci		free_completed_exception(e);
167862306a36Sopenharmony_ci		error = 1;
167962306a36Sopenharmony_ci
168062306a36Sopenharmony_ci		goto out;
168162306a36Sopenharmony_ci	}
168262306a36Sopenharmony_ci
168362306a36Sopenharmony_ci	/*
168462306a36Sopenharmony_ci	 * Add a proper exception. After inserting the completed exception all
168562306a36Sopenharmony_ci	 * subsequent snapshot reads to this chunk will be redirected to the
168662306a36Sopenharmony_ci	 * COW device.  This ensures that we do not starve. Moreover, as long
168762306a36Sopenharmony_ci	 * as the pending exception exists, neither origin writes nor snapshot
168862306a36Sopenharmony_ci	 * merging can overwrite the chunk in origin.
168962306a36Sopenharmony_ci	 */
169062306a36Sopenharmony_ci	dm_insert_exception(&s->complete, e);
169162306a36Sopenharmony_ci	up_read(&s->lock);
169262306a36Sopenharmony_ci
169362306a36Sopenharmony_ci	/* Wait for conflicting reads to drain */
169462306a36Sopenharmony_ci	if (__chunk_is_tracked(s, pe->e.old_chunk)) {
169562306a36Sopenharmony_ci		dm_exception_table_unlock(&lock);
169662306a36Sopenharmony_ci		__check_for_conflicting_io(s, pe->e.old_chunk);
169762306a36Sopenharmony_ci		dm_exception_table_lock(&lock);
169862306a36Sopenharmony_ci	}
169962306a36Sopenharmony_ci
170062306a36Sopenharmony_ciout:
170162306a36Sopenharmony_ci	/* Remove the in-flight exception from the list */
170262306a36Sopenharmony_ci	dm_remove_exception(&pe->e);
170362306a36Sopenharmony_ci
170462306a36Sopenharmony_ci	dm_exception_table_unlock(&lock);
170562306a36Sopenharmony_ci
170662306a36Sopenharmony_ci	snapshot_bios = bio_list_get(&pe->snapshot_bios);
170762306a36Sopenharmony_ci	origin_bios = bio_list_get(&pe->origin_bios);
170862306a36Sopenharmony_ci	full_bio = pe->full_bio;
170962306a36Sopenharmony_ci	if (full_bio)
171062306a36Sopenharmony_ci		full_bio->bi_end_io = pe->full_bio_end_io;
171162306a36Sopenharmony_ci	increment_pending_exceptions_done_count();
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ci	/* Submit any pending write bios */
171462306a36Sopenharmony_ci	if (error) {
171562306a36Sopenharmony_ci		if (full_bio)
171662306a36Sopenharmony_ci			bio_io_error(full_bio);
171762306a36Sopenharmony_ci		error_bios(snapshot_bios);
171862306a36Sopenharmony_ci	} else {
171962306a36Sopenharmony_ci		if (full_bio)
172062306a36Sopenharmony_ci			bio_endio(full_bio);
172162306a36Sopenharmony_ci		flush_bios(snapshot_bios);
172262306a36Sopenharmony_ci	}
172362306a36Sopenharmony_ci
172462306a36Sopenharmony_ci	retry_origin_bios(s, origin_bios);
172562306a36Sopenharmony_ci
172662306a36Sopenharmony_ci	free_pending_exception(pe);
172762306a36Sopenharmony_ci}
172862306a36Sopenharmony_ci
172962306a36Sopenharmony_cistatic void complete_exception(struct dm_snap_pending_exception *pe)
173062306a36Sopenharmony_ci{
173162306a36Sopenharmony_ci	struct dm_snapshot *s = pe->snap;
173262306a36Sopenharmony_ci
173362306a36Sopenharmony_ci	/* Update the metadata if we are persistent */
173462306a36Sopenharmony_ci	s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error,
173562306a36Sopenharmony_ci					 pending_complete, pe);
173662306a36Sopenharmony_ci}
173762306a36Sopenharmony_ci
173862306a36Sopenharmony_ci/*
173962306a36Sopenharmony_ci * Called when the copy I/O has finished.  kcopyd actually runs
174062306a36Sopenharmony_ci * this code so don't block.
174162306a36Sopenharmony_ci */
174262306a36Sopenharmony_cistatic void copy_callback(int read_err, unsigned long write_err, void *context)
174362306a36Sopenharmony_ci{
174462306a36Sopenharmony_ci	struct dm_snap_pending_exception *pe = context;
174562306a36Sopenharmony_ci	struct dm_snapshot *s = pe->snap;
174662306a36Sopenharmony_ci
174762306a36Sopenharmony_ci	pe->copy_error = read_err || write_err;
174862306a36Sopenharmony_ci
174962306a36Sopenharmony_ci	if (pe->exception_sequence == s->exception_complete_sequence) {
175062306a36Sopenharmony_ci		struct rb_node *next;
175162306a36Sopenharmony_ci
175262306a36Sopenharmony_ci		s->exception_complete_sequence++;
175362306a36Sopenharmony_ci		complete_exception(pe);
175462306a36Sopenharmony_ci
175562306a36Sopenharmony_ci		next = rb_first(&s->out_of_order_tree);
175662306a36Sopenharmony_ci		while (next) {
175762306a36Sopenharmony_ci			pe = rb_entry(next, struct dm_snap_pending_exception,
175862306a36Sopenharmony_ci					out_of_order_node);
175962306a36Sopenharmony_ci			if (pe->exception_sequence != s->exception_complete_sequence)
176062306a36Sopenharmony_ci				break;
176162306a36Sopenharmony_ci			next = rb_next(next);
176262306a36Sopenharmony_ci			s->exception_complete_sequence++;
176362306a36Sopenharmony_ci			rb_erase(&pe->out_of_order_node, &s->out_of_order_tree);
176462306a36Sopenharmony_ci			complete_exception(pe);
176562306a36Sopenharmony_ci			cond_resched();
176662306a36Sopenharmony_ci		}
176762306a36Sopenharmony_ci	} else {
176862306a36Sopenharmony_ci		struct rb_node *parent = NULL;
176962306a36Sopenharmony_ci		struct rb_node **p = &s->out_of_order_tree.rb_node;
177062306a36Sopenharmony_ci		struct dm_snap_pending_exception *pe2;
177162306a36Sopenharmony_ci
177262306a36Sopenharmony_ci		while (*p) {
177362306a36Sopenharmony_ci			pe2 = rb_entry(*p, struct dm_snap_pending_exception, out_of_order_node);
177462306a36Sopenharmony_ci			parent = *p;
177562306a36Sopenharmony_ci
177662306a36Sopenharmony_ci			BUG_ON(pe->exception_sequence == pe2->exception_sequence);
177762306a36Sopenharmony_ci			if (pe->exception_sequence < pe2->exception_sequence)
177862306a36Sopenharmony_ci				p = &((*p)->rb_left);
177962306a36Sopenharmony_ci			else
178062306a36Sopenharmony_ci				p = &((*p)->rb_right);
178162306a36Sopenharmony_ci		}
178262306a36Sopenharmony_ci
178362306a36Sopenharmony_ci		rb_link_node(&pe->out_of_order_node, parent, p);
178462306a36Sopenharmony_ci		rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
178562306a36Sopenharmony_ci	}
178662306a36Sopenharmony_ci	account_end_copy(s);
178762306a36Sopenharmony_ci}
178862306a36Sopenharmony_ci
178962306a36Sopenharmony_ci/*
179062306a36Sopenharmony_ci * Dispatches the copy operation to kcopyd.
179162306a36Sopenharmony_ci */
179262306a36Sopenharmony_cistatic void start_copy(struct dm_snap_pending_exception *pe)
179362306a36Sopenharmony_ci{
179462306a36Sopenharmony_ci	struct dm_snapshot *s = pe->snap;
179562306a36Sopenharmony_ci	struct dm_io_region src, dest;
179662306a36Sopenharmony_ci	struct block_device *bdev = s->origin->bdev;
179762306a36Sopenharmony_ci	sector_t dev_size;
179862306a36Sopenharmony_ci
179962306a36Sopenharmony_ci	dev_size = get_dev_size(bdev);
180062306a36Sopenharmony_ci
180162306a36Sopenharmony_ci	src.bdev = bdev;
180262306a36Sopenharmony_ci	src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
180362306a36Sopenharmony_ci	src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
180462306a36Sopenharmony_ci
180562306a36Sopenharmony_ci	dest.bdev = s->cow->bdev;
180662306a36Sopenharmony_ci	dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
180762306a36Sopenharmony_ci	dest.count = src.count;
180862306a36Sopenharmony_ci
180962306a36Sopenharmony_ci	/* Hand over to kcopyd */
181062306a36Sopenharmony_ci	account_start_copy(s);
181162306a36Sopenharmony_ci	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
181262306a36Sopenharmony_ci}
181362306a36Sopenharmony_ci
181462306a36Sopenharmony_cistatic void full_bio_end_io(struct bio *bio)
181562306a36Sopenharmony_ci{
181662306a36Sopenharmony_ci	void *callback_data = bio->bi_private;
181762306a36Sopenharmony_ci
181862306a36Sopenharmony_ci	dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
181962306a36Sopenharmony_ci}
182062306a36Sopenharmony_ci
182162306a36Sopenharmony_cistatic void start_full_bio(struct dm_snap_pending_exception *pe,
182262306a36Sopenharmony_ci			   struct bio *bio)
182362306a36Sopenharmony_ci{
182462306a36Sopenharmony_ci	struct dm_snapshot *s = pe->snap;
182562306a36Sopenharmony_ci	void *callback_data;
182662306a36Sopenharmony_ci
182762306a36Sopenharmony_ci	pe->full_bio = bio;
182862306a36Sopenharmony_ci	pe->full_bio_end_io = bio->bi_end_io;
182962306a36Sopenharmony_ci
183062306a36Sopenharmony_ci	account_start_copy(s);
183162306a36Sopenharmony_ci	callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
183262306a36Sopenharmony_ci						   copy_callback, pe);
183362306a36Sopenharmony_ci
183462306a36Sopenharmony_ci	bio->bi_end_io = full_bio_end_io;
183562306a36Sopenharmony_ci	bio->bi_private = callback_data;
183662306a36Sopenharmony_ci
183762306a36Sopenharmony_ci	submit_bio_noacct(bio);
183862306a36Sopenharmony_ci}
183962306a36Sopenharmony_ci
184062306a36Sopenharmony_cistatic struct dm_snap_pending_exception *
184162306a36Sopenharmony_ci__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
184262306a36Sopenharmony_ci{
184362306a36Sopenharmony_ci	struct dm_exception *e = dm_lookup_exception(&s->pending, chunk);
184462306a36Sopenharmony_ci
184562306a36Sopenharmony_ci	if (!e)
184662306a36Sopenharmony_ci		return NULL;
184762306a36Sopenharmony_ci
184862306a36Sopenharmony_ci	return container_of(e, struct dm_snap_pending_exception, e);
184962306a36Sopenharmony_ci}
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_ci/*
185262306a36Sopenharmony_ci * Inserts a pending exception into the pending table.
185362306a36Sopenharmony_ci *
185462306a36Sopenharmony_ci * NOTE: a write lock must be held on the chunk's pending exception table slot
185562306a36Sopenharmony_ci * before calling this.
185662306a36Sopenharmony_ci */
185762306a36Sopenharmony_cistatic struct dm_snap_pending_exception *
185862306a36Sopenharmony_ci__insert_pending_exception(struct dm_snapshot *s,
185962306a36Sopenharmony_ci			   struct dm_snap_pending_exception *pe, chunk_t chunk)
186062306a36Sopenharmony_ci{
186162306a36Sopenharmony_ci	pe->e.old_chunk = chunk;
186262306a36Sopenharmony_ci	bio_list_init(&pe->origin_bios);
186362306a36Sopenharmony_ci	bio_list_init(&pe->snapshot_bios);
186462306a36Sopenharmony_ci	pe->started = 0;
186562306a36Sopenharmony_ci	pe->full_bio = NULL;
186662306a36Sopenharmony_ci
186762306a36Sopenharmony_ci	spin_lock(&s->pe_allocation_lock);
186862306a36Sopenharmony_ci	if (s->store->type->prepare_exception(s->store, &pe->e)) {
186962306a36Sopenharmony_ci		spin_unlock(&s->pe_allocation_lock);
187062306a36Sopenharmony_ci		free_pending_exception(pe);
187162306a36Sopenharmony_ci		return NULL;
187262306a36Sopenharmony_ci	}
187362306a36Sopenharmony_ci
187462306a36Sopenharmony_ci	pe->exception_sequence = s->exception_start_sequence++;
187562306a36Sopenharmony_ci	spin_unlock(&s->pe_allocation_lock);
187662306a36Sopenharmony_ci
187762306a36Sopenharmony_ci	dm_insert_exception(&s->pending, &pe->e);
187862306a36Sopenharmony_ci
187962306a36Sopenharmony_ci	return pe;
188062306a36Sopenharmony_ci}
188162306a36Sopenharmony_ci
188262306a36Sopenharmony_ci/*
188362306a36Sopenharmony_ci * Looks to see if this snapshot already has a pending exception
188462306a36Sopenharmony_ci * for this chunk, otherwise it allocates a new one and inserts
188562306a36Sopenharmony_ci * it into the pending table.
188662306a36Sopenharmony_ci *
188762306a36Sopenharmony_ci * NOTE: a write lock must be held on the chunk's pending exception table slot
188862306a36Sopenharmony_ci * before calling this.
188962306a36Sopenharmony_ci */
189062306a36Sopenharmony_cistatic struct dm_snap_pending_exception *
189162306a36Sopenharmony_ci__find_pending_exception(struct dm_snapshot *s,
189262306a36Sopenharmony_ci			 struct dm_snap_pending_exception *pe, chunk_t chunk)
189362306a36Sopenharmony_ci{
189462306a36Sopenharmony_ci	struct dm_snap_pending_exception *pe2;
189562306a36Sopenharmony_ci
189662306a36Sopenharmony_ci	pe2 = __lookup_pending_exception(s, chunk);
189762306a36Sopenharmony_ci	if (pe2) {
189862306a36Sopenharmony_ci		free_pending_exception(pe);
189962306a36Sopenharmony_ci		return pe2;
190062306a36Sopenharmony_ci	}
190162306a36Sopenharmony_ci
190262306a36Sopenharmony_ci	return __insert_pending_exception(s, pe, chunk);
190362306a36Sopenharmony_ci}
190462306a36Sopenharmony_ci
190562306a36Sopenharmony_cistatic void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
190662306a36Sopenharmony_ci			    struct bio *bio, chunk_t chunk)
190762306a36Sopenharmony_ci{
190862306a36Sopenharmony_ci	bio_set_dev(bio, s->cow->bdev);
190962306a36Sopenharmony_ci	bio->bi_iter.bi_sector =
191062306a36Sopenharmony_ci		chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) +
191162306a36Sopenharmony_ci				(chunk - e->old_chunk)) +
191262306a36Sopenharmony_ci		(bio->bi_iter.bi_sector & s->store->chunk_mask);
191362306a36Sopenharmony_ci}
191462306a36Sopenharmony_ci
191562306a36Sopenharmony_cistatic void zero_callback(int read_err, unsigned long write_err, void *context)
191662306a36Sopenharmony_ci{
191762306a36Sopenharmony_ci	struct bio *bio = context;
191862306a36Sopenharmony_ci	struct dm_snapshot *s = bio->bi_private;
191962306a36Sopenharmony_ci
192062306a36Sopenharmony_ci	account_end_copy(s);
192162306a36Sopenharmony_ci	bio->bi_status = write_err ? BLK_STS_IOERR : 0;
192262306a36Sopenharmony_ci	bio_endio(bio);
192362306a36Sopenharmony_ci}
192462306a36Sopenharmony_ci
192562306a36Sopenharmony_cistatic void zero_exception(struct dm_snapshot *s, struct dm_exception *e,
192662306a36Sopenharmony_ci			   struct bio *bio, chunk_t chunk)
192762306a36Sopenharmony_ci{
192862306a36Sopenharmony_ci	struct dm_io_region dest;
192962306a36Sopenharmony_ci
193062306a36Sopenharmony_ci	dest.bdev = s->cow->bdev;
193162306a36Sopenharmony_ci	dest.sector = bio->bi_iter.bi_sector;
193262306a36Sopenharmony_ci	dest.count = s->store->chunk_size;
193362306a36Sopenharmony_ci
193462306a36Sopenharmony_ci	account_start_copy(s);
193562306a36Sopenharmony_ci	WARN_ON_ONCE(bio->bi_private);
193662306a36Sopenharmony_ci	bio->bi_private = s;
193762306a36Sopenharmony_ci	dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio);
193862306a36Sopenharmony_ci}
193962306a36Sopenharmony_ci
194062306a36Sopenharmony_cistatic bool io_overlaps_chunk(struct dm_snapshot *s, struct bio *bio)
194162306a36Sopenharmony_ci{
194262306a36Sopenharmony_ci	return bio->bi_iter.bi_size ==
194362306a36Sopenharmony_ci		(s->store->chunk_size << SECTOR_SHIFT);
194462306a36Sopenharmony_ci}
194562306a36Sopenharmony_ci
194662306a36Sopenharmony_cistatic int snapshot_map(struct dm_target *ti, struct bio *bio)
194762306a36Sopenharmony_ci{
194862306a36Sopenharmony_ci	struct dm_exception *e;
194962306a36Sopenharmony_ci	struct dm_snapshot *s = ti->private;
195062306a36Sopenharmony_ci	int r = DM_MAPIO_REMAPPED;
195162306a36Sopenharmony_ci	chunk_t chunk;
195262306a36Sopenharmony_ci	struct dm_snap_pending_exception *pe = NULL;
195362306a36Sopenharmony_ci	struct dm_exception_table_lock lock;
195462306a36Sopenharmony_ci
195562306a36Sopenharmony_ci	init_tracked_chunk(bio);
195662306a36Sopenharmony_ci
195762306a36Sopenharmony_ci	if (bio->bi_opf & REQ_PREFLUSH) {
195862306a36Sopenharmony_ci		bio_set_dev(bio, s->cow->bdev);
195962306a36Sopenharmony_ci		return DM_MAPIO_REMAPPED;
196062306a36Sopenharmony_ci	}
196162306a36Sopenharmony_ci
196262306a36Sopenharmony_ci	chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
196362306a36Sopenharmony_ci	dm_exception_table_lock_init(s, chunk, &lock);
196462306a36Sopenharmony_ci
196562306a36Sopenharmony_ci	/* Full snapshots are not usable */
196662306a36Sopenharmony_ci	/* To get here the table must be live so s->active is always set. */
196762306a36Sopenharmony_ci	if (!s->valid)
196862306a36Sopenharmony_ci		return DM_MAPIO_KILL;
196962306a36Sopenharmony_ci
197062306a36Sopenharmony_ci	if (bio_data_dir(bio) == WRITE) {
197162306a36Sopenharmony_ci		while (unlikely(!wait_for_in_progress(s, false)))
197262306a36Sopenharmony_ci			; /* wait_for_in_progress() has slept */
197362306a36Sopenharmony_ci	}
197462306a36Sopenharmony_ci
197562306a36Sopenharmony_ci	down_read(&s->lock);
197662306a36Sopenharmony_ci	dm_exception_table_lock(&lock);
197762306a36Sopenharmony_ci
197862306a36Sopenharmony_ci	if (!s->valid || (unlikely(s->snapshot_overflowed) &&
197962306a36Sopenharmony_ci	    bio_data_dir(bio) == WRITE)) {
198062306a36Sopenharmony_ci		r = DM_MAPIO_KILL;
198162306a36Sopenharmony_ci		goto out_unlock;
198262306a36Sopenharmony_ci	}
198362306a36Sopenharmony_ci
198462306a36Sopenharmony_ci	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
198562306a36Sopenharmony_ci		if (s->discard_passdown_origin && dm_bio_get_target_bio_nr(bio)) {
198662306a36Sopenharmony_ci			/*
198762306a36Sopenharmony_ci			 * passdown discard to origin (without triggering
198862306a36Sopenharmony_ci			 * snapshot exceptions via do_origin; doing so would
198962306a36Sopenharmony_ci			 * defeat the goal of freeing space in origin that is
199062306a36Sopenharmony_ci			 * implied by the "discard_passdown_origin" feature)
199162306a36Sopenharmony_ci			 */
199262306a36Sopenharmony_ci			bio_set_dev(bio, s->origin->bdev);
199362306a36Sopenharmony_ci			track_chunk(s, bio, chunk);
199462306a36Sopenharmony_ci			goto out_unlock;
199562306a36Sopenharmony_ci		}
199662306a36Sopenharmony_ci		/* discard to snapshot (target_bio_nr == 0) zeroes exceptions */
199762306a36Sopenharmony_ci	}
199862306a36Sopenharmony_ci
199962306a36Sopenharmony_ci	/* If the block is already remapped - use that, else remap it */
200062306a36Sopenharmony_ci	e = dm_lookup_exception(&s->complete, chunk);
200162306a36Sopenharmony_ci	if (e) {
200262306a36Sopenharmony_ci		remap_exception(s, e, bio, chunk);
200362306a36Sopenharmony_ci		if (unlikely(bio_op(bio) == REQ_OP_DISCARD) &&
200462306a36Sopenharmony_ci		    io_overlaps_chunk(s, bio)) {
200562306a36Sopenharmony_ci			dm_exception_table_unlock(&lock);
200662306a36Sopenharmony_ci			up_read(&s->lock);
200762306a36Sopenharmony_ci			zero_exception(s, e, bio, chunk);
200862306a36Sopenharmony_ci			r = DM_MAPIO_SUBMITTED; /* discard is not issued */
200962306a36Sopenharmony_ci			goto out;
201062306a36Sopenharmony_ci		}
201162306a36Sopenharmony_ci		goto out_unlock;
201262306a36Sopenharmony_ci	}
201362306a36Sopenharmony_ci
201462306a36Sopenharmony_ci	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
201562306a36Sopenharmony_ci		/*
201662306a36Sopenharmony_ci		 * If no exception exists, complete discard immediately
201762306a36Sopenharmony_ci		 * otherwise it'll trigger copy-out.
201862306a36Sopenharmony_ci		 */
201962306a36Sopenharmony_ci		bio_endio(bio);
202062306a36Sopenharmony_ci		r = DM_MAPIO_SUBMITTED;
202162306a36Sopenharmony_ci		goto out_unlock;
202262306a36Sopenharmony_ci	}
202362306a36Sopenharmony_ci
202462306a36Sopenharmony_ci	/*
202562306a36Sopenharmony_ci	 * Write to snapshot - higher level takes care of RW/RO
202662306a36Sopenharmony_ci	 * flags so we should only get this if we are
202762306a36Sopenharmony_ci	 * writable.
202862306a36Sopenharmony_ci	 */
202962306a36Sopenharmony_ci	if (bio_data_dir(bio) == WRITE) {
203062306a36Sopenharmony_ci		pe = __lookup_pending_exception(s, chunk);
203162306a36Sopenharmony_ci		if (!pe) {
203262306a36Sopenharmony_ci			dm_exception_table_unlock(&lock);
203362306a36Sopenharmony_ci			pe = alloc_pending_exception(s);
203462306a36Sopenharmony_ci			dm_exception_table_lock(&lock);
203562306a36Sopenharmony_ci
203662306a36Sopenharmony_ci			e = dm_lookup_exception(&s->complete, chunk);
203762306a36Sopenharmony_ci			if (e) {
203862306a36Sopenharmony_ci				free_pending_exception(pe);
203962306a36Sopenharmony_ci				remap_exception(s, e, bio, chunk);
204062306a36Sopenharmony_ci				goto out_unlock;
204162306a36Sopenharmony_ci			}
204262306a36Sopenharmony_ci
204362306a36Sopenharmony_ci			pe = __find_pending_exception(s, pe, chunk);
204462306a36Sopenharmony_ci			if (!pe) {
204562306a36Sopenharmony_ci				dm_exception_table_unlock(&lock);
204662306a36Sopenharmony_ci				up_read(&s->lock);
204762306a36Sopenharmony_ci
204862306a36Sopenharmony_ci				down_write(&s->lock);
204962306a36Sopenharmony_ci
205062306a36Sopenharmony_ci				if (s->store->userspace_supports_overflow) {
205162306a36Sopenharmony_ci					if (s->valid && !s->snapshot_overflowed) {
205262306a36Sopenharmony_ci						s->snapshot_overflowed = 1;
205362306a36Sopenharmony_ci						DMERR("Snapshot overflowed: Unable to allocate exception.");
205462306a36Sopenharmony_ci					}
205562306a36Sopenharmony_ci				} else
205662306a36Sopenharmony_ci					__invalidate_snapshot(s, -ENOMEM);
205762306a36Sopenharmony_ci				up_write(&s->lock);
205862306a36Sopenharmony_ci
205962306a36Sopenharmony_ci				r = DM_MAPIO_KILL;
206062306a36Sopenharmony_ci				goto out;
206162306a36Sopenharmony_ci			}
206262306a36Sopenharmony_ci		}
206362306a36Sopenharmony_ci
206462306a36Sopenharmony_ci		remap_exception(s, &pe->e, bio, chunk);
206562306a36Sopenharmony_ci
206662306a36Sopenharmony_ci		r = DM_MAPIO_SUBMITTED;
206762306a36Sopenharmony_ci
206862306a36Sopenharmony_ci		if (!pe->started && io_overlaps_chunk(s, bio)) {
206962306a36Sopenharmony_ci			pe->started = 1;
207062306a36Sopenharmony_ci
207162306a36Sopenharmony_ci			dm_exception_table_unlock(&lock);
207262306a36Sopenharmony_ci			up_read(&s->lock);
207362306a36Sopenharmony_ci
207462306a36Sopenharmony_ci			start_full_bio(pe, bio);
207562306a36Sopenharmony_ci			goto out;
207662306a36Sopenharmony_ci		}
207762306a36Sopenharmony_ci
207862306a36Sopenharmony_ci		bio_list_add(&pe->snapshot_bios, bio);
207962306a36Sopenharmony_ci
208062306a36Sopenharmony_ci		if (!pe->started) {
208162306a36Sopenharmony_ci			/* this is protected by the exception table lock */
208262306a36Sopenharmony_ci			pe->started = 1;
208362306a36Sopenharmony_ci
208462306a36Sopenharmony_ci			dm_exception_table_unlock(&lock);
208562306a36Sopenharmony_ci			up_read(&s->lock);
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci			start_copy(pe);
208862306a36Sopenharmony_ci			goto out;
208962306a36Sopenharmony_ci		}
209062306a36Sopenharmony_ci	} else {
209162306a36Sopenharmony_ci		bio_set_dev(bio, s->origin->bdev);
209262306a36Sopenharmony_ci		track_chunk(s, bio, chunk);
209362306a36Sopenharmony_ci	}
209462306a36Sopenharmony_ci
209562306a36Sopenharmony_ciout_unlock:
209662306a36Sopenharmony_ci	dm_exception_table_unlock(&lock);
209762306a36Sopenharmony_ci	up_read(&s->lock);
209862306a36Sopenharmony_ciout:
209962306a36Sopenharmony_ci	return r;
210062306a36Sopenharmony_ci}
210162306a36Sopenharmony_ci
210262306a36Sopenharmony_ci/*
210362306a36Sopenharmony_ci * A snapshot-merge target behaves like a combination of a snapshot
210462306a36Sopenharmony_ci * target and a snapshot-origin target.  It only generates new
210562306a36Sopenharmony_ci * exceptions in other snapshots and not in the one that is being
210662306a36Sopenharmony_ci * merged.
210762306a36Sopenharmony_ci *
210862306a36Sopenharmony_ci * For each chunk, if there is an existing exception, it is used to
210962306a36Sopenharmony_ci * redirect I/O to the cow device.  Otherwise I/O is sent to the origin,
211062306a36Sopenharmony_ci * which in turn might generate exceptions in other snapshots.
211162306a36Sopenharmony_ci * If merging is currently taking place on the chunk in question, the
211262306a36Sopenharmony_ci * I/O is deferred by adding it to s->bios_queued_during_merge.
211362306a36Sopenharmony_ci */
211462306a36Sopenharmony_cistatic int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
211562306a36Sopenharmony_ci{
211662306a36Sopenharmony_ci	struct dm_exception *e;
211762306a36Sopenharmony_ci	struct dm_snapshot *s = ti->private;
211862306a36Sopenharmony_ci	int r = DM_MAPIO_REMAPPED;
211962306a36Sopenharmony_ci	chunk_t chunk;
212062306a36Sopenharmony_ci
212162306a36Sopenharmony_ci	init_tracked_chunk(bio);
212262306a36Sopenharmony_ci
212362306a36Sopenharmony_ci	if (bio->bi_opf & REQ_PREFLUSH) {
212462306a36Sopenharmony_ci		if (!dm_bio_get_target_bio_nr(bio))
212562306a36Sopenharmony_ci			bio_set_dev(bio, s->origin->bdev);
212662306a36Sopenharmony_ci		else
212762306a36Sopenharmony_ci			bio_set_dev(bio, s->cow->bdev);
212862306a36Sopenharmony_ci		return DM_MAPIO_REMAPPED;
212962306a36Sopenharmony_ci	}
213062306a36Sopenharmony_ci
213162306a36Sopenharmony_ci	if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
213262306a36Sopenharmony_ci		/* Once merging, discards no longer effect change */
213362306a36Sopenharmony_ci		bio_endio(bio);
213462306a36Sopenharmony_ci		return DM_MAPIO_SUBMITTED;
213562306a36Sopenharmony_ci	}
213662306a36Sopenharmony_ci
213762306a36Sopenharmony_ci	chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
213862306a36Sopenharmony_ci
213962306a36Sopenharmony_ci	down_write(&s->lock);
214062306a36Sopenharmony_ci
214162306a36Sopenharmony_ci	/* Full merging snapshots are redirected to the origin */
214262306a36Sopenharmony_ci	if (!s->valid)
214362306a36Sopenharmony_ci		goto redirect_to_origin;
214462306a36Sopenharmony_ci
214562306a36Sopenharmony_ci	/* If the block is already remapped - use that */
214662306a36Sopenharmony_ci	e = dm_lookup_exception(&s->complete, chunk);
214762306a36Sopenharmony_ci	if (e) {
214862306a36Sopenharmony_ci		/* Queue writes overlapping with chunks being merged */
214962306a36Sopenharmony_ci		if (bio_data_dir(bio) == WRITE &&
215062306a36Sopenharmony_ci		    chunk >= s->first_merging_chunk &&
215162306a36Sopenharmony_ci		    chunk < (s->first_merging_chunk +
215262306a36Sopenharmony_ci			     s->num_merging_chunks)) {
215362306a36Sopenharmony_ci			bio_set_dev(bio, s->origin->bdev);
215462306a36Sopenharmony_ci			bio_list_add(&s->bios_queued_during_merge, bio);
215562306a36Sopenharmony_ci			r = DM_MAPIO_SUBMITTED;
215662306a36Sopenharmony_ci			goto out_unlock;
215762306a36Sopenharmony_ci		}
215862306a36Sopenharmony_ci
215962306a36Sopenharmony_ci		remap_exception(s, e, bio, chunk);
216062306a36Sopenharmony_ci
216162306a36Sopenharmony_ci		if (bio_data_dir(bio) == WRITE)
216262306a36Sopenharmony_ci			track_chunk(s, bio, chunk);
216362306a36Sopenharmony_ci		goto out_unlock;
216462306a36Sopenharmony_ci	}
216562306a36Sopenharmony_ci
216662306a36Sopenharmony_ciredirect_to_origin:
216762306a36Sopenharmony_ci	bio_set_dev(bio, s->origin->bdev);
216862306a36Sopenharmony_ci
216962306a36Sopenharmony_ci	if (bio_data_dir(bio) == WRITE) {
217062306a36Sopenharmony_ci		up_write(&s->lock);
217162306a36Sopenharmony_ci		return do_origin(s->origin, bio, false);
217262306a36Sopenharmony_ci	}
217362306a36Sopenharmony_ci
217462306a36Sopenharmony_ciout_unlock:
217562306a36Sopenharmony_ci	up_write(&s->lock);
217662306a36Sopenharmony_ci
217762306a36Sopenharmony_ci	return r;
217862306a36Sopenharmony_ci}
217962306a36Sopenharmony_ci
218062306a36Sopenharmony_cistatic int snapshot_end_io(struct dm_target *ti, struct bio *bio,
218162306a36Sopenharmony_ci		blk_status_t *error)
218262306a36Sopenharmony_ci{
218362306a36Sopenharmony_ci	struct dm_snapshot *s = ti->private;
218462306a36Sopenharmony_ci
218562306a36Sopenharmony_ci	if (is_bio_tracked(bio))
218662306a36Sopenharmony_ci		stop_tracking_chunk(s, bio);
218762306a36Sopenharmony_ci
218862306a36Sopenharmony_ci	return DM_ENDIO_DONE;
218962306a36Sopenharmony_ci}
219062306a36Sopenharmony_ci
219162306a36Sopenharmony_cistatic void snapshot_merge_presuspend(struct dm_target *ti)
219262306a36Sopenharmony_ci{
219362306a36Sopenharmony_ci	struct dm_snapshot *s = ti->private;
219462306a36Sopenharmony_ci
219562306a36Sopenharmony_ci	stop_merge(s);
219662306a36Sopenharmony_ci}
219762306a36Sopenharmony_ci
219862306a36Sopenharmony_cistatic int snapshot_preresume(struct dm_target *ti)
219962306a36Sopenharmony_ci{
220062306a36Sopenharmony_ci	int r = 0;
220162306a36Sopenharmony_ci	struct dm_snapshot *s = ti->private;
220262306a36Sopenharmony_ci	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
220362306a36Sopenharmony_ci
220462306a36Sopenharmony_ci	down_read(&_origins_lock);
220562306a36Sopenharmony_ci	(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
220662306a36Sopenharmony_ci	if (snap_src && snap_dest) {
220762306a36Sopenharmony_ci		down_read(&snap_src->lock);
220862306a36Sopenharmony_ci		if (s == snap_src) {
220962306a36Sopenharmony_ci			DMERR("Unable to resume snapshot source until handover completes.");
221062306a36Sopenharmony_ci			r = -EINVAL;
221162306a36Sopenharmony_ci		} else if (!dm_suspended(snap_src->ti)) {
221262306a36Sopenharmony_ci			DMERR("Unable to perform snapshot handover until source is suspended.");
221362306a36Sopenharmony_ci			r = -EINVAL;
221462306a36Sopenharmony_ci		}
221562306a36Sopenharmony_ci		up_read(&snap_src->lock);
221662306a36Sopenharmony_ci	}
221762306a36Sopenharmony_ci	up_read(&_origins_lock);
221862306a36Sopenharmony_ci
221962306a36Sopenharmony_ci	return r;
222062306a36Sopenharmony_ci}
222162306a36Sopenharmony_ci
222262306a36Sopenharmony_cistatic void snapshot_resume(struct dm_target *ti)
222362306a36Sopenharmony_ci{
222462306a36Sopenharmony_ci	struct dm_snapshot *s = ti->private;
222562306a36Sopenharmony_ci	struct dm_snapshot *snap_src = NULL, *snap_dest = NULL, *snap_merging = NULL;
222662306a36Sopenharmony_ci	struct dm_origin *o;
222762306a36Sopenharmony_ci	struct mapped_device *origin_md = NULL;
222862306a36Sopenharmony_ci	bool must_restart_merging = false;
222962306a36Sopenharmony_ci
223062306a36Sopenharmony_ci	down_read(&_origins_lock);
223162306a36Sopenharmony_ci
223262306a36Sopenharmony_ci	o = __lookup_dm_origin(s->origin->bdev);
223362306a36Sopenharmony_ci	if (o)
223462306a36Sopenharmony_ci		origin_md = dm_table_get_md(o->ti->table);
223562306a36Sopenharmony_ci	if (!origin_md) {
223662306a36Sopenharmony_ci		(void) __find_snapshots_sharing_cow(s, NULL, NULL, &snap_merging);
223762306a36Sopenharmony_ci		if (snap_merging)
223862306a36Sopenharmony_ci			origin_md = dm_table_get_md(snap_merging->ti->table);
223962306a36Sopenharmony_ci	}
224062306a36Sopenharmony_ci	if (origin_md == dm_table_get_md(ti->table))
224162306a36Sopenharmony_ci		origin_md = NULL;
224262306a36Sopenharmony_ci	if (origin_md) {
224362306a36Sopenharmony_ci		if (dm_hold(origin_md))
224462306a36Sopenharmony_ci			origin_md = NULL;
224562306a36Sopenharmony_ci	}
224662306a36Sopenharmony_ci
224762306a36Sopenharmony_ci	up_read(&_origins_lock);
224862306a36Sopenharmony_ci
224962306a36Sopenharmony_ci	if (origin_md) {
225062306a36Sopenharmony_ci		dm_internal_suspend_fast(origin_md);
225162306a36Sopenharmony_ci		if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) {
225262306a36Sopenharmony_ci			must_restart_merging = true;
225362306a36Sopenharmony_ci			stop_merge(snap_merging);
225462306a36Sopenharmony_ci		}
225562306a36Sopenharmony_ci	}
225662306a36Sopenharmony_ci
225762306a36Sopenharmony_ci	down_read(&_origins_lock);
225862306a36Sopenharmony_ci
225962306a36Sopenharmony_ci	(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
226062306a36Sopenharmony_ci	if (snap_src && snap_dest) {
226162306a36Sopenharmony_ci		down_write(&snap_src->lock);
226262306a36Sopenharmony_ci		down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
226362306a36Sopenharmony_ci		__handover_exceptions(snap_src, snap_dest);
226462306a36Sopenharmony_ci		up_write(&snap_dest->lock);
226562306a36Sopenharmony_ci		up_write(&snap_src->lock);
226662306a36Sopenharmony_ci	}
226762306a36Sopenharmony_ci
226862306a36Sopenharmony_ci	up_read(&_origins_lock);
226962306a36Sopenharmony_ci
227062306a36Sopenharmony_ci	if (origin_md) {
227162306a36Sopenharmony_ci		if (must_restart_merging)
227262306a36Sopenharmony_ci			start_merge(snap_merging);
227362306a36Sopenharmony_ci		dm_internal_resume_fast(origin_md);
227462306a36Sopenharmony_ci		dm_put(origin_md);
227562306a36Sopenharmony_ci	}
227662306a36Sopenharmony_ci
227762306a36Sopenharmony_ci	/* Now we have correct chunk size, reregister */
227862306a36Sopenharmony_ci	reregister_snapshot(s);
227962306a36Sopenharmony_ci
228062306a36Sopenharmony_ci	down_write(&s->lock);
228162306a36Sopenharmony_ci	s->active = 1;
228262306a36Sopenharmony_ci	up_write(&s->lock);
228362306a36Sopenharmony_ci}
228462306a36Sopenharmony_ci
228562306a36Sopenharmony_cistatic uint32_t get_origin_minimum_chunksize(struct block_device *bdev)
228662306a36Sopenharmony_ci{
228762306a36Sopenharmony_ci	uint32_t min_chunksize;
228862306a36Sopenharmony_ci
228962306a36Sopenharmony_ci	down_read(&_origins_lock);
229062306a36Sopenharmony_ci	min_chunksize = __minimum_chunk_size(__lookup_origin(bdev));
229162306a36Sopenharmony_ci	up_read(&_origins_lock);
229262306a36Sopenharmony_ci
229362306a36Sopenharmony_ci	return min_chunksize;
229462306a36Sopenharmony_ci}
229562306a36Sopenharmony_ci
229662306a36Sopenharmony_cistatic void snapshot_merge_resume(struct dm_target *ti)
229762306a36Sopenharmony_ci{
229862306a36Sopenharmony_ci	struct dm_snapshot *s = ti->private;
229962306a36Sopenharmony_ci
230062306a36Sopenharmony_ci	/*
230162306a36Sopenharmony_ci	 * Handover exceptions from existing snapshot.
230262306a36Sopenharmony_ci	 */
230362306a36Sopenharmony_ci	snapshot_resume(ti);
230462306a36Sopenharmony_ci
230562306a36Sopenharmony_ci	/*
230662306a36Sopenharmony_ci	 * snapshot-merge acts as an origin, so set ti->max_io_len
230762306a36Sopenharmony_ci	 */
230862306a36Sopenharmony_ci	ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev);
230962306a36Sopenharmony_ci
231062306a36Sopenharmony_ci	start_merge(s);
231162306a36Sopenharmony_ci}
231262306a36Sopenharmony_ci
231362306a36Sopenharmony_cistatic void snapshot_status(struct dm_target *ti, status_type_t type,
231462306a36Sopenharmony_ci			    unsigned int status_flags, char *result, unsigned int maxlen)
231562306a36Sopenharmony_ci{
231662306a36Sopenharmony_ci	unsigned int sz = 0;
231762306a36Sopenharmony_ci	struct dm_snapshot *snap = ti->private;
231862306a36Sopenharmony_ci	unsigned int num_features;
231962306a36Sopenharmony_ci
232062306a36Sopenharmony_ci	switch (type) {
232162306a36Sopenharmony_ci	case STATUSTYPE_INFO:
232262306a36Sopenharmony_ci
232362306a36Sopenharmony_ci		down_write(&snap->lock);
232462306a36Sopenharmony_ci
232562306a36Sopenharmony_ci		if (!snap->valid)
232662306a36Sopenharmony_ci			DMEMIT("Invalid");
232762306a36Sopenharmony_ci		else if (snap->merge_failed)
232862306a36Sopenharmony_ci			DMEMIT("Merge failed");
232962306a36Sopenharmony_ci		else if (snap->snapshot_overflowed)
233062306a36Sopenharmony_ci			DMEMIT("Overflow");
233162306a36Sopenharmony_ci		else {
233262306a36Sopenharmony_ci			if (snap->store->type->usage) {
233362306a36Sopenharmony_ci				sector_t total_sectors, sectors_allocated,
233462306a36Sopenharmony_ci					 metadata_sectors;
233562306a36Sopenharmony_ci				snap->store->type->usage(snap->store,
233662306a36Sopenharmony_ci							 &total_sectors,
233762306a36Sopenharmony_ci							 &sectors_allocated,
233862306a36Sopenharmony_ci							 &metadata_sectors);
233962306a36Sopenharmony_ci				DMEMIT("%llu/%llu %llu",
234062306a36Sopenharmony_ci				       (unsigned long long)sectors_allocated,
234162306a36Sopenharmony_ci				       (unsigned long long)total_sectors,
234262306a36Sopenharmony_ci				       (unsigned long long)metadata_sectors);
234362306a36Sopenharmony_ci			} else
234462306a36Sopenharmony_ci				DMEMIT("Unknown");
234562306a36Sopenharmony_ci		}
234662306a36Sopenharmony_ci
234762306a36Sopenharmony_ci		up_write(&snap->lock);
234862306a36Sopenharmony_ci
234962306a36Sopenharmony_ci		break;
235062306a36Sopenharmony_ci
235162306a36Sopenharmony_ci	case STATUSTYPE_TABLE:
235262306a36Sopenharmony_ci		/*
235362306a36Sopenharmony_ci		 * kdevname returns a static pointer so we need
235462306a36Sopenharmony_ci		 * to make private copies if the output is to
235562306a36Sopenharmony_ci		 * make sense.
235662306a36Sopenharmony_ci		 */
235762306a36Sopenharmony_ci		DMEMIT("%s %s", snap->origin->name, snap->cow->name);
235862306a36Sopenharmony_ci		sz += snap->store->type->status(snap->store, type, result + sz,
235962306a36Sopenharmony_ci						maxlen - sz);
236062306a36Sopenharmony_ci		num_features = snap->discard_zeroes_cow + snap->discard_passdown_origin;
236162306a36Sopenharmony_ci		if (num_features) {
236262306a36Sopenharmony_ci			DMEMIT(" %u", num_features);
236362306a36Sopenharmony_ci			if (snap->discard_zeroes_cow)
236462306a36Sopenharmony_ci				DMEMIT(" discard_zeroes_cow");
236562306a36Sopenharmony_ci			if (snap->discard_passdown_origin)
236662306a36Sopenharmony_ci				DMEMIT(" discard_passdown_origin");
236762306a36Sopenharmony_ci		}
236862306a36Sopenharmony_ci		break;
236962306a36Sopenharmony_ci
237062306a36Sopenharmony_ci	case STATUSTYPE_IMA:
237162306a36Sopenharmony_ci		DMEMIT_TARGET_NAME_VERSION(ti->type);
237262306a36Sopenharmony_ci		DMEMIT(",snap_origin_name=%s", snap->origin->name);
237362306a36Sopenharmony_ci		DMEMIT(",snap_cow_name=%s", snap->cow->name);
237462306a36Sopenharmony_ci		DMEMIT(",snap_valid=%c", snap->valid ? 'y' : 'n');
237562306a36Sopenharmony_ci		DMEMIT(",snap_merge_failed=%c", snap->merge_failed ? 'y' : 'n');
237662306a36Sopenharmony_ci		DMEMIT(",snapshot_overflowed=%c", snap->snapshot_overflowed ? 'y' : 'n');
237762306a36Sopenharmony_ci		DMEMIT(";");
237862306a36Sopenharmony_ci		break;
237962306a36Sopenharmony_ci	}
238062306a36Sopenharmony_ci}
238162306a36Sopenharmony_ci
238262306a36Sopenharmony_cistatic int snapshot_iterate_devices(struct dm_target *ti,
238362306a36Sopenharmony_ci				    iterate_devices_callout_fn fn, void *data)
238462306a36Sopenharmony_ci{
238562306a36Sopenharmony_ci	struct dm_snapshot *snap = ti->private;
238662306a36Sopenharmony_ci	int r;
238762306a36Sopenharmony_ci
238862306a36Sopenharmony_ci	r = fn(ti, snap->origin, 0, ti->len, data);
238962306a36Sopenharmony_ci
239062306a36Sopenharmony_ci	if (!r)
239162306a36Sopenharmony_ci		r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data);
239262306a36Sopenharmony_ci
239362306a36Sopenharmony_ci	return r;
239462306a36Sopenharmony_ci}
239562306a36Sopenharmony_ci
239662306a36Sopenharmony_cistatic void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits)
239762306a36Sopenharmony_ci{
239862306a36Sopenharmony_ci	struct dm_snapshot *snap = ti->private;
239962306a36Sopenharmony_ci
240062306a36Sopenharmony_ci	if (snap->discard_zeroes_cow) {
240162306a36Sopenharmony_ci		struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
240262306a36Sopenharmony_ci
240362306a36Sopenharmony_ci		down_read(&_origins_lock);
240462306a36Sopenharmony_ci
240562306a36Sopenharmony_ci		(void) __find_snapshots_sharing_cow(snap, &snap_src, &snap_dest, NULL);
240662306a36Sopenharmony_ci		if (snap_src && snap_dest)
240762306a36Sopenharmony_ci			snap = snap_src;
240862306a36Sopenharmony_ci
240962306a36Sopenharmony_ci		/* All discards are split on chunk_size boundary */
241062306a36Sopenharmony_ci		limits->discard_granularity = snap->store->chunk_size;
241162306a36Sopenharmony_ci		limits->max_discard_sectors = snap->store->chunk_size;
241262306a36Sopenharmony_ci
241362306a36Sopenharmony_ci		up_read(&_origins_lock);
241462306a36Sopenharmony_ci	}
241562306a36Sopenharmony_ci}
241662306a36Sopenharmony_ci
241762306a36Sopenharmony_ci/*
241862306a36Sopenharmony_ci *---------------------------------------------------------------
241962306a36Sopenharmony_ci * Origin methods
242062306a36Sopenharmony_ci *---------------------------------------------------------------
242162306a36Sopenharmony_ci */
242262306a36Sopenharmony_ci/*
242362306a36Sopenharmony_ci * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any
242462306a36Sopenharmony_ci * supplied bio was ignored.  The caller may submit it immediately.
242562306a36Sopenharmony_ci * (No remapping actually occurs as the origin is always a direct linear
242662306a36Sopenharmony_ci * map.)
242762306a36Sopenharmony_ci *
242862306a36Sopenharmony_ci * If further exceptions are required, DM_MAPIO_SUBMITTED is returned
242962306a36Sopenharmony_ci * and any supplied bio is added to a list to be submitted once all
243062306a36Sopenharmony_ci * the necessary exceptions exist.
243162306a36Sopenharmony_ci */
243262306a36Sopenharmony_cistatic int __origin_write(struct list_head *snapshots, sector_t sector,
243362306a36Sopenharmony_ci			  struct bio *bio)
243462306a36Sopenharmony_ci{
243562306a36Sopenharmony_ci	int r = DM_MAPIO_REMAPPED;
243662306a36Sopenharmony_ci	struct dm_snapshot *snap;
243762306a36Sopenharmony_ci	struct dm_exception *e;
243862306a36Sopenharmony_ci	struct dm_snap_pending_exception *pe, *pe2;
243962306a36Sopenharmony_ci	struct dm_snap_pending_exception *pe_to_start_now = NULL;
244062306a36Sopenharmony_ci	struct dm_snap_pending_exception *pe_to_start_last = NULL;
244162306a36Sopenharmony_ci	struct dm_exception_table_lock lock;
244262306a36Sopenharmony_ci	chunk_t chunk;
244362306a36Sopenharmony_ci
244462306a36Sopenharmony_ci	/* Do all the snapshots on this origin */
244562306a36Sopenharmony_ci	list_for_each_entry(snap, snapshots, list) {
244662306a36Sopenharmony_ci		/*
244762306a36Sopenharmony_ci		 * Don't make new exceptions in a merging snapshot
244862306a36Sopenharmony_ci		 * because it has effectively been deleted
244962306a36Sopenharmony_ci		 */
245062306a36Sopenharmony_ci		if (dm_target_is_snapshot_merge(snap->ti))
245162306a36Sopenharmony_ci			continue;
245262306a36Sopenharmony_ci
245362306a36Sopenharmony_ci		/* Nothing to do if writing beyond end of snapshot */
245462306a36Sopenharmony_ci		if (sector >= dm_table_get_size(snap->ti->table))
245562306a36Sopenharmony_ci			continue;
245662306a36Sopenharmony_ci
245762306a36Sopenharmony_ci		/*
245862306a36Sopenharmony_ci		 * Remember, different snapshots can have
245962306a36Sopenharmony_ci		 * different chunk sizes.
246062306a36Sopenharmony_ci		 */
246162306a36Sopenharmony_ci		chunk = sector_to_chunk(snap->store, sector);
246262306a36Sopenharmony_ci		dm_exception_table_lock_init(snap, chunk, &lock);
246362306a36Sopenharmony_ci
246462306a36Sopenharmony_ci		down_read(&snap->lock);
246562306a36Sopenharmony_ci		dm_exception_table_lock(&lock);
246662306a36Sopenharmony_ci
246762306a36Sopenharmony_ci		/* Only deal with valid and active snapshots */
246862306a36Sopenharmony_ci		if (!snap->valid || !snap->active)
246962306a36Sopenharmony_ci			goto next_snapshot;
247062306a36Sopenharmony_ci
247162306a36Sopenharmony_ci		pe = __lookup_pending_exception(snap, chunk);
247262306a36Sopenharmony_ci		if (!pe) {
247362306a36Sopenharmony_ci			/*
247462306a36Sopenharmony_ci			 * Check exception table to see if block is already
247562306a36Sopenharmony_ci			 * remapped in this snapshot and trigger an exception
247662306a36Sopenharmony_ci			 * if not.
247762306a36Sopenharmony_ci			 */
247862306a36Sopenharmony_ci			e = dm_lookup_exception(&snap->complete, chunk);
247962306a36Sopenharmony_ci			if (e)
248062306a36Sopenharmony_ci				goto next_snapshot;
248162306a36Sopenharmony_ci
248262306a36Sopenharmony_ci			dm_exception_table_unlock(&lock);
248362306a36Sopenharmony_ci			pe = alloc_pending_exception(snap);
248462306a36Sopenharmony_ci			dm_exception_table_lock(&lock);
248562306a36Sopenharmony_ci
248662306a36Sopenharmony_ci			pe2 = __lookup_pending_exception(snap, chunk);
248762306a36Sopenharmony_ci
248862306a36Sopenharmony_ci			if (!pe2) {
248962306a36Sopenharmony_ci				e = dm_lookup_exception(&snap->complete, chunk);
249062306a36Sopenharmony_ci				if (e) {
249162306a36Sopenharmony_ci					free_pending_exception(pe);
249262306a36Sopenharmony_ci					goto next_snapshot;
249362306a36Sopenharmony_ci				}
249462306a36Sopenharmony_ci
249562306a36Sopenharmony_ci				pe = __insert_pending_exception(snap, pe, chunk);
249662306a36Sopenharmony_ci				if (!pe) {
249762306a36Sopenharmony_ci					dm_exception_table_unlock(&lock);
249862306a36Sopenharmony_ci					up_read(&snap->lock);
249962306a36Sopenharmony_ci
250062306a36Sopenharmony_ci					invalidate_snapshot(snap, -ENOMEM);
250162306a36Sopenharmony_ci					continue;
250262306a36Sopenharmony_ci				}
250362306a36Sopenharmony_ci			} else {
250462306a36Sopenharmony_ci				free_pending_exception(pe);
250562306a36Sopenharmony_ci				pe = pe2;
250662306a36Sopenharmony_ci			}
250762306a36Sopenharmony_ci		}
250862306a36Sopenharmony_ci
250962306a36Sopenharmony_ci		r = DM_MAPIO_SUBMITTED;
251062306a36Sopenharmony_ci
251162306a36Sopenharmony_ci		/*
251262306a36Sopenharmony_ci		 * If an origin bio was supplied, queue it to wait for the
251362306a36Sopenharmony_ci		 * completion of this exception, and start this one last,
251462306a36Sopenharmony_ci		 * at the end of the function.
251562306a36Sopenharmony_ci		 */
251662306a36Sopenharmony_ci		if (bio) {
251762306a36Sopenharmony_ci			bio_list_add(&pe->origin_bios, bio);
251862306a36Sopenharmony_ci			bio = NULL;
251962306a36Sopenharmony_ci
252062306a36Sopenharmony_ci			if (!pe->started) {
252162306a36Sopenharmony_ci				pe->started = 1;
252262306a36Sopenharmony_ci				pe_to_start_last = pe;
252362306a36Sopenharmony_ci			}
252462306a36Sopenharmony_ci		}
252562306a36Sopenharmony_ci
252662306a36Sopenharmony_ci		if (!pe->started) {
252762306a36Sopenharmony_ci			pe->started = 1;
252862306a36Sopenharmony_ci			pe_to_start_now = pe;
252962306a36Sopenharmony_ci		}
253062306a36Sopenharmony_ci
253162306a36Sopenharmony_cinext_snapshot:
253262306a36Sopenharmony_ci		dm_exception_table_unlock(&lock);
253362306a36Sopenharmony_ci		up_read(&snap->lock);
253462306a36Sopenharmony_ci
253562306a36Sopenharmony_ci		if (pe_to_start_now) {
253662306a36Sopenharmony_ci			start_copy(pe_to_start_now);
253762306a36Sopenharmony_ci			pe_to_start_now = NULL;
253862306a36Sopenharmony_ci		}
253962306a36Sopenharmony_ci	}
254062306a36Sopenharmony_ci
254162306a36Sopenharmony_ci	/*
254262306a36Sopenharmony_ci	 * Submit the exception against which the bio is queued last,
254362306a36Sopenharmony_ci	 * to give the other exceptions a head start.
254462306a36Sopenharmony_ci	 */
254562306a36Sopenharmony_ci	if (pe_to_start_last)
254662306a36Sopenharmony_ci		start_copy(pe_to_start_last);
254762306a36Sopenharmony_ci
254862306a36Sopenharmony_ci	return r;
254962306a36Sopenharmony_ci}
255062306a36Sopenharmony_ci
255162306a36Sopenharmony_ci/*
255262306a36Sopenharmony_ci * Called on a write from the origin driver.
255362306a36Sopenharmony_ci */
255462306a36Sopenharmony_cistatic int do_origin(struct dm_dev *origin, struct bio *bio, bool limit)
255562306a36Sopenharmony_ci{
255662306a36Sopenharmony_ci	struct origin *o;
255762306a36Sopenharmony_ci	int r = DM_MAPIO_REMAPPED;
255862306a36Sopenharmony_ci
255962306a36Sopenharmony_ciagain:
256062306a36Sopenharmony_ci	down_read(&_origins_lock);
256162306a36Sopenharmony_ci	o = __lookup_origin(origin->bdev);
256262306a36Sopenharmony_ci	if (o) {
256362306a36Sopenharmony_ci		if (limit) {
256462306a36Sopenharmony_ci			struct dm_snapshot *s;
256562306a36Sopenharmony_ci
256662306a36Sopenharmony_ci			list_for_each_entry(s, &o->snapshots, list)
256762306a36Sopenharmony_ci				if (unlikely(!wait_for_in_progress(s, true)))
256862306a36Sopenharmony_ci					goto again;
256962306a36Sopenharmony_ci		}
257062306a36Sopenharmony_ci
257162306a36Sopenharmony_ci		r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
257262306a36Sopenharmony_ci	}
257362306a36Sopenharmony_ci	up_read(&_origins_lock);
257462306a36Sopenharmony_ci
257562306a36Sopenharmony_ci	return r;
257662306a36Sopenharmony_ci}
257762306a36Sopenharmony_ci
257862306a36Sopenharmony_ci/*
257962306a36Sopenharmony_ci * Trigger exceptions in all non-merging snapshots.
258062306a36Sopenharmony_ci *
258162306a36Sopenharmony_ci * The chunk size of the merging snapshot may be larger than the chunk
258262306a36Sopenharmony_ci * size of some other snapshot so we may need to reallocate multiple
258362306a36Sopenharmony_ci * chunks in other snapshots.
258462306a36Sopenharmony_ci *
258562306a36Sopenharmony_ci * We scan all the overlapping exceptions in the other snapshots.
258662306a36Sopenharmony_ci * Returns 1 if anything was reallocated and must be waited for,
258762306a36Sopenharmony_ci * otherwise returns 0.
258862306a36Sopenharmony_ci *
258962306a36Sopenharmony_ci * size must be a multiple of merging_snap's chunk_size.
259062306a36Sopenharmony_ci */
259162306a36Sopenharmony_cistatic int origin_write_extent(struct dm_snapshot *merging_snap,
259262306a36Sopenharmony_ci			       sector_t sector, unsigned int size)
259362306a36Sopenharmony_ci{
259462306a36Sopenharmony_ci	int must_wait = 0;
259562306a36Sopenharmony_ci	sector_t n;
259662306a36Sopenharmony_ci	struct origin *o;
259762306a36Sopenharmony_ci
259862306a36Sopenharmony_ci	/*
259962306a36Sopenharmony_ci	 * The origin's __minimum_chunk_size() got stored in max_io_len
260062306a36Sopenharmony_ci	 * by snapshot_merge_resume().
260162306a36Sopenharmony_ci	 */
260262306a36Sopenharmony_ci	down_read(&_origins_lock);
260362306a36Sopenharmony_ci	o = __lookup_origin(merging_snap->origin->bdev);
260462306a36Sopenharmony_ci	for (n = 0; n < size; n += merging_snap->ti->max_io_len)
260562306a36Sopenharmony_ci		if (__origin_write(&o->snapshots, sector + n, NULL) ==
260662306a36Sopenharmony_ci		    DM_MAPIO_SUBMITTED)
260762306a36Sopenharmony_ci			must_wait = 1;
260862306a36Sopenharmony_ci	up_read(&_origins_lock);
260962306a36Sopenharmony_ci
261062306a36Sopenharmony_ci	return must_wait;
261162306a36Sopenharmony_ci}
261262306a36Sopenharmony_ci
261362306a36Sopenharmony_ci/*
261462306a36Sopenharmony_ci * Origin: maps a linear range of a device, with hooks for snapshotting.
261562306a36Sopenharmony_ci */
261662306a36Sopenharmony_ci
261762306a36Sopenharmony_ci/*
261862306a36Sopenharmony_ci * Construct an origin mapping: <dev_path>
261962306a36Sopenharmony_ci * The context for an origin is merely a 'struct dm_dev *'
262062306a36Sopenharmony_ci * pointing to the real device.
262162306a36Sopenharmony_ci */
262262306a36Sopenharmony_cistatic int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
262362306a36Sopenharmony_ci{
262462306a36Sopenharmony_ci	int r;
262562306a36Sopenharmony_ci	struct dm_origin *o;
262662306a36Sopenharmony_ci
262762306a36Sopenharmony_ci	if (argc != 1) {
262862306a36Sopenharmony_ci		ti->error = "origin: incorrect number of arguments";
262962306a36Sopenharmony_ci		return -EINVAL;
263062306a36Sopenharmony_ci	}
263162306a36Sopenharmony_ci
263262306a36Sopenharmony_ci	o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL);
263362306a36Sopenharmony_ci	if (!o) {
263462306a36Sopenharmony_ci		ti->error = "Cannot allocate private origin structure";
263562306a36Sopenharmony_ci		r = -ENOMEM;
263662306a36Sopenharmony_ci		goto bad_alloc;
263762306a36Sopenharmony_ci	}
263862306a36Sopenharmony_ci
263962306a36Sopenharmony_ci	r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev);
264062306a36Sopenharmony_ci	if (r) {
264162306a36Sopenharmony_ci		ti->error = "Cannot get target device";
264262306a36Sopenharmony_ci		goto bad_open;
264362306a36Sopenharmony_ci	}
264462306a36Sopenharmony_ci
264562306a36Sopenharmony_ci	o->ti = ti;
264662306a36Sopenharmony_ci	ti->private = o;
264762306a36Sopenharmony_ci	ti->num_flush_bios = 1;
264862306a36Sopenharmony_ci
264962306a36Sopenharmony_ci	return 0;
265062306a36Sopenharmony_ci
265162306a36Sopenharmony_cibad_open:
265262306a36Sopenharmony_ci	kfree(o);
265362306a36Sopenharmony_cibad_alloc:
265462306a36Sopenharmony_ci	return r;
265562306a36Sopenharmony_ci}
265662306a36Sopenharmony_ci
265762306a36Sopenharmony_cistatic void origin_dtr(struct dm_target *ti)
265862306a36Sopenharmony_ci{
265962306a36Sopenharmony_ci	struct dm_origin *o = ti->private;
266062306a36Sopenharmony_ci
266162306a36Sopenharmony_ci	dm_put_device(ti, o->dev);
266262306a36Sopenharmony_ci	kfree(o);
266362306a36Sopenharmony_ci}
266462306a36Sopenharmony_ci
266562306a36Sopenharmony_cistatic int origin_map(struct dm_target *ti, struct bio *bio)
266662306a36Sopenharmony_ci{
266762306a36Sopenharmony_ci	struct dm_origin *o = ti->private;
266862306a36Sopenharmony_ci	unsigned int available_sectors;
266962306a36Sopenharmony_ci
267062306a36Sopenharmony_ci	bio_set_dev(bio, o->dev->bdev);
267162306a36Sopenharmony_ci
267262306a36Sopenharmony_ci	if (unlikely(bio->bi_opf & REQ_PREFLUSH))
267362306a36Sopenharmony_ci		return DM_MAPIO_REMAPPED;
267462306a36Sopenharmony_ci
267562306a36Sopenharmony_ci	if (bio_data_dir(bio) != WRITE)
267662306a36Sopenharmony_ci		return DM_MAPIO_REMAPPED;
267762306a36Sopenharmony_ci
267862306a36Sopenharmony_ci	available_sectors = o->split_boundary -
267962306a36Sopenharmony_ci		((unsigned int)bio->bi_iter.bi_sector & (o->split_boundary - 1));
268062306a36Sopenharmony_ci
268162306a36Sopenharmony_ci	if (bio_sectors(bio) > available_sectors)
268262306a36Sopenharmony_ci		dm_accept_partial_bio(bio, available_sectors);
268362306a36Sopenharmony_ci
268462306a36Sopenharmony_ci	/* Only tell snapshots if this is a write */
268562306a36Sopenharmony_ci	return do_origin(o->dev, bio, true);
268662306a36Sopenharmony_ci}
268762306a36Sopenharmony_ci
268862306a36Sopenharmony_ci/*
268962306a36Sopenharmony_ci * Set the target "max_io_len" field to the minimum of all the snapshots'
269062306a36Sopenharmony_ci * chunk sizes.
269162306a36Sopenharmony_ci */
269262306a36Sopenharmony_cistatic void origin_resume(struct dm_target *ti)
269362306a36Sopenharmony_ci{
269462306a36Sopenharmony_ci	struct dm_origin *o = ti->private;
269562306a36Sopenharmony_ci
269662306a36Sopenharmony_ci	o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev);
269762306a36Sopenharmony_ci
269862306a36Sopenharmony_ci	down_write(&_origins_lock);
269962306a36Sopenharmony_ci	__insert_dm_origin(o);
270062306a36Sopenharmony_ci	up_write(&_origins_lock);
270162306a36Sopenharmony_ci}
270262306a36Sopenharmony_ci
270362306a36Sopenharmony_cistatic void origin_postsuspend(struct dm_target *ti)
270462306a36Sopenharmony_ci{
270562306a36Sopenharmony_ci	struct dm_origin *o = ti->private;
270662306a36Sopenharmony_ci
270762306a36Sopenharmony_ci	down_write(&_origins_lock);
270862306a36Sopenharmony_ci	__remove_dm_origin(o);
270962306a36Sopenharmony_ci	up_write(&_origins_lock);
271062306a36Sopenharmony_ci}
271162306a36Sopenharmony_ci
271262306a36Sopenharmony_cistatic void origin_status(struct dm_target *ti, status_type_t type,
271362306a36Sopenharmony_ci			  unsigned int status_flags, char *result, unsigned int maxlen)
271462306a36Sopenharmony_ci{
271562306a36Sopenharmony_ci	struct dm_origin *o = ti->private;
271662306a36Sopenharmony_ci
271762306a36Sopenharmony_ci	switch (type) {
271862306a36Sopenharmony_ci	case STATUSTYPE_INFO:
271962306a36Sopenharmony_ci		result[0] = '\0';
272062306a36Sopenharmony_ci		break;
272162306a36Sopenharmony_ci
272262306a36Sopenharmony_ci	case STATUSTYPE_TABLE:
272362306a36Sopenharmony_ci		snprintf(result, maxlen, "%s", o->dev->name);
272462306a36Sopenharmony_ci		break;
272562306a36Sopenharmony_ci	case STATUSTYPE_IMA:
272662306a36Sopenharmony_ci		result[0] = '\0';
272762306a36Sopenharmony_ci		break;
272862306a36Sopenharmony_ci	}
272962306a36Sopenharmony_ci}
273062306a36Sopenharmony_ci
273162306a36Sopenharmony_cistatic int origin_iterate_devices(struct dm_target *ti,
273262306a36Sopenharmony_ci				  iterate_devices_callout_fn fn, void *data)
273362306a36Sopenharmony_ci{
273462306a36Sopenharmony_ci	struct dm_origin *o = ti->private;
273562306a36Sopenharmony_ci
273662306a36Sopenharmony_ci	return fn(ti, o->dev, 0, ti->len, data);
273762306a36Sopenharmony_ci}
273862306a36Sopenharmony_ci
273962306a36Sopenharmony_cistatic struct target_type origin_target = {
274062306a36Sopenharmony_ci	.name    = "snapshot-origin",
274162306a36Sopenharmony_ci	.version = {1, 9, 0},
274262306a36Sopenharmony_ci	.module  = THIS_MODULE,
274362306a36Sopenharmony_ci	.ctr     = origin_ctr,
274462306a36Sopenharmony_ci	.dtr     = origin_dtr,
274562306a36Sopenharmony_ci	.map     = origin_map,
274662306a36Sopenharmony_ci	.resume  = origin_resume,
274762306a36Sopenharmony_ci	.postsuspend = origin_postsuspend,
274862306a36Sopenharmony_ci	.status  = origin_status,
274962306a36Sopenharmony_ci	.iterate_devices = origin_iterate_devices,
275062306a36Sopenharmony_ci};
275162306a36Sopenharmony_ci
275262306a36Sopenharmony_cistatic struct target_type snapshot_target = {
275362306a36Sopenharmony_ci	.name    = "snapshot",
275462306a36Sopenharmony_ci	.version = {1, 16, 0},
275562306a36Sopenharmony_ci	.module  = THIS_MODULE,
275662306a36Sopenharmony_ci	.ctr     = snapshot_ctr,
275762306a36Sopenharmony_ci	.dtr     = snapshot_dtr,
275862306a36Sopenharmony_ci	.map     = snapshot_map,
275962306a36Sopenharmony_ci	.end_io  = snapshot_end_io,
276062306a36Sopenharmony_ci	.preresume  = snapshot_preresume,
276162306a36Sopenharmony_ci	.resume  = snapshot_resume,
276262306a36Sopenharmony_ci	.status  = snapshot_status,
276362306a36Sopenharmony_ci	.iterate_devices = snapshot_iterate_devices,
276462306a36Sopenharmony_ci	.io_hints = snapshot_io_hints,
276562306a36Sopenharmony_ci};
276662306a36Sopenharmony_ci
276762306a36Sopenharmony_cistatic struct target_type merge_target = {
276862306a36Sopenharmony_ci	.name    = dm_snapshot_merge_target_name,
276962306a36Sopenharmony_ci	.version = {1, 5, 0},
277062306a36Sopenharmony_ci	.module  = THIS_MODULE,
277162306a36Sopenharmony_ci	.ctr     = snapshot_ctr,
277262306a36Sopenharmony_ci	.dtr     = snapshot_dtr,
277362306a36Sopenharmony_ci	.map     = snapshot_merge_map,
277462306a36Sopenharmony_ci	.end_io  = snapshot_end_io,
277562306a36Sopenharmony_ci	.presuspend = snapshot_merge_presuspend,
277662306a36Sopenharmony_ci	.preresume  = snapshot_preresume,
277762306a36Sopenharmony_ci	.resume  = snapshot_merge_resume,
277862306a36Sopenharmony_ci	.status  = snapshot_status,
277962306a36Sopenharmony_ci	.iterate_devices = snapshot_iterate_devices,
278062306a36Sopenharmony_ci	.io_hints = snapshot_io_hints,
278162306a36Sopenharmony_ci};
278262306a36Sopenharmony_ci
278362306a36Sopenharmony_cistatic int __init dm_snapshot_init(void)
278462306a36Sopenharmony_ci{
278562306a36Sopenharmony_ci	int r;
278662306a36Sopenharmony_ci
278762306a36Sopenharmony_ci	r = dm_exception_store_init();
278862306a36Sopenharmony_ci	if (r) {
278962306a36Sopenharmony_ci		DMERR("Failed to initialize exception stores");
279062306a36Sopenharmony_ci		return r;
279162306a36Sopenharmony_ci	}
279262306a36Sopenharmony_ci
279362306a36Sopenharmony_ci	r = init_origin_hash();
279462306a36Sopenharmony_ci	if (r) {
279562306a36Sopenharmony_ci		DMERR("init_origin_hash failed.");
279662306a36Sopenharmony_ci		goto bad_origin_hash;
279762306a36Sopenharmony_ci	}
279862306a36Sopenharmony_ci
279962306a36Sopenharmony_ci	exception_cache = KMEM_CACHE(dm_exception, 0);
280062306a36Sopenharmony_ci	if (!exception_cache) {
280162306a36Sopenharmony_ci		DMERR("Couldn't create exception cache.");
280262306a36Sopenharmony_ci		r = -ENOMEM;
280362306a36Sopenharmony_ci		goto bad_exception_cache;
280462306a36Sopenharmony_ci	}
280562306a36Sopenharmony_ci
280662306a36Sopenharmony_ci	pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
280762306a36Sopenharmony_ci	if (!pending_cache) {
280862306a36Sopenharmony_ci		DMERR("Couldn't create pending cache.");
280962306a36Sopenharmony_ci		r = -ENOMEM;
281062306a36Sopenharmony_ci		goto bad_pending_cache;
281162306a36Sopenharmony_ci	}
281262306a36Sopenharmony_ci
281362306a36Sopenharmony_ci	r = dm_register_target(&snapshot_target);
281462306a36Sopenharmony_ci	if (r < 0)
281562306a36Sopenharmony_ci		goto bad_register_snapshot_target;
281662306a36Sopenharmony_ci
281762306a36Sopenharmony_ci	r = dm_register_target(&origin_target);
281862306a36Sopenharmony_ci	if (r < 0)
281962306a36Sopenharmony_ci		goto bad_register_origin_target;
282062306a36Sopenharmony_ci
282162306a36Sopenharmony_ci	r = dm_register_target(&merge_target);
282262306a36Sopenharmony_ci	if (r < 0)
282362306a36Sopenharmony_ci		goto bad_register_merge_target;
282462306a36Sopenharmony_ci
282562306a36Sopenharmony_ci	return 0;
282662306a36Sopenharmony_ci
282762306a36Sopenharmony_cibad_register_merge_target:
282862306a36Sopenharmony_ci	dm_unregister_target(&origin_target);
282962306a36Sopenharmony_cibad_register_origin_target:
283062306a36Sopenharmony_ci	dm_unregister_target(&snapshot_target);
283162306a36Sopenharmony_cibad_register_snapshot_target:
283262306a36Sopenharmony_ci	kmem_cache_destroy(pending_cache);
283362306a36Sopenharmony_cibad_pending_cache:
283462306a36Sopenharmony_ci	kmem_cache_destroy(exception_cache);
283562306a36Sopenharmony_cibad_exception_cache:
283662306a36Sopenharmony_ci	exit_origin_hash();
283762306a36Sopenharmony_cibad_origin_hash:
283862306a36Sopenharmony_ci	dm_exception_store_exit();
283962306a36Sopenharmony_ci
284062306a36Sopenharmony_ci	return r;
284162306a36Sopenharmony_ci}
284262306a36Sopenharmony_ci
284362306a36Sopenharmony_cistatic void __exit dm_snapshot_exit(void)
284462306a36Sopenharmony_ci{
284562306a36Sopenharmony_ci	dm_unregister_target(&snapshot_target);
284662306a36Sopenharmony_ci	dm_unregister_target(&origin_target);
284762306a36Sopenharmony_ci	dm_unregister_target(&merge_target);
284862306a36Sopenharmony_ci
284962306a36Sopenharmony_ci	exit_origin_hash();
285062306a36Sopenharmony_ci	kmem_cache_destroy(pending_cache);
285162306a36Sopenharmony_ci	kmem_cache_destroy(exception_cache);
285262306a36Sopenharmony_ci
285362306a36Sopenharmony_ci	dm_exception_store_exit();
285462306a36Sopenharmony_ci}
285562306a36Sopenharmony_ci
285662306a36Sopenharmony_ci/* Module hooks */
285762306a36Sopenharmony_cimodule_init(dm_snapshot_init);
285862306a36Sopenharmony_cimodule_exit(dm_snapshot_exit);
285962306a36Sopenharmony_ci
286062306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " snapshot target");
286162306a36Sopenharmony_ciMODULE_AUTHOR("Joe Thornber");
286262306a36Sopenharmony_ciMODULE_LICENSE("GPL");
286362306a36Sopenharmony_ciMODULE_ALIAS("dm-snapshot-origin");
286462306a36Sopenharmony_ciMODULE_ALIAS("dm-snapshot-merge");
2865