162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2019 Arrikto, Inc. All Rights Reserved.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/mm.h>
762306a36Sopenharmony_ci#include <linux/err.h>
862306a36Sopenharmony_ci#include <linux/slab.h>
962306a36Sopenharmony_ci#include <linux/rwsem.h>
1062306a36Sopenharmony_ci#include <linux/bitops.h>
1162306a36Sopenharmony_ci#include <linux/bitmap.h>
1262306a36Sopenharmony_ci#include <linux/device-mapper.h>
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#include "persistent-data/dm-bitset.h"
1562306a36Sopenharmony_ci#include "persistent-data/dm-space-map.h"
1662306a36Sopenharmony_ci#include "persistent-data/dm-block-manager.h"
1762306a36Sopenharmony_ci#include "persistent-data/dm-transaction-manager.h"
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include "dm-clone-metadata.h"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#define DM_MSG_PREFIX "clone metadata"
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#define SUPERBLOCK_LOCATION 0
2462306a36Sopenharmony_ci#define SUPERBLOCK_MAGIC 0x8af27f64
2562306a36Sopenharmony_ci#define SUPERBLOCK_CSUM_XOR 257649492
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#define DM_CLONE_MAX_CONCURRENT_LOCKS 5
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#define UUID_LEN 16
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci/* Min and max dm-clone metadata versions supported */
3262306a36Sopenharmony_ci#define DM_CLONE_MIN_METADATA_VERSION 1
3362306a36Sopenharmony_ci#define DM_CLONE_MAX_METADATA_VERSION 1
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci/*
3662306a36Sopenharmony_ci * On-disk metadata layout
3762306a36Sopenharmony_ci */
3862306a36Sopenharmony_cistruct superblock_disk {
3962306a36Sopenharmony_ci	__le32 csum;
4062306a36Sopenharmony_ci	__le32 flags;
4162306a36Sopenharmony_ci	__le64 blocknr;
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci	__u8 uuid[UUID_LEN];
4462306a36Sopenharmony_ci	__le64 magic;
4562306a36Sopenharmony_ci	__le32 version;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	__le64 region_size;
5062306a36Sopenharmony_ci	__le64 target_size;
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	__le64 bitset_root;
5362306a36Sopenharmony_ci} __packed;
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci/*
5662306a36Sopenharmony_ci * Region and Dirty bitmaps.
5762306a36Sopenharmony_ci *
5862306a36Sopenharmony_ci * dm-clone logically splits the source and destination devices in regions of
5962306a36Sopenharmony_ci * fixed size. The destination device's regions are gradually hydrated, i.e.,
6062306a36Sopenharmony_ci * we copy (clone) the source's regions to the destination device. Eventually,
6162306a36Sopenharmony_ci * all regions will get hydrated and all I/O will be served from the
6262306a36Sopenharmony_ci * destination device.
6362306a36Sopenharmony_ci *
6462306a36Sopenharmony_ci * We maintain an on-disk bitmap which tracks the state of each of the
6562306a36Sopenharmony_ci * destination device's regions, i.e., whether they are hydrated or not.
6662306a36Sopenharmony_ci *
6762306a36Sopenharmony_ci * To save constantly doing look ups on disk we keep an in core copy of the
6862306a36Sopenharmony_ci * on-disk bitmap, the region_map.
6962306a36Sopenharmony_ci *
7062306a36Sopenharmony_ci * In order to track which regions are hydrated during a metadata transaction,
7162306a36Sopenharmony_ci * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two
7262306a36Sopenharmony_ci * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap
7362306a36Sopenharmony_ci * tracks the regions that got hydrated during the current metadata
7462306a36Sopenharmony_ci * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of
7562306a36Sopenharmony_ci * the dirty_regions bitmap.
7662306a36Sopenharmony_ci *
7762306a36Sopenharmony_ci * This allows us to precisely track the regions that were hydrated during the
7862306a36Sopenharmony_ci * current metadata transaction and update the metadata accordingly, when we
7962306a36Sopenharmony_ci * commit the current transaction. This is important because dm-clone should
8062306a36Sopenharmony_ci * only commit the metadata of regions that were properly flushed to the
8162306a36Sopenharmony_ci * destination device beforehand. Otherwise, in case of a crash, we could end
8262306a36Sopenharmony_ci * up with a corrupted dm-clone device.
8362306a36Sopenharmony_ci *
8462306a36Sopenharmony_ci * When a region finishes hydrating dm-clone calls
8562306a36Sopenharmony_ci * dm_clone_set_region_hydrated(), or for discard requests
8662306a36Sopenharmony_ci * dm_clone_cond_set_range(), which sets the corresponding bits in region_map
8762306a36Sopenharmony_ci * and dmap.
8862306a36Sopenharmony_ci *
8962306a36Sopenharmony_ci * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions
9062306a36Sopenharmony_ci * and update the on-disk metadata accordingly. Thus, we don't have to flush to
9162306a36Sopenharmony_ci * disk the whole region_map. We can just flush the dirty region_map bits.
9262306a36Sopenharmony_ci *
9362306a36Sopenharmony_ci * We use the helper dmap->dirty_words bitmap, which is smaller than the
9462306a36Sopenharmony_ci * original region_map, to reduce the amount of memory accesses during a
9562306a36Sopenharmony_ci * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in
9662306a36Sopenharmony_ci * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk
9762306a36Sopenharmony_ci * accesses.
9862306a36Sopenharmony_ci *
9962306a36Sopenharmony_ci * We could update directly the on-disk bitmap, when dm-clone calls either
10062306a36Sopenharmony_ci * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
10162306a36Sopenharmony_ci * inserts significant metadata I/O overhead in dm-clone's I/O path. Also, as
10262306a36Sopenharmony_ci * these two functions don't block, we can call them in interrupt context,
10362306a36Sopenharmony_ci * e.g., in a hooked overwrite bio's completion routine, and further reduce the
10462306a36Sopenharmony_ci * I/O completion latency.
10562306a36Sopenharmony_ci *
10662306a36Sopenharmony_ci * We maintain two dirty bitmap sets. During a metadata commit we atomically
10762306a36Sopenharmony_ci * swap the currently used dmap with the unused one. This allows the metadata
10862306a36Sopenharmony_ci * update functions to run concurrently with an ongoing commit.
10962306a36Sopenharmony_ci */
11062306a36Sopenharmony_cistruct dirty_map {
11162306a36Sopenharmony_ci	unsigned long *dirty_words;
11262306a36Sopenharmony_ci	unsigned long *dirty_regions;
11362306a36Sopenharmony_ci	unsigned int changed;
11462306a36Sopenharmony_ci};
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_cistruct dm_clone_metadata {
11762306a36Sopenharmony_ci	/* The metadata block device */
11862306a36Sopenharmony_ci	struct block_device *bdev;
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	sector_t target_size;
12162306a36Sopenharmony_ci	sector_t region_size;
12262306a36Sopenharmony_ci	unsigned long nr_regions;
12362306a36Sopenharmony_ci	unsigned long nr_words;
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	/* Spinlock protecting the region and dirty bitmaps. */
12662306a36Sopenharmony_ci	spinlock_t bitmap_lock;
12762306a36Sopenharmony_ci	struct dirty_map dmap[2];
12862306a36Sopenharmony_ci	struct dirty_map *current_dmap;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	/* Protected by lock */
13162306a36Sopenharmony_ci	struct dirty_map *committing_dmap;
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	/*
13462306a36Sopenharmony_ci	 * In core copy of the on-disk bitmap to save constantly doing look ups
13562306a36Sopenharmony_ci	 * on disk.
13662306a36Sopenharmony_ci	 */
13762306a36Sopenharmony_ci	unsigned long *region_map;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	/* Protected by bitmap_lock */
14062306a36Sopenharmony_ci	unsigned int read_only;
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	struct dm_block_manager *bm;
14362306a36Sopenharmony_ci	struct dm_space_map *sm;
14462306a36Sopenharmony_ci	struct dm_transaction_manager *tm;
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	struct rw_semaphore lock;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	struct dm_disk_bitset bitset_info;
14962306a36Sopenharmony_ci	dm_block_t bitset_root;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	/*
15262306a36Sopenharmony_ci	 * Reading the space map root can fail, so we read it into this
15362306a36Sopenharmony_ci	 * buffer before the superblock is locked and updated.
15462306a36Sopenharmony_ci	 */
15562306a36Sopenharmony_ci	__u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	bool hydration_done:1;
15862306a36Sopenharmony_ci	bool fail_io:1;
15962306a36Sopenharmony_ci};
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci/*
16462306a36Sopenharmony_ci * Superblock validation.
16562306a36Sopenharmony_ci */
16662306a36Sopenharmony_cistatic void sb_prepare_for_write(struct dm_block_validator *v,
16762306a36Sopenharmony_ci				 struct dm_block *b, size_t sb_block_size)
16862306a36Sopenharmony_ci{
16962306a36Sopenharmony_ci	struct superblock_disk *sb;
17062306a36Sopenharmony_ci	u32 csum;
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	sb = dm_block_data(b);
17362306a36Sopenharmony_ci	sb->blocknr = cpu_to_le64(dm_block_location(b));
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
17662306a36Sopenharmony_ci			      SUPERBLOCK_CSUM_XOR);
17762306a36Sopenharmony_ci	sb->csum = cpu_to_le32(csum);
17862306a36Sopenharmony_ci}
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_cistatic int sb_check(struct dm_block_validator *v, struct dm_block *b,
18162306a36Sopenharmony_ci		    size_t sb_block_size)
18262306a36Sopenharmony_ci{
18362306a36Sopenharmony_ci	struct superblock_disk *sb;
18462306a36Sopenharmony_ci	u32 csum, metadata_version;
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	sb = dm_block_data(b);
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	if (dm_block_location(b) != le64_to_cpu(sb->blocknr)) {
18962306a36Sopenharmony_ci		DMERR("Superblock check failed: blocknr %llu, expected %llu",
19062306a36Sopenharmony_ci		      le64_to_cpu(sb->blocknr),
19162306a36Sopenharmony_ci		      (unsigned long long)dm_block_location(b));
19262306a36Sopenharmony_ci		return -ENOTBLK;
19362306a36Sopenharmony_ci	}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	if (le64_to_cpu(sb->magic) != SUPERBLOCK_MAGIC) {
19662306a36Sopenharmony_ci		DMERR("Superblock check failed: magic %llu, expected %llu",
19762306a36Sopenharmony_ci		      le64_to_cpu(sb->magic),
19862306a36Sopenharmony_ci		      (unsigned long long)SUPERBLOCK_MAGIC);
19962306a36Sopenharmony_ci		return -EILSEQ;
20062306a36Sopenharmony_ci	}
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	csum = dm_bm_checksum(&sb->flags, sb_block_size - sizeof(__le32),
20362306a36Sopenharmony_ci			      SUPERBLOCK_CSUM_XOR);
20462306a36Sopenharmony_ci	if (sb->csum != cpu_to_le32(csum)) {
20562306a36Sopenharmony_ci		DMERR("Superblock check failed: checksum %u, expected %u",
20662306a36Sopenharmony_ci		      csum, le32_to_cpu(sb->csum));
20762306a36Sopenharmony_ci		return -EILSEQ;
20862306a36Sopenharmony_ci	}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci	/* Check metadata version */
21162306a36Sopenharmony_ci	metadata_version = le32_to_cpu(sb->version);
21262306a36Sopenharmony_ci	if (metadata_version < DM_CLONE_MIN_METADATA_VERSION ||
21362306a36Sopenharmony_ci	    metadata_version > DM_CLONE_MAX_METADATA_VERSION) {
21462306a36Sopenharmony_ci		DMERR("Clone metadata version %u found, but only versions between %u and %u supported.",
21562306a36Sopenharmony_ci		      metadata_version, DM_CLONE_MIN_METADATA_VERSION,
21662306a36Sopenharmony_ci		      DM_CLONE_MAX_METADATA_VERSION);
21762306a36Sopenharmony_ci		return -EINVAL;
21862306a36Sopenharmony_ci	}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	return 0;
22162306a36Sopenharmony_ci}
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_cistatic struct dm_block_validator sb_validator = {
22462306a36Sopenharmony_ci	.name = "superblock",
22562306a36Sopenharmony_ci	.prepare_for_write = sb_prepare_for_write,
22662306a36Sopenharmony_ci	.check = sb_check
22762306a36Sopenharmony_ci};
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci/*
23062306a36Sopenharmony_ci * Check if the superblock is formatted or not. We consider the superblock to
23162306a36Sopenharmony_ci * be formatted in case we find non-zero bytes in it.
23262306a36Sopenharmony_ci */
23362306a36Sopenharmony_cistatic int __superblock_all_zeroes(struct dm_block_manager *bm, bool *formatted)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	int r;
23662306a36Sopenharmony_ci	unsigned int i, nr_words;
23762306a36Sopenharmony_ci	struct dm_block *sblock;
23862306a36Sopenharmony_ci	__le64 *data_le, zero = cpu_to_le64(0);
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	/*
24162306a36Sopenharmony_ci	 * We don't use a validator here because the superblock could be all
24262306a36Sopenharmony_ci	 * zeroes.
24362306a36Sopenharmony_ci	 */
24462306a36Sopenharmony_ci	r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &sblock);
24562306a36Sopenharmony_ci	if (r) {
24662306a36Sopenharmony_ci		DMERR("Failed to read_lock superblock");
24762306a36Sopenharmony_ci		return r;
24862306a36Sopenharmony_ci	}
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	data_le = dm_block_data(sblock);
25162306a36Sopenharmony_ci	*formatted = false;
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci	/* This assumes that the block size is a multiple of 8 bytes */
25462306a36Sopenharmony_ci	BUG_ON(dm_bm_block_size(bm) % sizeof(__le64));
25562306a36Sopenharmony_ci	nr_words = dm_bm_block_size(bm) / sizeof(__le64);
25662306a36Sopenharmony_ci	for (i = 0; i < nr_words; i++) {
25762306a36Sopenharmony_ci		if (data_le[i] != zero) {
25862306a36Sopenharmony_ci			*formatted = true;
25962306a36Sopenharmony_ci			break;
26062306a36Sopenharmony_ci		}
26162306a36Sopenharmony_ci	}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	dm_bm_unlock(sblock);
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	return 0;
26662306a36Sopenharmony_ci}
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci/*
27162306a36Sopenharmony_ci * Low-level metadata handling.
27262306a36Sopenharmony_ci */
27362306a36Sopenharmony_cistatic inline int superblock_read_lock(struct dm_clone_metadata *cmd,
27462306a36Sopenharmony_ci				       struct dm_block **sblock)
27562306a36Sopenharmony_ci{
27662306a36Sopenharmony_ci	return dm_bm_read_lock(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
27762306a36Sopenharmony_ci}
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_cistatic inline int superblock_write_lock_zero(struct dm_clone_metadata *cmd,
28062306a36Sopenharmony_ci					     struct dm_block **sblock)
28162306a36Sopenharmony_ci{
28262306a36Sopenharmony_ci	return dm_bm_write_lock_zero(cmd->bm, SUPERBLOCK_LOCATION, &sb_validator, sblock);
28362306a36Sopenharmony_ci}
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_cistatic int __copy_sm_root(struct dm_clone_metadata *cmd)
28662306a36Sopenharmony_ci{
28762306a36Sopenharmony_ci	int r;
28862306a36Sopenharmony_ci	size_t root_size;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	r = dm_sm_root_size(cmd->sm, &root_size);
29162306a36Sopenharmony_ci	if (r)
29262306a36Sopenharmony_ci		return r;
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	return dm_sm_copy_root(cmd->sm, &cmd->metadata_space_map_root, root_size);
29562306a36Sopenharmony_ci}
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci/* Save dm-clone metadata in superblock */
29862306a36Sopenharmony_cistatic void __prepare_superblock(struct dm_clone_metadata *cmd,
29962306a36Sopenharmony_ci				 struct superblock_disk *sb)
30062306a36Sopenharmony_ci{
30162306a36Sopenharmony_ci	sb->flags = cpu_to_le32(0UL);
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci	/* FIXME: UUID is currently unused */
30462306a36Sopenharmony_ci	memset(sb->uuid, 0, sizeof(sb->uuid));
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	sb->magic = cpu_to_le64(SUPERBLOCK_MAGIC);
30762306a36Sopenharmony_ci	sb->version = cpu_to_le32(DM_CLONE_MAX_METADATA_VERSION);
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	/* Save the metadata space_map root */
31062306a36Sopenharmony_ci	memcpy(&sb->metadata_space_map_root, &cmd->metadata_space_map_root,
31162306a36Sopenharmony_ci	       sizeof(cmd->metadata_space_map_root));
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	sb->region_size = cpu_to_le64(cmd->region_size);
31462306a36Sopenharmony_ci	sb->target_size = cpu_to_le64(cmd->target_size);
31562306a36Sopenharmony_ci	sb->bitset_root = cpu_to_le64(cmd->bitset_root);
31662306a36Sopenharmony_ci}
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_cistatic int __open_metadata(struct dm_clone_metadata *cmd)
31962306a36Sopenharmony_ci{
32062306a36Sopenharmony_ci	int r;
32162306a36Sopenharmony_ci	struct dm_block *sblock;
32262306a36Sopenharmony_ci	struct superblock_disk *sb;
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci	r = superblock_read_lock(cmd, &sblock);
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	if (r) {
32762306a36Sopenharmony_ci		DMERR("Failed to read_lock superblock");
32862306a36Sopenharmony_ci		return r;
32962306a36Sopenharmony_ci	}
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	sb = dm_block_data(sblock);
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci	/* Verify that target_size and region_size haven't changed. */
33462306a36Sopenharmony_ci	if (cmd->region_size != le64_to_cpu(sb->region_size) ||
33562306a36Sopenharmony_ci	    cmd->target_size != le64_to_cpu(sb->target_size)) {
33662306a36Sopenharmony_ci		DMERR("Region and/or target size don't match the ones in metadata");
33762306a36Sopenharmony_ci		r = -EINVAL;
33862306a36Sopenharmony_ci		goto out_with_lock;
33962306a36Sopenharmony_ci	}
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	r = dm_tm_open_with_sm(cmd->bm, SUPERBLOCK_LOCATION,
34262306a36Sopenharmony_ci			       sb->metadata_space_map_root,
34362306a36Sopenharmony_ci			       sizeof(sb->metadata_space_map_root),
34462306a36Sopenharmony_ci			       &cmd->tm, &cmd->sm);
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	if (r) {
34762306a36Sopenharmony_ci		DMERR("dm_tm_open_with_sm failed");
34862306a36Sopenharmony_ci		goto out_with_lock;
34962306a36Sopenharmony_ci	}
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci	dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
35262306a36Sopenharmony_ci	cmd->bitset_root = le64_to_cpu(sb->bitset_root);
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ciout_with_lock:
35562306a36Sopenharmony_ci	dm_bm_unlock(sblock);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	return r;
35862306a36Sopenharmony_ci}
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_cistatic int __format_metadata(struct dm_clone_metadata *cmd)
36162306a36Sopenharmony_ci{
36262306a36Sopenharmony_ci	int r;
36362306a36Sopenharmony_ci	struct dm_block *sblock;
36462306a36Sopenharmony_ci	struct superblock_disk *sb;
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	r = dm_tm_create_with_sm(cmd->bm, SUPERBLOCK_LOCATION, &cmd->tm, &cmd->sm);
36762306a36Sopenharmony_ci	if (r) {
36862306a36Sopenharmony_ci		DMERR("Failed to create transaction manager");
36962306a36Sopenharmony_ci		return r;
37062306a36Sopenharmony_ci	}
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_ci	dm_disk_bitset_init(cmd->tm, &cmd->bitset_info);
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	r = dm_bitset_empty(&cmd->bitset_info, &cmd->bitset_root);
37562306a36Sopenharmony_ci	if (r) {
37662306a36Sopenharmony_ci		DMERR("Failed to create empty on-disk bitset");
37762306a36Sopenharmony_ci		goto err_with_tm;
37862306a36Sopenharmony_ci	}
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	r = dm_bitset_resize(&cmd->bitset_info, cmd->bitset_root, 0,
38162306a36Sopenharmony_ci			     cmd->nr_regions, false, &cmd->bitset_root);
38262306a36Sopenharmony_ci	if (r) {
38362306a36Sopenharmony_ci		DMERR("Failed to resize on-disk bitset to %lu entries", cmd->nr_regions);
38462306a36Sopenharmony_ci		goto err_with_tm;
38562306a36Sopenharmony_ci	}
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	/* Flush to disk all blocks, except the superblock */
38862306a36Sopenharmony_ci	r = dm_tm_pre_commit(cmd->tm);
38962306a36Sopenharmony_ci	if (r) {
39062306a36Sopenharmony_ci		DMERR("dm_tm_pre_commit failed");
39162306a36Sopenharmony_ci		goto err_with_tm;
39262306a36Sopenharmony_ci	}
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	r = __copy_sm_root(cmd);
39562306a36Sopenharmony_ci	if (r) {
39662306a36Sopenharmony_ci		DMERR("__copy_sm_root failed");
39762306a36Sopenharmony_ci		goto err_with_tm;
39862306a36Sopenharmony_ci	}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	r = superblock_write_lock_zero(cmd, &sblock);
40162306a36Sopenharmony_ci	if (r) {
40262306a36Sopenharmony_ci		DMERR("Failed to write_lock superblock");
40362306a36Sopenharmony_ci		goto err_with_tm;
40462306a36Sopenharmony_ci	}
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_ci	sb = dm_block_data(sblock);
40762306a36Sopenharmony_ci	__prepare_superblock(cmd, sb);
40862306a36Sopenharmony_ci	r = dm_tm_commit(cmd->tm, sblock);
40962306a36Sopenharmony_ci	if (r) {
41062306a36Sopenharmony_ci		DMERR("Failed to commit superblock");
41162306a36Sopenharmony_ci		goto err_with_tm;
41262306a36Sopenharmony_ci	}
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	return 0;
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_cierr_with_tm:
41762306a36Sopenharmony_ci	dm_sm_destroy(cmd->sm);
41862306a36Sopenharmony_ci	dm_tm_destroy(cmd->tm);
41962306a36Sopenharmony_ci
42062306a36Sopenharmony_ci	return r;
42162306a36Sopenharmony_ci}
42262306a36Sopenharmony_ci
42362306a36Sopenharmony_cistatic int __open_or_format_metadata(struct dm_clone_metadata *cmd, bool may_format_device)
42462306a36Sopenharmony_ci{
42562306a36Sopenharmony_ci	int r;
42662306a36Sopenharmony_ci	bool formatted = false;
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	r = __superblock_all_zeroes(cmd->bm, &formatted);
42962306a36Sopenharmony_ci	if (r)
43062306a36Sopenharmony_ci		return r;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	if (!formatted)
43362306a36Sopenharmony_ci		return may_format_device ? __format_metadata(cmd) : -EPERM;
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci	return __open_metadata(cmd);
43662306a36Sopenharmony_ci}
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_cistatic int __create_persistent_data_structures(struct dm_clone_metadata *cmd,
43962306a36Sopenharmony_ci					       bool may_format_device)
44062306a36Sopenharmony_ci{
44162306a36Sopenharmony_ci	int r;
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci	/* Create block manager */
44462306a36Sopenharmony_ci	cmd->bm = dm_block_manager_create(cmd->bdev,
44562306a36Sopenharmony_ci					 DM_CLONE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
44662306a36Sopenharmony_ci					 DM_CLONE_MAX_CONCURRENT_LOCKS);
44762306a36Sopenharmony_ci	if (IS_ERR(cmd->bm)) {
44862306a36Sopenharmony_ci		DMERR("Failed to create block manager");
44962306a36Sopenharmony_ci		return PTR_ERR(cmd->bm);
45062306a36Sopenharmony_ci	}
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci	r = __open_or_format_metadata(cmd, may_format_device);
45362306a36Sopenharmony_ci	if (r)
45462306a36Sopenharmony_ci		dm_block_manager_destroy(cmd->bm);
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	return r;
45762306a36Sopenharmony_ci}
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_cistatic void __destroy_persistent_data_structures(struct dm_clone_metadata *cmd)
46062306a36Sopenharmony_ci{
46162306a36Sopenharmony_ci	dm_sm_destroy(cmd->sm);
46262306a36Sopenharmony_ci	dm_tm_destroy(cmd->tm);
46362306a36Sopenharmony_ci	dm_block_manager_destroy(cmd->bm);
46462306a36Sopenharmony_ci}
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci/*---------------------------------------------------------------------------*/
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_cistatic size_t bitmap_size(unsigned long nr_bits)
46962306a36Sopenharmony_ci{
47062306a36Sopenharmony_ci	return BITS_TO_LONGS(nr_bits) * sizeof(long);
47162306a36Sopenharmony_ci}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_cistatic int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
47462306a36Sopenharmony_ci			    unsigned long nr_regions)
47562306a36Sopenharmony_ci{
47662306a36Sopenharmony_ci	dmap->changed = 0;
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL);
47962306a36Sopenharmony_ci	if (!dmap->dirty_words)
48062306a36Sopenharmony_ci		return -ENOMEM;
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci	dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL);
48362306a36Sopenharmony_ci	if (!dmap->dirty_regions) {
48462306a36Sopenharmony_ci		kvfree(dmap->dirty_words);
48562306a36Sopenharmony_ci		return -ENOMEM;
48662306a36Sopenharmony_ci	}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	return 0;
48962306a36Sopenharmony_ci}
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_cistatic void __dirty_map_exit(struct dirty_map *dmap)
49262306a36Sopenharmony_ci{
49362306a36Sopenharmony_ci	kvfree(dmap->dirty_words);
49462306a36Sopenharmony_ci	kvfree(dmap->dirty_regions);
49562306a36Sopenharmony_ci}
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_cistatic int dirty_map_init(struct dm_clone_metadata *cmd)
49862306a36Sopenharmony_ci{
49962306a36Sopenharmony_ci	if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) {
50062306a36Sopenharmony_ci		DMERR("Failed to allocate dirty bitmap");
50162306a36Sopenharmony_ci		return -ENOMEM;
50262306a36Sopenharmony_ci	}
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) {
50562306a36Sopenharmony_ci		DMERR("Failed to allocate dirty bitmap");
50662306a36Sopenharmony_ci		__dirty_map_exit(&cmd->dmap[0]);
50762306a36Sopenharmony_ci		return -ENOMEM;
50862306a36Sopenharmony_ci	}
50962306a36Sopenharmony_ci
51062306a36Sopenharmony_ci	cmd->current_dmap = &cmd->dmap[0];
51162306a36Sopenharmony_ci	cmd->committing_dmap = NULL;
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	return 0;
51462306a36Sopenharmony_ci}
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_cistatic void dirty_map_exit(struct dm_clone_metadata *cmd)
51762306a36Sopenharmony_ci{
51862306a36Sopenharmony_ci	__dirty_map_exit(&cmd->dmap[0]);
51962306a36Sopenharmony_ci	__dirty_map_exit(&cmd->dmap[1]);
52062306a36Sopenharmony_ci}
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_cistatic int __load_bitset_in_core(struct dm_clone_metadata *cmd)
52362306a36Sopenharmony_ci{
52462306a36Sopenharmony_ci	int r;
52562306a36Sopenharmony_ci	unsigned long i;
52662306a36Sopenharmony_ci	struct dm_bitset_cursor c;
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci	/* Flush bitset cache */
52962306a36Sopenharmony_ci	r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
53062306a36Sopenharmony_ci	if (r)
53162306a36Sopenharmony_ci		return r;
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	r = dm_bitset_cursor_begin(&cmd->bitset_info, cmd->bitset_root, cmd->nr_regions, &c);
53462306a36Sopenharmony_ci	if (r)
53562306a36Sopenharmony_ci		return r;
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	for (i = 0; ; i++) {
53862306a36Sopenharmony_ci		if (dm_bitset_cursor_get_value(&c))
53962306a36Sopenharmony_ci			__set_bit(i, cmd->region_map);
54062306a36Sopenharmony_ci		else
54162306a36Sopenharmony_ci			__clear_bit(i, cmd->region_map);
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci		if (i >= (cmd->nr_regions - 1))
54462306a36Sopenharmony_ci			break;
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci		r = dm_bitset_cursor_next(&c);
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci		if (r)
54962306a36Sopenharmony_ci			break;
55062306a36Sopenharmony_ci	}
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	dm_bitset_cursor_end(&c);
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci	return r;
55562306a36Sopenharmony_ci}
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_cistruct dm_clone_metadata *dm_clone_metadata_open(struct block_device *bdev,
55862306a36Sopenharmony_ci						 sector_t target_size,
55962306a36Sopenharmony_ci						 sector_t region_size)
56062306a36Sopenharmony_ci{
56162306a36Sopenharmony_ci	int r;
56262306a36Sopenharmony_ci	struct dm_clone_metadata *cmd;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
56562306a36Sopenharmony_ci	if (!cmd) {
56662306a36Sopenharmony_ci		DMERR("Failed to allocate memory for dm-clone metadata");
56762306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
56862306a36Sopenharmony_ci	}
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci	cmd->bdev = bdev;
57162306a36Sopenharmony_ci	cmd->target_size = target_size;
57262306a36Sopenharmony_ci	cmd->region_size = region_size;
57362306a36Sopenharmony_ci	cmd->nr_regions = dm_sector_div_up(cmd->target_size, cmd->region_size);
57462306a36Sopenharmony_ci	cmd->nr_words = BITS_TO_LONGS(cmd->nr_regions);
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	init_rwsem(&cmd->lock);
57762306a36Sopenharmony_ci	spin_lock_init(&cmd->bitmap_lock);
57862306a36Sopenharmony_ci	cmd->read_only = 0;
57962306a36Sopenharmony_ci	cmd->fail_io = false;
58062306a36Sopenharmony_ci	cmd->hydration_done = false;
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci	cmd->region_map = kvmalloc(bitmap_size(cmd->nr_regions), GFP_KERNEL);
58362306a36Sopenharmony_ci	if (!cmd->region_map) {
58462306a36Sopenharmony_ci		DMERR("Failed to allocate memory for region bitmap");
58562306a36Sopenharmony_ci		r = -ENOMEM;
58662306a36Sopenharmony_ci		goto out_with_md;
58762306a36Sopenharmony_ci	}
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	r = __create_persistent_data_structures(cmd, true);
59062306a36Sopenharmony_ci	if (r)
59162306a36Sopenharmony_ci		goto out_with_region_map;
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	r = __load_bitset_in_core(cmd);
59462306a36Sopenharmony_ci	if (r) {
59562306a36Sopenharmony_ci		DMERR("Failed to load on-disk region map");
59662306a36Sopenharmony_ci		goto out_with_pds;
59762306a36Sopenharmony_ci	}
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	r = dirty_map_init(cmd);
60062306a36Sopenharmony_ci	if (r)
60162306a36Sopenharmony_ci		goto out_with_pds;
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci	if (bitmap_full(cmd->region_map, cmd->nr_regions))
60462306a36Sopenharmony_ci		cmd->hydration_done = true;
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci	return cmd;
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ciout_with_pds:
60962306a36Sopenharmony_ci	__destroy_persistent_data_structures(cmd);
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ciout_with_region_map:
61262306a36Sopenharmony_ci	kvfree(cmd->region_map);
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ciout_with_md:
61562306a36Sopenharmony_ci	kfree(cmd);
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	return ERR_PTR(r);
61862306a36Sopenharmony_ci}
61962306a36Sopenharmony_ci
62062306a36Sopenharmony_civoid dm_clone_metadata_close(struct dm_clone_metadata *cmd)
62162306a36Sopenharmony_ci{
62262306a36Sopenharmony_ci	if (!cmd->fail_io)
62362306a36Sopenharmony_ci		__destroy_persistent_data_structures(cmd);
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci	dirty_map_exit(cmd);
62662306a36Sopenharmony_ci	kvfree(cmd->region_map);
62762306a36Sopenharmony_ci	kfree(cmd);
62862306a36Sopenharmony_ci}
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_cibool dm_clone_is_hydration_done(struct dm_clone_metadata *cmd)
63162306a36Sopenharmony_ci{
63262306a36Sopenharmony_ci	return cmd->hydration_done;
63362306a36Sopenharmony_ci}
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_cibool dm_clone_is_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
63662306a36Sopenharmony_ci{
63762306a36Sopenharmony_ci	return dm_clone_is_hydration_done(cmd) || test_bit(region_nr, cmd->region_map);
63862306a36Sopenharmony_ci}
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_cibool dm_clone_is_range_hydrated(struct dm_clone_metadata *cmd,
64162306a36Sopenharmony_ci				unsigned long start, unsigned long nr_regions)
64262306a36Sopenharmony_ci{
64362306a36Sopenharmony_ci	unsigned long bit;
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	if (dm_clone_is_hydration_done(cmd))
64662306a36Sopenharmony_ci		return true;
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci	bit = find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
64962306a36Sopenharmony_ci
65062306a36Sopenharmony_ci	return (bit >= (start + nr_regions));
65162306a36Sopenharmony_ci}
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ciunsigned int dm_clone_nr_of_hydrated_regions(struct dm_clone_metadata *cmd)
65462306a36Sopenharmony_ci{
65562306a36Sopenharmony_ci	return bitmap_weight(cmd->region_map, cmd->nr_regions);
65662306a36Sopenharmony_ci}
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ciunsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd,
65962306a36Sopenharmony_ci						   unsigned long start)
66062306a36Sopenharmony_ci{
66162306a36Sopenharmony_ci	return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
66262306a36Sopenharmony_ci}
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_cistatic int __update_metadata_word(struct dm_clone_metadata *cmd,
66562306a36Sopenharmony_ci				  unsigned long *dirty_regions,
66662306a36Sopenharmony_ci				  unsigned long word)
66762306a36Sopenharmony_ci{
66862306a36Sopenharmony_ci	int r;
66962306a36Sopenharmony_ci	unsigned long index = word * BITS_PER_LONG;
67062306a36Sopenharmony_ci	unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci	while (index < max_index) {
67362306a36Sopenharmony_ci		if (test_bit(index, dirty_regions)) {
67462306a36Sopenharmony_ci			r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
67562306a36Sopenharmony_ci					      index, &cmd->bitset_root);
67662306a36Sopenharmony_ci			if (r) {
67762306a36Sopenharmony_ci				DMERR("dm_bitset_set_bit failed");
67862306a36Sopenharmony_ci				return r;
67962306a36Sopenharmony_ci			}
68062306a36Sopenharmony_ci			__clear_bit(index, dirty_regions);
68162306a36Sopenharmony_ci		}
68262306a36Sopenharmony_ci		index++;
68362306a36Sopenharmony_ci	}
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci	return 0;
68662306a36Sopenharmony_ci}
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_cistatic int __metadata_commit(struct dm_clone_metadata *cmd)
68962306a36Sopenharmony_ci{
69062306a36Sopenharmony_ci	int r;
69162306a36Sopenharmony_ci	struct dm_block *sblock;
69262306a36Sopenharmony_ci	struct superblock_disk *sb;
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci	/* Flush bitset cache */
69562306a36Sopenharmony_ci	r = dm_bitset_flush(&cmd->bitset_info, cmd->bitset_root, &cmd->bitset_root);
69662306a36Sopenharmony_ci	if (r) {
69762306a36Sopenharmony_ci		DMERR("dm_bitset_flush failed");
69862306a36Sopenharmony_ci		return r;
69962306a36Sopenharmony_ci	}
70062306a36Sopenharmony_ci
70162306a36Sopenharmony_ci	/* Flush to disk all blocks, except the superblock */
70262306a36Sopenharmony_ci	r = dm_tm_pre_commit(cmd->tm);
70362306a36Sopenharmony_ci	if (r) {
70462306a36Sopenharmony_ci		DMERR("dm_tm_pre_commit failed");
70562306a36Sopenharmony_ci		return r;
70662306a36Sopenharmony_ci	}
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ci	/* Save the space map root in cmd->metadata_space_map_root */
70962306a36Sopenharmony_ci	r = __copy_sm_root(cmd);
71062306a36Sopenharmony_ci	if (r) {
71162306a36Sopenharmony_ci		DMERR("__copy_sm_root failed");
71262306a36Sopenharmony_ci		return r;
71362306a36Sopenharmony_ci	}
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	/* Lock the superblock */
71662306a36Sopenharmony_ci	r = superblock_write_lock_zero(cmd, &sblock);
71762306a36Sopenharmony_ci	if (r) {
71862306a36Sopenharmony_ci		DMERR("Failed to write_lock superblock");
71962306a36Sopenharmony_ci		return r;
72062306a36Sopenharmony_ci	}
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci	/* Save the metadata in superblock */
72362306a36Sopenharmony_ci	sb = dm_block_data(sblock);
72462306a36Sopenharmony_ci	__prepare_superblock(cmd, sb);
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	/* Unlock superblock and commit it to disk */
72762306a36Sopenharmony_ci	r = dm_tm_commit(cmd->tm, sblock);
72862306a36Sopenharmony_ci	if (r) {
72962306a36Sopenharmony_ci		DMERR("Failed to commit superblock");
73062306a36Sopenharmony_ci		return r;
73162306a36Sopenharmony_ci	}
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	/*
73462306a36Sopenharmony_ci	 * FIXME: Find a more efficient way to check if the hydration is done.
73562306a36Sopenharmony_ci	 */
73662306a36Sopenharmony_ci	if (bitmap_full(cmd->region_map, cmd->nr_regions))
73762306a36Sopenharmony_ci		cmd->hydration_done = true;
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	return 0;
74062306a36Sopenharmony_ci}
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_cistatic int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
74362306a36Sopenharmony_ci{
74462306a36Sopenharmony_ci	int r;
74562306a36Sopenharmony_ci	unsigned long word;
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	word = 0;
74862306a36Sopenharmony_ci	do {
74962306a36Sopenharmony_ci		word = find_next_bit(dmap->dirty_words, cmd->nr_words, word);
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci		if (word == cmd->nr_words)
75262306a36Sopenharmony_ci			break;
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci		r = __update_metadata_word(cmd, dmap->dirty_regions, word);
75562306a36Sopenharmony_ci
75662306a36Sopenharmony_ci		if (r)
75762306a36Sopenharmony_ci			return r;
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci		__clear_bit(word, dmap->dirty_words);
76062306a36Sopenharmony_ci		word++;
76162306a36Sopenharmony_ci	} while (word < cmd->nr_words);
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	r = __metadata_commit(cmd);
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ci	if (r)
76662306a36Sopenharmony_ci		return r;
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_ci	/* Update the changed flag */
76962306a36Sopenharmony_ci	spin_lock_irq(&cmd->bitmap_lock);
77062306a36Sopenharmony_ci	dmap->changed = 0;
77162306a36Sopenharmony_ci	spin_unlock_irq(&cmd->bitmap_lock);
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci	return 0;
77462306a36Sopenharmony_ci}
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ciint dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd)
77762306a36Sopenharmony_ci{
77862306a36Sopenharmony_ci	int r = 0;
77962306a36Sopenharmony_ci	struct dirty_map *dmap, *next_dmap;
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci	down_write(&cmd->lock);
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
78462306a36Sopenharmony_ci		r = -EPERM;
78562306a36Sopenharmony_ci		goto out;
78662306a36Sopenharmony_ci	}
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci	/* Get current dirty bitmap */
78962306a36Sopenharmony_ci	dmap = cmd->current_dmap;
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci	/* Get next dirty bitmap */
79262306a36Sopenharmony_ci	next_dmap = (dmap == &cmd->dmap[0]) ? &cmd->dmap[1] : &cmd->dmap[0];
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	/*
79562306a36Sopenharmony_ci	 * The last commit failed, so we don't have a clean dirty-bitmap to
79662306a36Sopenharmony_ci	 * use.
79762306a36Sopenharmony_ci	 */
79862306a36Sopenharmony_ci	if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) {
79962306a36Sopenharmony_ci		r = -EINVAL;
80062306a36Sopenharmony_ci		goto out;
80162306a36Sopenharmony_ci	}
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	/* Swap dirty bitmaps */
80462306a36Sopenharmony_ci	spin_lock_irq(&cmd->bitmap_lock);
80562306a36Sopenharmony_ci	cmd->current_dmap = next_dmap;
80662306a36Sopenharmony_ci	spin_unlock_irq(&cmd->bitmap_lock);
80762306a36Sopenharmony_ci
80862306a36Sopenharmony_ci	/* Set old dirty bitmap as currently committing */
80962306a36Sopenharmony_ci	cmd->committing_dmap = dmap;
81062306a36Sopenharmony_ciout:
81162306a36Sopenharmony_ci	up_write(&cmd->lock);
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci	return r;
81462306a36Sopenharmony_ci}
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ciint dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
81762306a36Sopenharmony_ci{
81862306a36Sopenharmony_ci	int r = -EPERM;
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	down_write(&cmd->lock);
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
82362306a36Sopenharmony_ci		goto out;
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_ci	if (WARN_ON(!cmd->committing_dmap)) {
82662306a36Sopenharmony_ci		r = -EINVAL;
82762306a36Sopenharmony_ci		goto out;
82862306a36Sopenharmony_ci	}
82962306a36Sopenharmony_ci
83062306a36Sopenharmony_ci	r = __flush_dmap(cmd, cmd->committing_dmap);
83162306a36Sopenharmony_ci	if (!r) {
83262306a36Sopenharmony_ci		/* Clear committing dmap */
83362306a36Sopenharmony_ci		cmd->committing_dmap = NULL;
83462306a36Sopenharmony_ci	}
83562306a36Sopenharmony_ciout:
83662306a36Sopenharmony_ci	up_write(&cmd->lock);
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	return r;
83962306a36Sopenharmony_ci}
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ciint dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long region_nr)
84262306a36Sopenharmony_ci{
84362306a36Sopenharmony_ci	int r = 0;
84462306a36Sopenharmony_ci	struct dirty_map *dmap;
84562306a36Sopenharmony_ci	unsigned long word, flags;
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ci	if (unlikely(region_nr >= cmd->nr_regions)) {
84862306a36Sopenharmony_ci		DMERR("Region %lu out of range (total number of regions %lu)",
84962306a36Sopenharmony_ci		      region_nr, cmd->nr_regions);
85062306a36Sopenharmony_ci		return -ERANGE;
85162306a36Sopenharmony_ci	}
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci	word = region_nr / BITS_PER_LONG;
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_ci	spin_lock_irqsave(&cmd->bitmap_lock, flags);
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci	if (cmd->read_only) {
85862306a36Sopenharmony_ci		r = -EPERM;
85962306a36Sopenharmony_ci		goto out;
86062306a36Sopenharmony_ci	}
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci	dmap = cmd->current_dmap;
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_ci	__set_bit(word, dmap->dirty_words);
86562306a36Sopenharmony_ci	__set_bit(region_nr, dmap->dirty_regions);
86662306a36Sopenharmony_ci	__set_bit(region_nr, cmd->region_map);
86762306a36Sopenharmony_ci	dmap->changed = 1;
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ciout:
87062306a36Sopenharmony_ci	spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	return r;
87362306a36Sopenharmony_ci}
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ciint dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
87662306a36Sopenharmony_ci			    unsigned long nr_regions)
87762306a36Sopenharmony_ci{
87862306a36Sopenharmony_ci	int r = 0;
87962306a36Sopenharmony_ci	struct dirty_map *dmap;
88062306a36Sopenharmony_ci	unsigned long word, region_nr;
88162306a36Sopenharmony_ci
88262306a36Sopenharmony_ci	if (unlikely(start >= cmd->nr_regions || (start + nr_regions) < start ||
88362306a36Sopenharmony_ci		     (start + nr_regions) > cmd->nr_regions)) {
88462306a36Sopenharmony_ci		DMERR("Invalid region range: start %lu, nr_regions %lu (total number of regions %lu)",
88562306a36Sopenharmony_ci		      start, nr_regions, cmd->nr_regions);
88662306a36Sopenharmony_ci		return -ERANGE;
88762306a36Sopenharmony_ci	}
88862306a36Sopenharmony_ci
88962306a36Sopenharmony_ci	spin_lock_irq(&cmd->bitmap_lock);
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci	if (cmd->read_only) {
89262306a36Sopenharmony_ci		r = -EPERM;
89362306a36Sopenharmony_ci		goto out;
89462306a36Sopenharmony_ci	}
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci	dmap = cmd->current_dmap;
89762306a36Sopenharmony_ci	for (region_nr = start; region_nr < (start + nr_regions); region_nr++) {
89862306a36Sopenharmony_ci		if (!test_bit(region_nr, cmd->region_map)) {
89962306a36Sopenharmony_ci			word = region_nr / BITS_PER_LONG;
90062306a36Sopenharmony_ci			__set_bit(word, dmap->dirty_words);
90162306a36Sopenharmony_ci			__set_bit(region_nr, dmap->dirty_regions);
90262306a36Sopenharmony_ci			__set_bit(region_nr, cmd->region_map);
90362306a36Sopenharmony_ci			dmap->changed = 1;
90462306a36Sopenharmony_ci		}
90562306a36Sopenharmony_ci	}
90662306a36Sopenharmony_ciout:
90762306a36Sopenharmony_ci	spin_unlock_irq(&cmd->bitmap_lock);
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	return r;
91062306a36Sopenharmony_ci}
91162306a36Sopenharmony_ci
91262306a36Sopenharmony_ci/*
91362306a36Sopenharmony_ci * WARNING: This must not be called concurrently with either
91462306a36Sopenharmony_ci * dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), as it changes
91562306a36Sopenharmony_ci * cmd->region_map without taking the cmd->bitmap_lock spinlock. The only
91662306a36Sopenharmony_ci * exception is after setting the metadata to read-only mode, using
91762306a36Sopenharmony_ci * dm_clone_metadata_set_read_only().
91862306a36Sopenharmony_ci *
91962306a36Sopenharmony_ci * We don't take the spinlock because __load_bitset_in_core() does I/O, so it
92062306a36Sopenharmony_ci * may block.
92162306a36Sopenharmony_ci */
92262306a36Sopenharmony_ciint dm_clone_reload_in_core_bitset(struct dm_clone_metadata *cmd)
92362306a36Sopenharmony_ci{
92462306a36Sopenharmony_ci	int r = -EINVAL;
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	down_write(&cmd->lock);
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	if (cmd->fail_io)
92962306a36Sopenharmony_ci		goto out;
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ci	r = __load_bitset_in_core(cmd);
93262306a36Sopenharmony_ciout:
93362306a36Sopenharmony_ci	up_write(&cmd->lock);
93462306a36Sopenharmony_ci
93562306a36Sopenharmony_ci	return r;
93662306a36Sopenharmony_ci}
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_cibool dm_clone_changed_this_transaction(struct dm_clone_metadata *cmd)
93962306a36Sopenharmony_ci{
94062306a36Sopenharmony_ci	bool r;
94162306a36Sopenharmony_ci	unsigned long flags;
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci	spin_lock_irqsave(&cmd->bitmap_lock, flags);
94462306a36Sopenharmony_ci	r = cmd->dmap[0].changed || cmd->dmap[1].changed;
94562306a36Sopenharmony_ci	spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
94662306a36Sopenharmony_ci
94762306a36Sopenharmony_ci	return r;
94862306a36Sopenharmony_ci}
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ciint dm_clone_metadata_abort(struct dm_clone_metadata *cmd)
95162306a36Sopenharmony_ci{
95262306a36Sopenharmony_ci	int r = -EPERM;
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ci	down_write(&cmd->lock);
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci	if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
95762306a36Sopenharmony_ci		goto out;
95862306a36Sopenharmony_ci
95962306a36Sopenharmony_ci	__destroy_persistent_data_structures(cmd);
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci	r = __create_persistent_data_structures(cmd, false);
96262306a36Sopenharmony_ci	if (r) {
96362306a36Sopenharmony_ci		/* If something went wrong we can neither write nor read the metadata */
96462306a36Sopenharmony_ci		cmd->fail_io = true;
96562306a36Sopenharmony_ci	}
96662306a36Sopenharmony_ciout:
96762306a36Sopenharmony_ci	up_write(&cmd->lock);
96862306a36Sopenharmony_ci
96962306a36Sopenharmony_ci	return r;
97062306a36Sopenharmony_ci}
97162306a36Sopenharmony_ci
97262306a36Sopenharmony_civoid dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
97362306a36Sopenharmony_ci{
97462306a36Sopenharmony_ci	down_write(&cmd->lock);
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_ci	spin_lock_irq(&cmd->bitmap_lock);
97762306a36Sopenharmony_ci	cmd->read_only = 1;
97862306a36Sopenharmony_ci	spin_unlock_irq(&cmd->bitmap_lock);
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci	if (!cmd->fail_io)
98162306a36Sopenharmony_ci		dm_bm_set_read_only(cmd->bm);
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci	up_write(&cmd->lock);
98462306a36Sopenharmony_ci}
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_civoid dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
98762306a36Sopenharmony_ci{
98862306a36Sopenharmony_ci	down_write(&cmd->lock);
98962306a36Sopenharmony_ci
99062306a36Sopenharmony_ci	spin_lock_irq(&cmd->bitmap_lock);
99162306a36Sopenharmony_ci	cmd->read_only = 0;
99262306a36Sopenharmony_ci	spin_unlock_irq(&cmd->bitmap_lock);
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci	if (!cmd->fail_io)
99562306a36Sopenharmony_ci		dm_bm_set_read_write(cmd->bm);
99662306a36Sopenharmony_ci
99762306a36Sopenharmony_ci	up_write(&cmd->lock);
99862306a36Sopenharmony_ci}
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ciint dm_clone_get_free_metadata_block_count(struct dm_clone_metadata *cmd,
100162306a36Sopenharmony_ci					   dm_block_t *result)
100262306a36Sopenharmony_ci{
100362306a36Sopenharmony_ci	int r = -EINVAL;
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ci	down_read(&cmd->lock);
100662306a36Sopenharmony_ci
100762306a36Sopenharmony_ci	if (!cmd->fail_io)
100862306a36Sopenharmony_ci		r = dm_sm_get_nr_free(cmd->sm, result);
100962306a36Sopenharmony_ci
101062306a36Sopenharmony_ci	up_read(&cmd->lock);
101162306a36Sopenharmony_ci
101262306a36Sopenharmony_ci	return r;
101362306a36Sopenharmony_ci}
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ciint dm_clone_get_metadata_dev_size(struct dm_clone_metadata *cmd,
101662306a36Sopenharmony_ci				   dm_block_t *result)
101762306a36Sopenharmony_ci{
101862306a36Sopenharmony_ci	int r = -EINVAL;
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci	down_read(&cmd->lock);
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci	if (!cmd->fail_io)
102362306a36Sopenharmony_ci		r = dm_sm_get_nr_blocks(cmd->sm, result);
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci	up_read(&cmd->lock);
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_ci	return r;
102862306a36Sopenharmony_ci}
1029