162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci#include "dm.h" 362306a36Sopenharmony_ci#include "persistent-data/dm-transaction-manager.h" 462306a36Sopenharmony_ci#include "persistent-data/dm-bitset.h" 562306a36Sopenharmony_ci#include "persistent-data/dm-space-map.h" 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#include <linux/dm-io.h> 862306a36Sopenharmony_ci#include <linux/dm-kcopyd.h> 962306a36Sopenharmony_ci#include <linux/init.h> 1062306a36Sopenharmony_ci#include <linux/mempool.h> 1162306a36Sopenharmony_ci#include <linux/module.h> 1262306a36Sopenharmony_ci#include <linux/slab.h> 1362306a36Sopenharmony_ci#include <linux/vmalloc.h> 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#define DM_MSG_PREFIX "era" 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#define SUPERBLOCK_LOCATION 0 1862306a36Sopenharmony_ci#define SUPERBLOCK_MAGIC 2126579579 1962306a36Sopenharmony_ci#define SUPERBLOCK_CSUM_XOR 146538381 2062306a36Sopenharmony_ci#define MIN_ERA_VERSION 1 2162306a36Sopenharmony_ci#define MAX_ERA_VERSION 1 2262306a36Sopenharmony_ci#define INVALID_WRITESET_ROOT SUPERBLOCK_LOCATION 2362306a36Sopenharmony_ci#define MIN_BLOCK_SIZE 8 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci/* 2662306a36Sopenharmony_ci *-------------------------------------------------------------- 2762306a36Sopenharmony_ci * Writeset 2862306a36Sopenharmony_ci *-------------------------------------------------------------- 2962306a36Sopenharmony_ci */ 3062306a36Sopenharmony_cistruct writeset_metadata { 3162306a36Sopenharmony_ci uint32_t nr_bits; 3262306a36Sopenharmony_ci dm_block_t root; 3362306a36Sopenharmony_ci}; 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_cistruct writeset { 3662306a36Sopenharmony_ci struct writeset_metadata md; 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci /* 3962306a36Sopenharmony_ci * An in core copy of the bits to save constantly doing look ups on 4062306a36Sopenharmony_ci * disk. 4162306a36Sopenharmony_ci */ 4262306a36Sopenharmony_ci unsigned long *bits; 4362306a36Sopenharmony_ci}; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci/* 4662306a36Sopenharmony_ci * This does not free off the on disk bitset as this will normally be done 4762306a36Sopenharmony_ci * after digesting into the era array. 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_cistatic void writeset_free(struct writeset *ws) 5062306a36Sopenharmony_ci{ 5162306a36Sopenharmony_ci vfree(ws->bits); 5262306a36Sopenharmony_ci ws->bits = NULL; 5362306a36Sopenharmony_ci} 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_cistatic int setup_on_disk_bitset(struct dm_disk_bitset *info, 5662306a36Sopenharmony_ci unsigned int nr_bits, dm_block_t *root) 5762306a36Sopenharmony_ci{ 5862306a36Sopenharmony_ci int r; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci r = dm_bitset_empty(info, root); 6162306a36Sopenharmony_ci if (r) 6262306a36Sopenharmony_ci return r; 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci return dm_bitset_resize(info, *root, 0, nr_bits, false, root); 6562306a36Sopenharmony_ci} 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_cistatic size_t bitset_size(unsigned int nr_bits) 6862306a36Sopenharmony_ci{ 6962306a36Sopenharmony_ci return sizeof(unsigned long) * dm_div_up(nr_bits, BITS_PER_LONG); 7062306a36Sopenharmony_ci} 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci/* 7362306a36Sopenharmony_ci * Allocates memory for the in core bitset. 7462306a36Sopenharmony_ci */ 7562306a36Sopenharmony_cistatic int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) 7662306a36Sopenharmony_ci{ 7762306a36Sopenharmony_ci ws->bits = vzalloc(bitset_size(nr_blocks)); 7862306a36Sopenharmony_ci if (!ws->bits) { 7962306a36Sopenharmony_ci DMERR("%s: couldn't allocate in memory bitset", __func__); 8062306a36Sopenharmony_ci return -ENOMEM; 8162306a36Sopenharmony_ci } 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci return 0; 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci/* 8762306a36Sopenharmony_ci * Wipes the in-core bitset, and creates a new on disk bitset. 8862306a36Sopenharmony_ci */ 8962306a36Sopenharmony_cistatic int writeset_init(struct dm_disk_bitset *info, struct writeset *ws, 9062306a36Sopenharmony_ci dm_block_t nr_blocks) 9162306a36Sopenharmony_ci{ 9262306a36Sopenharmony_ci int r; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci memset(ws->bits, 0, bitset_size(nr_blocks)); 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci ws->md.nr_bits = nr_blocks; 9762306a36Sopenharmony_ci r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); 9862306a36Sopenharmony_ci if (r) { 9962306a36Sopenharmony_ci DMERR("%s: setup_on_disk_bitset failed", __func__); 10062306a36Sopenharmony_ci return r; 10162306a36Sopenharmony_ci } 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci return 0; 10462306a36Sopenharmony_ci} 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_cistatic bool writeset_marked(struct writeset *ws, dm_block_t block) 10762306a36Sopenharmony_ci{ 10862306a36Sopenharmony_ci return test_bit(block, ws->bits); 10962306a36Sopenharmony_ci} 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_cistatic int writeset_marked_on_disk(struct dm_disk_bitset *info, 11262306a36Sopenharmony_ci struct writeset_metadata *m, dm_block_t block, 11362306a36Sopenharmony_ci bool *result) 11462306a36Sopenharmony_ci{ 11562306a36Sopenharmony_ci int r; 11662306a36Sopenharmony_ci dm_block_t old = m->root; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci /* 11962306a36Sopenharmony_ci * The bitset was flushed when it was archived, so we know there'll 12062306a36Sopenharmony_ci * be no change to the root. 12162306a36Sopenharmony_ci */ 12262306a36Sopenharmony_ci r = dm_bitset_test_bit(info, m->root, block, &m->root, result); 12362306a36Sopenharmony_ci if (r) { 12462306a36Sopenharmony_ci DMERR("%s: dm_bitset_test_bit failed", __func__); 12562306a36Sopenharmony_ci return r; 12662306a36Sopenharmony_ci } 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci BUG_ON(m->root != old); 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci return r; 13162306a36Sopenharmony_ci} 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci/* 13462306a36Sopenharmony_ci * Returns < 0 on error, 0 if the bit wasn't previously set, 1 if it was. 13562306a36Sopenharmony_ci */ 13662306a36Sopenharmony_cistatic int writeset_test_and_set(struct dm_disk_bitset *info, 13762306a36Sopenharmony_ci struct writeset *ws, uint32_t block) 13862306a36Sopenharmony_ci{ 13962306a36Sopenharmony_ci int r; 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci if (!test_bit(block, ws->bits)) { 14262306a36Sopenharmony_ci r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root); 14362306a36Sopenharmony_ci if (r) { 14462306a36Sopenharmony_ci /* FIXME: fail mode */ 14562306a36Sopenharmony_ci return r; 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci return 0; 14962306a36Sopenharmony_ci } 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci return 1; 15262306a36Sopenharmony_ci} 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci/* 15562306a36Sopenharmony_ci *-------------------------------------------------------------- 15662306a36Sopenharmony_ci * On disk metadata layout 15762306a36Sopenharmony_ci *-------------------------------------------------------------- 15862306a36Sopenharmony_ci */ 15962306a36Sopenharmony_ci#define SPACE_MAP_ROOT_SIZE 128 16062306a36Sopenharmony_ci#define UUID_LEN 16 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_cistruct writeset_disk { 16362306a36Sopenharmony_ci __le32 nr_bits; 16462306a36Sopenharmony_ci __le64 root; 16562306a36Sopenharmony_ci} __packed; 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_cistruct superblock_disk { 16862306a36Sopenharmony_ci __le32 csum; 16962306a36Sopenharmony_ci __le32 flags; 17062306a36Sopenharmony_ci __le64 blocknr; 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci __u8 uuid[UUID_LEN]; 17362306a36Sopenharmony_ci __le64 magic; 17462306a36Sopenharmony_ci __le32 version; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci __le32 data_block_size; 17962306a36Sopenharmony_ci __le32 metadata_block_size; 18062306a36Sopenharmony_ci __le32 nr_blocks; 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci __le32 current_era; 18362306a36Sopenharmony_ci struct writeset_disk current_writeset; 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci /* 18662306a36Sopenharmony_ci * Only these two fields are valid within the metadata snapshot. 18762306a36Sopenharmony_ci */ 18862306a36Sopenharmony_ci __le64 writeset_tree_root; 18962306a36Sopenharmony_ci __le64 era_array_root; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci __le64 metadata_snap; 19262306a36Sopenharmony_ci} __packed; 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci/* 19562306a36Sopenharmony_ci *-------------------------------------------------------------- 19662306a36Sopenharmony_ci * Superblock validation 19762306a36Sopenharmony_ci *-------------------------------------------------------------- 19862306a36Sopenharmony_ci */ 19962306a36Sopenharmony_cistatic void sb_prepare_for_write(struct dm_block_validator *v, 20062306a36Sopenharmony_ci struct dm_block *b, 20162306a36Sopenharmony_ci size_t sb_block_size) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci struct superblock_disk *disk = dm_block_data(b); 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci disk->blocknr = cpu_to_le64(dm_block_location(b)); 20662306a36Sopenharmony_ci disk->csum = cpu_to_le32(dm_bm_checksum(&disk->flags, 20762306a36Sopenharmony_ci sb_block_size - sizeof(__le32), 20862306a36Sopenharmony_ci SUPERBLOCK_CSUM_XOR)); 20962306a36Sopenharmony_ci} 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_cistatic int check_metadata_version(struct superblock_disk *disk) 21262306a36Sopenharmony_ci{ 21362306a36Sopenharmony_ci uint32_t metadata_version = le32_to_cpu(disk->version); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci if (metadata_version < MIN_ERA_VERSION || metadata_version > MAX_ERA_VERSION) { 21662306a36Sopenharmony_ci DMERR("Era metadata version %u found, but only versions between %u and %u supported.", 21762306a36Sopenharmony_ci metadata_version, MIN_ERA_VERSION, MAX_ERA_VERSION); 21862306a36Sopenharmony_ci return -EINVAL; 21962306a36Sopenharmony_ci } 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci return 0; 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic int sb_check(struct dm_block_validator *v, 22562306a36Sopenharmony_ci struct dm_block *b, 22662306a36Sopenharmony_ci size_t sb_block_size) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci struct superblock_disk *disk = dm_block_data(b); 22962306a36Sopenharmony_ci __le32 csum_le; 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci if (dm_block_location(b) != le64_to_cpu(disk->blocknr)) { 23262306a36Sopenharmony_ci DMERR("%s failed: blocknr %llu: wanted %llu", 23362306a36Sopenharmony_ci __func__, le64_to_cpu(disk->blocknr), 23462306a36Sopenharmony_ci (unsigned long long)dm_block_location(b)); 23562306a36Sopenharmony_ci return -ENOTBLK; 23662306a36Sopenharmony_ci } 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci if (le64_to_cpu(disk->magic) != SUPERBLOCK_MAGIC) { 23962306a36Sopenharmony_ci DMERR("%s failed: magic %llu: wanted %llu", 24062306a36Sopenharmony_ci __func__, le64_to_cpu(disk->magic), 24162306a36Sopenharmony_ci (unsigned long long) SUPERBLOCK_MAGIC); 24262306a36Sopenharmony_ci return -EILSEQ; 24362306a36Sopenharmony_ci } 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci csum_le = cpu_to_le32(dm_bm_checksum(&disk->flags, 24662306a36Sopenharmony_ci sb_block_size - sizeof(__le32), 24762306a36Sopenharmony_ci SUPERBLOCK_CSUM_XOR)); 24862306a36Sopenharmony_ci if (csum_le != disk->csum) { 24962306a36Sopenharmony_ci DMERR("%s failed: csum %u: wanted %u", 25062306a36Sopenharmony_ci __func__, le32_to_cpu(csum_le), le32_to_cpu(disk->csum)); 25162306a36Sopenharmony_ci return -EILSEQ; 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci return check_metadata_version(disk); 25562306a36Sopenharmony_ci} 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_cistatic struct dm_block_validator sb_validator = { 25862306a36Sopenharmony_ci .name = "superblock", 25962306a36Sopenharmony_ci .prepare_for_write = sb_prepare_for_write, 26062306a36Sopenharmony_ci .check = sb_check 26162306a36Sopenharmony_ci}; 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci/* 26462306a36Sopenharmony_ci *-------------------------------------------------------------- 26562306a36Sopenharmony_ci * Low level metadata handling 26662306a36Sopenharmony_ci *-------------------------------------------------------------- 26762306a36Sopenharmony_ci */ 26862306a36Sopenharmony_ci#define DM_ERA_METADATA_BLOCK_SIZE 4096 26962306a36Sopenharmony_ci#define ERA_MAX_CONCURRENT_LOCKS 5 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_cistruct era_metadata { 27262306a36Sopenharmony_ci struct block_device *bdev; 27362306a36Sopenharmony_ci struct dm_block_manager *bm; 27462306a36Sopenharmony_ci struct dm_space_map *sm; 27562306a36Sopenharmony_ci struct dm_transaction_manager *tm; 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci dm_block_t block_size; 27862306a36Sopenharmony_ci uint32_t nr_blocks; 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci uint32_t current_era; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci /* 28362306a36Sopenharmony_ci * We preallocate 2 writesets. When an era rolls over we 28462306a36Sopenharmony_ci * switch between them. This means the allocation is done at 28562306a36Sopenharmony_ci * preresume time, rather than on the io path. 28662306a36Sopenharmony_ci */ 28762306a36Sopenharmony_ci struct writeset writesets[2]; 28862306a36Sopenharmony_ci struct writeset *current_writeset; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci dm_block_t writeset_tree_root; 29162306a36Sopenharmony_ci dm_block_t era_array_root; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci struct dm_disk_bitset bitset_info; 29462306a36Sopenharmony_ci struct dm_btree_info writeset_tree_info; 29562306a36Sopenharmony_ci struct dm_array_info era_array_info; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci dm_block_t metadata_snap; 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci /* 30062306a36Sopenharmony_ci * A flag that is set whenever a writeset has been archived. 30162306a36Sopenharmony_ci */ 30262306a36Sopenharmony_ci bool archived_writesets; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci /* 30562306a36Sopenharmony_ci * Reading the space map root can fail, so we read it into this 30662306a36Sopenharmony_ci * buffer before the superblock is locked and updated. 30762306a36Sopenharmony_ci */ 30862306a36Sopenharmony_ci __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; 30962306a36Sopenharmony_ci}; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_cistatic int superblock_read_lock(struct era_metadata *md, 31262306a36Sopenharmony_ci struct dm_block **sblock) 31362306a36Sopenharmony_ci{ 31462306a36Sopenharmony_ci return dm_bm_read_lock(md->bm, SUPERBLOCK_LOCATION, 31562306a36Sopenharmony_ci &sb_validator, sblock); 31662306a36Sopenharmony_ci} 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_cistatic int superblock_lock_zero(struct era_metadata *md, 31962306a36Sopenharmony_ci struct dm_block **sblock) 32062306a36Sopenharmony_ci{ 32162306a36Sopenharmony_ci return dm_bm_write_lock_zero(md->bm, SUPERBLOCK_LOCATION, 32262306a36Sopenharmony_ci &sb_validator, sblock); 32362306a36Sopenharmony_ci} 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_cistatic int superblock_lock(struct era_metadata *md, 32662306a36Sopenharmony_ci struct dm_block **sblock) 32762306a36Sopenharmony_ci{ 32862306a36Sopenharmony_ci return dm_bm_write_lock(md->bm, SUPERBLOCK_LOCATION, 32962306a36Sopenharmony_ci &sb_validator, sblock); 33062306a36Sopenharmony_ci} 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci/* FIXME: duplication with cache and thin */ 33362306a36Sopenharmony_cistatic int superblock_all_zeroes(struct dm_block_manager *bm, bool *result) 33462306a36Sopenharmony_ci{ 33562306a36Sopenharmony_ci int r; 33662306a36Sopenharmony_ci unsigned int i; 33762306a36Sopenharmony_ci struct dm_block *b; 33862306a36Sopenharmony_ci __le64 *data_le, zero = cpu_to_le64(0); 33962306a36Sopenharmony_ci unsigned int sb_block_size = dm_bm_block_size(bm) / sizeof(__le64); 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci /* 34262306a36Sopenharmony_ci * We can't use a validator here - it may be all zeroes. 34362306a36Sopenharmony_ci */ 34462306a36Sopenharmony_ci r = dm_bm_read_lock(bm, SUPERBLOCK_LOCATION, NULL, &b); 34562306a36Sopenharmony_ci if (r) 34662306a36Sopenharmony_ci return r; 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci data_le = dm_block_data(b); 34962306a36Sopenharmony_ci *result = true; 35062306a36Sopenharmony_ci for (i = 0; i < sb_block_size; i++) { 35162306a36Sopenharmony_ci if (data_le[i] != zero) { 35262306a36Sopenharmony_ci *result = false; 35362306a36Sopenharmony_ci break; 35462306a36Sopenharmony_ci } 35562306a36Sopenharmony_ci } 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci dm_bm_unlock(b); 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci return 0; 36062306a36Sopenharmony_ci} 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_cistatic void ws_pack(const struct writeset_metadata *core, struct writeset_disk *disk) 36562306a36Sopenharmony_ci{ 36662306a36Sopenharmony_ci disk->nr_bits = cpu_to_le32(core->nr_bits); 36762306a36Sopenharmony_ci disk->root = cpu_to_le64(core->root); 36862306a36Sopenharmony_ci} 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_cistatic void ws_unpack(const struct writeset_disk *disk, struct writeset_metadata *core) 37162306a36Sopenharmony_ci{ 37262306a36Sopenharmony_ci core->nr_bits = le32_to_cpu(disk->nr_bits); 37362306a36Sopenharmony_ci core->root = le64_to_cpu(disk->root); 37462306a36Sopenharmony_ci} 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_cistatic void ws_inc(void *context, const void *value, unsigned int count) 37762306a36Sopenharmony_ci{ 37862306a36Sopenharmony_ci struct era_metadata *md = context; 37962306a36Sopenharmony_ci struct writeset_disk ws_d; 38062306a36Sopenharmony_ci dm_block_t b; 38162306a36Sopenharmony_ci unsigned int i; 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci for (i = 0; i < count; i++) { 38462306a36Sopenharmony_ci memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d)); 38562306a36Sopenharmony_ci b = le64_to_cpu(ws_d.root); 38662306a36Sopenharmony_ci dm_tm_inc(md->tm, b); 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci} 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_cistatic void ws_dec(void *context, const void *value, unsigned int count) 39162306a36Sopenharmony_ci{ 39262306a36Sopenharmony_ci struct era_metadata *md = context; 39362306a36Sopenharmony_ci struct writeset_disk ws_d; 39462306a36Sopenharmony_ci dm_block_t b; 39562306a36Sopenharmony_ci unsigned int i; 39662306a36Sopenharmony_ci 39762306a36Sopenharmony_ci for (i = 0; i < count; i++) { 39862306a36Sopenharmony_ci memcpy(&ws_d, value + (i * sizeof(ws_d)), sizeof(ws_d)); 39962306a36Sopenharmony_ci b = le64_to_cpu(ws_d.root); 40062306a36Sopenharmony_ci dm_bitset_del(&md->bitset_info, b); 40162306a36Sopenharmony_ci } 40262306a36Sopenharmony_ci} 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_cistatic int ws_eq(void *context, const void *value1, const void *value2) 40562306a36Sopenharmony_ci{ 40662306a36Sopenharmony_ci return !memcmp(value1, value2, sizeof(struct writeset_disk)); 40762306a36Sopenharmony_ci} 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_cistatic void setup_writeset_tree_info(struct era_metadata *md) 41262306a36Sopenharmony_ci{ 41362306a36Sopenharmony_ci struct dm_btree_value_type *vt = &md->writeset_tree_info.value_type; 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci md->writeset_tree_info.tm = md->tm; 41662306a36Sopenharmony_ci md->writeset_tree_info.levels = 1; 41762306a36Sopenharmony_ci vt->context = md; 41862306a36Sopenharmony_ci vt->size = sizeof(struct writeset_disk); 41962306a36Sopenharmony_ci vt->inc = ws_inc; 42062306a36Sopenharmony_ci vt->dec = ws_dec; 42162306a36Sopenharmony_ci vt->equal = ws_eq; 42262306a36Sopenharmony_ci} 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_cistatic void setup_era_array_info(struct era_metadata *md) 42562306a36Sopenharmony_ci{ 42662306a36Sopenharmony_ci struct dm_btree_value_type vt; 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci vt.context = NULL; 42962306a36Sopenharmony_ci vt.size = sizeof(__le32); 43062306a36Sopenharmony_ci vt.inc = NULL; 43162306a36Sopenharmony_ci vt.dec = NULL; 43262306a36Sopenharmony_ci vt.equal = NULL; 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci dm_array_info_init(&md->era_array_info, md->tm, &vt); 43562306a36Sopenharmony_ci} 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_cistatic void setup_infos(struct era_metadata *md) 43862306a36Sopenharmony_ci{ 43962306a36Sopenharmony_ci dm_disk_bitset_init(md->tm, &md->bitset_info); 44062306a36Sopenharmony_ci setup_writeset_tree_info(md); 44162306a36Sopenharmony_ci setup_era_array_info(md); 44262306a36Sopenharmony_ci} 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_cistatic int create_fresh_metadata(struct era_metadata *md) 44762306a36Sopenharmony_ci{ 44862306a36Sopenharmony_ci int r; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci r = dm_tm_create_with_sm(md->bm, SUPERBLOCK_LOCATION, 45162306a36Sopenharmony_ci &md->tm, &md->sm); 45262306a36Sopenharmony_ci if (r < 0) { 45362306a36Sopenharmony_ci DMERR("dm_tm_create_with_sm failed"); 45462306a36Sopenharmony_ci return r; 45562306a36Sopenharmony_ci } 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci setup_infos(md); 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci r = dm_btree_empty(&md->writeset_tree_info, &md->writeset_tree_root); 46062306a36Sopenharmony_ci if (r) { 46162306a36Sopenharmony_ci DMERR("couldn't create new writeset tree"); 46262306a36Sopenharmony_ci goto bad; 46362306a36Sopenharmony_ci } 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci r = dm_array_empty(&md->era_array_info, &md->era_array_root); 46662306a36Sopenharmony_ci if (r) { 46762306a36Sopenharmony_ci DMERR("couldn't create era array"); 46862306a36Sopenharmony_ci goto bad; 46962306a36Sopenharmony_ci } 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci return 0; 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_cibad: 47462306a36Sopenharmony_ci dm_sm_destroy(md->sm); 47562306a36Sopenharmony_ci dm_tm_destroy(md->tm); 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci return r; 47862306a36Sopenharmony_ci} 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_cistatic int save_sm_root(struct era_metadata *md) 48162306a36Sopenharmony_ci{ 48262306a36Sopenharmony_ci int r; 48362306a36Sopenharmony_ci size_t metadata_len; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci r = dm_sm_root_size(md->sm, &metadata_len); 48662306a36Sopenharmony_ci if (r < 0) 48762306a36Sopenharmony_ci return r; 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci return dm_sm_copy_root(md->sm, &md->metadata_space_map_root, 49062306a36Sopenharmony_ci metadata_len); 49162306a36Sopenharmony_ci} 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_cistatic void copy_sm_root(struct era_metadata *md, struct superblock_disk *disk) 49462306a36Sopenharmony_ci{ 49562306a36Sopenharmony_ci memcpy(&disk->metadata_space_map_root, 49662306a36Sopenharmony_ci &md->metadata_space_map_root, 49762306a36Sopenharmony_ci sizeof(md->metadata_space_map_root)); 49862306a36Sopenharmony_ci} 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci/* 50162306a36Sopenharmony_ci * Writes a superblock, including the static fields that don't get updated 50262306a36Sopenharmony_ci * with every commit (possible optimisation here). 'md' should be fully 50362306a36Sopenharmony_ci * constructed when this is called. 50462306a36Sopenharmony_ci */ 50562306a36Sopenharmony_cistatic void prepare_superblock(struct era_metadata *md, struct superblock_disk *disk) 50662306a36Sopenharmony_ci{ 50762306a36Sopenharmony_ci disk->magic = cpu_to_le64(SUPERBLOCK_MAGIC); 50862306a36Sopenharmony_ci disk->flags = cpu_to_le32(0ul); 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci /* FIXME: can't keep blanking the uuid (uuid is currently unused though) */ 51162306a36Sopenharmony_ci memset(disk->uuid, 0, sizeof(disk->uuid)); 51262306a36Sopenharmony_ci disk->version = cpu_to_le32(MAX_ERA_VERSION); 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_ci copy_sm_root(md, disk); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci disk->data_block_size = cpu_to_le32(md->block_size); 51762306a36Sopenharmony_ci disk->metadata_block_size = cpu_to_le32(DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); 51862306a36Sopenharmony_ci disk->nr_blocks = cpu_to_le32(md->nr_blocks); 51962306a36Sopenharmony_ci disk->current_era = cpu_to_le32(md->current_era); 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci ws_pack(&md->current_writeset->md, &disk->current_writeset); 52262306a36Sopenharmony_ci disk->writeset_tree_root = cpu_to_le64(md->writeset_tree_root); 52362306a36Sopenharmony_ci disk->era_array_root = cpu_to_le64(md->era_array_root); 52462306a36Sopenharmony_ci disk->metadata_snap = cpu_to_le64(md->metadata_snap); 52562306a36Sopenharmony_ci} 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_cistatic int write_superblock(struct era_metadata *md) 52862306a36Sopenharmony_ci{ 52962306a36Sopenharmony_ci int r; 53062306a36Sopenharmony_ci struct dm_block *sblock; 53162306a36Sopenharmony_ci struct superblock_disk *disk; 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci r = save_sm_root(md); 53462306a36Sopenharmony_ci if (r) { 53562306a36Sopenharmony_ci DMERR("%s: save_sm_root failed", __func__); 53662306a36Sopenharmony_ci return r; 53762306a36Sopenharmony_ci } 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci r = superblock_lock_zero(md, &sblock); 54062306a36Sopenharmony_ci if (r) 54162306a36Sopenharmony_ci return r; 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ci disk = dm_block_data(sblock); 54462306a36Sopenharmony_ci prepare_superblock(md, disk); 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci return dm_tm_commit(md->tm, sblock); 54762306a36Sopenharmony_ci} 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci/* 55062306a36Sopenharmony_ci * Assumes block_size and the infos are set. 55162306a36Sopenharmony_ci */ 55262306a36Sopenharmony_cistatic int format_metadata(struct era_metadata *md) 55362306a36Sopenharmony_ci{ 55462306a36Sopenharmony_ci int r; 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci r = create_fresh_metadata(md); 55762306a36Sopenharmony_ci if (r) 55862306a36Sopenharmony_ci return r; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci r = write_superblock(md); 56162306a36Sopenharmony_ci if (r) { 56262306a36Sopenharmony_ci dm_sm_destroy(md->sm); 56362306a36Sopenharmony_ci dm_tm_destroy(md->tm); 56462306a36Sopenharmony_ci return r; 56562306a36Sopenharmony_ci } 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci return 0; 56862306a36Sopenharmony_ci} 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_cistatic int open_metadata(struct era_metadata *md) 57162306a36Sopenharmony_ci{ 57262306a36Sopenharmony_ci int r; 57362306a36Sopenharmony_ci struct dm_block *sblock; 57462306a36Sopenharmony_ci struct superblock_disk *disk; 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci r = superblock_read_lock(md, &sblock); 57762306a36Sopenharmony_ci if (r) { 57862306a36Sopenharmony_ci DMERR("couldn't read_lock superblock"); 57962306a36Sopenharmony_ci return r; 58062306a36Sopenharmony_ci } 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci disk = dm_block_data(sblock); 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci /* Verify the data block size hasn't changed */ 58562306a36Sopenharmony_ci if (le32_to_cpu(disk->data_block_size) != md->block_size) { 58662306a36Sopenharmony_ci DMERR("changing the data block size (from %u to %llu) is not supported", 58762306a36Sopenharmony_ci le32_to_cpu(disk->data_block_size), md->block_size); 58862306a36Sopenharmony_ci r = -EINVAL; 58962306a36Sopenharmony_ci goto bad; 59062306a36Sopenharmony_ci } 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION, 59362306a36Sopenharmony_ci disk->metadata_space_map_root, 59462306a36Sopenharmony_ci sizeof(disk->metadata_space_map_root), 59562306a36Sopenharmony_ci &md->tm, &md->sm); 59662306a36Sopenharmony_ci if (r) { 59762306a36Sopenharmony_ci DMERR("dm_tm_open_with_sm failed"); 59862306a36Sopenharmony_ci goto bad; 59962306a36Sopenharmony_ci } 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci setup_infos(md); 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci md->nr_blocks = le32_to_cpu(disk->nr_blocks); 60462306a36Sopenharmony_ci md->current_era = le32_to_cpu(disk->current_era); 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci ws_unpack(&disk->current_writeset, &md->current_writeset->md); 60762306a36Sopenharmony_ci md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); 60862306a36Sopenharmony_ci md->era_array_root = le64_to_cpu(disk->era_array_root); 60962306a36Sopenharmony_ci md->metadata_snap = le64_to_cpu(disk->metadata_snap); 61062306a36Sopenharmony_ci md->archived_writesets = true; 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci dm_bm_unlock(sblock); 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci return 0; 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_cibad: 61762306a36Sopenharmony_ci dm_bm_unlock(sblock); 61862306a36Sopenharmony_ci return r; 61962306a36Sopenharmony_ci} 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_cistatic int open_or_format_metadata(struct era_metadata *md, 62262306a36Sopenharmony_ci bool may_format) 62362306a36Sopenharmony_ci{ 62462306a36Sopenharmony_ci int r; 62562306a36Sopenharmony_ci bool unformatted = false; 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci r = superblock_all_zeroes(md->bm, &unformatted); 62862306a36Sopenharmony_ci if (r) 62962306a36Sopenharmony_ci return r; 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci if (unformatted) 63262306a36Sopenharmony_ci return may_format ? format_metadata(md) : -EPERM; 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci return open_metadata(md); 63562306a36Sopenharmony_ci} 63662306a36Sopenharmony_ci 63762306a36Sopenharmony_cistatic int create_persistent_data_objects(struct era_metadata *md, 63862306a36Sopenharmony_ci bool may_format) 63962306a36Sopenharmony_ci{ 64062306a36Sopenharmony_ci int r; 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci md->bm = dm_block_manager_create(md->bdev, DM_ERA_METADATA_BLOCK_SIZE, 64362306a36Sopenharmony_ci ERA_MAX_CONCURRENT_LOCKS); 64462306a36Sopenharmony_ci if (IS_ERR(md->bm)) { 64562306a36Sopenharmony_ci DMERR("could not create block manager"); 64662306a36Sopenharmony_ci return PTR_ERR(md->bm); 64762306a36Sopenharmony_ci } 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci r = open_or_format_metadata(md, may_format); 65062306a36Sopenharmony_ci if (r) 65162306a36Sopenharmony_ci dm_block_manager_destroy(md->bm); 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci return r; 65462306a36Sopenharmony_ci} 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_cistatic void destroy_persistent_data_objects(struct era_metadata *md) 65762306a36Sopenharmony_ci{ 65862306a36Sopenharmony_ci dm_sm_destroy(md->sm); 65962306a36Sopenharmony_ci dm_tm_destroy(md->tm); 66062306a36Sopenharmony_ci dm_block_manager_destroy(md->bm); 66162306a36Sopenharmony_ci} 66262306a36Sopenharmony_ci 66362306a36Sopenharmony_ci/* 66462306a36Sopenharmony_ci * This waits until all era_map threads have picked up the new filter. 66562306a36Sopenharmony_ci */ 66662306a36Sopenharmony_cistatic void swap_writeset(struct era_metadata *md, struct writeset *new_writeset) 66762306a36Sopenharmony_ci{ 66862306a36Sopenharmony_ci rcu_assign_pointer(md->current_writeset, new_writeset); 66962306a36Sopenharmony_ci synchronize_rcu(); 67062306a36Sopenharmony_ci} 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci/* 67362306a36Sopenharmony_ci *------------------------------------------------------------------------ 67462306a36Sopenharmony_ci * Writesets get 'digested' into the main era array. 67562306a36Sopenharmony_ci * 67662306a36Sopenharmony_ci * We're using a coroutine here so the worker thread can do the digestion, 67762306a36Sopenharmony_ci * thus avoiding synchronisation of the metadata. Digesting a whole 67862306a36Sopenharmony_ci * writeset in one go would cause too much latency. 67962306a36Sopenharmony_ci *------------------------------------------------------------------------ 68062306a36Sopenharmony_ci */ 68162306a36Sopenharmony_cistruct digest { 68262306a36Sopenharmony_ci uint32_t era; 68362306a36Sopenharmony_ci unsigned int nr_bits, current_bit; 68462306a36Sopenharmony_ci struct writeset_metadata writeset; 68562306a36Sopenharmony_ci __le32 value; 68662306a36Sopenharmony_ci struct dm_disk_bitset info; 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci int (*step)(struct era_metadata *md, struct digest *d); 68962306a36Sopenharmony_ci}; 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_cistatic int metadata_digest_lookup_writeset(struct era_metadata *md, 69262306a36Sopenharmony_ci struct digest *d); 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_cistatic int metadata_digest_remove_writeset(struct era_metadata *md, 69562306a36Sopenharmony_ci struct digest *d) 69662306a36Sopenharmony_ci{ 69762306a36Sopenharmony_ci int r; 69862306a36Sopenharmony_ci uint64_t key = d->era; 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ci r = dm_btree_remove(&md->writeset_tree_info, md->writeset_tree_root, 70162306a36Sopenharmony_ci &key, &md->writeset_tree_root); 70262306a36Sopenharmony_ci if (r) { 70362306a36Sopenharmony_ci DMERR("%s: dm_btree_remove failed", __func__); 70462306a36Sopenharmony_ci return r; 70562306a36Sopenharmony_ci } 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci d->step = metadata_digest_lookup_writeset; 70862306a36Sopenharmony_ci return 0; 70962306a36Sopenharmony_ci} 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci#define INSERTS_PER_STEP 100 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_cistatic int metadata_digest_transcribe_writeset(struct era_metadata *md, 71462306a36Sopenharmony_ci struct digest *d) 71562306a36Sopenharmony_ci{ 71662306a36Sopenharmony_ci int r; 71762306a36Sopenharmony_ci bool marked; 71862306a36Sopenharmony_ci unsigned int b, e = min(d->current_bit + INSERTS_PER_STEP, d->nr_bits); 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci for (b = d->current_bit; b < e; b++) { 72162306a36Sopenharmony_ci r = writeset_marked_on_disk(&d->info, &d->writeset, b, &marked); 72262306a36Sopenharmony_ci if (r) { 72362306a36Sopenharmony_ci DMERR("%s: writeset_marked_on_disk failed", __func__); 72462306a36Sopenharmony_ci return r; 72562306a36Sopenharmony_ci } 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci if (!marked) 72862306a36Sopenharmony_ci continue; 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci __dm_bless_for_disk(&d->value); 73162306a36Sopenharmony_ci r = dm_array_set_value(&md->era_array_info, md->era_array_root, 73262306a36Sopenharmony_ci b, &d->value, &md->era_array_root); 73362306a36Sopenharmony_ci if (r) { 73462306a36Sopenharmony_ci DMERR("%s: dm_array_set_value failed", __func__); 73562306a36Sopenharmony_ci return r; 73662306a36Sopenharmony_ci } 73762306a36Sopenharmony_ci } 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci if (b == d->nr_bits) 74062306a36Sopenharmony_ci d->step = metadata_digest_remove_writeset; 74162306a36Sopenharmony_ci else 74262306a36Sopenharmony_ci d->current_bit = b; 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci return 0; 74562306a36Sopenharmony_ci} 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_cistatic int metadata_digest_lookup_writeset(struct era_metadata *md, 74862306a36Sopenharmony_ci struct digest *d) 74962306a36Sopenharmony_ci{ 75062306a36Sopenharmony_ci int r; 75162306a36Sopenharmony_ci uint64_t key; 75262306a36Sopenharmony_ci struct writeset_disk disk; 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci r = dm_btree_find_lowest_key(&md->writeset_tree_info, 75562306a36Sopenharmony_ci md->writeset_tree_root, &key); 75662306a36Sopenharmony_ci if (r < 0) 75762306a36Sopenharmony_ci return r; 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci d->era = key; 76062306a36Sopenharmony_ci 76162306a36Sopenharmony_ci r = dm_btree_lookup(&md->writeset_tree_info, 76262306a36Sopenharmony_ci md->writeset_tree_root, &key, &disk); 76362306a36Sopenharmony_ci if (r) { 76462306a36Sopenharmony_ci if (r == -ENODATA) { 76562306a36Sopenharmony_ci d->step = NULL; 76662306a36Sopenharmony_ci return 0; 76762306a36Sopenharmony_ci } 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ci DMERR("%s: dm_btree_lookup failed", __func__); 77062306a36Sopenharmony_ci return r; 77162306a36Sopenharmony_ci } 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci ws_unpack(&disk, &d->writeset); 77462306a36Sopenharmony_ci d->value = cpu_to_le32(key); 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci /* 77762306a36Sopenharmony_ci * We initialise another bitset info to avoid any caching side effects 77862306a36Sopenharmony_ci * with the previous one. 77962306a36Sopenharmony_ci */ 78062306a36Sopenharmony_ci dm_disk_bitset_init(md->tm, &d->info); 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_ci d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks); 78362306a36Sopenharmony_ci d->current_bit = 0; 78462306a36Sopenharmony_ci d->step = metadata_digest_transcribe_writeset; 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci return 0; 78762306a36Sopenharmony_ci} 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_cistatic int metadata_digest_start(struct era_metadata *md, struct digest *d) 79062306a36Sopenharmony_ci{ 79162306a36Sopenharmony_ci if (d->step) 79262306a36Sopenharmony_ci return 0; 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci memset(d, 0, sizeof(*d)); 79562306a36Sopenharmony_ci d->step = metadata_digest_lookup_writeset; 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci return 0; 79862306a36Sopenharmony_ci} 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci/* 80162306a36Sopenharmony_ci *----------------------------------------------------------------- 80262306a36Sopenharmony_ci * High level metadata interface. Target methods should use these, 80362306a36Sopenharmony_ci * and not the lower level ones. 80462306a36Sopenharmony_ci *----------------------------------------------------------------- 80562306a36Sopenharmony_ci */ 80662306a36Sopenharmony_cistatic struct era_metadata *metadata_open(struct block_device *bdev, 80762306a36Sopenharmony_ci sector_t block_size, 80862306a36Sopenharmony_ci bool may_format) 80962306a36Sopenharmony_ci{ 81062306a36Sopenharmony_ci int r; 81162306a36Sopenharmony_ci struct era_metadata *md = kzalloc(sizeof(*md), GFP_KERNEL); 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci if (!md) 81462306a36Sopenharmony_ci return NULL; 81562306a36Sopenharmony_ci 81662306a36Sopenharmony_ci md->bdev = bdev; 81762306a36Sopenharmony_ci md->block_size = block_size; 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci md->writesets[0].md.root = INVALID_WRITESET_ROOT; 82062306a36Sopenharmony_ci md->writesets[1].md.root = INVALID_WRITESET_ROOT; 82162306a36Sopenharmony_ci md->current_writeset = &md->writesets[0]; 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci r = create_persistent_data_objects(md, may_format); 82462306a36Sopenharmony_ci if (r) { 82562306a36Sopenharmony_ci kfree(md); 82662306a36Sopenharmony_ci return ERR_PTR(r); 82762306a36Sopenharmony_ci } 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci return md; 83062306a36Sopenharmony_ci} 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_cistatic void metadata_close(struct era_metadata *md) 83362306a36Sopenharmony_ci{ 83462306a36Sopenharmony_ci writeset_free(&md->writesets[0]); 83562306a36Sopenharmony_ci writeset_free(&md->writesets[1]); 83662306a36Sopenharmony_ci destroy_persistent_data_objects(md); 83762306a36Sopenharmony_ci kfree(md); 83862306a36Sopenharmony_ci} 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_cistatic bool valid_nr_blocks(dm_block_t n) 84162306a36Sopenharmony_ci{ 84262306a36Sopenharmony_ci /* 84362306a36Sopenharmony_ci * dm_bitset restricts us to 2^32. test_bit & co. restrict us 84462306a36Sopenharmony_ci * further to 2^31 - 1 84562306a36Sopenharmony_ci */ 84662306a36Sopenharmony_ci return n < (1ull << 31); 84762306a36Sopenharmony_ci} 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_cistatic int metadata_resize(struct era_metadata *md, void *arg) 85062306a36Sopenharmony_ci{ 85162306a36Sopenharmony_ci int r; 85262306a36Sopenharmony_ci dm_block_t *new_size = arg; 85362306a36Sopenharmony_ci __le32 value; 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci if (!valid_nr_blocks(*new_size)) { 85662306a36Sopenharmony_ci DMERR("Invalid number of origin blocks %llu", 85762306a36Sopenharmony_ci (unsigned long long) *new_size); 85862306a36Sopenharmony_ci return -EINVAL; 85962306a36Sopenharmony_ci } 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ci writeset_free(&md->writesets[0]); 86262306a36Sopenharmony_ci writeset_free(&md->writesets[1]); 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci r = writeset_alloc(&md->writesets[0], *new_size); 86562306a36Sopenharmony_ci if (r) { 86662306a36Sopenharmony_ci DMERR("%s: writeset_alloc failed for writeset 0", __func__); 86762306a36Sopenharmony_ci return r; 86862306a36Sopenharmony_ci } 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci r = writeset_alloc(&md->writesets[1], *new_size); 87162306a36Sopenharmony_ci if (r) { 87262306a36Sopenharmony_ci DMERR("%s: writeset_alloc failed for writeset 1", __func__); 87362306a36Sopenharmony_ci writeset_free(&md->writesets[0]); 87462306a36Sopenharmony_ci return r; 87562306a36Sopenharmony_ci } 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci value = cpu_to_le32(0u); 87862306a36Sopenharmony_ci __dm_bless_for_disk(&value); 87962306a36Sopenharmony_ci r = dm_array_resize(&md->era_array_info, md->era_array_root, 88062306a36Sopenharmony_ci md->nr_blocks, *new_size, 88162306a36Sopenharmony_ci &value, &md->era_array_root); 88262306a36Sopenharmony_ci if (r) { 88362306a36Sopenharmony_ci DMERR("%s: dm_array_resize failed", __func__); 88462306a36Sopenharmony_ci writeset_free(&md->writesets[0]); 88562306a36Sopenharmony_ci writeset_free(&md->writesets[1]); 88662306a36Sopenharmony_ci return r; 88762306a36Sopenharmony_ci } 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_ci md->nr_blocks = *new_size; 89062306a36Sopenharmony_ci return 0; 89162306a36Sopenharmony_ci} 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_cistatic int metadata_era_archive(struct era_metadata *md) 89462306a36Sopenharmony_ci{ 89562306a36Sopenharmony_ci int r; 89662306a36Sopenharmony_ci uint64_t keys[1]; 89762306a36Sopenharmony_ci struct writeset_disk value; 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 90062306a36Sopenharmony_ci &md->current_writeset->md.root); 90162306a36Sopenharmony_ci if (r) { 90262306a36Sopenharmony_ci DMERR("%s: dm_bitset_flush failed", __func__); 90362306a36Sopenharmony_ci return r; 90462306a36Sopenharmony_ci } 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci ws_pack(&md->current_writeset->md, &value); 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci keys[0] = md->current_era; 90962306a36Sopenharmony_ci __dm_bless_for_disk(&value); 91062306a36Sopenharmony_ci r = dm_btree_insert(&md->writeset_tree_info, md->writeset_tree_root, 91162306a36Sopenharmony_ci keys, &value, &md->writeset_tree_root); 91262306a36Sopenharmony_ci if (r) { 91362306a36Sopenharmony_ci DMERR("%s: couldn't insert writeset into btree", __func__); 91462306a36Sopenharmony_ci /* FIXME: fail mode */ 91562306a36Sopenharmony_ci return r; 91662306a36Sopenharmony_ci } 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci md->current_writeset->md.root = INVALID_WRITESET_ROOT; 91962306a36Sopenharmony_ci md->archived_writesets = true; 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci return 0; 92262306a36Sopenharmony_ci} 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_cistatic struct writeset *next_writeset(struct era_metadata *md) 92562306a36Sopenharmony_ci{ 92662306a36Sopenharmony_ci return (md->current_writeset == &md->writesets[0]) ? 92762306a36Sopenharmony_ci &md->writesets[1] : &md->writesets[0]; 92862306a36Sopenharmony_ci} 92962306a36Sopenharmony_ci 93062306a36Sopenharmony_cistatic int metadata_new_era(struct era_metadata *md) 93162306a36Sopenharmony_ci{ 93262306a36Sopenharmony_ci int r; 93362306a36Sopenharmony_ci struct writeset *new_writeset = next_writeset(md); 93462306a36Sopenharmony_ci 93562306a36Sopenharmony_ci r = writeset_init(&md->bitset_info, new_writeset, md->nr_blocks); 93662306a36Sopenharmony_ci if (r) { 93762306a36Sopenharmony_ci DMERR("%s: writeset_init failed", __func__); 93862306a36Sopenharmony_ci return r; 93962306a36Sopenharmony_ci } 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci swap_writeset(md, new_writeset); 94262306a36Sopenharmony_ci md->current_era++; 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci return 0; 94562306a36Sopenharmony_ci} 94662306a36Sopenharmony_ci 94762306a36Sopenharmony_cistatic int metadata_era_rollover(struct era_metadata *md) 94862306a36Sopenharmony_ci{ 94962306a36Sopenharmony_ci int r; 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { 95262306a36Sopenharmony_ci r = metadata_era_archive(md); 95362306a36Sopenharmony_ci if (r) { 95462306a36Sopenharmony_ci DMERR("%s: metadata_archive_era failed", __func__); 95562306a36Sopenharmony_ci /* FIXME: fail mode? */ 95662306a36Sopenharmony_ci return r; 95762306a36Sopenharmony_ci } 95862306a36Sopenharmony_ci } 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci r = metadata_new_era(md); 96162306a36Sopenharmony_ci if (r) { 96262306a36Sopenharmony_ci DMERR("%s: new era failed", __func__); 96362306a36Sopenharmony_ci /* FIXME: fail mode */ 96462306a36Sopenharmony_ci return r; 96562306a36Sopenharmony_ci } 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci return 0; 96862306a36Sopenharmony_ci} 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_cistatic bool metadata_current_marked(struct era_metadata *md, dm_block_t block) 97162306a36Sopenharmony_ci{ 97262306a36Sopenharmony_ci bool r; 97362306a36Sopenharmony_ci struct writeset *ws; 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci rcu_read_lock(); 97662306a36Sopenharmony_ci ws = rcu_dereference(md->current_writeset); 97762306a36Sopenharmony_ci r = writeset_marked(ws, block); 97862306a36Sopenharmony_ci rcu_read_unlock(); 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_ci return r; 98162306a36Sopenharmony_ci} 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_cistatic int metadata_commit(struct era_metadata *md) 98462306a36Sopenharmony_ci{ 98562306a36Sopenharmony_ci int r; 98662306a36Sopenharmony_ci struct dm_block *sblock; 98762306a36Sopenharmony_ci 98862306a36Sopenharmony_ci if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { 98962306a36Sopenharmony_ci r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, 99062306a36Sopenharmony_ci &md->current_writeset->md.root); 99162306a36Sopenharmony_ci if (r) { 99262306a36Sopenharmony_ci DMERR("%s: bitset flush failed", __func__); 99362306a36Sopenharmony_ci return r; 99462306a36Sopenharmony_ci } 99562306a36Sopenharmony_ci } 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci r = dm_tm_pre_commit(md->tm); 99862306a36Sopenharmony_ci if (r) { 99962306a36Sopenharmony_ci DMERR("%s: pre commit failed", __func__); 100062306a36Sopenharmony_ci return r; 100162306a36Sopenharmony_ci } 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci r = save_sm_root(md); 100462306a36Sopenharmony_ci if (r) { 100562306a36Sopenharmony_ci DMERR("%s: save_sm_root failed", __func__); 100662306a36Sopenharmony_ci return r; 100762306a36Sopenharmony_ci } 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci r = superblock_lock(md, &sblock); 101062306a36Sopenharmony_ci if (r) { 101162306a36Sopenharmony_ci DMERR("%s: superblock lock failed", __func__); 101262306a36Sopenharmony_ci return r; 101362306a36Sopenharmony_ci } 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci prepare_superblock(md, dm_block_data(sblock)); 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci return dm_tm_commit(md->tm, sblock); 101862306a36Sopenharmony_ci} 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_cistatic int metadata_checkpoint(struct era_metadata *md) 102162306a36Sopenharmony_ci{ 102262306a36Sopenharmony_ci /* 102362306a36Sopenharmony_ci * For now we just rollover, but later I want to put a check in to 102462306a36Sopenharmony_ci * avoid this if the filter is still pretty fresh. 102562306a36Sopenharmony_ci */ 102662306a36Sopenharmony_ci return metadata_era_rollover(md); 102762306a36Sopenharmony_ci} 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci/* 103062306a36Sopenharmony_ci * Metadata snapshots allow userland to access era data. 103162306a36Sopenharmony_ci */ 103262306a36Sopenharmony_cistatic int metadata_take_snap(struct era_metadata *md) 103362306a36Sopenharmony_ci{ 103462306a36Sopenharmony_ci int r, inc; 103562306a36Sopenharmony_ci struct dm_block *clone; 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci if (md->metadata_snap != SUPERBLOCK_LOCATION) { 103862306a36Sopenharmony_ci DMERR("%s: metadata snapshot already exists", __func__); 103962306a36Sopenharmony_ci return -EINVAL; 104062306a36Sopenharmony_ci } 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ci r = metadata_era_rollover(md); 104362306a36Sopenharmony_ci if (r) { 104462306a36Sopenharmony_ci DMERR("%s: era rollover failed", __func__); 104562306a36Sopenharmony_ci return r; 104662306a36Sopenharmony_ci } 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci r = metadata_commit(md); 104962306a36Sopenharmony_ci if (r) { 105062306a36Sopenharmony_ci DMERR("%s: pre commit failed", __func__); 105162306a36Sopenharmony_ci return r; 105262306a36Sopenharmony_ci } 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci r = dm_sm_inc_block(md->sm, SUPERBLOCK_LOCATION); 105562306a36Sopenharmony_ci if (r) { 105662306a36Sopenharmony_ci DMERR("%s: couldn't increment superblock", __func__); 105762306a36Sopenharmony_ci return r; 105862306a36Sopenharmony_ci } 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci r = dm_tm_shadow_block(md->tm, SUPERBLOCK_LOCATION, 106162306a36Sopenharmony_ci &sb_validator, &clone, &inc); 106262306a36Sopenharmony_ci if (r) { 106362306a36Sopenharmony_ci DMERR("%s: couldn't shadow superblock", __func__); 106462306a36Sopenharmony_ci dm_sm_dec_block(md->sm, SUPERBLOCK_LOCATION); 106562306a36Sopenharmony_ci return r; 106662306a36Sopenharmony_ci } 106762306a36Sopenharmony_ci BUG_ON(!inc); 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_ci r = dm_sm_inc_block(md->sm, md->writeset_tree_root); 107062306a36Sopenharmony_ci if (r) { 107162306a36Sopenharmony_ci DMERR("%s: couldn't inc writeset tree root", __func__); 107262306a36Sopenharmony_ci dm_tm_unlock(md->tm, clone); 107362306a36Sopenharmony_ci return r; 107462306a36Sopenharmony_ci } 107562306a36Sopenharmony_ci 107662306a36Sopenharmony_ci r = dm_sm_inc_block(md->sm, md->era_array_root); 107762306a36Sopenharmony_ci if (r) { 107862306a36Sopenharmony_ci DMERR("%s: couldn't inc era tree root", __func__); 107962306a36Sopenharmony_ci dm_sm_dec_block(md->sm, md->writeset_tree_root); 108062306a36Sopenharmony_ci dm_tm_unlock(md->tm, clone); 108162306a36Sopenharmony_ci return r; 108262306a36Sopenharmony_ci } 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci md->metadata_snap = dm_block_location(clone); 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_ci dm_tm_unlock(md->tm, clone); 108762306a36Sopenharmony_ci 108862306a36Sopenharmony_ci return 0; 108962306a36Sopenharmony_ci} 109062306a36Sopenharmony_ci 109162306a36Sopenharmony_cistatic int metadata_drop_snap(struct era_metadata *md) 109262306a36Sopenharmony_ci{ 109362306a36Sopenharmony_ci int r; 109462306a36Sopenharmony_ci dm_block_t location; 109562306a36Sopenharmony_ci struct dm_block *clone; 109662306a36Sopenharmony_ci struct superblock_disk *disk; 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci if (md->metadata_snap == SUPERBLOCK_LOCATION) { 109962306a36Sopenharmony_ci DMERR("%s: no snap to drop", __func__); 110062306a36Sopenharmony_ci return -EINVAL; 110162306a36Sopenharmony_ci } 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci r = dm_tm_read_lock(md->tm, md->metadata_snap, &sb_validator, &clone); 110462306a36Sopenharmony_ci if (r) { 110562306a36Sopenharmony_ci DMERR("%s: couldn't read lock superblock clone", __func__); 110662306a36Sopenharmony_ci return r; 110762306a36Sopenharmony_ci } 110862306a36Sopenharmony_ci 110962306a36Sopenharmony_ci /* 111062306a36Sopenharmony_ci * Whatever happens now we'll commit with no record of the metadata 111162306a36Sopenharmony_ci * snap. 111262306a36Sopenharmony_ci */ 111362306a36Sopenharmony_ci md->metadata_snap = SUPERBLOCK_LOCATION; 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci disk = dm_block_data(clone); 111662306a36Sopenharmony_ci r = dm_btree_del(&md->writeset_tree_info, 111762306a36Sopenharmony_ci le64_to_cpu(disk->writeset_tree_root)); 111862306a36Sopenharmony_ci if (r) { 111962306a36Sopenharmony_ci DMERR("%s: error deleting writeset tree clone", __func__); 112062306a36Sopenharmony_ci dm_tm_unlock(md->tm, clone); 112162306a36Sopenharmony_ci return r; 112262306a36Sopenharmony_ci } 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_ci r = dm_array_del(&md->era_array_info, le64_to_cpu(disk->era_array_root)); 112562306a36Sopenharmony_ci if (r) { 112662306a36Sopenharmony_ci DMERR("%s: error deleting era array clone", __func__); 112762306a36Sopenharmony_ci dm_tm_unlock(md->tm, clone); 112862306a36Sopenharmony_ci return r; 112962306a36Sopenharmony_ci } 113062306a36Sopenharmony_ci 113162306a36Sopenharmony_ci location = dm_block_location(clone); 113262306a36Sopenharmony_ci dm_tm_unlock(md->tm, clone); 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci return dm_sm_dec_block(md->sm, location); 113562306a36Sopenharmony_ci} 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_cistruct metadata_stats { 113862306a36Sopenharmony_ci dm_block_t used; 113962306a36Sopenharmony_ci dm_block_t total; 114062306a36Sopenharmony_ci dm_block_t snap; 114162306a36Sopenharmony_ci uint32_t era; 114262306a36Sopenharmony_ci}; 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_cistatic int metadata_get_stats(struct era_metadata *md, void *ptr) 114562306a36Sopenharmony_ci{ 114662306a36Sopenharmony_ci int r; 114762306a36Sopenharmony_ci struct metadata_stats *s = ptr; 114862306a36Sopenharmony_ci dm_block_t nr_free, nr_total; 114962306a36Sopenharmony_ci 115062306a36Sopenharmony_ci r = dm_sm_get_nr_free(md->sm, &nr_free); 115162306a36Sopenharmony_ci if (r) { 115262306a36Sopenharmony_ci DMERR("dm_sm_get_nr_free returned %d", r); 115362306a36Sopenharmony_ci return r; 115462306a36Sopenharmony_ci } 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci r = dm_sm_get_nr_blocks(md->sm, &nr_total); 115762306a36Sopenharmony_ci if (r) { 115862306a36Sopenharmony_ci DMERR("dm_pool_get_metadata_dev_size returned %d", r); 115962306a36Sopenharmony_ci return r; 116062306a36Sopenharmony_ci } 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci s->used = nr_total - nr_free; 116362306a36Sopenharmony_ci s->total = nr_total; 116462306a36Sopenharmony_ci s->snap = md->metadata_snap; 116562306a36Sopenharmony_ci s->era = md->current_era; 116662306a36Sopenharmony_ci 116762306a36Sopenharmony_ci return 0; 116862306a36Sopenharmony_ci} 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_cistruct era { 117362306a36Sopenharmony_ci struct dm_target *ti; 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci struct dm_dev *metadata_dev; 117662306a36Sopenharmony_ci struct dm_dev *origin_dev; 117762306a36Sopenharmony_ci 117862306a36Sopenharmony_ci dm_block_t nr_blocks; 117962306a36Sopenharmony_ci uint32_t sectors_per_block; 118062306a36Sopenharmony_ci int sectors_per_block_shift; 118162306a36Sopenharmony_ci struct era_metadata *md; 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci struct workqueue_struct *wq; 118462306a36Sopenharmony_ci struct work_struct worker; 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_ci spinlock_t deferred_lock; 118762306a36Sopenharmony_ci struct bio_list deferred_bios; 118862306a36Sopenharmony_ci 118962306a36Sopenharmony_ci spinlock_t rpc_lock; 119062306a36Sopenharmony_ci struct list_head rpc_calls; 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci struct digest digest; 119362306a36Sopenharmony_ci atomic_t suspended; 119462306a36Sopenharmony_ci}; 119562306a36Sopenharmony_ci 119662306a36Sopenharmony_cistruct rpc { 119762306a36Sopenharmony_ci struct list_head list; 119862306a36Sopenharmony_ci 119962306a36Sopenharmony_ci int (*fn0)(struct era_metadata *md); 120062306a36Sopenharmony_ci int (*fn1)(struct era_metadata *md, void *ref); 120162306a36Sopenharmony_ci void *arg; 120262306a36Sopenharmony_ci int result; 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci struct completion complete; 120562306a36Sopenharmony_ci}; 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci/* 120862306a36Sopenharmony_ci *--------------------------------------------------------------- 120962306a36Sopenharmony_ci * Remapping. 121062306a36Sopenharmony_ci *--------------------------------------------------------------- 121162306a36Sopenharmony_ci */ 121262306a36Sopenharmony_cistatic bool block_size_is_power_of_two(struct era *era) 121362306a36Sopenharmony_ci{ 121462306a36Sopenharmony_ci return era->sectors_per_block_shift >= 0; 121562306a36Sopenharmony_ci} 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_cistatic dm_block_t get_block(struct era *era, struct bio *bio) 121862306a36Sopenharmony_ci{ 121962306a36Sopenharmony_ci sector_t block_nr = bio->bi_iter.bi_sector; 122062306a36Sopenharmony_ci 122162306a36Sopenharmony_ci if (!block_size_is_power_of_two(era)) 122262306a36Sopenharmony_ci (void) sector_div(block_nr, era->sectors_per_block); 122362306a36Sopenharmony_ci else 122462306a36Sopenharmony_ci block_nr >>= era->sectors_per_block_shift; 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci return block_nr; 122762306a36Sopenharmony_ci} 122862306a36Sopenharmony_ci 122962306a36Sopenharmony_cistatic void remap_to_origin(struct era *era, struct bio *bio) 123062306a36Sopenharmony_ci{ 123162306a36Sopenharmony_ci bio_set_dev(bio, era->origin_dev->bdev); 123262306a36Sopenharmony_ci} 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci/* 123562306a36Sopenharmony_ci *-------------------------------------------------------------- 123662306a36Sopenharmony_ci * Worker thread 123762306a36Sopenharmony_ci *-------------------------------------------------------------- 123862306a36Sopenharmony_ci */ 123962306a36Sopenharmony_cistatic void wake_worker(struct era *era) 124062306a36Sopenharmony_ci{ 124162306a36Sopenharmony_ci if (!atomic_read(&era->suspended)) 124262306a36Sopenharmony_ci queue_work(era->wq, &era->worker); 124362306a36Sopenharmony_ci} 124462306a36Sopenharmony_ci 124562306a36Sopenharmony_cistatic void process_old_eras(struct era *era) 124662306a36Sopenharmony_ci{ 124762306a36Sopenharmony_ci int r; 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci if (!era->digest.step) 125062306a36Sopenharmony_ci return; 125162306a36Sopenharmony_ci 125262306a36Sopenharmony_ci r = era->digest.step(era->md, &era->digest); 125362306a36Sopenharmony_ci if (r < 0) { 125462306a36Sopenharmony_ci DMERR("%s: digest step failed, stopping digestion", __func__); 125562306a36Sopenharmony_ci era->digest.step = NULL; 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci } else if (era->digest.step) 125862306a36Sopenharmony_ci wake_worker(era); 125962306a36Sopenharmony_ci} 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_cistatic void process_deferred_bios(struct era *era) 126262306a36Sopenharmony_ci{ 126362306a36Sopenharmony_ci int r; 126462306a36Sopenharmony_ci struct bio_list deferred_bios, marked_bios; 126562306a36Sopenharmony_ci struct bio *bio; 126662306a36Sopenharmony_ci struct blk_plug plug; 126762306a36Sopenharmony_ci bool commit_needed = false; 126862306a36Sopenharmony_ci bool failed = false; 126962306a36Sopenharmony_ci struct writeset *ws = era->md->current_writeset; 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci bio_list_init(&deferred_bios); 127262306a36Sopenharmony_ci bio_list_init(&marked_bios); 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci spin_lock(&era->deferred_lock); 127562306a36Sopenharmony_ci bio_list_merge(&deferred_bios, &era->deferred_bios); 127662306a36Sopenharmony_ci bio_list_init(&era->deferred_bios); 127762306a36Sopenharmony_ci spin_unlock(&era->deferred_lock); 127862306a36Sopenharmony_ci 127962306a36Sopenharmony_ci if (bio_list_empty(&deferred_bios)) 128062306a36Sopenharmony_ci return; 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci while ((bio = bio_list_pop(&deferred_bios))) { 128362306a36Sopenharmony_ci r = writeset_test_and_set(&era->md->bitset_info, ws, 128462306a36Sopenharmony_ci get_block(era, bio)); 128562306a36Sopenharmony_ci if (r < 0) { 128662306a36Sopenharmony_ci /* 128762306a36Sopenharmony_ci * This is bad news, we need to rollback. 128862306a36Sopenharmony_ci * FIXME: finish. 128962306a36Sopenharmony_ci */ 129062306a36Sopenharmony_ci failed = true; 129162306a36Sopenharmony_ci } else if (r == 0) 129262306a36Sopenharmony_ci commit_needed = true; 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci bio_list_add(&marked_bios, bio); 129562306a36Sopenharmony_ci } 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci if (commit_needed) { 129862306a36Sopenharmony_ci r = metadata_commit(era->md); 129962306a36Sopenharmony_ci if (r) 130062306a36Sopenharmony_ci failed = true; 130162306a36Sopenharmony_ci } 130262306a36Sopenharmony_ci 130362306a36Sopenharmony_ci if (failed) 130462306a36Sopenharmony_ci while ((bio = bio_list_pop(&marked_bios))) 130562306a36Sopenharmony_ci bio_io_error(bio); 130662306a36Sopenharmony_ci else { 130762306a36Sopenharmony_ci blk_start_plug(&plug); 130862306a36Sopenharmony_ci while ((bio = bio_list_pop(&marked_bios))) { 130962306a36Sopenharmony_ci /* 131062306a36Sopenharmony_ci * Only update the in-core writeset if the on-disk one 131162306a36Sopenharmony_ci * was updated too. 131262306a36Sopenharmony_ci */ 131362306a36Sopenharmony_ci if (commit_needed) 131462306a36Sopenharmony_ci set_bit(get_block(era, bio), ws->bits); 131562306a36Sopenharmony_ci submit_bio_noacct(bio); 131662306a36Sopenharmony_ci } 131762306a36Sopenharmony_ci blk_finish_plug(&plug); 131862306a36Sopenharmony_ci } 131962306a36Sopenharmony_ci} 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_cistatic void process_rpc_calls(struct era *era) 132262306a36Sopenharmony_ci{ 132362306a36Sopenharmony_ci int r; 132462306a36Sopenharmony_ci bool need_commit = false; 132562306a36Sopenharmony_ci struct list_head calls; 132662306a36Sopenharmony_ci struct rpc *rpc, *tmp; 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_ci INIT_LIST_HEAD(&calls); 132962306a36Sopenharmony_ci spin_lock(&era->rpc_lock); 133062306a36Sopenharmony_ci list_splice_init(&era->rpc_calls, &calls); 133162306a36Sopenharmony_ci spin_unlock(&era->rpc_lock); 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci list_for_each_entry_safe(rpc, tmp, &calls, list) { 133462306a36Sopenharmony_ci rpc->result = rpc->fn0 ? rpc->fn0(era->md) : rpc->fn1(era->md, rpc->arg); 133562306a36Sopenharmony_ci need_commit = true; 133662306a36Sopenharmony_ci } 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci if (need_commit) { 133962306a36Sopenharmony_ci r = metadata_commit(era->md); 134062306a36Sopenharmony_ci if (r) 134162306a36Sopenharmony_ci list_for_each_entry_safe(rpc, tmp, &calls, list) 134262306a36Sopenharmony_ci rpc->result = r; 134362306a36Sopenharmony_ci } 134462306a36Sopenharmony_ci 134562306a36Sopenharmony_ci list_for_each_entry_safe(rpc, tmp, &calls, list) 134662306a36Sopenharmony_ci complete(&rpc->complete); 134762306a36Sopenharmony_ci} 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_cistatic void kick_off_digest(struct era *era) 135062306a36Sopenharmony_ci{ 135162306a36Sopenharmony_ci if (era->md->archived_writesets) { 135262306a36Sopenharmony_ci era->md->archived_writesets = false; 135362306a36Sopenharmony_ci metadata_digest_start(era->md, &era->digest); 135462306a36Sopenharmony_ci } 135562306a36Sopenharmony_ci} 135662306a36Sopenharmony_ci 135762306a36Sopenharmony_cistatic void do_work(struct work_struct *ws) 135862306a36Sopenharmony_ci{ 135962306a36Sopenharmony_ci struct era *era = container_of(ws, struct era, worker); 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci kick_off_digest(era); 136262306a36Sopenharmony_ci process_old_eras(era); 136362306a36Sopenharmony_ci process_deferred_bios(era); 136462306a36Sopenharmony_ci process_rpc_calls(era); 136562306a36Sopenharmony_ci} 136662306a36Sopenharmony_ci 136762306a36Sopenharmony_cistatic void defer_bio(struct era *era, struct bio *bio) 136862306a36Sopenharmony_ci{ 136962306a36Sopenharmony_ci spin_lock(&era->deferred_lock); 137062306a36Sopenharmony_ci bio_list_add(&era->deferred_bios, bio); 137162306a36Sopenharmony_ci spin_unlock(&era->deferred_lock); 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci wake_worker(era); 137462306a36Sopenharmony_ci} 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci/* 137762306a36Sopenharmony_ci * Make an rpc call to the worker to change the metadata. 137862306a36Sopenharmony_ci */ 137962306a36Sopenharmony_cistatic int perform_rpc(struct era *era, struct rpc *rpc) 138062306a36Sopenharmony_ci{ 138162306a36Sopenharmony_ci rpc->result = 0; 138262306a36Sopenharmony_ci init_completion(&rpc->complete); 138362306a36Sopenharmony_ci 138462306a36Sopenharmony_ci spin_lock(&era->rpc_lock); 138562306a36Sopenharmony_ci list_add(&rpc->list, &era->rpc_calls); 138662306a36Sopenharmony_ci spin_unlock(&era->rpc_lock); 138762306a36Sopenharmony_ci 138862306a36Sopenharmony_ci wake_worker(era); 138962306a36Sopenharmony_ci wait_for_completion(&rpc->complete); 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci return rpc->result; 139262306a36Sopenharmony_ci} 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_cistatic int in_worker0(struct era *era, int (*fn)(struct era_metadata *md)) 139562306a36Sopenharmony_ci{ 139662306a36Sopenharmony_ci struct rpc rpc; 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci rpc.fn0 = fn; 139962306a36Sopenharmony_ci rpc.fn1 = NULL; 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci return perform_rpc(era, &rpc); 140262306a36Sopenharmony_ci} 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_cistatic int in_worker1(struct era *era, 140562306a36Sopenharmony_ci int (*fn)(struct era_metadata *md, void *ref), void *arg) 140662306a36Sopenharmony_ci{ 140762306a36Sopenharmony_ci struct rpc rpc; 140862306a36Sopenharmony_ci 140962306a36Sopenharmony_ci rpc.fn0 = NULL; 141062306a36Sopenharmony_ci rpc.fn1 = fn; 141162306a36Sopenharmony_ci rpc.arg = arg; 141262306a36Sopenharmony_ci 141362306a36Sopenharmony_ci return perform_rpc(era, &rpc); 141462306a36Sopenharmony_ci} 141562306a36Sopenharmony_ci 141662306a36Sopenharmony_cistatic void start_worker(struct era *era) 141762306a36Sopenharmony_ci{ 141862306a36Sopenharmony_ci atomic_set(&era->suspended, 0); 141962306a36Sopenharmony_ci} 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_cistatic void stop_worker(struct era *era) 142262306a36Sopenharmony_ci{ 142362306a36Sopenharmony_ci atomic_set(&era->suspended, 1); 142462306a36Sopenharmony_ci drain_workqueue(era->wq); 142562306a36Sopenharmony_ci} 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci/* 142862306a36Sopenharmony_ci *-------------------------------------------------------------- 142962306a36Sopenharmony_ci * Target methods 143062306a36Sopenharmony_ci *-------------------------------------------------------------- 143162306a36Sopenharmony_ci */ 143262306a36Sopenharmony_cistatic void era_destroy(struct era *era) 143362306a36Sopenharmony_ci{ 143462306a36Sopenharmony_ci if (era->md) 143562306a36Sopenharmony_ci metadata_close(era->md); 143662306a36Sopenharmony_ci 143762306a36Sopenharmony_ci if (era->wq) 143862306a36Sopenharmony_ci destroy_workqueue(era->wq); 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci if (era->origin_dev) 144162306a36Sopenharmony_ci dm_put_device(era->ti, era->origin_dev); 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_ci if (era->metadata_dev) 144462306a36Sopenharmony_ci dm_put_device(era->ti, era->metadata_dev); 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_ci kfree(era); 144762306a36Sopenharmony_ci} 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_cistatic dm_block_t calc_nr_blocks(struct era *era) 145062306a36Sopenharmony_ci{ 145162306a36Sopenharmony_ci return dm_sector_div_up(era->ti->len, era->sectors_per_block); 145262306a36Sopenharmony_ci} 145362306a36Sopenharmony_ci 145462306a36Sopenharmony_cistatic bool valid_block_size(dm_block_t block_size) 145562306a36Sopenharmony_ci{ 145662306a36Sopenharmony_ci bool greater_than_zero = block_size > 0; 145762306a36Sopenharmony_ci bool multiple_of_min_block_size = (block_size & (MIN_BLOCK_SIZE - 1)) == 0; 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci return greater_than_zero && multiple_of_min_block_size; 146062306a36Sopenharmony_ci} 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci/* 146362306a36Sopenharmony_ci * <metadata dev> <data dev> <data block size (sectors)> 146462306a36Sopenharmony_ci */ 146562306a36Sopenharmony_cistatic int era_ctr(struct dm_target *ti, unsigned int argc, char **argv) 146662306a36Sopenharmony_ci{ 146762306a36Sopenharmony_ci int r; 146862306a36Sopenharmony_ci char dummy; 146962306a36Sopenharmony_ci struct era *era; 147062306a36Sopenharmony_ci struct era_metadata *md; 147162306a36Sopenharmony_ci 147262306a36Sopenharmony_ci if (argc != 3) { 147362306a36Sopenharmony_ci ti->error = "Invalid argument count"; 147462306a36Sopenharmony_ci return -EINVAL; 147562306a36Sopenharmony_ci } 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_ci era = kzalloc(sizeof(*era), GFP_KERNEL); 147862306a36Sopenharmony_ci if (!era) { 147962306a36Sopenharmony_ci ti->error = "Error allocating era structure"; 148062306a36Sopenharmony_ci return -ENOMEM; 148162306a36Sopenharmony_ci } 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ci era->ti = ti; 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_ci r = dm_get_device(ti, argv[0], BLK_OPEN_READ | BLK_OPEN_WRITE, 148662306a36Sopenharmony_ci &era->metadata_dev); 148762306a36Sopenharmony_ci if (r) { 148862306a36Sopenharmony_ci ti->error = "Error opening metadata device"; 148962306a36Sopenharmony_ci era_destroy(era); 149062306a36Sopenharmony_ci return -EINVAL; 149162306a36Sopenharmony_ci } 149262306a36Sopenharmony_ci 149362306a36Sopenharmony_ci r = dm_get_device(ti, argv[1], BLK_OPEN_READ | BLK_OPEN_WRITE, 149462306a36Sopenharmony_ci &era->origin_dev); 149562306a36Sopenharmony_ci if (r) { 149662306a36Sopenharmony_ci ti->error = "Error opening data device"; 149762306a36Sopenharmony_ci era_destroy(era); 149862306a36Sopenharmony_ci return -EINVAL; 149962306a36Sopenharmony_ci } 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci r = sscanf(argv[2], "%u%c", &era->sectors_per_block, &dummy); 150262306a36Sopenharmony_ci if (r != 1) { 150362306a36Sopenharmony_ci ti->error = "Error parsing block size"; 150462306a36Sopenharmony_ci era_destroy(era); 150562306a36Sopenharmony_ci return -EINVAL; 150662306a36Sopenharmony_ci } 150762306a36Sopenharmony_ci 150862306a36Sopenharmony_ci r = dm_set_target_max_io_len(ti, era->sectors_per_block); 150962306a36Sopenharmony_ci if (r) { 151062306a36Sopenharmony_ci ti->error = "could not set max io len"; 151162306a36Sopenharmony_ci era_destroy(era); 151262306a36Sopenharmony_ci return -EINVAL; 151362306a36Sopenharmony_ci } 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_ci if (!valid_block_size(era->sectors_per_block)) { 151662306a36Sopenharmony_ci ti->error = "Invalid block size"; 151762306a36Sopenharmony_ci era_destroy(era); 151862306a36Sopenharmony_ci return -EINVAL; 151962306a36Sopenharmony_ci } 152062306a36Sopenharmony_ci if (era->sectors_per_block & (era->sectors_per_block - 1)) 152162306a36Sopenharmony_ci era->sectors_per_block_shift = -1; 152262306a36Sopenharmony_ci else 152362306a36Sopenharmony_ci era->sectors_per_block_shift = __ffs(era->sectors_per_block); 152462306a36Sopenharmony_ci 152562306a36Sopenharmony_ci md = metadata_open(era->metadata_dev->bdev, era->sectors_per_block, true); 152662306a36Sopenharmony_ci if (IS_ERR(md)) { 152762306a36Sopenharmony_ci ti->error = "Error reading metadata"; 152862306a36Sopenharmony_ci era_destroy(era); 152962306a36Sopenharmony_ci return PTR_ERR(md); 153062306a36Sopenharmony_ci } 153162306a36Sopenharmony_ci era->md = md; 153262306a36Sopenharmony_ci 153362306a36Sopenharmony_ci era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); 153462306a36Sopenharmony_ci if (!era->wq) { 153562306a36Sopenharmony_ci ti->error = "could not create workqueue for metadata object"; 153662306a36Sopenharmony_ci era_destroy(era); 153762306a36Sopenharmony_ci return -ENOMEM; 153862306a36Sopenharmony_ci } 153962306a36Sopenharmony_ci INIT_WORK(&era->worker, do_work); 154062306a36Sopenharmony_ci 154162306a36Sopenharmony_ci spin_lock_init(&era->deferred_lock); 154262306a36Sopenharmony_ci bio_list_init(&era->deferred_bios); 154362306a36Sopenharmony_ci 154462306a36Sopenharmony_ci spin_lock_init(&era->rpc_lock); 154562306a36Sopenharmony_ci INIT_LIST_HEAD(&era->rpc_calls); 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ci ti->private = era; 154862306a36Sopenharmony_ci ti->num_flush_bios = 1; 154962306a36Sopenharmony_ci ti->flush_supported = true; 155062306a36Sopenharmony_ci 155162306a36Sopenharmony_ci ti->num_discard_bios = 1; 155262306a36Sopenharmony_ci 155362306a36Sopenharmony_ci return 0; 155462306a36Sopenharmony_ci} 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_cistatic void era_dtr(struct dm_target *ti) 155762306a36Sopenharmony_ci{ 155862306a36Sopenharmony_ci era_destroy(ti->private); 155962306a36Sopenharmony_ci} 156062306a36Sopenharmony_ci 156162306a36Sopenharmony_cistatic int era_map(struct dm_target *ti, struct bio *bio) 156262306a36Sopenharmony_ci{ 156362306a36Sopenharmony_ci struct era *era = ti->private; 156462306a36Sopenharmony_ci dm_block_t block = get_block(era, bio); 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci /* 156762306a36Sopenharmony_ci * All bios get remapped to the origin device. We do this now, but 156862306a36Sopenharmony_ci * it may not get issued until later. Depending on whether the 156962306a36Sopenharmony_ci * block is marked in this era. 157062306a36Sopenharmony_ci */ 157162306a36Sopenharmony_ci remap_to_origin(era, bio); 157262306a36Sopenharmony_ci 157362306a36Sopenharmony_ci /* 157462306a36Sopenharmony_ci * REQ_PREFLUSH bios carry no data, so we're not interested in them. 157562306a36Sopenharmony_ci */ 157662306a36Sopenharmony_ci if (!(bio->bi_opf & REQ_PREFLUSH) && 157762306a36Sopenharmony_ci (bio_data_dir(bio) == WRITE) && 157862306a36Sopenharmony_ci !metadata_current_marked(era->md, block)) { 157962306a36Sopenharmony_ci defer_bio(era, bio); 158062306a36Sopenharmony_ci return DM_MAPIO_SUBMITTED; 158162306a36Sopenharmony_ci } 158262306a36Sopenharmony_ci 158362306a36Sopenharmony_ci return DM_MAPIO_REMAPPED; 158462306a36Sopenharmony_ci} 158562306a36Sopenharmony_ci 158662306a36Sopenharmony_cistatic void era_postsuspend(struct dm_target *ti) 158762306a36Sopenharmony_ci{ 158862306a36Sopenharmony_ci int r; 158962306a36Sopenharmony_ci struct era *era = ti->private; 159062306a36Sopenharmony_ci 159162306a36Sopenharmony_ci r = in_worker0(era, metadata_era_archive); 159262306a36Sopenharmony_ci if (r) { 159362306a36Sopenharmony_ci DMERR("%s: couldn't archive current era", __func__); 159462306a36Sopenharmony_ci /* FIXME: fail mode */ 159562306a36Sopenharmony_ci } 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_ci stop_worker(era); 159862306a36Sopenharmony_ci 159962306a36Sopenharmony_ci r = metadata_commit(era->md); 160062306a36Sopenharmony_ci if (r) { 160162306a36Sopenharmony_ci DMERR("%s: metadata_commit failed", __func__); 160262306a36Sopenharmony_ci /* FIXME: fail mode */ 160362306a36Sopenharmony_ci } 160462306a36Sopenharmony_ci} 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_cistatic int era_preresume(struct dm_target *ti) 160762306a36Sopenharmony_ci{ 160862306a36Sopenharmony_ci int r; 160962306a36Sopenharmony_ci struct era *era = ti->private; 161062306a36Sopenharmony_ci dm_block_t new_size = calc_nr_blocks(era); 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci if (era->nr_blocks != new_size) { 161362306a36Sopenharmony_ci r = metadata_resize(era->md, &new_size); 161462306a36Sopenharmony_ci if (r) { 161562306a36Sopenharmony_ci DMERR("%s: metadata_resize failed", __func__); 161662306a36Sopenharmony_ci return r; 161762306a36Sopenharmony_ci } 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci r = metadata_commit(era->md); 162062306a36Sopenharmony_ci if (r) { 162162306a36Sopenharmony_ci DMERR("%s: metadata_commit failed", __func__); 162262306a36Sopenharmony_ci return r; 162362306a36Sopenharmony_ci } 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci era->nr_blocks = new_size; 162662306a36Sopenharmony_ci } 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci start_worker(era); 162962306a36Sopenharmony_ci 163062306a36Sopenharmony_ci r = in_worker0(era, metadata_era_rollover); 163162306a36Sopenharmony_ci if (r) { 163262306a36Sopenharmony_ci DMERR("%s: metadata_era_rollover failed", __func__); 163362306a36Sopenharmony_ci return r; 163462306a36Sopenharmony_ci } 163562306a36Sopenharmony_ci 163662306a36Sopenharmony_ci return 0; 163762306a36Sopenharmony_ci} 163862306a36Sopenharmony_ci 163962306a36Sopenharmony_ci/* 164062306a36Sopenharmony_ci * Status format: 164162306a36Sopenharmony_ci * 164262306a36Sopenharmony_ci * <metadata block size> <#used metadata blocks>/<#total metadata blocks> 164362306a36Sopenharmony_ci * <current era> <held metadata root | '-'> 164462306a36Sopenharmony_ci */ 164562306a36Sopenharmony_cistatic void era_status(struct dm_target *ti, status_type_t type, 164662306a36Sopenharmony_ci unsigned int status_flags, char *result, unsigned int maxlen) 164762306a36Sopenharmony_ci{ 164862306a36Sopenharmony_ci int r; 164962306a36Sopenharmony_ci struct era *era = ti->private; 165062306a36Sopenharmony_ci ssize_t sz = 0; 165162306a36Sopenharmony_ci struct metadata_stats stats; 165262306a36Sopenharmony_ci char buf[BDEVNAME_SIZE]; 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci switch (type) { 165562306a36Sopenharmony_ci case STATUSTYPE_INFO: 165662306a36Sopenharmony_ci r = in_worker1(era, metadata_get_stats, &stats); 165762306a36Sopenharmony_ci if (r) 165862306a36Sopenharmony_ci goto err; 165962306a36Sopenharmony_ci 166062306a36Sopenharmony_ci DMEMIT("%u %llu/%llu %u", 166162306a36Sopenharmony_ci (unsigned int) (DM_ERA_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 166262306a36Sopenharmony_ci (unsigned long long) stats.used, 166362306a36Sopenharmony_ci (unsigned long long) stats.total, 166462306a36Sopenharmony_ci (unsigned int) stats.era); 166562306a36Sopenharmony_ci 166662306a36Sopenharmony_ci if (stats.snap != SUPERBLOCK_LOCATION) 166762306a36Sopenharmony_ci DMEMIT(" %llu", stats.snap); 166862306a36Sopenharmony_ci else 166962306a36Sopenharmony_ci DMEMIT(" -"); 167062306a36Sopenharmony_ci break; 167162306a36Sopenharmony_ci 167262306a36Sopenharmony_ci case STATUSTYPE_TABLE: 167362306a36Sopenharmony_ci format_dev_t(buf, era->metadata_dev->bdev->bd_dev); 167462306a36Sopenharmony_ci DMEMIT("%s ", buf); 167562306a36Sopenharmony_ci format_dev_t(buf, era->origin_dev->bdev->bd_dev); 167662306a36Sopenharmony_ci DMEMIT("%s %u", buf, era->sectors_per_block); 167762306a36Sopenharmony_ci break; 167862306a36Sopenharmony_ci 167962306a36Sopenharmony_ci case STATUSTYPE_IMA: 168062306a36Sopenharmony_ci *result = '\0'; 168162306a36Sopenharmony_ci break; 168262306a36Sopenharmony_ci } 168362306a36Sopenharmony_ci 168462306a36Sopenharmony_ci return; 168562306a36Sopenharmony_ci 168662306a36Sopenharmony_cierr: 168762306a36Sopenharmony_ci DMEMIT("Error"); 168862306a36Sopenharmony_ci} 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_cistatic int era_message(struct dm_target *ti, unsigned int argc, char **argv, 169162306a36Sopenharmony_ci char *result, unsigned int maxlen) 169262306a36Sopenharmony_ci{ 169362306a36Sopenharmony_ci struct era *era = ti->private; 169462306a36Sopenharmony_ci 169562306a36Sopenharmony_ci if (argc != 1) { 169662306a36Sopenharmony_ci DMERR("incorrect number of message arguments"); 169762306a36Sopenharmony_ci return -EINVAL; 169862306a36Sopenharmony_ci } 169962306a36Sopenharmony_ci 170062306a36Sopenharmony_ci if (!strcasecmp(argv[0], "checkpoint")) 170162306a36Sopenharmony_ci return in_worker0(era, metadata_checkpoint); 170262306a36Sopenharmony_ci 170362306a36Sopenharmony_ci if (!strcasecmp(argv[0], "take_metadata_snap")) 170462306a36Sopenharmony_ci return in_worker0(era, metadata_take_snap); 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci if (!strcasecmp(argv[0], "drop_metadata_snap")) 170762306a36Sopenharmony_ci return in_worker0(era, metadata_drop_snap); 170862306a36Sopenharmony_ci 170962306a36Sopenharmony_ci DMERR("unsupported message '%s'", argv[0]); 171062306a36Sopenharmony_ci return -EINVAL; 171162306a36Sopenharmony_ci} 171262306a36Sopenharmony_ci 171362306a36Sopenharmony_cistatic sector_t get_dev_size(struct dm_dev *dev) 171462306a36Sopenharmony_ci{ 171562306a36Sopenharmony_ci return bdev_nr_sectors(dev->bdev); 171662306a36Sopenharmony_ci} 171762306a36Sopenharmony_ci 171862306a36Sopenharmony_cistatic int era_iterate_devices(struct dm_target *ti, 171962306a36Sopenharmony_ci iterate_devices_callout_fn fn, void *data) 172062306a36Sopenharmony_ci{ 172162306a36Sopenharmony_ci struct era *era = ti->private; 172262306a36Sopenharmony_ci 172362306a36Sopenharmony_ci return fn(ti, era->origin_dev, 0, get_dev_size(era->origin_dev), data); 172462306a36Sopenharmony_ci} 172562306a36Sopenharmony_ci 172662306a36Sopenharmony_cistatic void era_io_hints(struct dm_target *ti, struct queue_limits *limits) 172762306a36Sopenharmony_ci{ 172862306a36Sopenharmony_ci struct era *era = ti->private; 172962306a36Sopenharmony_ci uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_ci /* 173262306a36Sopenharmony_ci * If the system-determined stacked limits are compatible with the 173362306a36Sopenharmony_ci * era device's blocksize (io_opt is a factor) do not override them. 173462306a36Sopenharmony_ci */ 173562306a36Sopenharmony_ci if (io_opt_sectors < era->sectors_per_block || 173662306a36Sopenharmony_ci do_div(io_opt_sectors, era->sectors_per_block)) { 173762306a36Sopenharmony_ci blk_limits_io_min(limits, 0); 173862306a36Sopenharmony_ci blk_limits_io_opt(limits, era->sectors_per_block << SECTOR_SHIFT); 173962306a36Sopenharmony_ci } 174062306a36Sopenharmony_ci} 174162306a36Sopenharmony_ci 174262306a36Sopenharmony_ci/*----------------------------------------------------------------*/ 174362306a36Sopenharmony_ci 174462306a36Sopenharmony_cistatic struct target_type era_target = { 174562306a36Sopenharmony_ci .name = "era", 174662306a36Sopenharmony_ci .version = {1, 0, 0}, 174762306a36Sopenharmony_ci .module = THIS_MODULE, 174862306a36Sopenharmony_ci .ctr = era_ctr, 174962306a36Sopenharmony_ci .dtr = era_dtr, 175062306a36Sopenharmony_ci .map = era_map, 175162306a36Sopenharmony_ci .postsuspend = era_postsuspend, 175262306a36Sopenharmony_ci .preresume = era_preresume, 175362306a36Sopenharmony_ci .status = era_status, 175462306a36Sopenharmony_ci .message = era_message, 175562306a36Sopenharmony_ci .iterate_devices = era_iterate_devices, 175662306a36Sopenharmony_ci .io_hints = era_io_hints 175762306a36Sopenharmony_ci}; 175862306a36Sopenharmony_cimodule_dm(era); 175962306a36Sopenharmony_ci 176062306a36Sopenharmony_ciMODULE_DESCRIPTION(DM_NAME " era target"); 176162306a36Sopenharmony_ciMODULE_AUTHOR("Joe Thornber <ejt@redhat.com>"); 176262306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 1763