18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 38c2ecf20Sopenharmony_ci * Copyright (C) 2006-2008 Red Hat GmbH 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * This file is released under the GPL. 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include "dm-exception-store.h" 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include <linux/ctype.h> 118c2ecf20Sopenharmony_ci#include <linux/mm.h> 128c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 138c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 148c2ecf20Sopenharmony_ci#include <linux/export.h> 158c2ecf20Sopenharmony_ci#include <linux/slab.h> 168c2ecf20Sopenharmony_ci#include <linux/dm-io.h> 178c2ecf20Sopenharmony_ci#include <linux/dm-bufio.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#define DM_MSG_PREFIX "persistent snapshot" 208c2ecf20Sopenharmony_ci#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32U /* 16KB */ 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#define DM_PREFETCH_CHUNKS 12 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci/*----------------------------------------------------------------- 258c2ecf20Sopenharmony_ci * Persistent snapshots, by persistent we mean that the snapshot 268c2ecf20Sopenharmony_ci * will survive a reboot. 278c2ecf20Sopenharmony_ci *---------------------------------------------------------------*/ 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci/* 308c2ecf20Sopenharmony_ci * We need to store a record of which parts of the origin have 318c2ecf20Sopenharmony_ci * been copied to the snapshot device. The snapshot code 328c2ecf20Sopenharmony_ci * requires that we copy exception chunks to chunk aligned areas 338c2ecf20Sopenharmony_ci * of the COW store. It makes sense therefore, to store the 348c2ecf20Sopenharmony_ci * metadata in chunk size blocks. 358c2ecf20Sopenharmony_ci * 368c2ecf20Sopenharmony_ci * There is no backward or forward compatibility implemented, 378c2ecf20Sopenharmony_ci * snapshots with different disk versions than the kernel will 388c2ecf20Sopenharmony_ci * not be usable. It is expected that "lvcreate" will blank out 398c2ecf20Sopenharmony_ci * the start of a fresh COW device before calling the snapshot 408c2ecf20Sopenharmony_ci * constructor. 418c2ecf20Sopenharmony_ci * 428c2ecf20Sopenharmony_ci * The first chunk of the COW device just contains the header. 438c2ecf20Sopenharmony_ci * After this there is a chunk filled with exception metadata, 448c2ecf20Sopenharmony_ci * followed by as many exception chunks as can fit in the 458c2ecf20Sopenharmony_ci * metadata areas. 468c2ecf20Sopenharmony_ci * 478c2ecf20Sopenharmony_ci * All on disk structures are in little-endian format. The end 488c2ecf20Sopenharmony_ci * of the exceptions info is indicated by an exception with a 498c2ecf20Sopenharmony_ci * new_chunk of 0, which is invalid since it would point to the 508c2ecf20Sopenharmony_ci * header chunk. 518c2ecf20Sopenharmony_ci */ 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci/* 548c2ecf20Sopenharmony_ci * Magic for persistent snapshots: "SnAp" - Feeble isn't it. 558c2ecf20Sopenharmony_ci */ 568c2ecf20Sopenharmony_ci#define SNAP_MAGIC 0x70416e53 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci/* 598c2ecf20Sopenharmony_ci * The on-disk version of the metadata. 608c2ecf20Sopenharmony_ci */ 618c2ecf20Sopenharmony_ci#define SNAPSHOT_DISK_VERSION 1 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci#define NUM_SNAPSHOT_HDR_CHUNKS 1 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_cistruct disk_header { 668c2ecf20Sopenharmony_ci __le32 magic; 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci /* 698c2ecf20Sopenharmony_ci * Is this snapshot valid. There is no way of recovering 708c2ecf20Sopenharmony_ci * an invalid snapshot. 718c2ecf20Sopenharmony_ci */ 728c2ecf20Sopenharmony_ci __le32 valid; 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci /* 758c2ecf20Sopenharmony_ci * Simple, incrementing version. no backward 768c2ecf20Sopenharmony_ci * compatibility. 778c2ecf20Sopenharmony_ci */ 788c2ecf20Sopenharmony_ci __le32 version; 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci /* In sectors */ 818c2ecf20Sopenharmony_ci __le32 chunk_size; 828c2ecf20Sopenharmony_ci} __packed; 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_cistruct disk_exception { 858c2ecf20Sopenharmony_ci __le64 old_chunk; 868c2ecf20Sopenharmony_ci __le64 new_chunk; 878c2ecf20Sopenharmony_ci} __packed; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_cistruct core_exception { 908c2ecf20Sopenharmony_ci uint64_t old_chunk; 918c2ecf20Sopenharmony_ci uint64_t new_chunk; 928c2ecf20Sopenharmony_ci}; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_cistruct commit_callback { 958c2ecf20Sopenharmony_ci void (*callback)(void *, int success); 968c2ecf20Sopenharmony_ci void *context; 978c2ecf20Sopenharmony_ci}; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci/* 1008c2ecf20Sopenharmony_ci * The top level structure for a persistent exception store. 1018c2ecf20Sopenharmony_ci */ 1028c2ecf20Sopenharmony_cistruct pstore { 1038c2ecf20Sopenharmony_ci struct dm_exception_store *store; 1048c2ecf20Sopenharmony_ci int version; 1058c2ecf20Sopenharmony_ci int valid; 1068c2ecf20Sopenharmony_ci uint32_t exceptions_per_area; 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci /* 1098c2ecf20Sopenharmony_ci * Now that we have an asynchronous kcopyd there is no 1108c2ecf20Sopenharmony_ci * need for large chunk sizes, so it wont hurt to have a 1118c2ecf20Sopenharmony_ci * whole chunks worth of metadata in memory at once. 1128c2ecf20Sopenharmony_ci */ 1138c2ecf20Sopenharmony_ci void *area; 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci /* 1168c2ecf20Sopenharmony_ci * An area of zeros used to clear the next area. 1178c2ecf20Sopenharmony_ci */ 1188c2ecf20Sopenharmony_ci void *zero_area; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci /* 1218c2ecf20Sopenharmony_ci * An area used for header. The header can be written 1228c2ecf20Sopenharmony_ci * concurrently with metadata (when invalidating the snapshot), 1238c2ecf20Sopenharmony_ci * so it needs a separate buffer. 1248c2ecf20Sopenharmony_ci */ 1258c2ecf20Sopenharmony_ci void *header_area; 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci /* 1288c2ecf20Sopenharmony_ci * Used to keep track of which metadata area the data in 1298c2ecf20Sopenharmony_ci * 'chunk' refers to. 1308c2ecf20Sopenharmony_ci */ 1318c2ecf20Sopenharmony_ci chunk_t current_area; 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci /* 1348c2ecf20Sopenharmony_ci * The next free chunk for an exception. 1358c2ecf20Sopenharmony_ci * 1368c2ecf20Sopenharmony_ci * When creating exceptions, all the chunks here and above are 1378c2ecf20Sopenharmony_ci * free. It holds the next chunk to be allocated. On rare 1388c2ecf20Sopenharmony_ci * occasions (e.g. after a system crash) holes can be left in 1398c2ecf20Sopenharmony_ci * the exception store because chunks can be committed out of 1408c2ecf20Sopenharmony_ci * order. 1418c2ecf20Sopenharmony_ci * 1428c2ecf20Sopenharmony_ci * When merging exceptions, it does not necessarily mean all the 1438c2ecf20Sopenharmony_ci * chunks here and above are free. It holds the value it would 1448c2ecf20Sopenharmony_ci * have held if all chunks had been committed in order of 1458c2ecf20Sopenharmony_ci * allocation. Consequently the value may occasionally be 1468c2ecf20Sopenharmony_ci * slightly too low, but since it's only used for 'status' and 1478c2ecf20Sopenharmony_ci * it can never reach its minimum value too early this doesn't 1488c2ecf20Sopenharmony_ci * matter. 1498c2ecf20Sopenharmony_ci */ 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_ci chunk_t next_free; 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci /* 1548c2ecf20Sopenharmony_ci * The index of next free exception in the current 1558c2ecf20Sopenharmony_ci * metadata area. 1568c2ecf20Sopenharmony_ci */ 1578c2ecf20Sopenharmony_ci uint32_t current_committed; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci atomic_t pending_count; 1608c2ecf20Sopenharmony_ci uint32_t callback_count; 1618c2ecf20Sopenharmony_ci struct commit_callback *callbacks; 1628c2ecf20Sopenharmony_ci struct dm_io_client *io_client; 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci struct workqueue_struct *metadata_wq; 1658c2ecf20Sopenharmony_ci}; 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_cistatic int alloc_area(struct pstore *ps) 1688c2ecf20Sopenharmony_ci{ 1698c2ecf20Sopenharmony_ci int r = -ENOMEM; 1708c2ecf20Sopenharmony_ci size_t len; 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci len = ps->store->chunk_size << SECTOR_SHIFT; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci /* 1758c2ecf20Sopenharmony_ci * Allocate the chunk_size block of memory that will hold 1768c2ecf20Sopenharmony_ci * a single metadata area. 1778c2ecf20Sopenharmony_ci */ 1788c2ecf20Sopenharmony_ci ps->area = vmalloc(len); 1798c2ecf20Sopenharmony_ci if (!ps->area) 1808c2ecf20Sopenharmony_ci goto err_area; 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci ps->zero_area = vzalloc(len); 1838c2ecf20Sopenharmony_ci if (!ps->zero_area) 1848c2ecf20Sopenharmony_ci goto err_zero_area; 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci ps->header_area = vmalloc(len); 1878c2ecf20Sopenharmony_ci if (!ps->header_area) 1888c2ecf20Sopenharmony_ci goto err_header_area; 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci return 0; 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_cierr_header_area: 1938c2ecf20Sopenharmony_ci vfree(ps->zero_area); 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_cierr_zero_area: 1968c2ecf20Sopenharmony_ci vfree(ps->area); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_cierr_area: 1998c2ecf20Sopenharmony_ci return r; 2008c2ecf20Sopenharmony_ci} 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_cistatic void free_area(struct pstore *ps) 2038c2ecf20Sopenharmony_ci{ 2048c2ecf20Sopenharmony_ci vfree(ps->area); 2058c2ecf20Sopenharmony_ci ps->area = NULL; 2068c2ecf20Sopenharmony_ci vfree(ps->zero_area); 2078c2ecf20Sopenharmony_ci ps->zero_area = NULL; 2088c2ecf20Sopenharmony_ci vfree(ps->header_area); 2098c2ecf20Sopenharmony_ci ps->header_area = NULL; 2108c2ecf20Sopenharmony_ci} 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_cistruct mdata_req { 2138c2ecf20Sopenharmony_ci struct dm_io_region *where; 2148c2ecf20Sopenharmony_ci struct dm_io_request *io_req; 2158c2ecf20Sopenharmony_ci struct work_struct work; 2168c2ecf20Sopenharmony_ci int result; 2178c2ecf20Sopenharmony_ci}; 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_cistatic void do_metadata(struct work_struct *work) 2208c2ecf20Sopenharmony_ci{ 2218c2ecf20Sopenharmony_ci struct mdata_req *req = container_of(work, struct mdata_req, work); 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci req->result = dm_io(req->io_req, 1, req->where, NULL); 2248c2ecf20Sopenharmony_ci} 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci/* 2278c2ecf20Sopenharmony_ci * Read or write a chunk aligned and sized block of data from a device. 2288c2ecf20Sopenharmony_ci */ 2298c2ecf20Sopenharmony_cistatic int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op, 2308c2ecf20Sopenharmony_ci int op_flags, int metadata) 2318c2ecf20Sopenharmony_ci{ 2328c2ecf20Sopenharmony_ci struct dm_io_region where = { 2338c2ecf20Sopenharmony_ci .bdev = dm_snap_cow(ps->store->snap)->bdev, 2348c2ecf20Sopenharmony_ci .sector = ps->store->chunk_size * chunk, 2358c2ecf20Sopenharmony_ci .count = ps->store->chunk_size, 2368c2ecf20Sopenharmony_ci }; 2378c2ecf20Sopenharmony_ci struct dm_io_request io_req = { 2388c2ecf20Sopenharmony_ci .bi_op = op, 2398c2ecf20Sopenharmony_ci .bi_op_flags = op_flags, 2408c2ecf20Sopenharmony_ci .mem.type = DM_IO_VMA, 2418c2ecf20Sopenharmony_ci .mem.ptr.vma = area, 2428c2ecf20Sopenharmony_ci .client = ps->io_client, 2438c2ecf20Sopenharmony_ci .notify.fn = NULL, 2448c2ecf20Sopenharmony_ci }; 2458c2ecf20Sopenharmony_ci struct mdata_req req; 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci if (!metadata) 2488c2ecf20Sopenharmony_ci return dm_io(&io_req, 1, &where, NULL); 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci req.where = &where; 2518c2ecf20Sopenharmony_ci req.io_req = &io_req; 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci /* 2548c2ecf20Sopenharmony_ci * Issue the synchronous I/O from a different thread 2558c2ecf20Sopenharmony_ci * to avoid submit_bio_noacct recursion. 2568c2ecf20Sopenharmony_ci */ 2578c2ecf20Sopenharmony_ci INIT_WORK_ONSTACK(&req.work, do_metadata); 2588c2ecf20Sopenharmony_ci queue_work(ps->metadata_wq, &req.work); 2598c2ecf20Sopenharmony_ci flush_workqueue(ps->metadata_wq); 2608c2ecf20Sopenharmony_ci destroy_work_on_stack(&req.work); 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci return req.result; 2638c2ecf20Sopenharmony_ci} 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci/* 2668c2ecf20Sopenharmony_ci * Convert a metadata area index to a chunk index. 2678c2ecf20Sopenharmony_ci */ 2688c2ecf20Sopenharmony_cistatic chunk_t area_location(struct pstore *ps, chunk_t area) 2698c2ecf20Sopenharmony_ci{ 2708c2ecf20Sopenharmony_ci return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area); 2718c2ecf20Sopenharmony_ci} 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_cistatic void skip_metadata(struct pstore *ps) 2748c2ecf20Sopenharmony_ci{ 2758c2ecf20Sopenharmony_ci uint32_t stride = ps->exceptions_per_area + 1; 2768c2ecf20Sopenharmony_ci chunk_t next_free = ps->next_free; 2778c2ecf20Sopenharmony_ci if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS) 2788c2ecf20Sopenharmony_ci ps->next_free++; 2798c2ecf20Sopenharmony_ci} 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci/* 2828c2ecf20Sopenharmony_ci * Read or write a metadata area. Remembering to skip the first 2838c2ecf20Sopenharmony_ci * chunk which holds the header. 2848c2ecf20Sopenharmony_ci */ 2858c2ecf20Sopenharmony_cistatic int area_io(struct pstore *ps, int op, int op_flags) 2868c2ecf20Sopenharmony_ci{ 2878c2ecf20Sopenharmony_ci chunk_t chunk = area_location(ps, ps->current_area); 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci return chunk_io(ps, ps->area, chunk, op, op_flags, 0); 2908c2ecf20Sopenharmony_ci} 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_cistatic void zero_memory_area(struct pstore *ps) 2938c2ecf20Sopenharmony_ci{ 2948c2ecf20Sopenharmony_ci memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT); 2958c2ecf20Sopenharmony_ci} 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_cistatic int zero_disk_area(struct pstore *ps, chunk_t area) 2988c2ecf20Sopenharmony_ci{ 2998c2ecf20Sopenharmony_ci return chunk_io(ps, ps->zero_area, area_location(ps, area), 3008c2ecf20Sopenharmony_ci REQ_OP_WRITE, 0, 0); 3018c2ecf20Sopenharmony_ci} 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_cistatic int read_header(struct pstore *ps, int *new_snapshot) 3048c2ecf20Sopenharmony_ci{ 3058c2ecf20Sopenharmony_ci int r; 3068c2ecf20Sopenharmony_ci struct disk_header *dh; 3078c2ecf20Sopenharmony_ci unsigned chunk_size; 3088c2ecf20Sopenharmony_ci int chunk_size_supplied = 1; 3098c2ecf20Sopenharmony_ci char *chunk_err; 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci /* 3128c2ecf20Sopenharmony_ci * Use default chunk size (or logical_block_size, if larger) 3138c2ecf20Sopenharmony_ci * if none supplied 3148c2ecf20Sopenharmony_ci */ 3158c2ecf20Sopenharmony_ci if (!ps->store->chunk_size) { 3168c2ecf20Sopenharmony_ci ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, 3178c2ecf20Sopenharmony_ci bdev_logical_block_size(dm_snap_cow(ps->store->snap)-> 3188c2ecf20Sopenharmony_ci bdev) >> 9); 3198c2ecf20Sopenharmony_ci ps->store->chunk_mask = ps->store->chunk_size - 1; 3208c2ecf20Sopenharmony_ci ps->store->chunk_shift = __ffs(ps->store->chunk_size); 3218c2ecf20Sopenharmony_ci chunk_size_supplied = 0; 3228c2ecf20Sopenharmony_ci } 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci ps->io_client = dm_io_client_create(); 3258c2ecf20Sopenharmony_ci if (IS_ERR(ps->io_client)) 3268c2ecf20Sopenharmony_ci return PTR_ERR(ps->io_client); 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci r = alloc_area(ps); 3298c2ecf20Sopenharmony_ci if (r) 3308c2ecf20Sopenharmony_ci return r; 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 0, 1); 3338c2ecf20Sopenharmony_ci if (r) 3348c2ecf20Sopenharmony_ci goto bad; 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci dh = ps->header_area; 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci if (le32_to_cpu(dh->magic) == 0) { 3398c2ecf20Sopenharmony_ci *new_snapshot = 1; 3408c2ecf20Sopenharmony_ci return 0; 3418c2ecf20Sopenharmony_ci } 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { 3448c2ecf20Sopenharmony_ci DMWARN("Invalid or corrupt snapshot"); 3458c2ecf20Sopenharmony_ci r = -ENXIO; 3468c2ecf20Sopenharmony_ci goto bad; 3478c2ecf20Sopenharmony_ci } 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci *new_snapshot = 0; 3508c2ecf20Sopenharmony_ci ps->valid = le32_to_cpu(dh->valid); 3518c2ecf20Sopenharmony_ci ps->version = le32_to_cpu(dh->version); 3528c2ecf20Sopenharmony_ci chunk_size = le32_to_cpu(dh->chunk_size); 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci if (ps->store->chunk_size == chunk_size) 3558c2ecf20Sopenharmony_ci return 0; 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci if (chunk_size_supplied) 3588c2ecf20Sopenharmony_ci DMWARN("chunk size %u in device metadata overrides " 3598c2ecf20Sopenharmony_ci "table chunk size of %u.", 3608c2ecf20Sopenharmony_ci chunk_size, ps->store->chunk_size); 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci /* We had a bogus chunk_size. Fix stuff up. */ 3638c2ecf20Sopenharmony_ci free_area(ps); 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci r = dm_exception_store_set_chunk_size(ps->store, chunk_size, 3668c2ecf20Sopenharmony_ci &chunk_err); 3678c2ecf20Sopenharmony_ci if (r) { 3688c2ecf20Sopenharmony_ci DMERR("invalid on-disk chunk size %u: %s.", 3698c2ecf20Sopenharmony_ci chunk_size, chunk_err); 3708c2ecf20Sopenharmony_ci return r; 3718c2ecf20Sopenharmony_ci } 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci r = alloc_area(ps); 3748c2ecf20Sopenharmony_ci return r; 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_cibad: 3778c2ecf20Sopenharmony_ci free_area(ps); 3788c2ecf20Sopenharmony_ci return r; 3798c2ecf20Sopenharmony_ci} 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_cistatic int write_header(struct pstore *ps) 3828c2ecf20Sopenharmony_ci{ 3838c2ecf20Sopenharmony_ci struct disk_header *dh; 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT); 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_ci dh = ps->header_area; 3888c2ecf20Sopenharmony_ci dh->magic = cpu_to_le32(SNAP_MAGIC); 3898c2ecf20Sopenharmony_ci dh->valid = cpu_to_le32(ps->valid); 3908c2ecf20Sopenharmony_ci dh->version = cpu_to_le32(ps->version); 3918c2ecf20Sopenharmony_ci dh->chunk_size = cpu_to_le32(ps->store->chunk_size); 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 0, 1); 3948c2ecf20Sopenharmony_ci} 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci/* 3978c2ecf20Sopenharmony_ci * Access functions for the disk exceptions, these do the endian conversions. 3988c2ecf20Sopenharmony_ci */ 3998c2ecf20Sopenharmony_cistatic struct disk_exception *get_exception(struct pstore *ps, void *ps_area, 4008c2ecf20Sopenharmony_ci uint32_t index) 4018c2ecf20Sopenharmony_ci{ 4028c2ecf20Sopenharmony_ci BUG_ON(index >= ps->exceptions_per_area); 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci return ((struct disk_exception *) ps_area) + index; 4058c2ecf20Sopenharmony_ci} 4068c2ecf20Sopenharmony_ci 4078c2ecf20Sopenharmony_cistatic void read_exception(struct pstore *ps, void *ps_area, 4088c2ecf20Sopenharmony_ci uint32_t index, struct core_exception *result) 4098c2ecf20Sopenharmony_ci{ 4108c2ecf20Sopenharmony_ci struct disk_exception *de = get_exception(ps, ps_area, index); 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci /* copy it */ 4138c2ecf20Sopenharmony_ci result->old_chunk = le64_to_cpu(de->old_chunk); 4148c2ecf20Sopenharmony_ci result->new_chunk = le64_to_cpu(de->new_chunk); 4158c2ecf20Sopenharmony_ci} 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_cistatic void write_exception(struct pstore *ps, 4188c2ecf20Sopenharmony_ci uint32_t index, struct core_exception *e) 4198c2ecf20Sopenharmony_ci{ 4208c2ecf20Sopenharmony_ci struct disk_exception *de = get_exception(ps, ps->area, index); 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci /* copy it */ 4238c2ecf20Sopenharmony_ci de->old_chunk = cpu_to_le64(e->old_chunk); 4248c2ecf20Sopenharmony_ci de->new_chunk = cpu_to_le64(e->new_chunk); 4258c2ecf20Sopenharmony_ci} 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_cistatic void clear_exception(struct pstore *ps, uint32_t index) 4288c2ecf20Sopenharmony_ci{ 4298c2ecf20Sopenharmony_ci struct disk_exception *de = get_exception(ps, ps->area, index); 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci /* clear it */ 4328c2ecf20Sopenharmony_ci de->old_chunk = 0; 4338c2ecf20Sopenharmony_ci de->new_chunk = 0; 4348c2ecf20Sopenharmony_ci} 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci/* 4378c2ecf20Sopenharmony_ci * Registers the exceptions that are present in the current area. 4388c2ecf20Sopenharmony_ci * 'full' is filled in to indicate if the area has been 4398c2ecf20Sopenharmony_ci * filled. 4408c2ecf20Sopenharmony_ci */ 4418c2ecf20Sopenharmony_cistatic int insert_exceptions(struct pstore *ps, void *ps_area, 4428c2ecf20Sopenharmony_ci int (*callback)(void *callback_context, 4438c2ecf20Sopenharmony_ci chunk_t old, chunk_t new), 4448c2ecf20Sopenharmony_ci void *callback_context, 4458c2ecf20Sopenharmony_ci int *full) 4468c2ecf20Sopenharmony_ci{ 4478c2ecf20Sopenharmony_ci int r; 4488c2ecf20Sopenharmony_ci unsigned int i; 4498c2ecf20Sopenharmony_ci struct core_exception e; 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci /* presume the area is full */ 4528c2ecf20Sopenharmony_ci *full = 1; 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci for (i = 0; i < ps->exceptions_per_area; i++) { 4558c2ecf20Sopenharmony_ci read_exception(ps, ps_area, i, &e); 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci /* 4588c2ecf20Sopenharmony_ci * If the new_chunk is pointing at the start of 4598c2ecf20Sopenharmony_ci * the COW device, where the first metadata area 4608c2ecf20Sopenharmony_ci * is we know that we've hit the end of the 4618c2ecf20Sopenharmony_ci * exceptions. Therefore the area is not full. 4628c2ecf20Sopenharmony_ci */ 4638c2ecf20Sopenharmony_ci if (e.new_chunk == 0LL) { 4648c2ecf20Sopenharmony_ci ps->current_committed = i; 4658c2ecf20Sopenharmony_ci *full = 0; 4668c2ecf20Sopenharmony_ci break; 4678c2ecf20Sopenharmony_ci } 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci /* 4708c2ecf20Sopenharmony_ci * Keep track of the start of the free chunks. 4718c2ecf20Sopenharmony_ci */ 4728c2ecf20Sopenharmony_ci if (ps->next_free <= e.new_chunk) 4738c2ecf20Sopenharmony_ci ps->next_free = e.new_chunk + 1; 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci /* 4768c2ecf20Sopenharmony_ci * Otherwise we add the exception to the snapshot. 4778c2ecf20Sopenharmony_ci */ 4788c2ecf20Sopenharmony_ci r = callback(callback_context, e.old_chunk, e.new_chunk); 4798c2ecf20Sopenharmony_ci if (r) 4808c2ecf20Sopenharmony_ci return r; 4818c2ecf20Sopenharmony_ci } 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci return 0; 4848c2ecf20Sopenharmony_ci} 4858c2ecf20Sopenharmony_ci 4868c2ecf20Sopenharmony_cistatic int read_exceptions(struct pstore *ps, 4878c2ecf20Sopenharmony_ci int (*callback)(void *callback_context, chunk_t old, 4888c2ecf20Sopenharmony_ci chunk_t new), 4898c2ecf20Sopenharmony_ci void *callback_context) 4908c2ecf20Sopenharmony_ci{ 4918c2ecf20Sopenharmony_ci int r, full = 1; 4928c2ecf20Sopenharmony_ci struct dm_bufio_client *client; 4938c2ecf20Sopenharmony_ci chunk_t prefetch_area = 0; 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci client = dm_bufio_client_create(dm_snap_cow(ps->store->snap)->bdev, 4968c2ecf20Sopenharmony_ci ps->store->chunk_size << SECTOR_SHIFT, 4978c2ecf20Sopenharmony_ci 1, 0, NULL, NULL); 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_ci if (IS_ERR(client)) 5008c2ecf20Sopenharmony_ci return PTR_ERR(client); 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci /* 5038c2ecf20Sopenharmony_ci * Setup for one current buffer + desired readahead buffers. 5048c2ecf20Sopenharmony_ci */ 5058c2ecf20Sopenharmony_ci dm_bufio_set_minimum_buffers(client, 1 + DM_PREFETCH_CHUNKS); 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_ci /* 5088c2ecf20Sopenharmony_ci * Keeping reading chunks and inserting exceptions until 5098c2ecf20Sopenharmony_ci * we find a partially full area. 5108c2ecf20Sopenharmony_ci */ 5118c2ecf20Sopenharmony_ci for (ps->current_area = 0; full; ps->current_area++) { 5128c2ecf20Sopenharmony_ci struct dm_buffer *bp; 5138c2ecf20Sopenharmony_ci void *area; 5148c2ecf20Sopenharmony_ci chunk_t chunk; 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci if (unlikely(prefetch_area < ps->current_area)) 5178c2ecf20Sopenharmony_ci prefetch_area = ps->current_area; 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci if (DM_PREFETCH_CHUNKS) do { 5208c2ecf20Sopenharmony_ci chunk_t pf_chunk = area_location(ps, prefetch_area); 5218c2ecf20Sopenharmony_ci if (unlikely(pf_chunk >= dm_bufio_get_device_size(client))) 5228c2ecf20Sopenharmony_ci break; 5238c2ecf20Sopenharmony_ci dm_bufio_prefetch(client, pf_chunk, 1); 5248c2ecf20Sopenharmony_ci prefetch_area++; 5258c2ecf20Sopenharmony_ci if (unlikely(!prefetch_area)) 5268c2ecf20Sopenharmony_ci break; 5278c2ecf20Sopenharmony_ci } while (prefetch_area <= ps->current_area + DM_PREFETCH_CHUNKS); 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci chunk = area_location(ps, ps->current_area); 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci area = dm_bufio_read(client, chunk, &bp); 5328c2ecf20Sopenharmony_ci if (IS_ERR(area)) { 5338c2ecf20Sopenharmony_ci r = PTR_ERR(area); 5348c2ecf20Sopenharmony_ci goto ret_destroy_bufio; 5358c2ecf20Sopenharmony_ci } 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci r = insert_exceptions(ps, area, callback, callback_context, 5388c2ecf20Sopenharmony_ci &full); 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci if (!full) 5418c2ecf20Sopenharmony_ci memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT); 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ci dm_bufio_release(bp); 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci dm_bufio_forget(client, chunk); 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci if (unlikely(r)) 5488c2ecf20Sopenharmony_ci goto ret_destroy_bufio; 5498c2ecf20Sopenharmony_ci } 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci ps->current_area--; 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci skip_metadata(ps); 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci r = 0; 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ciret_destroy_bufio: 5588c2ecf20Sopenharmony_ci dm_bufio_client_destroy(client); 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci return r; 5618c2ecf20Sopenharmony_ci} 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_cistatic struct pstore *get_info(struct dm_exception_store *store) 5648c2ecf20Sopenharmony_ci{ 5658c2ecf20Sopenharmony_ci return (struct pstore *) store->context; 5668c2ecf20Sopenharmony_ci} 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_cistatic void persistent_usage(struct dm_exception_store *store, 5698c2ecf20Sopenharmony_ci sector_t *total_sectors, 5708c2ecf20Sopenharmony_ci sector_t *sectors_allocated, 5718c2ecf20Sopenharmony_ci sector_t *metadata_sectors) 5728c2ecf20Sopenharmony_ci{ 5738c2ecf20Sopenharmony_ci struct pstore *ps = get_info(store); 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci *sectors_allocated = ps->next_free * store->chunk_size; 5768c2ecf20Sopenharmony_ci *total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev); 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci /* 5798c2ecf20Sopenharmony_ci * First chunk is the fixed header. 5808c2ecf20Sopenharmony_ci * Then there are (ps->current_area + 1) metadata chunks, each one 5818c2ecf20Sopenharmony_ci * separated from the next by ps->exceptions_per_area data chunks. 5828c2ecf20Sopenharmony_ci */ 5838c2ecf20Sopenharmony_ci *metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) * 5848c2ecf20Sopenharmony_ci store->chunk_size; 5858c2ecf20Sopenharmony_ci} 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_cistatic void persistent_dtr(struct dm_exception_store *store) 5888c2ecf20Sopenharmony_ci{ 5898c2ecf20Sopenharmony_ci struct pstore *ps = get_info(store); 5908c2ecf20Sopenharmony_ci 5918c2ecf20Sopenharmony_ci destroy_workqueue(ps->metadata_wq); 5928c2ecf20Sopenharmony_ci 5938c2ecf20Sopenharmony_ci /* Created in read_header */ 5948c2ecf20Sopenharmony_ci if (ps->io_client) 5958c2ecf20Sopenharmony_ci dm_io_client_destroy(ps->io_client); 5968c2ecf20Sopenharmony_ci free_area(ps); 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci /* Allocated in persistent_read_metadata */ 5998c2ecf20Sopenharmony_ci vfree(ps->callbacks); 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci kfree(ps); 6028c2ecf20Sopenharmony_ci} 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_cistatic int persistent_read_metadata(struct dm_exception_store *store, 6058c2ecf20Sopenharmony_ci int (*callback)(void *callback_context, 6068c2ecf20Sopenharmony_ci chunk_t old, chunk_t new), 6078c2ecf20Sopenharmony_ci void *callback_context) 6088c2ecf20Sopenharmony_ci{ 6098c2ecf20Sopenharmony_ci int r, new_snapshot; 6108c2ecf20Sopenharmony_ci struct pstore *ps = get_info(store); 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci /* 6138c2ecf20Sopenharmony_ci * Read the snapshot header. 6148c2ecf20Sopenharmony_ci */ 6158c2ecf20Sopenharmony_ci r = read_header(ps, &new_snapshot); 6168c2ecf20Sopenharmony_ci if (r) 6178c2ecf20Sopenharmony_ci return r; 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci /* 6208c2ecf20Sopenharmony_ci * Now we know correct chunk_size, complete the initialisation. 6218c2ecf20Sopenharmony_ci */ 6228c2ecf20Sopenharmony_ci ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) / 6238c2ecf20Sopenharmony_ci sizeof(struct disk_exception); 6248c2ecf20Sopenharmony_ci ps->callbacks = dm_vcalloc(ps->exceptions_per_area, 6258c2ecf20Sopenharmony_ci sizeof(*ps->callbacks)); 6268c2ecf20Sopenharmony_ci if (!ps->callbacks) 6278c2ecf20Sopenharmony_ci return -ENOMEM; 6288c2ecf20Sopenharmony_ci 6298c2ecf20Sopenharmony_ci /* 6308c2ecf20Sopenharmony_ci * Do we need to setup a new snapshot ? 6318c2ecf20Sopenharmony_ci */ 6328c2ecf20Sopenharmony_ci if (new_snapshot) { 6338c2ecf20Sopenharmony_ci r = write_header(ps); 6348c2ecf20Sopenharmony_ci if (r) { 6358c2ecf20Sopenharmony_ci DMWARN("write_header failed"); 6368c2ecf20Sopenharmony_ci return r; 6378c2ecf20Sopenharmony_ci } 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_ci ps->current_area = 0; 6408c2ecf20Sopenharmony_ci zero_memory_area(ps); 6418c2ecf20Sopenharmony_ci r = zero_disk_area(ps, 0); 6428c2ecf20Sopenharmony_ci if (r) 6438c2ecf20Sopenharmony_ci DMWARN("zero_disk_area(0) failed"); 6448c2ecf20Sopenharmony_ci return r; 6458c2ecf20Sopenharmony_ci } 6468c2ecf20Sopenharmony_ci /* 6478c2ecf20Sopenharmony_ci * Sanity checks. 6488c2ecf20Sopenharmony_ci */ 6498c2ecf20Sopenharmony_ci if (ps->version != SNAPSHOT_DISK_VERSION) { 6508c2ecf20Sopenharmony_ci DMWARN("unable to handle snapshot disk version %d", 6518c2ecf20Sopenharmony_ci ps->version); 6528c2ecf20Sopenharmony_ci return -EINVAL; 6538c2ecf20Sopenharmony_ci } 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_ci /* 6568c2ecf20Sopenharmony_ci * Metadata are valid, but snapshot is invalidated 6578c2ecf20Sopenharmony_ci */ 6588c2ecf20Sopenharmony_ci if (!ps->valid) 6598c2ecf20Sopenharmony_ci return 1; 6608c2ecf20Sopenharmony_ci 6618c2ecf20Sopenharmony_ci /* 6628c2ecf20Sopenharmony_ci * Read the metadata. 6638c2ecf20Sopenharmony_ci */ 6648c2ecf20Sopenharmony_ci r = read_exceptions(ps, callback, callback_context); 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci return r; 6678c2ecf20Sopenharmony_ci} 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_cistatic int persistent_prepare_exception(struct dm_exception_store *store, 6708c2ecf20Sopenharmony_ci struct dm_exception *e) 6718c2ecf20Sopenharmony_ci{ 6728c2ecf20Sopenharmony_ci struct pstore *ps = get_info(store); 6738c2ecf20Sopenharmony_ci sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev); 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci /* Is there enough room ? */ 6768c2ecf20Sopenharmony_ci if (size < ((ps->next_free + 1) * store->chunk_size)) 6778c2ecf20Sopenharmony_ci return -ENOSPC; 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ci e->new_chunk = ps->next_free; 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci /* 6828c2ecf20Sopenharmony_ci * Move onto the next free pending, making sure to take 6838c2ecf20Sopenharmony_ci * into account the location of the metadata chunks. 6848c2ecf20Sopenharmony_ci */ 6858c2ecf20Sopenharmony_ci ps->next_free++; 6868c2ecf20Sopenharmony_ci skip_metadata(ps); 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci atomic_inc(&ps->pending_count); 6898c2ecf20Sopenharmony_ci return 0; 6908c2ecf20Sopenharmony_ci} 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_cistatic void persistent_commit_exception(struct dm_exception_store *store, 6938c2ecf20Sopenharmony_ci struct dm_exception *e, int valid, 6948c2ecf20Sopenharmony_ci void (*callback) (void *, int success), 6958c2ecf20Sopenharmony_ci void *callback_context) 6968c2ecf20Sopenharmony_ci{ 6978c2ecf20Sopenharmony_ci unsigned int i; 6988c2ecf20Sopenharmony_ci struct pstore *ps = get_info(store); 6998c2ecf20Sopenharmony_ci struct core_exception ce; 7008c2ecf20Sopenharmony_ci struct commit_callback *cb; 7018c2ecf20Sopenharmony_ci 7028c2ecf20Sopenharmony_ci if (!valid) 7038c2ecf20Sopenharmony_ci ps->valid = 0; 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_ci ce.old_chunk = e->old_chunk; 7068c2ecf20Sopenharmony_ci ce.new_chunk = e->new_chunk; 7078c2ecf20Sopenharmony_ci write_exception(ps, ps->current_committed++, &ce); 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci /* 7108c2ecf20Sopenharmony_ci * Add the callback to the back of the array. This code 7118c2ecf20Sopenharmony_ci * is the only place where the callback array is 7128c2ecf20Sopenharmony_ci * manipulated, and we know that it will never be called 7138c2ecf20Sopenharmony_ci * multiple times concurrently. 7148c2ecf20Sopenharmony_ci */ 7158c2ecf20Sopenharmony_ci cb = ps->callbacks + ps->callback_count++; 7168c2ecf20Sopenharmony_ci cb->callback = callback; 7178c2ecf20Sopenharmony_ci cb->context = callback_context; 7188c2ecf20Sopenharmony_ci 7198c2ecf20Sopenharmony_ci /* 7208c2ecf20Sopenharmony_ci * If there are exceptions in flight and we have not yet 7218c2ecf20Sopenharmony_ci * filled this metadata area there's nothing more to do. 7228c2ecf20Sopenharmony_ci */ 7238c2ecf20Sopenharmony_ci if (!atomic_dec_and_test(&ps->pending_count) && 7248c2ecf20Sopenharmony_ci (ps->current_committed != ps->exceptions_per_area)) 7258c2ecf20Sopenharmony_ci return; 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci /* 7288c2ecf20Sopenharmony_ci * If we completely filled the current area, then wipe the next one. 7298c2ecf20Sopenharmony_ci */ 7308c2ecf20Sopenharmony_ci if ((ps->current_committed == ps->exceptions_per_area) && 7318c2ecf20Sopenharmony_ci zero_disk_area(ps, ps->current_area + 1)) 7328c2ecf20Sopenharmony_ci ps->valid = 0; 7338c2ecf20Sopenharmony_ci 7348c2ecf20Sopenharmony_ci /* 7358c2ecf20Sopenharmony_ci * Commit exceptions to disk. 7368c2ecf20Sopenharmony_ci */ 7378c2ecf20Sopenharmony_ci if (ps->valid && area_io(ps, REQ_OP_WRITE, 7388c2ecf20Sopenharmony_ci REQ_PREFLUSH | REQ_FUA | REQ_SYNC)) 7398c2ecf20Sopenharmony_ci ps->valid = 0; 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_ci /* 7428c2ecf20Sopenharmony_ci * Advance to the next area if this one is full. 7438c2ecf20Sopenharmony_ci */ 7448c2ecf20Sopenharmony_ci if (ps->current_committed == ps->exceptions_per_area) { 7458c2ecf20Sopenharmony_ci ps->current_committed = 0; 7468c2ecf20Sopenharmony_ci ps->current_area++; 7478c2ecf20Sopenharmony_ci zero_memory_area(ps); 7488c2ecf20Sopenharmony_ci } 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_ci for (i = 0; i < ps->callback_count; i++) { 7518c2ecf20Sopenharmony_ci cb = ps->callbacks + i; 7528c2ecf20Sopenharmony_ci cb->callback(cb->context, ps->valid); 7538c2ecf20Sopenharmony_ci } 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_ci ps->callback_count = 0; 7568c2ecf20Sopenharmony_ci} 7578c2ecf20Sopenharmony_ci 7588c2ecf20Sopenharmony_cistatic int persistent_prepare_merge(struct dm_exception_store *store, 7598c2ecf20Sopenharmony_ci chunk_t *last_old_chunk, 7608c2ecf20Sopenharmony_ci chunk_t *last_new_chunk) 7618c2ecf20Sopenharmony_ci{ 7628c2ecf20Sopenharmony_ci struct pstore *ps = get_info(store); 7638c2ecf20Sopenharmony_ci struct core_exception ce; 7648c2ecf20Sopenharmony_ci int nr_consecutive; 7658c2ecf20Sopenharmony_ci int r; 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci /* 7688c2ecf20Sopenharmony_ci * When current area is empty, move back to preceding area. 7698c2ecf20Sopenharmony_ci */ 7708c2ecf20Sopenharmony_ci if (!ps->current_committed) { 7718c2ecf20Sopenharmony_ci /* 7728c2ecf20Sopenharmony_ci * Have we finished? 7738c2ecf20Sopenharmony_ci */ 7748c2ecf20Sopenharmony_ci if (!ps->current_area) 7758c2ecf20Sopenharmony_ci return 0; 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci ps->current_area--; 7788c2ecf20Sopenharmony_ci r = area_io(ps, REQ_OP_READ, 0); 7798c2ecf20Sopenharmony_ci if (r < 0) 7808c2ecf20Sopenharmony_ci return r; 7818c2ecf20Sopenharmony_ci ps->current_committed = ps->exceptions_per_area; 7828c2ecf20Sopenharmony_ci } 7838c2ecf20Sopenharmony_ci 7848c2ecf20Sopenharmony_ci read_exception(ps, ps->area, ps->current_committed - 1, &ce); 7858c2ecf20Sopenharmony_ci *last_old_chunk = ce.old_chunk; 7868c2ecf20Sopenharmony_ci *last_new_chunk = ce.new_chunk; 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci /* 7898c2ecf20Sopenharmony_ci * Find number of consecutive chunks within the current area, 7908c2ecf20Sopenharmony_ci * working backwards. 7918c2ecf20Sopenharmony_ci */ 7928c2ecf20Sopenharmony_ci for (nr_consecutive = 1; nr_consecutive < ps->current_committed; 7938c2ecf20Sopenharmony_ci nr_consecutive++) { 7948c2ecf20Sopenharmony_ci read_exception(ps, ps->area, 7958c2ecf20Sopenharmony_ci ps->current_committed - 1 - nr_consecutive, &ce); 7968c2ecf20Sopenharmony_ci if (ce.old_chunk != *last_old_chunk - nr_consecutive || 7978c2ecf20Sopenharmony_ci ce.new_chunk != *last_new_chunk - nr_consecutive) 7988c2ecf20Sopenharmony_ci break; 7998c2ecf20Sopenharmony_ci } 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_ci return nr_consecutive; 8028c2ecf20Sopenharmony_ci} 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_cistatic int persistent_commit_merge(struct dm_exception_store *store, 8058c2ecf20Sopenharmony_ci int nr_merged) 8068c2ecf20Sopenharmony_ci{ 8078c2ecf20Sopenharmony_ci int r, i; 8088c2ecf20Sopenharmony_ci struct pstore *ps = get_info(store); 8098c2ecf20Sopenharmony_ci 8108c2ecf20Sopenharmony_ci BUG_ON(nr_merged > ps->current_committed); 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_ci for (i = 0; i < nr_merged; i++) 8138c2ecf20Sopenharmony_ci clear_exception(ps, ps->current_committed - 1 - i); 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_ci r = area_io(ps, REQ_OP_WRITE, REQ_PREFLUSH | REQ_FUA); 8168c2ecf20Sopenharmony_ci if (r < 0) 8178c2ecf20Sopenharmony_ci return r; 8188c2ecf20Sopenharmony_ci 8198c2ecf20Sopenharmony_ci ps->current_committed -= nr_merged; 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci /* 8228c2ecf20Sopenharmony_ci * At this stage, only persistent_usage() uses ps->next_free, so 8238c2ecf20Sopenharmony_ci * we make no attempt to keep ps->next_free strictly accurate 8248c2ecf20Sopenharmony_ci * as exceptions may have been committed out-of-order originally. 8258c2ecf20Sopenharmony_ci * Once a snapshot has become merging, we set it to the value it 8268c2ecf20Sopenharmony_ci * would have held had all the exceptions been committed in order. 8278c2ecf20Sopenharmony_ci * 8288c2ecf20Sopenharmony_ci * ps->current_area does not get reduced by prepare_merge() until 8298c2ecf20Sopenharmony_ci * after commit_merge() has removed the nr_merged previous exceptions. 8308c2ecf20Sopenharmony_ci */ 8318c2ecf20Sopenharmony_ci ps->next_free = area_location(ps, ps->current_area) + 8328c2ecf20Sopenharmony_ci ps->current_committed + 1; 8338c2ecf20Sopenharmony_ci 8348c2ecf20Sopenharmony_ci return 0; 8358c2ecf20Sopenharmony_ci} 8368c2ecf20Sopenharmony_ci 8378c2ecf20Sopenharmony_cistatic void persistent_drop_snapshot(struct dm_exception_store *store) 8388c2ecf20Sopenharmony_ci{ 8398c2ecf20Sopenharmony_ci struct pstore *ps = get_info(store); 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci ps->valid = 0; 8428c2ecf20Sopenharmony_ci if (write_header(ps)) 8438c2ecf20Sopenharmony_ci DMWARN("write header failed"); 8448c2ecf20Sopenharmony_ci} 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_cistatic int persistent_ctr(struct dm_exception_store *store, char *options) 8478c2ecf20Sopenharmony_ci{ 8488c2ecf20Sopenharmony_ci struct pstore *ps; 8498c2ecf20Sopenharmony_ci int r; 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci /* allocate the pstore */ 8528c2ecf20Sopenharmony_ci ps = kzalloc(sizeof(*ps), GFP_KERNEL); 8538c2ecf20Sopenharmony_ci if (!ps) 8548c2ecf20Sopenharmony_ci return -ENOMEM; 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci ps->store = store; 8578c2ecf20Sopenharmony_ci ps->valid = 1; 8588c2ecf20Sopenharmony_ci ps->version = SNAPSHOT_DISK_VERSION; 8598c2ecf20Sopenharmony_ci ps->area = NULL; 8608c2ecf20Sopenharmony_ci ps->zero_area = NULL; 8618c2ecf20Sopenharmony_ci ps->header_area = NULL; 8628c2ecf20Sopenharmony_ci ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1; /* header and 1st area */ 8638c2ecf20Sopenharmony_ci ps->current_committed = 0; 8648c2ecf20Sopenharmony_ci 8658c2ecf20Sopenharmony_ci ps->callback_count = 0; 8668c2ecf20Sopenharmony_ci atomic_set(&ps->pending_count, 0); 8678c2ecf20Sopenharmony_ci ps->callbacks = NULL; 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_ci ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0); 8708c2ecf20Sopenharmony_ci if (!ps->metadata_wq) { 8718c2ecf20Sopenharmony_ci DMERR("couldn't start header metadata update thread"); 8728c2ecf20Sopenharmony_ci r = -ENOMEM; 8738c2ecf20Sopenharmony_ci goto err_workqueue; 8748c2ecf20Sopenharmony_ci } 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci if (options) { 8778c2ecf20Sopenharmony_ci char overflow = toupper(options[0]); 8788c2ecf20Sopenharmony_ci if (overflow == 'O') 8798c2ecf20Sopenharmony_ci store->userspace_supports_overflow = true; 8808c2ecf20Sopenharmony_ci else { 8818c2ecf20Sopenharmony_ci DMERR("Unsupported persistent store option: %s", options); 8828c2ecf20Sopenharmony_ci r = -EINVAL; 8838c2ecf20Sopenharmony_ci goto err_options; 8848c2ecf20Sopenharmony_ci } 8858c2ecf20Sopenharmony_ci } 8868c2ecf20Sopenharmony_ci 8878c2ecf20Sopenharmony_ci store->context = ps; 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ci return 0; 8908c2ecf20Sopenharmony_ci 8918c2ecf20Sopenharmony_cierr_options: 8928c2ecf20Sopenharmony_ci destroy_workqueue(ps->metadata_wq); 8938c2ecf20Sopenharmony_cierr_workqueue: 8948c2ecf20Sopenharmony_ci kfree(ps); 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci return r; 8978c2ecf20Sopenharmony_ci} 8988c2ecf20Sopenharmony_ci 8998c2ecf20Sopenharmony_cistatic unsigned persistent_status(struct dm_exception_store *store, 9008c2ecf20Sopenharmony_ci status_type_t status, char *result, 9018c2ecf20Sopenharmony_ci unsigned maxlen) 9028c2ecf20Sopenharmony_ci{ 9038c2ecf20Sopenharmony_ci unsigned sz = 0; 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci switch (status) { 9068c2ecf20Sopenharmony_ci case STATUSTYPE_INFO: 9078c2ecf20Sopenharmony_ci break; 9088c2ecf20Sopenharmony_ci case STATUSTYPE_TABLE: 9098c2ecf20Sopenharmony_ci DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P", 9108c2ecf20Sopenharmony_ci (unsigned long long)store->chunk_size); 9118c2ecf20Sopenharmony_ci } 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_ci return sz; 9148c2ecf20Sopenharmony_ci} 9158c2ecf20Sopenharmony_ci 9168c2ecf20Sopenharmony_cistatic struct dm_exception_store_type _persistent_type = { 9178c2ecf20Sopenharmony_ci .name = "persistent", 9188c2ecf20Sopenharmony_ci .module = THIS_MODULE, 9198c2ecf20Sopenharmony_ci .ctr = persistent_ctr, 9208c2ecf20Sopenharmony_ci .dtr = persistent_dtr, 9218c2ecf20Sopenharmony_ci .read_metadata = persistent_read_metadata, 9228c2ecf20Sopenharmony_ci .prepare_exception = persistent_prepare_exception, 9238c2ecf20Sopenharmony_ci .commit_exception = persistent_commit_exception, 9248c2ecf20Sopenharmony_ci .prepare_merge = persistent_prepare_merge, 9258c2ecf20Sopenharmony_ci .commit_merge = persistent_commit_merge, 9268c2ecf20Sopenharmony_ci .drop_snapshot = persistent_drop_snapshot, 9278c2ecf20Sopenharmony_ci .usage = persistent_usage, 9288c2ecf20Sopenharmony_ci .status = persistent_status, 9298c2ecf20Sopenharmony_ci}; 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_cistatic struct dm_exception_store_type _persistent_compat_type = { 9328c2ecf20Sopenharmony_ci .name = "P", 9338c2ecf20Sopenharmony_ci .module = THIS_MODULE, 9348c2ecf20Sopenharmony_ci .ctr = persistent_ctr, 9358c2ecf20Sopenharmony_ci .dtr = persistent_dtr, 9368c2ecf20Sopenharmony_ci .read_metadata = persistent_read_metadata, 9378c2ecf20Sopenharmony_ci .prepare_exception = persistent_prepare_exception, 9388c2ecf20Sopenharmony_ci .commit_exception = persistent_commit_exception, 9398c2ecf20Sopenharmony_ci .prepare_merge = persistent_prepare_merge, 9408c2ecf20Sopenharmony_ci .commit_merge = persistent_commit_merge, 9418c2ecf20Sopenharmony_ci .drop_snapshot = persistent_drop_snapshot, 9428c2ecf20Sopenharmony_ci .usage = persistent_usage, 9438c2ecf20Sopenharmony_ci .status = persistent_status, 9448c2ecf20Sopenharmony_ci}; 9458c2ecf20Sopenharmony_ci 9468c2ecf20Sopenharmony_ciint dm_persistent_snapshot_init(void) 9478c2ecf20Sopenharmony_ci{ 9488c2ecf20Sopenharmony_ci int r; 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci r = dm_exception_store_type_register(&_persistent_type); 9518c2ecf20Sopenharmony_ci if (r) { 9528c2ecf20Sopenharmony_ci DMERR("Unable to register persistent exception store type"); 9538c2ecf20Sopenharmony_ci return r; 9548c2ecf20Sopenharmony_ci } 9558c2ecf20Sopenharmony_ci 9568c2ecf20Sopenharmony_ci r = dm_exception_store_type_register(&_persistent_compat_type); 9578c2ecf20Sopenharmony_ci if (r) { 9588c2ecf20Sopenharmony_ci DMERR("Unable to register old-style persistent exception " 9598c2ecf20Sopenharmony_ci "store type"); 9608c2ecf20Sopenharmony_ci dm_exception_store_type_unregister(&_persistent_type); 9618c2ecf20Sopenharmony_ci return r; 9628c2ecf20Sopenharmony_ci } 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci return r; 9658c2ecf20Sopenharmony_ci} 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_civoid dm_persistent_snapshot_exit(void) 9688c2ecf20Sopenharmony_ci{ 9698c2ecf20Sopenharmony_ci dm_exception_store_type_unregister(&_persistent_type); 9708c2ecf20Sopenharmony_ci dm_exception_store_type_unregister(&_persistent_compat_type); 9718c2ecf20Sopenharmony_ci} 972