162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
462306a36Sopenharmony_ci * Copyright (C) 2006-2008 Red Hat GmbH
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * This file is released under the GPL.
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include "dm-exception-store.h"
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/ctype.h>
1262306a36Sopenharmony_ci#include <linux/mm.h>
1362306a36Sopenharmony_ci#include <linux/pagemap.h>
1462306a36Sopenharmony_ci#include <linux/vmalloc.h>
1562306a36Sopenharmony_ci#include <linux/export.h>
1662306a36Sopenharmony_ci#include <linux/slab.h>
1762306a36Sopenharmony_ci#include <linux/dm-io.h>
1862306a36Sopenharmony_ci#include <linux/dm-bufio.h>
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#define DM_MSG_PREFIX "persistent snapshot"
2162306a36Sopenharmony_ci#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32U	/* 16KB */
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#define DM_PREFETCH_CHUNKS		12
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci/*
2662306a36Sopenharmony_ci *---------------------------------------------------------------
2762306a36Sopenharmony_ci * Persistent snapshots, by persistent we mean that the snapshot
2862306a36Sopenharmony_ci * will survive a reboot.
2962306a36Sopenharmony_ci *---------------------------------------------------------------
3062306a36Sopenharmony_ci */
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci/*
3362306a36Sopenharmony_ci * We need to store a record of which parts of the origin have
3462306a36Sopenharmony_ci * been copied to the snapshot device.  The snapshot code
3562306a36Sopenharmony_ci * requires that we copy exception chunks to chunk aligned areas
3662306a36Sopenharmony_ci * of the COW store.  It makes sense therefore, to store the
3762306a36Sopenharmony_ci * metadata in chunk size blocks.
3862306a36Sopenharmony_ci *
3962306a36Sopenharmony_ci * There is no backward or forward compatibility implemented,
4062306a36Sopenharmony_ci * snapshots with different disk versions than the kernel will
4162306a36Sopenharmony_ci * not be usable.  It is expected that "lvcreate" will blank out
4262306a36Sopenharmony_ci * the start of a fresh COW device before calling the snapshot
4362306a36Sopenharmony_ci * constructor.
4462306a36Sopenharmony_ci *
4562306a36Sopenharmony_ci * The first chunk of the COW device just contains the header.
4662306a36Sopenharmony_ci * After this there is a chunk filled with exception metadata,
4762306a36Sopenharmony_ci * followed by as many exception chunks as can fit in the
4862306a36Sopenharmony_ci * metadata areas.
4962306a36Sopenharmony_ci *
5062306a36Sopenharmony_ci * All on disk structures are in little-endian format.  The end
5162306a36Sopenharmony_ci * of the exceptions info is indicated by an exception with a
5262306a36Sopenharmony_ci * new_chunk of 0, which is invalid since it would point to the
5362306a36Sopenharmony_ci * header chunk.
5462306a36Sopenharmony_ci */
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci/*
5762306a36Sopenharmony_ci * Magic for persistent snapshots: "SnAp" - Feeble isn't it.
5862306a36Sopenharmony_ci */
5962306a36Sopenharmony_ci#define SNAP_MAGIC 0x70416e53
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci/*
6262306a36Sopenharmony_ci * The on-disk version of the metadata.
6362306a36Sopenharmony_ci */
6462306a36Sopenharmony_ci#define SNAPSHOT_DISK_VERSION 1
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci#define NUM_SNAPSHOT_HDR_CHUNKS 1
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_cistruct disk_header {
6962306a36Sopenharmony_ci	__le32 magic;
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	/*
7262306a36Sopenharmony_ci	 * Is this snapshot valid.  There is no way of recovering
7362306a36Sopenharmony_ci	 * an invalid snapshot.
7462306a36Sopenharmony_ci	 */
7562306a36Sopenharmony_ci	__le32 valid;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	/*
7862306a36Sopenharmony_ci	 * Simple, incrementing version. no backward
7962306a36Sopenharmony_ci	 * compatibility.
8062306a36Sopenharmony_ci	 */
8162306a36Sopenharmony_ci	__le32 version;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	/* In sectors */
8462306a36Sopenharmony_ci	__le32 chunk_size;
8562306a36Sopenharmony_ci} __packed;
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_cistruct disk_exception {
8862306a36Sopenharmony_ci	__le64 old_chunk;
8962306a36Sopenharmony_ci	__le64 new_chunk;
9062306a36Sopenharmony_ci} __packed;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_cistruct core_exception {
9362306a36Sopenharmony_ci	uint64_t old_chunk;
9462306a36Sopenharmony_ci	uint64_t new_chunk;
9562306a36Sopenharmony_ci};
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_cistruct commit_callback {
9862306a36Sopenharmony_ci	void (*callback)(void *ref, int success);
9962306a36Sopenharmony_ci	void *context;
10062306a36Sopenharmony_ci};
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci/*
10362306a36Sopenharmony_ci * The top level structure for a persistent exception store.
10462306a36Sopenharmony_ci */
10562306a36Sopenharmony_cistruct pstore {
10662306a36Sopenharmony_ci	struct dm_exception_store *store;
10762306a36Sopenharmony_ci	int version;
10862306a36Sopenharmony_ci	int valid;
10962306a36Sopenharmony_ci	uint32_t exceptions_per_area;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	/*
11262306a36Sopenharmony_ci	 * Now that we have an asynchronous kcopyd there is no
11362306a36Sopenharmony_ci	 * need for large chunk sizes, so it wont hurt to have a
11462306a36Sopenharmony_ci	 * whole chunks worth of metadata in memory at once.
11562306a36Sopenharmony_ci	 */
11662306a36Sopenharmony_ci	void *area;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	/*
11962306a36Sopenharmony_ci	 * An area of zeros used to clear the next area.
12062306a36Sopenharmony_ci	 */
12162306a36Sopenharmony_ci	void *zero_area;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	/*
12462306a36Sopenharmony_ci	 * An area used for header. The header can be written
12562306a36Sopenharmony_ci	 * concurrently with metadata (when invalidating the snapshot),
12662306a36Sopenharmony_ci	 * so it needs a separate buffer.
12762306a36Sopenharmony_ci	 */
12862306a36Sopenharmony_ci	void *header_area;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	/*
13162306a36Sopenharmony_ci	 * Used to keep track of which metadata area the data in
13262306a36Sopenharmony_ci	 * 'chunk' refers to.
13362306a36Sopenharmony_ci	 */
13462306a36Sopenharmony_ci	chunk_t current_area;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	/*
13762306a36Sopenharmony_ci	 * The next free chunk for an exception.
13862306a36Sopenharmony_ci	 *
13962306a36Sopenharmony_ci	 * When creating exceptions, all the chunks here and above are
14062306a36Sopenharmony_ci	 * free.  It holds the next chunk to be allocated.  On rare
14162306a36Sopenharmony_ci	 * occasions (e.g. after a system crash) holes can be left in
14262306a36Sopenharmony_ci	 * the exception store because chunks can be committed out of
14362306a36Sopenharmony_ci	 * order.
14462306a36Sopenharmony_ci	 *
14562306a36Sopenharmony_ci	 * When merging exceptions, it does not necessarily mean all the
14662306a36Sopenharmony_ci	 * chunks here and above are free.  It holds the value it would
14762306a36Sopenharmony_ci	 * have held if all chunks had been committed in order of
14862306a36Sopenharmony_ci	 * allocation.  Consequently the value may occasionally be
14962306a36Sopenharmony_ci	 * slightly too low, but since it's only used for 'status' and
15062306a36Sopenharmony_ci	 * it can never reach its minimum value too early this doesn't
15162306a36Sopenharmony_ci	 * matter.
15262306a36Sopenharmony_ci	 */
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	chunk_t next_free;
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	/*
15762306a36Sopenharmony_ci	 * The index of next free exception in the current
15862306a36Sopenharmony_ci	 * metadata area.
15962306a36Sopenharmony_ci	 */
16062306a36Sopenharmony_ci	uint32_t current_committed;
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	atomic_t pending_count;
16362306a36Sopenharmony_ci	uint32_t callback_count;
16462306a36Sopenharmony_ci	struct commit_callback *callbacks;
16562306a36Sopenharmony_ci	struct dm_io_client *io_client;
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_ci	struct workqueue_struct *metadata_wq;
16862306a36Sopenharmony_ci};
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_cistatic int alloc_area(struct pstore *ps)
17162306a36Sopenharmony_ci{
17262306a36Sopenharmony_ci	int r = -ENOMEM;
17362306a36Sopenharmony_ci	size_t len;
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	len = ps->store->chunk_size << SECTOR_SHIFT;
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	/*
17862306a36Sopenharmony_ci	 * Allocate the chunk_size block of memory that will hold
17962306a36Sopenharmony_ci	 * a single metadata area.
18062306a36Sopenharmony_ci	 */
18162306a36Sopenharmony_ci	ps->area = vmalloc(len);
18262306a36Sopenharmony_ci	if (!ps->area)
18362306a36Sopenharmony_ci		goto err_area;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	ps->zero_area = vzalloc(len);
18662306a36Sopenharmony_ci	if (!ps->zero_area)
18762306a36Sopenharmony_ci		goto err_zero_area;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	ps->header_area = vmalloc(len);
19062306a36Sopenharmony_ci	if (!ps->header_area)
19162306a36Sopenharmony_ci		goto err_header_area;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	return 0;
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_cierr_header_area:
19662306a36Sopenharmony_ci	vfree(ps->zero_area);
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_cierr_zero_area:
19962306a36Sopenharmony_ci	vfree(ps->area);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_cierr_area:
20262306a36Sopenharmony_ci	return r;
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_cistatic void free_area(struct pstore *ps)
20662306a36Sopenharmony_ci{
20762306a36Sopenharmony_ci	vfree(ps->area);
20862306a36Sopenharmony_ci	ps->area = NULL;
20962306a36Sopenharmony_ci	vfree(ps->zero_area);
21062306a36Sopenharmony_ci	ps->zero_area = NULL;
21162306a36Sopenharmony_ci	vfree(ps->header_area);
21262306a36Sopenharmony_ci	ps->header_area = NULL;
21362306a36Sopenharmony_ci}
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_cistruct mdata_req {
21662306a36Sopenharmony_ci	struct dm_io_region *where;
21762306a36Sopenharmony_ci	struct dm_io_request *io_req;
21862306a36Sopenharmony_ci	struct work_struct work;
21962306a36Sopenharmony_ci	int result;
22062306a36Sopenharmony_ci};
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_cistatic void do_metadata(struct work_struct *work)
22362306a36Sopenharmony_ci{
22462306a36Sopenharmony_ci	struct mdata_req *req = container_of(work, struct mdata_req, work);
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	req->result = dm_io(req->io_req, 1, req->where, NULL, IOPRIO_DEFAULT);
22762306a36Sopenharmony_ci}
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci/*
23062306a36Sopenharmony_ci * Read or write a chunk aligned and sized block of data from a device.
23162306a36Sopenharmony_ci */
23262306a36Sopenharmony_cistatic int chunk_io(struct pstore *ps, void *area, chunk_t chunk, blk_opf_t opf,
23362306a36Sopenharmony_ci		    int metadata)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	struct dm_io_region where = {
23662306a36Sopenharmony_ci		.bdev = dm_snap_cow(ps->store->snap)->bdev,
23762306a36Sopenharmony_ci		.sector = ps->store->chunk_size * chunk,
23862306a36Sopenharmony_ci		.count = ps->store->chunk_size,
23962306a36Sopenharmony_ci	};
24062306a36Sopenharmony_ci	struct dm_io_request io_req = {
24162306a36Sopenharmony_ci		.bi_opf = opf,
24262306a36Sopenharmony_ci		.mem.type = DM_IO_VMA,
24362306a36Sopenharmony_ci		.mem.ptr.vma = area,
24462306a36Sopenharmony_ci		.client = ps->io_client,
24562306a36Sopenharmony_ci		.notify.fn = NULL,
24662306a36Sopenharmony_ci	};
24762306a36Sopenharmony_ci	struct mdata_req req;
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	if (!metadata)
25062306a36Sopenharmony_ci		return dm_io(&io_req, 1, &where, NULL, IOPRIO_DEFAULT);
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	req.where = &where;
25362306a36Sopenharmony_ci	req.io_req = &io_req;
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci	/*
25662306a36Sopenharmony_ci	 * Issue the synchronous I/O from a different thread
25762306a36Sopenharmony_ci	 * to avoid submit_bio_noacct recursion.
25862306a36Sopenharmony_ci	 */
25962306a36Sopenharmony_ci	INIT_WORK_ONSTACK(&req.work, do_metadata);
26062306a36Sopenharmony_ci	queue_work(ps->metadata_wq, &req.work);
26162306a36Sopenharmony_ci	flush_workqueue(ps->metadata_wq);
26262306a36Sopenharmony_ci	destroy_work_on_stack(&req.work);
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	return req.result;
26562306a36Sopenharmony_ci}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci/*
26862306a36Sopenharmony_ci * Convert a metadata area index to a chunk index.
26962306a36Sopenharmony_ci */
27062306a36Sopenharmony_cistatic chunk_t area_location(struct pstore *ps, chunk_t area)
27162306a36Sopenharmony_ci{
27262306a36Sopenharmony_ci	return NUM_SNAPSHOT_HDR_CHUNKS + ((ps->exceptions_per_area + 1) * area);
27362306a36Sopenharmony_ci}
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_cistatic void skip_metadata(struct pstore *ps)
27662306a36Sopenharmony_ci{
27762306a36Sopenharmony_ci	uint32_t stride = ps->exceptions_per_area + 1;
27862306a36Sopenharmony_ci	chunk_t next_free = ps->next_free;
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	if (sector_div(next_free, stride) == NUM_SNAPSHOT_HDR_CHUNKS)
28162306a36Sopenharmony_ci		ps->next_free++;
28262306a36Sopenharmony_ci}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci/*
28562306a36Sopenharmony_ci * Read or write a metadata area.  Remembering to skip the first
28662306a36Sopenharmony_ci * chunk which holds the header.
28762306a36Sopenharmony_ci */
28862306a36Sopenharmony_cistatic int area_io(struct pstore *ps, blk_opf_t opf)
28962306a36Sopenharmony_ci{
29062306a36Sopenharmony_ci	chunk_t chunk = area_location(ps, ps->current_area);
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	return chunk_io(ps, ps->area, chunk, opf, 0);
29362306a36Sopenharmony_ci}
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_cistatic void zero_memory_area(struct pstore *ps)
29662306a36Sopenharmony_ci{
29762306a36Sopenharmony_ci	memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
29862306a36Sopenharmony_ci}
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_cistatic int zero_disk_area(struct pstore *ps, chunk_t area)
30162306a36Sopenharmony_ci{
30262306a36Sopenharmony_ci	return chunk_io(ps, ps->zero_area, area_location(ps, area),
30362306a36Sopenharmony_ci			REQ_OP_WRITE, 0);
30462306a36Sopenharmony_ci}
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_cistatic int read_header(struct pstore *ps, int *new_snapshot)
30762306a36Sopenharmony_ci{
30862306a36Sopenharmony_ci	int r;
30962306a36Sopenharmony_ci	struct disk_header *dh;
31062306a36Sopenharmony_ci	unsigned int chunk_size;
31162306a36Sopenharmony_ci	int chunk_size_supplied = 1;
31262306a36Sopenharmony_ci	char *chunk_err;
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci	/*
31562306a36Sopenharmony_ci	 * Use default chunk size (or logical_block_size, if larger)
31662306a36Sopenharmony_ci	 * if none supplied
31762306a36Sopenharmony_ci	 */
31862306a36Sopenharmony_ci	if (!ps->store->chunk_size) {
31962306a36Sopenharmony_ci		ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
32062306a36Sopenharmony_ci		    bdev_logical_block_size(dm_snap_cow(ps->store->snap)->
32162306a36Sopenharmony_ci					    bdev) >> 9);
32262306a36Sopenharmony_ci		ps->store->chunk_mask = ps->store->chunk_size - 1;
32362306a36Sopenharmony_ci		ps->store->chunk_shift = __ffs(ps->store->chunk_size);
32462306a36Sopenharmony_ci		chunk_size_supplied = 0;
32562306a36Sopenharmony_ci	}
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	ps->io_client = dm_io_client_create();
32862306a36Sopenharmony_ci	if (IS_ERR(ps->io_client))
32962306a36Sopenharmony_ci		return PTR_ERR(ps->io_client);
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	r = alloc_area(ps);
33262306a36Sopenharmony_ci	if (r)
33362306a36Sopenharmony_ci		return r;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 1);
33662306a36Sopenharmony_ci	if (r)
33762306a36Sopenharmony_ci		goto bad;
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci	dh = ps->header_area;
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	if (le32_to_cpu(dh->magic) == 0) {
34262306a36Sopenharmony_ci		*new_snapshot = 1;
34362306a36Sopenharmony_ci		return 0;
34462306a36Sopenharmony_ci	}
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
34762306a36Sopenharmony_ci		DMWARN("Invalid or corrupt snapshot");
34862306a36Sopenharmony_ci		r = -ENXIO;
34962306a36Sopenharmony_ci		goto bad;
35062306a36Sopenharmony_ci	}
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	*new_snapshot = 0;
35362306a36Sopenharmony_ci	ps->valid = le32_to_cpu(dh->valid);
35462306a36Sopenharmony_ci	ps->version = le32_to_cpu(dh->version);
35562306a36Sopenharmony_ci	chunk_size = le32_to_cpu(dh->chunk_size);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	if (ps->store->chunk_size == chunk_size)
35862306a36Sopenharmony_ci		return 0;
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	if (chunk_size_supplied)
36162306a36Sopenharmony_ci		DMWARN("chunk size %u in device metadata overrides table chunk size of %u.",
36262306a36Sopenharmony_ci		       chunk_size, ps->store->chunk_size);
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	/* We had a bogus chunk_size. Fix stuff up. */
36562306a36Sopenharmony_ci	free_area(ps);
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	r = dm_exception_store_set_chunk_size(ps->store, chunk_size,
36862306a36Sopenharmony_ci					      &chunk_err);
36962306a36Sopenharmony_ci	if (r) {
37062306a36Sopenharmony_ci		DMERR("invalid on-disk chunk size %u: %s.",
37162306a36Sopenharmony_ci		      chunk_size, chunk_err);
37262306a36Sopenharmony_ci		return r;
37362306a36Sopenharmony_ci	}
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	r = alloc_area(ps);
37662306a36Sopenharmony_ci	return r;
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_cibad:
37962306a36Sopenharmony_ci	free_area(ps);
38062306a36Sopenharmony_ci	return r;
38162306a36Sopenharmony_ci}
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_cistatic int write_header(struct pstore *ps)
38462306a36Sopenharmony_ci{
38562306a36Sopenharmony_ci	struct disk_header *dh;
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	memset(ps->header_area, 0, ps->store->chunk_size << SECTOR_SHIFT);
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_ci	dh = ps->header_area;
39062306a36Sopenharmony_ci	dh->magic = cpu_to_le32(SNAP_MAGIC);
39162306a36Sopenharmony_ci	dh->valid = cpu_to_le32(ps->valid);
39262306a36Sopenharmony_ci	dh->version = cpu_to_le32(ps->version);
39362306a36Sopenharmony_ci	dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci	return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 1);
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci/*
39962306a36Sopenharmony_ci * Access functions for the disk exceptions, these do the endian conversions.
40062306a36Sopenharmony_ci */
40162306a36Sopenharmony_cistatic struct disk_exception *get_exception(struct pstore *ps, void *ps_area,
40262306a36Sopenharmony_ci					    uint32_t index)
40362306a36Sopenharmony_ci{
40462306a36Sopenharmony_ci	BUG_ON(index >= ps->exceptions_per_area);
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_ci	return ((struct disk_exception *) ps_area) + index;
40762306a36Sopenharmony_ci}
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_cistatic void read_exception(struct pstore *ps, void *ps_area,
41062306a36Sopenharmony_ci			   uint32_t index, struct core_exception *result)
41162306a36Sopenharmony_ci{
41262306a36Sopenharmony_ci	struct disk_exception *de = get_exception(ps, ps_area, index);
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	/* copy it */
41562306a36Sopenharmony_ci	result->old_chunk = le64_to_cpu(de->old_chunk);
41662306a36Sopenharmony_ci	result->new_chunk = le64_to_cpu(de->new_chunk);
41762306a36Sopenharmony_ci}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_cistatic void write_exception(struct pstore *ps,
42062306a36Sopenharmony_ci			    uint32_t index, struct core_exception *e)
42162306a36Sopenharmony_ci{
42262306a36Sopenharmony_ci	struct disk_exception *de = get_exception(ps, ps->area, index);
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	/* copy it */
42562306a36Sopenharmony_ci	de->old_chunk = cpu_to_le64(e->old_chunk);
42662306a36Sopenharmony_ci	de->new_chunk = cpu_to_le64(e->new_chunk);
42762306a36Sopenharmony_ci}
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_cistatic void clear_exception(struct pstore *ps, uint32_t index)
43062306a36Sopenharmony_ci{
43162306a36Sopenharmony_ci	struct disk_exception *de = get_exception(ps, ps->area, index);
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	/* clear it */
43462306a36Sopenharmony_ci	de->old_chunk = 0;
43562306a36Sopenharmony_ci	de->new_chunk = 0;
43662306a36Sopenharmony_ci}
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci/*
43962306a36Sopenharmony_ci * Registers the exceptions that are present in the current area.
44062306a36Sopenharmony_ci * 'full' is filled in to indicate if the area has been
44162306a36Sopenharmony_ci * filled.
44262306a36Sopenharmony_ci */
44362306a36Sopenharmony_cistatic int insert_exceptions(struct pstore *ps, void *ps_area,
44462306a36Sopenharmony_ci			     int (*callback)(void *callback_context,
44562306a36Sopenharmony_ci					     chunk_t old, chunk_t new),
44662306a36Sopenharmony_ci			     void *callback_context,
44762306a36Sopenharmony_ci			     int *full)
44862306a36Sopenharmony_ci{
44962306a36Sopenharmony_ci	int r;
45062306a36Sopenharmony_ci	unsigned int i;
45162306a36Sopenharmony_ci	struct core_exception e;
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci	/* presume the area is full */
45462306a36Sopenharmony_ci	*full = 1;
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	for (i = 0; i < ps->exceptions_per_area; i++) {
45762306a36Sopenharmony_ci		read_exception(ps, ps_area, i, &e);
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci		/*
46062306a36Sopenharmony_ci		 * If the new_chunk is pointing at the start of
46162306a36Sopenharmony_ci		 * the COW device, where the first metadata area
46262306a36Sopenharmony_ci		 * is we know that we've hit the end of the
46362306a36Sopenharmony_ci		 * exceptions.  Therefore the area is not full.
46462306a36Sopenharmony_ci		 */
46562306a36Sopenharmony_ci		if (e.new_chunk == 0LL) {
46662306a36Sopenharmony_ci			ps->current_committed = i;
46762306a36Sopenharmony_ci			*full = 0;
46862306a36Sopenharmony_ci			break;
46962306a36Sopenharmony_ci		}
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci		/*
47262306a36Sopenharmony_ci		 * Keep track of the start of the free chunks.
47362306a36Sopenharmony_ci		 */
47462306a36Sopenharmony_ci		if (ps->next_free <= e.new_chunk)
47562306a36Sopenharmony_ci			ps->next_free = e.new_chunk + 1;
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci		/*
47862306a36Sopenharmony_ci		 * Otherwise we add the exception to the snapshot.
47962306a36Sopenharmony_ci		 */
48062306a36Sopenharmony_ci		r = callback(callback_context, e.old_chunk, e.new_chunk);
48162306a36Sopenharmony_ci		if (r)
48262306a36Sopenharmony_ci			return r;
48362306a36Sopenharmony_ci	}
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci	return 0;
48662306a36Sopenharmony_ci}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_cistatic int read_exceptions(struct pstore *ps,
48962306a36Sopenharmony_ci			   int (*callback)(void *callback_context, chunk_t old,
49062306a36Sopenharmony_ci					   chunk_t new),
49162306a36Sopenharmony_ci			   void *callback_context)
49262306a36Sopenharmony_ci{
49362306a36Sopenharmony_ci	int r, full = 1;
49462306a36Sopenharmony_ci	struct dm_bufio_client *client;
49562306a36Sopenharmony_ci	chunk_t prefetch_area = 0;
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_ci	client = dm_bufio_client_create(dm_snap_cow(ps->store->snap)->bdev,
49862306a36Sopenharmony_ci					ps->store->chunk_size << SECTOR_SHIFT,
49962306a36Sopenharmony_ci					1, 0, NULL, NULL, 0);
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	if (IS_ERR(client))
50262306a36Sopenharmony_ci		return PTR_ERR(client);
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	/*
50562306a36Sopenharmony_ci	 * Setup for one current buffer + desired readahead buffers.
50662306a36Sopenharmony_ci	 */
50762306a36Sopenharmony_ci	dm_bufio_set_minimum_buffers(client, 1 + DM_PREFETCH_CHUNKS);
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	/*
51062306a36Sopenharmony_ci	 * Keeping reading chunks and inserting exceptions until
51162306a36Sopenharmony_ci	 * we find a partially full area.
51262306a36Sopenharmony_ci	 */
51362306a36Sopenharmony_ci	for (ps->current_area = 0; full; ps->current_area++) {
51462306a36Sopenharmony_ci		struct dm_buffer *bp;
51562306a36Sopenharmony_ci		void *area;
51662306a36Sopenharmony_ci		chunk_t chunk;
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci		if (unlikely(prefetch_area < ps->current_area))
51962306a36Sopenharmony_ci			prefetch_area = ps->current_area;
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci		if (DM_PREFETCH_CHUNKS) {
52262306a36Sopenharmony_ci			do {
52362306a36Sopenharmony_ci				chunk_t pf_chunk = area_location(ps, prefetch_area);
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci				if (unlikely(pf_chunk >= dm_bufio_get_device_size(client)))
52662306a36Sopenharmony_ci					break;
52762306a36Sopenharmony_ci				dm_bufio_prefetch(client, pf_chunk, 1);
52862306a36Sopenharmony_ci				prefetch_area++;
52962306a36Sopenharmony_ci				if (unlikely(!prefetch_area))
53062306a36Sopenharmony_ci					break;
53162306a36Sopenharmony_ci			} while (prefetch_area <= ps->current_area + DM_PREFETCH_CHUNKS);
53262306a36Sopenharmony_ci		}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci		chunk = area_location(ps, ps->current_area);
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ci		area = dm_bufio_read(client, chunk, &bp);
53762306a36Sopenharmony_ci		if (IS_ERR(area)) {
53862306a36Sopenharmony_ci			r = PTR_ERR(area);
53962306a36Sopenharmony_ci			goto ret_destroy_bufio;
54062306a36Sopenharmony_ci		}
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci		r = insert_exceptions(ps, area, callback, callback_context,
54362306a36Sopenharmony_ci				      &full);
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci		if (!full)
54662306a36Sopenharmony_ci			memcpy(ps->area, area, ps->store->chunk_size << SECTOR_SHIFT);
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci		dm_bufio_release(bp);
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci		dm_bufio_forget(client, chunk);
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci		if (unlikely(r))
55362306a36Sopenharmony_ci			goto ret_destroy_bufio;
55462306a36Sopenharmony_ci	}
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	ps->current_area--;
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci	skip_metadata(ps);
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	r = 0;
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ciret_destroy_bufio:
56362306a36Sopenharmony_ci	dm_bufio_client_destroy(client);
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	return r;
56662306a36Sopenharmony_ci}
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_cistatic struct pstore *get_info(struct dm_exception_store *store)
56962306a36Sopenharmony_ci{
57062306a36Sopenharmony_ci	return store->context;
57162306a36Sopenharmony_ci}
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_cistatic void persistent_usage(struct dm_exception_store *store,
57462306a36Sopenharmony_ci			     sector_t *total_sectors,
57562306a36Sopenharmony_ci			     sector_t *sectors_allocated,
57662306a36Sopenharmony_ci			     sector_t *metadata_sectors)
57762306a36Sopenharmony_ci{
57862306a36Sopenharmony_ci	struct pstore *ps = get_info(store);
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci	*sectors_allocated = ps->next_free * store->chunk_size;
58162306a36Sopenharmony_ci	*total_sectors = get_dev_size(dm_snap_cow(store->snap)->bdev);
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	/*
58462306a36Sopenharmony_ci	 * First chunk is the fixed header.
58562306a36Sopenharmony_ci	 * Then there are (ps->current_area + 1) metadata chunks, each one
58662306a36Sopenharmony_ci	 * separated from the next by ps->exceptions_per_area data chunks.
58762306a36Sopenharmony_ci	 */
58862306a36Sopenharmony_ci	*metadata_sectors = (ps->current_area + 1 + NUM_SNAPSHOT_HDR_CHUNKS) *
58962306a36Sopenharmony_ci			    store->chunk_size;
59062306a36Sopenharmony_ci}
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_cistatic void persistent_dtr(struct dm_exception_store *store)
59362306a36Sopenharmony_ci{
59462306a36Sopenharmony_ci	struct pstore *ps = get_info(store);
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	destroy_workqueue(ps->metadata_wq);
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_ci	/* Created in read_header */
59962306a36Sopenharmony_ci	if (ps->io_client)
60062306a36Sopenharmony_ci		dm_io_client_destroy(ps->io_client);
60162306a36Sopenharmony_ci	free_area(ps);
60262306a36Sopenharmony_ci
60362306a36Sopenharmony_ci	/* Allocated in persistent_read_metadata */
60462306a36Sopenharmony_ci	kvfree(ps->callbacks);
60562306a36Sopenharmony_ci
60662306a36Sopenharmony_ci	kfree(ps);
60762306a36Sopenharmony_ci}
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_cistatic int persistent_read_metadata(struct dm_exception_store *store,
61062306a36Sopenharmony_ci				    int (*callback)(void *callback_context,
61162306a36Sopenharmony_ci						    chunk_t old, chunk_t new),
61262306a36Sopenharmony_ci				    void *callback_context)
61362306a36Sopenharmony_ci{
61462306a36Sopenharmony_ci	int r, new_snapshot;
61562306a36Sopenharmony_ci	struct pstore *ps = get_info(store);
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	/*
61862306a36Sopenharmony_ci	 * Read the snapshot header.
61962306a36Sopenharmony_ci	 */
62062306a36Sopenharmony_ci	r = read_header(ps, &new_snapshot);
62162306a36Sopenharmony_ci	if (r)
62262306a36Sopenharmony_ci		return r;
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci	/*
62562306a36Sopenharmony_ci	 * Now we know correct chunk_size, complete the initialisation.
62662306a36Sopenharmony_ci	 */
62762306a36Sopenharmony_ci	ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
62862306a36Sopenharmony_ci				  sizeof(struct disk_exception);
62962306a36Sopenharmony_ci	ps->callbacks = kvcalloc(ps->exceptions_per_area,
63062306a36Sopenharmony_ci				 sizeof(*ps->callbacks), GFP_KERNEL);
63162306a36Sopenharmony_ci	if (!ps->callbacks)
63262306a36Sopenharmony_ci		return -ENOMEM;
63362306a36Sopenharmony_ci
63462306a36Sopenharmony_ci	/*
63562306a36Sopenharmony_ci	 * Do we need to setup a new snapshot ?
63662306a36Sopenharmony_ci	 */
63762306a36Sopenharmony_ci	if (new_snapshot) {
63862306a36Sopenharmony_ci		r = write_header(ps);
63962306a36Sopenharmony_ci		if (r) {
64062306a36Sopenharmony_ci			DMWARN("write_header failed");
64162306a36Sopenharmony_ci			return r;
64262306a36Sopenharmony_ci		}
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci		ps->current_area = 0;
64562306a36Sopenharmony_ci		zero_memory_area(ps);
64662306a36Sopenharmony_ci		r = zero_disk_area(ps, 0);
64762306a36Sopenharmony_ci		if (r)
64862306a36Sopenharmony_ci			DMWARN("zero_disk_area(0) failed");
64962306a36Sopenharmony_ci		return r;
65062306a36Sopenharmony_ci	}
65162306a36Sopenharmony_ci	/*
65262306a36Sopenharmony_ci	 * Sanity checks.
65362306a36Sopenharmony_ci	 */
65462306a36Sopenharmony_ci	if (ps->version != SNAPSHOT_DISK_VERSION) {
65562306a36Sopenharmony_ci		DMWARN("unable to handle snapshot disk version %d",
65662306a36Sopenharmony_ci		       ps->version);
65762306a36Sopenharmony_ci		return -EINVAL;
65862306a36Sopenharmony_ci	}
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci	/*
66162306a36Sopenharmony_ci	 * Metadata are valid, but snapshot is invalidated
66262306a36Sopenharmony_ci	 */
66362306a36Sopenharmony_ci	if (!ps->valid)
66462306a36Sopenharmony_ci		return 1;
66562306a36Sopenharmony_ci
66662306a36Sopenharmony_ci	/*
66762306a36Sopenharmony_ci	 * Read the metadata.
66862306a36Sopenharmony_ci	 */
66962306a36Sopenharmony_ci	r = read_exceptions(ps, callback, callback_context);
67062306a36Sopenharmony_ci
67162306a36Sopenharmony_ci	return r;
67262306a36Sopenharmony_ci}
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_cistatic int persistent_prepare_exception(struct dm_exception_store *store,
67562306a36Sopenharmony_ci					struct dm_exception *e)
67662306a36Sopenharmony_ci{
67762306a36Sopenharmony_ci	struct pstore *ps = get_info(store);
67862306a36Sopenharmony_ci	sector_t size = get_dev_size(dm_snap_cow(store->snap)->bdev);
67962306a36Sopenharmony_ci
68062306a36Sopenharmony_ci	/* Is there enough room ? */
68162306a36Sopenharmony_ci	if (size < ((ps->next_free + 1) * store->chunk_size))
68262306a36Sopenharmony_ci		return -ENOSPC;
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci	e->new_chunk = ps->next_free;
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_ci	/*
68762306a36Sopenharmony_ci	 * Move onto the next free pending, making sure to take
68862306a36Sopenharmony_ci	 * into account the location of the metadata chunks.
68962306a36Sopenharmony_ci	 */
69062306a36Sopenharmony_ci	ps->next_free++;
69162306a36Sopenharmony_ci	skip_metadata(ps);
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci	atomic_inc(&ps->pending_count);
69462306a36Sopenharmony_ci	return 0;
69562306a36Sopenharmony_ci}
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_cistatic void persistent_commit_exception(struct dm_exception_store *store,
69862306a36Sopenharmony_ci					struct dm_exception *e, int valid,
69962306a36Sopenharmony_ci					void (*callback)(void *, int success),
70062306a36Sopenharmony_ci					void *callback_context)
70162306a36Sopenharmony_ci{
70262306a36Sopenharmony_ci	unsigned int i;
70362306a36Sopenharmony_ci	struct pstore *ps = get_info(store);
70462306a36Sopenharmony_ci	struct core_exception ce;
70562306a36Sopenharmony_ci	struct commit_callback *cb;
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	if (!valid)
70862306a36Sopenharmony_ci		ps->valid = 0;
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci	ce.old_chunk = e->old_chunk;
71162306a36Sopenharmony_ci	ce.new_chunk = e->new_chunk;
71262306a36Sopenharmony_ci	write_exception(ps, ps->current_committed++, &ce);
71362306a36Sopenharmony_ci
71462306a36Sopenharmony_ci	/*
71562306a36Sopenharmony_ci	 * Add the callback to the back of the array.  This code
71662306a36Sopenharmony_ci	 * is the only place where the callback array is
71762306a36Sopenharmony_ci	 * manipulated, and we know that it will never be called
71862306a36Sopenharmony_ci	 * multiple times concurrently.
71962306a36Sopenharmony_ci	 */
72062306a36Sopenharmony_ci	cb = ps->callbacks + ps->callback_count++;
72162306a36Sopenharmony_ci	cb->callback = callback;
72262306a36Sopenharmony_ci	cb->context = callback_context;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	/*
72562306a36Sopenharmony_ci	 * If there are exceptions in flight and we have not yet
72662306a36Sopenharmony_ci	 * filled this metadata area there's nothing more to do.
72762306a36Sopenharmony_ci	 */
72862306a36Sopenharmony_ci	if (!atomic_dec_and_test(&ps->pending_count) &&
72962306a36Sopenharmony_ci	    (ps->current_committed != ps->exceptions_per_area))
73062306a36Sopenharmony_ci		return;
73162306a36Sopenharmony_ci
73262306a36Sopenharmony_ci	/*
73362306a36Sopenharmony_ci	 * If we completely filled the current area, then wipe the next one.
73462306a36Sopenharmony_ci	 */
73562306a36Sopenharmony_ci	if ((ps->current_committed == ps->exceptions_per_area) &&
73662306a36Sopenharmony_ci	    zero_disk_area(ps, ps->current_area + 1))
73762306a36Sopenharmony_ci		ps->valid = 0;
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	/*
74062306a36Sopenharmony_ci	 * Commit exceptions to disk.
74162306a36Sopenharmony_ci	 */
74262306a36Sopenharmony_ci	if (ps->valid && area_io(ps, REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA |
74362306a36Sopenharmony_ci				 REQ_SYNC))
74462306a36Sopenharmony_ci		ps->valid = 0;
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci	/*
74762306a36Sopenharmony_ci	 * Advance to the next area if this one is full.
74862306a36Sopenharmony_ci	 */
74962306a36Sopenharmony_ci	if (ps->current_committed == ps->exceptions_per_area) {
75062306a36Sopenharmony_ci		ps->current_committed = 0;
75162306a36Sopenharmony_ci		ps->current_area++;
75262306a36Sopenharmony_ci		zero_memory_area(ps);
75362306a36Sopenharmony_ci	}
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	for (i = 0; i < ps->callback_count; i++) {
75662306a36Sopenharmony_ci		cb = ps->callbacks + i;
75762306a36Sopenharmony_ci		cb->callback(cb->context, ps->valid);
75862306a36Sopenharmony_ci	}
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci	ps->callback_count = 0;
76162306a36Sopenharmony_ci}
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_cistatic int persistent_prepare_merge(struct dm_exception_store *store,
76462306a36Sopenharmony_ci				    chunk_t *last_old_chunk,
76562306a36Sopenharmony_ci				    chunk_t *last_new_chunk)
76662306a36Sopenharmony_ci{
76762306a36Sopenharmony_ci	struct pstore *ps = get_info(store);
76862306a36Sopenharmony_ci	struct core_exception ce;
76962306a36Sopenharmony_ci	int nr_consecutive;
77062306a36Sopenharmony_ci	int r;
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci	/*
77362306a36Sopenharmony_ci	 * When current area is empty, move back to preceding area.
77462306a36Sopenharmony_ci	 */
77562306a36Sopenharmony_ci	if (!ps->current_committed) {
77662306a36Sopenharmony_ci		/*
77762306a36Sopenharmony_ci		 * Have we finished?
77862306a36Sopenharmony_ci		 */
77962306a36Sopenharmony_ci		if (!ps->current_area)
78062306a36Sopenharmony_ci			return 0;
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci		ps->current_area--;
78362306a36Sopenharmony_ci		r = area_io(ps, REQ_OP_READ);
78462306a36Sopenharmony_ci		if (r < 0)
78562306a36Sopenharmony_ci			return r;
78662306a36Sopenharmony_ci		ps->current_committed = ps->exceptions_per_area;
78762306a36Sopenharmony_ci	}
78862306a36Sopenharmony_ci
78962306a36Sopenharmony_ci	read_exception(ps, ps->area, ps->current_committed - 1, &ce);
79062306a36Sopenharmony_ci	*last_old_chunk = ce.old_chunk;
79162306a36Sopenharmony_ci	*last_new_chunk = ce.new_chunk;
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci	/*
79462306a36Sopenharmony_ci	 * Find number of consecutive chunks within the current area,
79562306a36Sopenharmony_ci	 * working backwards.
79662306a36Sopenharmony_ci	 */
79762306a36Sopenharmony_ci	for (nr_consecutive = 1; nr_consecutive < ps->current_committed;
79862306a36Sopenharmony_ci	     nr_consecutive++) {
79962306a36Sopenharmony_ci		read_exception(ps, ps->area,
80062306a36Sopenharmony_ci			       ps->current_committed - 1 - nr_consecutive, &ce);
80162306a36Sopenharmony_ci		if (ce.old_chunk != *last_old_chunk - nr_consecutive ||
80262306a36Sopenharmony_ci		    ce.new_chunk != *last_new_chunk - nr_consecutive)
80362306a36Sopenharmony_ci			break;
80462306a36Sopenharmony_ci	}
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_ci	return nr_consecutive;
80762306a36Sopenharmony_ci}
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_cistatic int persistent_commit_merge(struct dm_exception_store *store,
81062306a36Sopenharmony_ci				   int nr_merged)
81162306a36Sopenharmony_ci{
81262306a36Sopenharmony_ci	int r, i;
81362306a36Sopenharmony_ci	struct pstore *ps = get_info(store);
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci	BUG_ON(nr_merged > ps->current_committed);
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci	for (i = 0; i < nr_merged; i++)
81862306a36Sopenharmony_ci		clear_exception(ps, ps->current_committed - 1 - i);
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	r = area_io(ps, REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA);
82162306a36Sopenharmony_ci	if (r < 0)
82262306a36Sopenharmony_ci		return r;
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	ps->current_committed -= nr_merged;
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	/*
82762306a36Sopenharmony_ci	 * At this stage, only persistent_usage() uses ps->next_free, so
82862306a36Sopenharmony_ci	 * we make no attempt to keep ps->next_free strictly accurate
82962306a36Sopenharmony_ci	 * as exceptions may have been committed out-of-order originally.
83062306a36Sopenharmony_ci	 * Once a snapshot has become merging, we set it to the value it
83162306a36Sopenharmony_ci	 * would have held had all the exceptions been committed in order.
83262306a36Sopenharmony_ci	 *
83362306a36Sopenharmony_ci	 * ps->current_area does not get reduced by prepare_merge() until
83462306a36Sopenharmony_ci	 * after commit_merge() has removed the nr_merged previous exceptions.
83562306a36Sopenharmony_ci	 */
83662306a36Sopenharmony_ci	ps->next_free = area_location(ps, ps->current_area) +
83762306a36Sopenharmony_ci			ps->current_committed + 1;
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci	return 0;
84062306a36Sopenharmony_ci}
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_cistatic void persistent_drop_snapshot(struct dm_exception_store *store)
84362306a36Sopenharmony_ci{
84462306a36Sopenharmony_ci	struct pstore *ps = get_info(store);
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ci	ps->valid = 0;
84762306a36Sopenharmony_ci	if (write_header(ps))
84862306a36Sopenharmony_ci		DMWARN("write header failed");
84962306a36Sopenharmony_ci}
85062306a36Sopenharmony_ci
85162306a36Sopenharmony_cistatic int persistent_ctr(struct dm_exception_store *store, char *options)
85262306a36Sopenharmony_ci{
85362306a36Sopenharmony_ci	struct pstore *ps;
85462306a36Sopenharmony_ci	int r;
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_ci	/* allocate the pstore */
85762306a36Sopenharmony_ci	ps = kzalloc(sizeof(*ps), GFP_KERNEL);
85862306a36Sopenharmony_ci	if (!ps)
85962306a36Sopenharmony_ci		return -ENOMEM;
86062306a36Sopenharmony_ci
86162306a36Sopenharmony_ci	ps->store = store;
86262306a36Sopenharmony_ci	ps->valid = 1;
86362306a36Sopenharmony_ci	ps->version = SNAPSHOT_DISK_VERSION;
86462306a36Sopenharmony_ci	ps->area = NULL;
86562306a36Sopenharmony_ci	ps->zero_area = NULL;
86662306a36Sopenharmony_ci	ps->header_area = NULL;
86762306a36Sopenharmony_ci	ps->next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1; /* header and 1st area */
86862306a36Sopenharmony_ci	ps->current_committed = 0;
86962306a36Sopenharmony_ci
87062306a36Sopenharmony_ci	ps->callback_count = 0;
87162306a36Sopenharmony_ci	atomic_set(&ps->pending_count, 0);
87262306a36Sopenharmony_ci	ps->callbacks = NULL;
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
87562306a36Sopenharmony_ci	if (!ps->metadata_wq) {
87662306a36Sopenharmony_ci		DMERR("couldn't start header metadata update thread");
87762306a36Sopenharmony_ci		r = -ENOMEM;
87862306a36Sopenharmony_ci		goto err_workqueue;
87962306a36Sopenharmony_ci	}
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci	if (options) {
88262306a36Sopenharmony_ci		char overflow = toupper(options[0]);
88362306a36Sopenharmony_ci
88462306a36Sopenharmony_ci		if (overflow == 'O')
88562306a36Sopenharmony_ci			store->userspace_supports_overflow = true;
88662306a36Sopenharmony_ci		else {
88762306a36Sopenharmony_ci			DMERR("Unsupported persistent store option: %s", options);
88862306a36Sopenharmony_ci			r = -EINVAL;
88962306a36Sopenharmony_ci			goto err_options;
89062306a36Sopenharmony_ci		}
89162306a36Sopenharmony_ci	}
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci	store->context = ps;
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci	return 0;
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_cierr_options:
89862306a36Sopenharmony_ci	destroy_workqueue(ps->metadata_wq);
89962306a36Sopenharmony_cierr_workqueue:
90062306a36Sopenharmony_ci	kfree(ps);
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci	return r;
90362306a36Sopenharmony_ci}
90462306a36Sopenharmony_ci
90562306a36Sopenharmony_cistatic unsigned int persistent_status(struct dm_exception_store *store,
90662306a36Sopenharmony_ci				  status_type_t status, char *result,
90762306a36Sopenharmony_ci				  unsigned int maxlen)
90862306a36Sopenharmony_ci{
90962306a36Sopenharmony_ci	unsigned int sz = 0;
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	switch (status) {
91262306a36Sopenharmony_ci	case STATUSTYPE_INFO:
91362306a36Sopenharmony_ci		break;
91462306a36Sopenharmony_ci	case STATUSTYPE_TABLE:
91562306a36Sopenharmony_ci		DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P",
91662306a36Sopenharmony_ci		       (unsigned long long)store->chunk_size);
91762306a36Sopenharmony_ci		break;
91862306a36Sopenharmony_ci	case STATUSTYPE_IMA:
91962306a36Sopenharmony_ci		*result = '\0';
92062306a36Sopenharmony_ci		break;
92162306a36Sopenharmony_ci	}
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci	return sz;
92462306a36Sopenharmony_ci}
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_cistatic struct dm_exception_store_type _persistent_type = {
92762306a36Sopenharmony_ci	.name = "persistent",
92862306a36Sopenharmony_ci	.module = THIS_MODULE,
92962306a36Sopenharmony_ci	.ctr = persistent_ctr,
93062306a36Sopenharmony_ci	.dtr = persistent_dtr,
93162306a36Sopenharmony_ci	.read_metadata = persistent_read_metadata,
93262306a36Sopenharmony_ci	.prepare_exception = persistent_prepare_exception,
93362306a36Sopenharmony_ci	.commit_exception = persistent_commit_exception,
93462306a36Sopenharmony_ci	.prepare_merge = persistent_prepare_merge,
93562306a36Sopenharmony_ci	.commit_merge = persistent_commit_merge,
93662306a36Sopenharmony_ci	.drop_snapshot = persistent_drop_snapshot,
93762306a36Sopenharmony_ci	.usage = persistent_usage,
93862306a36Sopenharmony_ci	.status = persistent_status,
93962306a36Sopenharmony_ci};
94062306a36Sopenharmony_ci
94162306a36Sopenharmony_cistatic struct dm_exception_store_type _persistent_compat_type = {
94262306a36Sopenharmony_ci	.name = "P",
94362306a36Sopenharmony_ci	.module = THIS_MODULE,
94462306a36Sopenharmony_ci	.ctr = persistent_ctr,
94562306a36Sopenharmony_ci	.dtr = persistent_dtr,
94662306a36Sopenharmony_ci	.read_metadata = persistent_read_metadata,
94762306a36Sopenharmony_ci	.prepare_exception = persistent_prepare_exception,
94862306a36Sopenharmony_ci	.commit_exception = persistent_commit_exception,
94962306a36Sopenharmony_ci	.prepare_merge = persistent_prepare_merge,
95062306a36Sopenharmony_ci	.commit_merge = persistent_commit_merge,
95162306a36Sopenharmony_ci	.drop_snapshot = persistent_drop_snapshot,
95262306a36Sopenharmony_ci	.usage = persistent_usage,
95362306a36Sopenharmony_ci	.status = persistent_status,
95462306a36Sopenharmony_ci};
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ciint dm_persistent_snapshot_init(void)
95762306a36Sopenharmony_ci{
95862306a36Sopenharmony_ci	int r;
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	r = dm_exception_store_type_register(&_persistent_type);
96162306a36Sopenharmony_ci	if (r) {
96262306a36Sopenharmony_ci		DMERR("Unable to register persistent exception store type");
96362306a36Sopenharmony_ci		return r;
96462306a36Sopenharmony_ci	}
96562306a36Sopenharmony_ci
96662306a36Sopenharmony_ci	r = dm_exception_store_type_register(&_persistent_compat_type);
96762306a36Sopenharmony_ci	if (r) {
96862306a36Sopenharmony_ci		DMERR("Unable to register old-style persistent exception store type");
96962306a36Sopenharmony_ci		dm_exception_store_type_unregister(&_persistent_type);
97062306a36Sopenharmony_ci		return r;
97162306a36Sopenharmony_ci	}
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci	return r;
97462306a36Sopenharmony_ci}
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_civoid dm_persistent_snapshot_exit(void)
97762306a36Sopenharmony_ci{
97862306a36Sopenharmony_ci	dm_exception_store_type_unregister(&_persistent_type);
97962306a36Sopenharmony_ci	dm_exception_store_type_unregister(&_persistent_compat_type);
98062306a36Sopenharmony_ci}
981